Merge pull request #967 from NagaYZ/fix-reaper

Fix reaperscan site structure changed
master
devi 2 years ago committed by GitHub
commit 5f771973a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -4,33 +4,27 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import okhttp3.Headers
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.Request
import okhttp3.RequestBody
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONArray
import org.json.JSONObject
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import org.koitharu.kotatsu.parsers.util.json.*
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.*
import kotlin.random.Random
private const val MAX_RETRY_COUNT = 5
private val JSON_MEDIA_TYPE get() = "application/json; charset=utf-8".toMediaType()
@MangaSourceParser("REAPERCOMICS", "ReaperComics", "en")
internal class ReaperComics(context: MangaLoaderContext) :
PagedMangaParser(context, MangaParserSource.REAPERCOMICS, pageSize = 32) {
PagedMangaParser(context, MangaParserSource.REAPERCOMICS, pageSize = 20) {
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL)
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL, SortOrder.POPULARITY, SortOrder.NEWEST, SortOrder.ALPHABETICAL_DESC)
override val configKeyDomain = ConfigKey.Domain("reaperscans.com")
@ -42,12 +36,6 @@ internal class ReaperComics(context: MangaLoaderContext) :
override val headers
get() = getApiHeaders()
private val selectTotalChapter = "dl.mt-2 div:nth-child(5) > dd"
private val selectState = "dl.mt-2 div:nth-child(4) > dd"
private val searchCache = mutableSetOf<Manga>() // Cache search results
private val chapterCache = mutableMapOf<String, Manga>() // Cache chapter lists
private fun getApiHeaders(): Headers {
val userCookie = context.cookieJar.getCookies(domain).find {
it.name == "user"
@ -58,245 +46,198 @@ internal class ReaperComics(context: MangaLoaderContext) :
}
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
if(page > 1) return emptyList()
val url = buildString {
append("https://")
append(domain)
append("api.$domain")
append("/query?page=$page&perPage=9999&series_type=Comic")
when (filter) {
is MangaListFilter.Search -> {
val searchTitle = filter.query.trim()
if (searchCache.isNotEmpty()) {
if (page > 1) {
return emptyList()
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }
} else {
return searchAllPage(page, searchTitle)
}
append("&query_string=")
append(filter.query.urlEncoded())
}
is MangaListFilter.Advanced -> {
append("/")
if (filter.sortOrder == SortOrder.UPDATED) {
append("latest/")
append("&orderBy=")
val order = when (filter.sortOrder) {
SortOrder.UPDATED -> "updated_at"
SortOrder.POPULARITY -> "total_views"
SortOrder.NEWEST -> "created_at"
SortOrder.ALPHABETICAL -> "title"
SortOrder.ALPHABETICAL_DESC -> "title"
else -> "updated_at"
}
append(order)
val sortOrder = if (filter.sortOrder == SortOrder.ALPHABETICAL_DESC) "desc" else "asc"
append("&order=$sortOrder")
filter.states.oneOrThrowIfMany()?.let {
append("&status=")
append(
when (it) {
MangaState.ONGOING -> "Ongoing"
MangaState.FINISHED -> "Completed"
MangaState.ABANDONED -> "Dropped"
MangaState.PAUSED -> "Hiatus"
else -> "All"
},
)
}
if (filter.tags.isNotEmpty()) {
append("&tags_ids=")
append(filter.tags.joinToString(separator = "%") { it.key })
}
append("comics?page=")
append(page.toString())
}
null -> {
append("/latest/comics?page=")
append(page.toString())
append("&orderBy=updated_at")
append("&order=asc")
append("&adult=true")
append("&status=All")
}
}
}
return parseMangaList(webClient.httpGet(url).parseHtml())
return parseMangaList(webClient.httpGet(url).parseJson())
}
/**
* Search once all pages and stores them in cache
*
* @param page the page to start from
* @param searchTitle the title to search for
* @return the list of manga
*/
private suspend fun searchAllPage(page: Int, searchTitle: String): List<Manga> {
var currentPage = page
val url = buildString {
append("https://")
append(domain)
append("/comics?page=")
}
while (true) {
try {
val allEntries = parseMangaList(webClient.httpGet(url + currentPage).parseHtml())
if (allEntries.isEmpty()) {
break
}
searchCache.addAll(allEntries)
currentPage++
} catch (e: Exception) {
println("Error parsing page $currentPage: ${e.message}")
break
}
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }.toList()
}
/**
* Parse the list of manga from the given document
*
* @param docs the document to parse
* @return the list of manga
*/
private fun parseMangaList(docs: Document): List<Manga> {
return docs.select("main div.relative, main li.col-span-1").map {
val a = it.selectFirstOrThrow("a")
val url = a.attrAsAbsoluteUrl("href")
private fun parseMangaList(response: JSONObject): List<Manga> {
return response.getJSONArray("data").mapJSON { it ->
val id = it.getLong("id")
val url = "/comic/${it.getString("series_slug")}"
val title = it.getString("title")
val thumbnailPath = it.getString("thumbnail")
Manga(
id = generateUid(url),
id = id,
url = url,
title = (it.selectFirst("p a") ?: it.selectLast("a"))?.text().orEmpty(),
altTitle = null,
publicUrl = url,
rating = RATING_UNKNOWN,
title = title,
altTitle = it.getString("alternative_names").takeIf { it.isNotBlank() },
publicUrl = url.toAbsoluteUrl(domain),
description = it.getString("description"),
rating = it.getFloatOrDefault("rating", RATING_UNKNOWN) / 5f,
isNsfw = isNsfwSource,
coverUrl = it.selectFirstOrThrow("img").src().orEmpty(),
coverUrl = "https://media.reaperscans.com/file/4SRBHm//$thumbnailPath",
tags = emptySet(),
state = null,
state = when (it.getString("status")) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
"Dropped" -> MangaState.ABANDONED
"Hiatus" -> MangaState.PAUSED
else -> null
},
author = null,
source = source,
)
}
}
override suspend fun getAvailableTags(): Set<MangaTag> = emptySet()
override suspend fun getAvailableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain/comics").parseHtml()
val scriptContent = doc.select("script").find {
it.data().contains("tags")
}?.data()
if (scriptContent != null) {
val jsonString = scriptContent.substringAfter("push(").substringBeforeLast(")")
val jsonArray = JSONArray(jsonString)
val childrenArray = jsonArray.getString(1)
val tagsString = childrenArray.substringAfter("tags:[").substringBeforeLast("]")
val tagObjects = tagsString.split("},{")
return tagObjects.mapNotNullTo(mutableSetOf()) { tagString ->
val id = tagString.substringAfter("\"id\":").substringBefore(",")
val name = tagString.substringAfter("\"name\":\"").substringBefore("\"")
if (id.isNotEmpty() && name.isNotEmpty()) {
MangaTag(
key = id,
title = name.toTitleCase(sourceLocale),
source = source
)
} else {
null
}
}
}
return emptySet()
}
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys)
keys.add(userAgentKey)
}
private val chapterListNextPageSelector: String = "button[wire:click*=nextPage]"
private val chapterListSelector: String = "div[wire:id] > div > ul[role=list] > li"
private val dateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", sourceLocale)
override suspend fun getDetails(manga: Manga): Manga {
val cachedChapters = chapterCache[manga.url]
if (cachedChapters != null) {
return cachedChapters
}
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val simpleDateFormat = SimpleDateFormat("dd/MM/yyyy", sourceLocale)
var totalChapters = (doc.selectFirst(selectTotalChapter)?.text()?.toFloatOrNull() ?: 0f) - 1f
val chapters = mutableSetOf<MangaChapter>()
var hasNextPage = doc.selectFirst(chapterListNextPageSelector) != null
chapters.addAll(
doc.select(chapterListSelector).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
val seriesid = manga.id
val url = "https://api.$domain/chapter/query?page=1&perPage=9999&series_id=$seriesid"
val response = makeRequest(url)
val data = response.getJSONArray("data")
return manga.copy(
chapters = data.mapJSONIndexed { index, it ->
val chapterUrl = "/series/${it.getJSONObject("series").getString("series_slug")}/${it.getString("chapter_slug")}"
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = totalChapters--,
id = it.getLong("id"),
name = it.getString("chapter_name"),
number = (data.length() - index).toFloat(),
volume = 0,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
uploadDate = parseChapterDate(dateFormat, it.getString("created_at")),
branch = null,
source = source,
)
},
)
if (!hasNextPage) {
return manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
}
)
}
val csrfToken = doc.selectFirst("meta[name=csrf-token]")?.attr("content") ?: error("Couldn't find csrf-token")
val livewareData = doc.selectFirst("div[wire:initial-data*=Models\\\\Comic]")?.attr("wire:initial-data")
?.let { JSONObject(it) } ?: error("Couldn't find LiveWireData")
val routeName =
livewareData.getJSONObject("fingerprint").getStringOrNull("name") ?: error("Couldn't find routeName")
val fingerprint = livewareData.getJSONObject("fingerprint")
var serverMemo = livewareData.getJSONObject("serverMemo")
var pageToQuery = 2
// Javascript: (Math.random() + 1).toString(36).substring(8)
val generateId = {
"1.${
Random.nextLong().toString(36)
}".substring(10)
} // Not exactly the same, but results in a 3-5 character string
while (hasNextPage) {
//need to format the payload to the expected response format since org.json.JSONObject are not ordered, and the server seems to care about the order of the keys
val payload = String.format(
responseTemplate,
fingerprint.getString("id"),
fingerprint.getString("path"),
serverMemo.getString("htmlHash"),
pageToQuery - 1,
pageToQuery - 1,
serverMemo.getJSONObject("dataMeta").getJSONObject("models").getJSONObject("comic").getString("id"),
serverMemo.getString("checksum"),
generateId(),
pageToQuery,
).toRequestBody(JSON_MEDIA_TYPE)
val headers = Headers.Builder().add("x-csrf-token", csrfToken).add("x-livewire", "true").build()
val responseData =
makeRequest("https://$domain/livewire/message/$routeName", payload, headers)
// response contains state that we need to preserve
serverMemo = mergeLeft(serverMemo, responseData.serverMemo)
val chaptersHtml = Jsoup.parse(responseData.effects.html, "https://$domain")
chapters.addAll(
chaptersHtml.select(chapterListSelector).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = totalChapters--,
volume = 0,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
branch = null,
source = source,
)
},
)
hasNextPage = chaptersHtml.selectFirst(chapterListNextPageSelector) != null
pageToQuery++
private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
return try {
dateFormat.tryParse(date)
} catch (e: Exception) {
0L
}
}
val copy = manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
private val pageSelector = "div#content div.container img"
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
val processedUrls = mutableSetOf<String>()
chapterCache[manga.url] = copy
return copy
return doc.select(pageSelector).mapNotNull { img ->
val url = img.attr("data-cfsrc").takeIf { it.isNotBlank() }
?: img.attr("src").takeIf { it.isNotBlank() }
?: img.selectFirst("noscript img")?.attr("src")
?: return@mapNotNull null
val relativeUrl = url.toRelativeUrl(domain)
if (relativeUrl !in processedUrls) {
processedUrls.add(relativeUrl)
MangaPage(
id = generateUid(relativeUrl),
url = relativeUrl,
preview = null,
source = source,
)
} else {
null
}
}
}
private suspend fun makeRequest(url: String, payload: RequestBody, headers: Headers): LiveWireResponseDto {
private suspend fun makeRequest(url: String): JSONObject {
var retryCount = 0
val backoffDelay = 2000L // Initial delay (milliseconds)
val request = Request.Builder().url(url).post(payload).headers(headers).build()
val request = Request.Builder().url(url).headers(headers).build()
while (true) {
try {
val response = context.httpClient.newCall(request).execute().parseJson()
val effectsJson = response.getJSONObject("effects")
val serverMemoJson = response.getJSONObject("serverMemo")
val effects = LiveWireEffectsDto(effectsJson.getString("html"))
return LiveWireResponseDto(effects, serverMemoJson)
return response
} catch (e: Exception) {
// Log or handle the exception as needed
@ -310,72 +251,5 @@ internal class ReaperComics(context: MangaLoaderContext) :
}
}
}
/**
* Recursively merges j2 onto j1 in place
* If j1 and j2 both contain keys whose values aren't both jsonObjects, j2's value overwrites j1's
*
*/
private fun mergeLeft(j1: JSONObject, j2: JSONObject): JSONObject {
for (key in j2.keys()) {
val j1Value = j1.opt(key)
if (j1Value !is JSONObject) {
j1.put(key, j2[key])
} else if (j2[key] is JSONObject) {
j1.put(key, mergeLeft(j1Value, j2.getJSONObject(key)))
}
}
return j1
}
private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
val d = date?.lowercase() ?: return 0
return when {
d.endsWith(" ago") -> parseRelativeDate(date)
else -> dateFormat.tryParse(date)
}
}
private fun parseRelativeDate(date: String): Long {
val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0
val cal = Calendar.getInstance()
return when {
WordSet("second").anyWordIn(date) -> cal.apply { add(Calendar.SECOND, -number) }.timeInMillis
WordSet("minute", "minutes").anyWordIn(date) -> cal.apply { add(Calendar.MINUTE, -number) }.timeInMillis
WordSet("hour", "hours").anyWordIn(date) -> cal.apply { add(Calendar.HOUR, -number) }.timeInMillis
WordSet("day", "days").anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis
WordSet("week", "weeks").anyWordIn(date) -> cal.apply { add(Calendar.WEEK_OF_YEAR, -number) }.timeInMillis
WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis
WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis
else -> 0
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
return doc.select("img.max-w-full").map { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
private class LiveWireResponseDto(
val effects: LiveWireEffectsDto,
val serverMemo: JSONObject,
)
private class LiveWireEffectsDto(
val html: String,
)
//!IMPORTANT
private val responseTemplate =
"""{"fingerprint":{"id":"%s","name":"frontend.comic-chapter-list","locale":"en","path":"%s","method":"GET","v":"acj"},"serverMemo":{"children":[],"errors":[],"htmlHash":"%s","data":{"comic":[],"page":%d,"paginators":{"page":%d}},"dataMeta":{"models":{"comic":{"class":"App\\Models\\Comic","id":"%s","relations":[],"connection":"pgsql","collectionClass":null}}},"checksum":"%s"},"updates":[{"type":"callMethod","payload":{"id":"%s","method":"gotoPage","params":[%d,"page"]}}]}"""
}

Loading…
Cancel
Save