Merge pull request #967 from NagaYZ/fix-reaper

Fix reaperscan site structure changed
master
devi 2 years ago committed by GitHub
commit 5f771973a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -4,33 +4,27 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
import okhttp3.Headers import okhttp3.Headers
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.Request import okhttp3.Request
import okhttp3.RequestBody import org.json.JSONArray
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONObject import org.json.JSONObject
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser import org.koitharu.kotatsu.parsers.PagedMangaParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull import org.koitharu.kotatsu.parsers.util.json.*
import java.text.DateFormat import java.text.DateFormat
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.* import java.util.*
import kotlin.random.Random
private const val MAX_RETRY_COUNT = 5 private const val MAX_RETRY_COUNT = 5
private val JSON_MEDIA_TYPE get() = "application/json; charset=utf-8".toMediaType()
@MangaSourceParser("REAPERCOMICS", "ReaperComics", "en") @MangaSourceParser("REAPERCOMICS", "ReaperComics", "en")
internal class ReaperComics(context: MangaLoaderContext) : internal class ReaperComics(context: MangaLoaderContext) :
PagedMangaParser(context, MangaParserSource.REAPERCOMICS, pageSize = 32) { PagedMangaParser(context, MangaParserSource.REAPERCOMICS, pageSize = 20) {
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL) override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL, SortOrder.POPULARITY, SortOrder.NEWEST, SortOrder.ALPHABETICAL_DESC)
override val configKeyDomain = ConfigKey.Domain("reaperscans.com") override val configKeyDomain = ConfigKey.Domain("reaperscans.com")
@ -42,12 +36,6 @@ internal class ReaperComics(context: MangaLoaderContext) :
override val headers override val headers
get() = getApiHeaders() get() = getApiHeaders()
private val selectTotalChapter = "dl.mt-2 div:nth-child(5) > dd"
private val selectState = "dl.mt-2 div:nth-child(4) > dd"
private val searchCache = mutableSetOf<Manga>() // Cache search results
private val chapterCache = mutableMapOf<String, Manga>() // Cache chapter lists
private fun getApiHeaders(): Headers { private fun getApiHeaders(): Headers {
val userCookie = context.cookieJar.getCookies(domain).find { val userCookie = context.cookieJar.getCookies(domain).find {
it.name == "user" it.name == "user"
@ -58,245 +46,198 @@ internal class ReaperComics(context: MangaLoaderContext) :
} }
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> { override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
if(page > 1) return emptyList()
val url = buildString { val url = buildString {
append("https://") append("https://")
append(domain) append("api.$domain")
append("/query?page=$page&perPage=9999&series_type=Comic")
when (filter) { when (filter) {
is MangaListFilter.Search -> { is MangaListFilter.Search -> {
val searchTitle = filter.query.trim() append("&query_string=")
if (searchCache.isNotEmpty()) { append(filter.query.urlEncoded())
if (page > 1) {
return emptyList()
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }
} else {
return searchAllPage(page, searchTitle)
}
} }
is MangaListFilter.Advanced -> { is MangaListFilter.Advanced -> {
append("/") append("&orderBy=")
if (filter.sortOrder == SortOrder.UPDATED) { val order = when (filter.sortOrder) {
append("latest/") SortOrder.UPDATED -> "updated_at"
SortOrder.POPULARITY -> "total_views"
SortOrder.NEWEST -> "created_at"
SortOrder.ALPHABETICAL -> "title"
SortOrder.ALPHABETICAL_DESC -> "title"
else -> "updated_at"
}
append(order)
val sortOrder = if (filter.sortOrder == SortOrder.ALPHABETICAL_DESC) "desc" else "asc"
append("&order=$sortOrder")
filter.states.oneOrThrowIfMany()?.let {
append("&status=")
append(
when (it) {
MangaState.ONGOING -> "Ongoing"
MangaState.FINISHED -> "Completed"
MangaState.ABANDONED -> "Dropped"
MangaState.PAUSED -> "Hiatus"
else -> "All"
},
)
}
if (filter.tags.isNotEmpty()) {
append("&tags_ids=")
append(filter.tags.joinToString(separator = "%") { it.key })
} }
append("comics?page=")
append(page.toString())
} }
null -> { null -> {
append("/latest/comics?page=") append("&orderBy=updated_at")
append(page.toString()) append("&order=asc")
append("&adult=true")
append("&status=All")
} }
} }
} }
return parseMangaList(webClient.httpGet(url).parseHtml()) return parseMangaList(webClient.httpGet(url).parseJson())
} }
/**
* Search once all pages and stores them in cache
*
* @param page the page to start from
* @param searchTitle the title to search for
* @return the list of manga
*/
private suspend fun searchAllPage(page: Int, searchTitle: String): List<Manga> {
var currentPage = page
val url = buildString {
append("https://")
append(domain)
append("/comics?page=")
}
while (true) {
try {
val allEntries = parseMangaList(webClient.httpGet(url + currentPage).parseHtml())
if (allEntries.isEmpty()) {
break
}
searchCache.addAll(allEntries)
currentPage++
} catch (e: Exception) {
println("Error parsing page $currentPage: ${e.message}")
break
}
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }.toList()
}
/** private fun parseMangaList(response: JSONObject): List<Manga> {
* Parse the list of manga from the given document return response.getJSONArray("data").mapJSON { it ->
* val id = it.getLong("id")
* @param docs the document to parse val url = "/comic/${it.getString("series_slug")}"
* @return the list of manga val title = it.getString("title")
*/ val thumbnailPath = it.getString("thumbnail")
private fun parseMangaList(docs: Document): List<Manga> {
return docs.select("main div.relative, main li.col-span-1").map {
val a = it.selectFirstOrThrow("a")
val url = a.attrAsAbsoluteUrl("href")
Manga( Manga(
id = generateUid(url), id = id,
url = url, url = url,
title = (it.selectFirst("p a") ?: it.selectLast("a"))?.text().orEmpty(), title = title,
altTitle = null, altTitle = it.getString("alternative_names").takeIf { it.isNotBlank() },
publicUrl = url, publicUrl = url.toAbsoluteUrl(domain),
rating = RATING_UNKNOWN, description = it.getString("description"),
rating = it.getFloatOrDefault("rating", RATING_UNKNOWN) / 5f,
isNsfw = isNsfwSource, isNsfw = isNsfwSource,
coverUrl = it.selectFirstOrThrow("img").src().orEmpty(), coverUrl = "https://media.reaperscans.com/file/4SRBHm//$thumbnailPath",
tags = emptySet(), tags = emptySet(),
state = null, state = when (it.getString("status")) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
"Dropped" -> MangaState.ABANDONED
"Hiatus" -> MangaState.PAUSED
else -> null
},
author = null, author = null,
source = source, source = source,
) )
} }
} }
override suspend fun getAvailableTags(): Set<MangaTag> = emptySet()
override suspend fun getAvailableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain/comics").parseHtml()
val scriptContent = doc.select("script").find {
it.data().contains("tags")
}?.data()
if (scriptContent != null) {
val jsonString = scriptContent.substringAfter("push(").substringBeforeLast(")")
val jsonArray = JSONArray(jsonString)
val childrenArray = jsonArray.getString(1)
val tagsString = childrenArray.substringAfter("tags:[").substringBeforeLast("]")
val tagObjects = tagsString.split("},{")
return tagObjects.mapNotNullTo(mutableSetOf()) { tagString ->
val id = tagString.substringAfter("\"id\":").substringBefore(",")
val name = tagString.substringAfter("\"name\":\"").substringBefore("\"")
if (id.isNotEmpty() && name.isNotEmpty()) {
MangaTag(
key = id,
title = name.toTitleCase(sourceLocale),
source = source
)
} else {
null
}
}
}
return emptySet()
}
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) { override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys) super.onCreateConfig(keys)
keys.add(userAgentKey) keys.add(userAgentKey)
} }
private val dateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", sourceLocale)
private val chapterListNextPageSelector: String = "button[wire:click*=nextPage]"
private val chapterListSelector: String = "div[wire:id] > div > ul[role=list] > li"
override suspend fun getDetails(manga: Manga): Manga { override suspend fun getDetails(manga: Manga): Manga {
val cachedChapters = chapterCache[manga.url] val seriesid = manga.id
if (cachedChapters != null) { val url = "https://api.$domain/chapter/query?page=1&perPage=9999&series_id=$seriesid"
return cachedChapters val response = makeRequest(url)
} val data = response.getJSONArray("data")
return manga.copy(
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() chapters = data.mapJSONIndexed { index, it ->
val simpleDateFormat = SimpleDateFormat("dd/MM/yyyy", sourceLocale) val chapterUrl = "/series/${it.getJSONObject("series").getString("series_slug")}/${it.getString("chapter_slug")}"
var totalChapters = (doc.selectFirst(selectTotalChapter)?.text()?.toFloatOrNull() ?: 0f) - 1f
val chapters = mutableSetOf<MangaChapter>()
var hasNextPage = doc.selectFirst(chapterListNextPageSelector) != null
chapters.addAll(
doc.select(chapterListSelector).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter( MangaChapter(
id = generateUid(chapterUrl), id = it.getLong("id"),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(), name = it.getString("chapter_name"),
number = totalChapters--, number = (data.length() - index).toFloat(),
volume = 0, volume = 0,
url = chapterUrl, url = chapterUrl,
scanlator = null, scanlator = null,
uploadDate = parseChapterDate( uploadDate = parseChapterDate(dateFormat, it.getString("created_at")),
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
branch = null, branch = null,
source = source, source = source,
) )
}, }
) )
}
if (!hasNextPage) { private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
return manga.copy( return try {
description = doc.selectFirst("div.p-4 p.prose")?.html(), dateFormat.tryParse(date)
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) { } catch (e: Exception) {
"ongoing" -> MangaState.ONGOING 0L
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
}
val csrfToken = doc.selectFirst("meta[name=csrf-token]")?.attr("content") ?: error("Couldn't find csrf-token")
val livewareData = doc.selectFirst("div[wire:initial-data*=Models\\\\Comic]")?.attr("wire:initial-data")
?.let { JSONObject(it) } ?: error("Couldn't find LiveWireData")
val routeName =
livewareData.getJSONObject("fingerprint").getStringOrNull("name") ?: error("Couldn't find routeName")
val fingerprint = livewareData.getJSONObject("fingerprint")
var serverMemo = livewareData.getJSONObject("serverMemo")
var pageToQuery = 2
// Javascript: (Math.random() + 1).toString(36).substring(8)
val generateId = {
"1.${
Random.nextLong().toString(36)
}".substring(10)
} // Not exactly the same, but results in a 3-5 character string
while (hasNextPage) {
//need to format the payload to the expected response format since org.json.JSONObject are not ordered, and the server seems to care about the order of the keys
val payload = String.format(
responseTemplate,
fingerprint.getString("id"),
fingerprint.getString("path"),
serverMemo.getString("htmlHash"),
pageToQuery - 1,
pageToQuery - 1,
serverMemo.getJSONObject("dataMeta").getJSONObject("models").getJSONObject("comic").getString("id"),
serverMemo.getString("checksum"),
generateId(),
pageToQuery,
).toRequestBody(JSON_MEDIA_TYPE)
val headers = Headers.Builder().add("x-csrf-token", csrfToken).add("x-livewire", "true").build()
val responseData =
makeRequest("https://$domain/livewire/message/$routeName", payload, headers)
// response contains state that we need to preserve
serverMemo = mergeLeft(serverMemo, responseData.serverMemo)
val chaptersHtml = Jsoup.parse(responseData.effects.html, "https://$domain")
chapters.addAll(
chaptersHtml.select(chapterListSelector).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = totalChapters--,
volume = 0,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
branch = null,
source = source,
)
},
)
hasNextPage = chaptersHtml.selectFirst(chapterListNextPageSelector) != null
pageToQuery++
} }
}
val copy = manga.copy( private val pageSelector = "div#content div.container img"
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
chapterCache[manga.url] = copy
return copy
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
val processedUrls = mutableSetOf<String>()
return doc.select(pageSelector).mapNotNull { img ->
val url = img.attr("data-cfsrc").takeIf { it.isNotBlank() }
?: img.attr("src").takeIf { it.isNotBlank() }
?: img.selectFirst("noscript img")?.attr("src")
?: return@mapNotNull null
val relativeUrl = url.toRelativeUrl(domain)
if (relativeUrl !in processedUrls) {
processedUrls.add(relativeUrl)
MangaPage(
id = generateUid(relativeUrl),
url = relativeUrl,
preview = null,
source = source,
)
} else {
null
}
}
} }
private suspend fun makeRequest(url: String, payload: RequestBody, headers: Headers): LiveWireResponseDto { private suspend fun makeRequest(url: String): JSONObject {
var retryCount = 0 var retryCount = 0
val backoffDelay = 2000L // Initial delay (milliseconds) val backoffDelay = 2000L // Initial delay (milliseconds)
val request = Request.Builder().url(url).post(payload).headers(headers).build() val request = Request.Builder().url(url).headers(headers).build()
while (true) { while (true) {
try { try {
val response = context.httpClient.newCall(request).execute().parseJson() val response = context.httpClient.newCall(request).execute().parseJson()
val effectsJson = response.getJSONObject("effects") return response
val serverMemoJson = response.getJSONObject("serverMemo")
val effects = LiveWireEffectsDto(effectsJson.getString("html"))
return LiveWireResponseDto(effects, serverMemoJson)
} catch (e: Exception) { } catch (e: Exception) {
// Log or handle the exception as needed // Log or handle the exception as needed
@ -310,72 +251,5 @@ internal class ReaperComics(context: MangaLoaderContext) :
} }
} }
} }
/**
* Recursively merges j2 onto j1 in place
* If j1 and j2 both contain keys whose values aren't both jsonObjects, j2's value overwrites j1's
*
*/
private fun mergeLeft(j1: JSONObject, j2: JSONObject): JSONObject {
for (key in j2.keys()) {
val j1Value = j1.opt(key)
if (j1Value !is JSONObject) {
j1.put(key, j2[key])
} else if (j2[key] is JSONObject) {
j1.put(key, mergeLeft(j1Value, j2.getJSONObject(key)))
}
}
return j1
}
private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
val d = date?.lowercase() ?: return 0
return when {
d.endsWith(" ago") -> parseRelativeDate(date)
else -> dateFormat.tryParse(date)
}
}
private fun parseRelativeDate(date: String): Long {
val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0
val cal = Calendar.getInstance()
return when {
WordSet("second").anyWordIn(date) -> cal.apply { add(Calendar.SECOND, -number) }.timeInMillis
WordSet("minute", "minutes").anyWordIn(date) -> cal.apply { add(Calendar.MINUTE, -number) }.timeInMillis
WordSet("hour", "hours").anyWordIn(date) -> cal.apply { add(Calendar.HOUR, -number) }.timeInMillis
WordSet("day", "days").anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis
WordSet("week", "weeks").anyWordIn(date) -> cal.apply { add(Calendar.WEEK_OF_YEAR, -number) }.timeInMillis
WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis
WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis
else -> 0
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
return doc.select("img.max-w-full").map { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
private class LiveWireResponseDto(
val effects: LiveWireEffectsDto,
val serverMemo: JSONObject,
)
private class LiveWireEffectsDto(
val html: String,
)
//!IMPORTANT
private val responseTemplate =
"""{"fingerprint":{"id":"%s","name":"frontend.comic-chapter-list","locale":"en","path":"%s","method":"GET","v":"acj"},"serverMemo":{"children":[],"errors":[],"htmlHash":"%s","data":{"comic":[],"page":%d,"paginators":{"page":%d}},"dataMeta":{"models":{"comic":{"class":"App\\Models\\Comic","id":"%s","relations":[],"connection":"pgsql","collectionClass":null}}},"checksum":"%s"},"updates":[{"type":"callMethod","payload":{"id":"%s","method":"gotoPage","params":[%d,"page"]}}]}"""
} }

Loading…
Cancel
Save