diff --git a/.idea/.gitignore b/.idea/.gitignore index 26d33521..8f00030d 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -1,3 +1,5 @@ # Default ignored files /shelf/ /workspace.xml +# GitHub Copilot persisted chat sessions +/copilot/chatSessions diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/ReaperComics.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/ReaperComics.kt index 73b7e760..5fa4a412 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/ReaperComics.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/ReaperComics.kt @@ -1,41 +1,81 @@ package org.koitharu.kotatsu.parsers.site.en +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.delay +import kotlinx.coroutines.withContext import okhttp3.Headers +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.Request +import okhttp3.RequestBody +import okhttp3.RequestBody.Companion.toRequestBody +import org.json.JSONObject +import org.jsoup.Jsoup import org.jsoup.nodes.Document -import org.koitharu.kotatsu.parsers.ErrorMessages import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.PagedMangaParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.model.* -import org.koitharu.kotatsu.parsers.network.UserAgents import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.getStringOrNull import java.text.DateFormat import java.text.SimpleDateFormat -import java.util.* +import java.util.Calendar +import java.util.EnumSet +import kotlin.random.Random + +private const val TOO_MANY_REQUESTS = 429 +private const val MAX_RETRY_COUNT = 5 @MangaSourceParser("REAPERCOMICS", "ReaperComics", "en") internal class ReaperComics(context: MangaLoaderContext) : - PagedMangaParser(context, MangaSource.REAPERCOMICS, pageSize = 30) { + PagedMangaParser(context, MangaSource.REAPERCOMICS, pageSize = 32) { override val availableSortOrders: Set = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL) override val configKeyDomain = ConfigKey.Domain("reaperscans.com") - override val isSearchSupported = false + private val userAgentKey = + ConfigKey.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36") - override val headers: Headers = Headers.Builder() - .add("User-Agent", UserAgents.CHROME_DESKTOP) - .build() + private val baseHeaders: Headers + get() = Headers.Builder().add("User-Agent", config[userAgentKey]).build() - override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + override val headers + get() = getApiHeaders() + + private val selectTotalChapter = "dl.mt-2 div:nth-child(5) > dd" + private val selectState = "dl.mt-2 div:nth-child(4) > dd" + + private val searchCache = mutableSetOf() // Cache search results + private val chapterCache = mutableMapOf() // Cache chapter lists + + private val baseUrl = "https://reaperscans.com" + private fun getApiHeaders(): Headers { + val userCookie = context.cookieJar.getCookies(domain).find { + it.name == "user" + } ?: return baseHeaders + val jo = JSONObject(userCookie.value.urlDecode()) + val accessToken = jo.getStringOrNull("access_token") ?: return baseHeaders + return baseHeaders.newBuilder().add("authorization", "bearer $accessToken").build() + } + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { val url = buildString { append("https://") append(domain) when (filter) { is MangaListFilter.Search -> { - throw IllegalArgumentException(ErrorMessages.SEARCH_NOT_SUPPORTED) // TODO + val searchTitle = filter.query.trim() + if (searchCache.isNotEmpty()) { + if (page > 1) { + return emptyList() + } + return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) } + } else { + return searchAllPage(page, searchTitle) + } } is MangaListFilter.Advanced -> { @@ -53,13 +93,46 @@ internal class ReaperComics(context: MangaLoaderContext) : } } } - return parseMangaList(webClient.httpGet(url).parseHtml()) } - private fun parseMangaList(docs: Document): List { - + /** + * Search once all pages and stores them in cache + * + * @param page the page to start from + * @param searchTitle the title to search for + * @return the list of manga + */ + private suspend fun searchAllPage(page: Int, searchTitle: String): List { + var currentPage = page + val url = buildString { + append("https://") + append(domain) + append("/comics?page=") + } + while (true) { + try { + val allEntries = parseMangaList(webClient.httpGet(url + currentPage).parseHtml()) + if (allEntries.isEmpty()) { + break + } + searchCache.addAll(allEntries) + currentPage++ + } catch (e: Exception) { + println("Error parsing page $currentPage: ${e.message}") + break + } + } + return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }.toList() + } + /** + * Parse the list of manga from the given document + * + * @param docs the document to parse + * @return the list of manga + */ + private fun parseMangaList(docs: Document): List { return docs.select("main div.relative, main li.col-span-1").map { val a = it.selectFirstOrThrow("a") val url = a.attrAsAbsoluteUrl("href") @@ -82,23 +155,33 @@ internal class ReaperComics(context: MangaLoaderContext) : override suspend fun getAvailableTags(): Set = emptySet() + companion object { + private val JSON_MEDIA_TYPE = "application/json; charset=utf-8".toMediaType() + } + + private fun chapterListNextPageSelector(): String = "button[wire:click*=nextPage]" + + private fun chapterListSelector() = "div[wire:id] > div > ul[role=list] > li" + override suspend fun getDetails(manga: Manga): Manga { + val cachedChapters = chapterCache[manga.url] + if (cachedChapters != null) { + return cachedChapters + } + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() val simpleDateFormat = SimpleDateFormat("dd/MM/yyyy", sourceLocale) - return manga.copy( - description = doc.selectFirst("div.p-4 p.prose")?.html(), - state = when (doc.selectFirst("dl.mt-2 div:contains(Status) dd")?.text()?.lowercase()) { - "ongoing" -> MangaState.ONGOING - "complete" -> MangaState.FINISHED - else -> null - }, - chapters = doc.select("div.p-2 div.pb-4 ul li").mapChapters(reversed = true) { i, li -> + var totalChapters = (doc.selectFirst(selectTotalChapter)?.text()?.toIntOrNull() ?: 0) - 1 + val chapters = mutableSetOf() + var hasNextPage = doc.selectFirst(chapterListNextPageSelector()) != null + chapters.addAll( + doc.select(chapterListSelector()).mapChapters { _, li -> val a = li.selectFirstOrThrow("a") - val chapterUrl = a.attrAsAbsoluteUrl("href").toRelativeUrl(domain) + val chapterUrl = a.attr("href").toRelativeUrl(domain) MangaChapter( id = generateUid(chapterUrl), name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(), - number = i + 1, + number = totalChapters--, url = chapterUrl, scanlator = null, uploadDate = parseChapterDate( @@ -110,6 +193,141 @@ internal class ReaperComics(context: MangaLoaderContext) : ) }, ) + + if (!hasNextPage) { + return manga.copy( + description = doc.selectFirst("div.p-4 p.prose")?.html(), + state = when (doc.selectFirst(selectState)?.text()?.lowercase()) { + "ongoing" -> MangaState.ONGOING + "complete" -> MangaState.FINISHED + else -> null + }, + chapters = chapters.reversed(), + ) + } + + val csrfToken = doc.selectFirst("meta[name=csrf-token]")?.attr("content") ?: error("Couldn't find csrf-token") + val livewareData = doc.selectFirst("div[wire:initial-data*=Models\\\\Comic]")?.attr("wire:initial-data") + ?.let { JSONObject(it) } ?: error("Couldn't find LiveWireData") + + val routeName = + livewareData.getJSONObject("fingerprint").getStringOrNull("name") ?: error("Couldn't find routeName") + + val fingerprint = livewareData.getJSONObject("fingerprint") + var serverMemo = livewareData.getJSONObject("serverMemo") + + var pageToQuery = 2 + + // Javascript: (Math.random() + 1).toString(36).substring(8) + val generateId = { -> + "1.${ + Random.nextLong().toString(36) + }".substring(10) + } // Not exactly the same, but results in a 3-5 character string + + while (hasNextPage) { + //need to format the payload to the expected response format since org.json.JSONObject are not ordered, and the server seems to care about the order of the keys + val payload = String.format( + responseTemplate, + fingerprint.getString("id"), + fingerprint.getString("path"), + serverMemo.getString("htmlHash"), + pageToQuery - 1, + pageToQuery - 1, + serverMemo.getJSONObject("dataMeta").getJSONObject("models").getJSONObject("comic").getString("id"), + serverMemo.getString("checksum"), + generateId(), + pageToQuery, + ).toRequestBody(JSON_MEDIA_TYPE) + + val headers = Headers.Builder().add("x-csrf-token", csrfToken).add("x-livewire", "true").build() + + val responseData = + makeRequest("$baseUrl/livewire/message/$routeName", payload, headers) + + // response contains state that we need to preserve + serverMemo = mergeLeft(serverMemo, responseData.serverMemo) + val chaptersHtml = Jsoup.parse(responseData.effects.html, baseUrl) + chapters.addAll( + chaptersHtml.select(chapterListSelector()).mapChapters { _, li -> + val a = li.selectFirstOrThrow("a") + val chapterUrl = a.attr("href").toRelativeUrl(domain) + MangaChapter( + id = generateUid(chapterUrl), + name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(), + number = totalChapters--, + url = chapterUrl, + scanlator = null, + uploadDate = parseChapterDate( + simpleDateFormat, + li.selectFirst("div.truncate div.items-center")?.text(), + ), + branch = null, + source = source, + ) + }, + ) + hasNextPage = chaptersHtml.selectFirst(chapterListNextPageSelector()) != null + pageToQuery++ + } + + val copy = manga.copy( + description = doc.selectFirst("div.p-4 p.prose")?.html(), + state = when (doc.selectFirst(selectState)?.text()?.lowercase()) { + "ongoing" -> MangaState.ONGOING + "complete" -> MangaState.FINISHED + else -> null + }, + chapters = chapters.reversed(), + ) + + chapterCache[manga.url] = copy + return copy + + } + + private suspend fun makeRequest(url: String, payload: RequestBody, headers: Headers): LiveWireResponseDto { + var retryCount = 0 + val backoffDelay = 2000L // Initial delay (milliseconds) + val request = Request.Builder().url(url).post(payload).headers(headers).build() + + while (true) { + try { + val response = context.httpClient.newCall(request).execute().parseJson() + val effectsJson = response.getJSONObject("effects") + val serverMemoJson = response.getJSONObject("serverMemo") + val effects = LiveWireEffectsDto(effectsJson.getString("html")) + return LiveWireResponseDto(effects, serverMemoJson) + + } catch (e: Exception) { + // Log or handle the exception as needed + if (++retryCount <= MAX_RETRY_COUNT) { + withContext(Dispatchers.Default) { + delay(backoffDelay) + } + } else { + throw e + } + } + } + } + + /** + * Recursively merges j2 onto j1 in place + * If j1 and j2 both contain keys whose values aren't both jsonObjects, j2's value overwrites j1's + * + */ + private fun mergeLeft(j1: JSONObject, j2: JSONObject): JSONObject { + for (key in j2.keys()) { + val j1Value = j1.opt(key) + + if (j1Value !is JSONObject) { + j1.put(key, j2[key]) + } else if (j2[key] is JSONObject) { + j1.put(key, mergeLeft(j1Value, j2.getJSONObject(key))) + } + } + return j1 } private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long { @@ -148,3 +366,15 @@ internal class ReaperComics(context: MangaLoaderContext) : } } } + +data class LiveWireResponseDto( + val effects: LiveWireEffectsDto, + val serverMemo: JSONObject, +) + +data class LiveWireEffectsDto( + val html: String, +) + +//!IMPORTANT +private val responseTemplate = """{"fingerprint":{"id":"%s","name":"frontend.comic-chapter-list","locale":"en","path":"%s","method":"GET","v":"acj"},"serverMemo":{"children":[],"errors":[],"htmlHash":"%s","data":{"comic":[],"page":%d,"paginators":{"page":%d}},"dataMeta":{"models":{"comic":{"class":"App\\Models\\Comic","id":"%s","relations":[],"connection":"pgsql","collectionClass":null}}},"checksum":"%s"},"updates":[{"type":"callMethod","payload":{"id":"%s","method":"gotoPage","params":[%d,"page"]}}]}"""