Merge pull request #620 from NagaYZ/feature-search-reaper

Feature search reaper
master
Koitharu 2 years ago committed by GitHub
commit ba8682f79e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

2
.idea/.gitignore vendored

@ -1,3 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
# GitHub Copilot persisted chat sessions
/copilot/chatSessions

@ -1,41 +1,81 @@
package org.koitharu.kotatsu.parsers.site.en
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import okhttp3.Headers
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.Request
import okhttp3.RequestBody
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONObject
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.koitharu.kotatsu.parsers.ErrorMessages
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.network.UserAgents
import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.*
import java.util.Calendar
import java.util.EnumSet
import kotlin.random.Random
private const val TOO_MANY_REQUESTS = 429
private const val MAX_RETRY_COUNT = 5
@MangaSourceParser("REAPERCOMICS", "ReaperComics", "en")
internal class ReaperComics(context: MangaLoaderContext) :
PagedMangaParser(context, MangaSource.REAPERCOMICS, pageSize = 30) {
PagedMangaParser(context, MangaSource.REAPERCOMICS, pageSize = 32) {
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL)
override val configKeyDomain = ConfigKey.Domain("reaperscans.com")
override val isSearchSupported = false
private val userAgentKey =
ConfigKey.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36")
override val headers: Headers = Headers.Builder()
.add("User-Agent", UserAgents.CHROME_DESKTOP)
.build()
private val baseHeaders: Headers
get() = Headers.Builder().add("User-Agent", config[userAgentKey]).build()
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
override val headers
get() = getApiHeaders()
private val selectTotalChapter = "dl.mt-2 div:nth-child(5) > dd"
private val selectState = "dl.mt-2 div:nth-child(4) > dd"
private val searchCache = mutableSetOf<Manga>() // Cache search results
private val chapterCache = mutableMapOf<String, Manga>() // Cache chapter lists
private val baseUrl = "https://reaperscans.com"
private fun getApiHeaders(): Headers {
val userCookie = context.cookieJar.getCookies(domain).find {
it.name == "user"
} ?: return baseHeaders
val jo = JSONObject(userCookie.value.urlDecode())
val accessToken = jo.getStringOrNull("access_token") ?: return baseHeaders
return baseHeaders.newBuilder().add("authorization", "bearer $accessToken").build()
}
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
val url = buildString {
append("https://")
append(domain)
when (filter) {
is MangaListFilter.Search -> {
throw IllegalArgumentException(ErrorMessages.SEARCH_NOT_SUPPORTED) // TODO
val searchTitle = filter.query.trim()
if (searchCache.isNotEmpty()) {
if (page > 1) {
return emptyList()
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }
} else {
return searchAllPage(page, searchTitle)
}
}
is MangaListFilter.Advanced -> {
@ -53,13 +93,46 @@ internal class ReaperComics(context: MangaLoaderContext) :
}
}
}
return parseMangaList(webClient.httpGet(url).parseHtml())
}
private fun parseMangaList(docs: Document): List<Manga> {
/**
* Search once all pages and stores them in cache
*
* @param page the page to start from
* @param searchTitle the title to search for
* @return the list of manga
*/
private suspend fun searchAllPage(page: Int, searchTitle: String): List<Manga> {
var currentPage = page
val url = buildString {
append("https://")
append(domain)
append("/comics?page=")
}
while (true) {
try {
val allEntries = parseMangaList(webClient.httpGet(url + currentPage).parseHtml())
if (allEntries.isEmpty()) {
break
}
searchCache.addAll(allEntries)
currentPage++
} catch (e: Exception) {
println("Error parsing page $currentPage: ${e.message}")
break
}
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }.toList()
}
/**
* Parse the list of manga from the given document
*
* @param docs the document to parse
* @return the list of manga
*/
private fun parseMangaList(docs: Document): List<Manga> {
return docs.select("main div.relative, main li.col-span-1").map {
val a = it.selectFirstOrThrow("a")
val url = a.attrAsAbsoluteUrl("href")
@ -82,23 +155,33 @@ internal class ReaperComics(context: MangaLoaderContext) :
override suspend fun getAvailableTags(): Set<MangaTag> = emptySet()
companion object {
private val JSON_MEDIA_TYPE = "application/json; charset=utf-8".toMediaType()
}
private fun chapterListNextPageSelector(): String = "button[wire:click*=nextPage]"
private fun chapterListSelector() = "div[wire:id] > div > ul[role=list] > li"
override suspend fun getDetails(manga: Manga): Manga {
val cachedChapters = chapterCache[manga.url]
if (cachedChapters != null) {
return cachedChapters
}
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val simpleDateFormat = SimpleDateFormat("dd/MM/yyyy", sourceLocale)
return manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst("dl.mt-2 div:contains(Status) dd")?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = doc.select("div.p-2 div.pb-4 ul li").mapChapters(reversed = true) { i, li ->
var totalChapters = (doc.selectFirst(selectTotalChapter)?.text()?.toIntOrNull() ?: 0) - 1
val chapters = mutableSetOf<MangaChapter>()
var hasNextPage = doc.selectFirst(chapterListNextPageSelector()) != null
chapters.addAll(
doc.select(chapterListSelector()).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attrAsAbsoluteUrl("href").toRelativeUrl(domain)
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = i + 1,
number = totalChapters--,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
@ -110,6 +193,141 @@ internal class ReaperComics(context: MangaLoaderContext) :
)
},
)
if (!hasNextPage) {
return manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
}
val csrfToken = doc.selectFirst("meta[name=csrf-token]")?.attr("content") ?: error("Couldn't find csrf-token")
val livewareData = doc.selectFirst("div[wire:initial-data*=Models\\\\Comic]")?.attr("wire:initial-data")
?.let { JSONObject(it) } ?: error("Couldn't find LiveWireData")
val routeName =
livewareData.getJSONObject("fingerprint").getStringOrNull("name") ?: error("Couldn't find routeName")
val fingerprint = livewareData.getJSONObject("fingerprint")
var serverMemo = livewareData.getJSONObject("serverMemo")
var pageToQuery = 2
// Javascript: (Math.random() + 1).toString(36).substring(8)
val generateId = { ->
"1.${
Random.nextLong().toString(36)
}".substring(10)
} // Not exactly the same, but results in a 3-5 character string
while (hasNextPage) {
//need to format the payload to the expected response format since org.json.JSONObject are not ordered, and the server seems to care about the order of the keys
val payload = String.format(
responseTemplate,
fingerprint.getString("id"),
fingerprint.getString("path"),
serverMemo.getString("htmlHash"),
pageToQuery - 1,
pageToQuery - 1,
serverMemo.getJSONObject("dataMeta").getJSONObject("models").getJSONObject("comic").getString("id"),
serverMemo.getString("checksum"),
generateId(),
pageToQuery,
).toRequestBody(JSON_MEDIA_TYPE)
val headers = Headers.Builder().add("x-csrf-token", csrfToken).add("x-livewire", "true").build()
val responseData =
makeRequest("$baseUrl/livewire/message/$routeName", payload, headers)
// response contains state that we need to preserve
serverMemo = mergeLeft(serverMemo, responseData.serverMemo)
val chaptersHtml = Jsoup.parse(responseData.effects.html, baseUrl)
chapters.addAll(
chaptersHtml.select(chapterListSelector()).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = totalChapters--,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
branch = null,
source = source,
)
},
)
hasNextPage = chaptersHtml.selectFirst(chapterListNextPageSelector()) != null
pageToQuery++
}
val copy = manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
chapterCache[manga.url] = copy
return copy
}
private suspend fun makeRequest(url: String, payload: RequestBody, headers: Headers): LiveWireResponseDto {
var retryCount = 0
val backoffDelay = 2000L // Initial delay (milliseconds)
val request = Request.Builder().url(url).post(payload).headers(headers).build()
while (true) {
try {
val response = context.httpClient.newCall(request).execute().parseJson()
val effectsJson = response.getJSONObject("effects")
val serverMemoJson = response.getJSONObject("serverMemo")
val effects = LiveWireEffectsDto(effectsJson.getString("html"))
return LiveWireResponseDto(effects, serverMemoJson)
} catch (e: Exception) {
// Log or handle the exception as needed
if (++retryCount <= MAX_RETRY_COUNT) {
withContext(Dispatchers.Default) {
delay(backoffDelay)
}
} else {
throw e
}
}
}
}
/**
* Recursively merges j2 onto j1 in place
* If j1 and j2 both contain keys whose values aren't both jsonObjects, j2's value overwrites j1's
*
*/
private fun mergeLeft(j1: JSONObject, j2: JSONObject): JSONObject {
for (key in j2.keys()) {
val j1Value = j1.opt(key)
if (j1Value !is JSONObject) {
j1.put(key, j2[key])
} else if (j2[key] is JSONObject) {
j1.put(key, mergeLeft(j1Value, j2.getJSONObject(key)))
}
}
return j1
}
private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
@ -148,3 +366,15 @@ internal class ReaperComics(context: MangaLoaderContext) :
}
}
}
data class LiveWireResponseDto(
val effects: LiveWireEffectsDto,
val serverMemo: JSONObject,
)
data class LiveWireEffectsDto(
val html: String,
)
//!IMPORTANT
private val responseTemplate = """{"fingerprint":{"id":"%s","name":"frontend.comic-chapter-list","locale":"en","path":"%s","method":"GET","v":"acj"},"serverMemo":{"children":[],"errors":[],"htmlHash":"%s","data":{"comic":[],"page":%d,"paginators":{"page":%d}},"dataMeta":{"models":{"comic":{"class":"App\\Models\\Comic","id":"%s","relations":[],"connection":"pgsql","collectionClass":null}}},"checksum":"%s"},"updates":[{"type":"callMethod","payload":{"id":"%s","method":"gotoPage","params":[%d,"page"]}}]}"""

Loading…
Cancel
Save