reaper changed site structure

master
Naga 2 years ago
parent ad7c953d29
commit 0d3d8232f9

@ -1,5 +1,6 @@
package org.koitharu.kotatsu.parsers.site.en
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
@ -7,17 +8,15 @@ import okhttp3.Headers
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.Request
import okhttp3.RequestBody
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONArray
import org.json.JSONObject
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import org.koitharu.kotatsu.parsers.util.json.*
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.*
@ -25,13 +24,12 @@ import kotlin.random.Random
private const val TOO_MANY_REQUESTS = 429
private const val MAX_RETRY_COUNT = 5
private val JSON_MEDIA_TYPE get() = "application/json; charset=utf-8".toMediaType()
@MangaSourceParser("REAPERCOMICS", "ReaperComics", "en")
internal class ReaperComics(context: MangaLoaderContext) :
PagedMangaParser(context, MangaSource.REAPERCOMICS, pageSize = 32) {
PagedMangaParser(context, MangaSource.REAPERCOMICS, pageSize = 20) {
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL)
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED, SortOrder.ALPHABETICAL, SortOrder.POPULARITY, SortOrder.NEWEST, SortOrder.ALPHABETICAL_DESC)
override val configKeyDomain = ConfigKey.Domain("reaperscans.com")
@ -45,12 +43,6 @@ internal class ReaperComics(context: MangaLoaderContext) :
override val headers
get() = getApiHeaders()
private val selectTotalChapter = "dl.mt-2 div:nth-child(5) > dd"
private val selectState = "dl.mt-2 div:nth-child(4) > dd"
private val searchCache = mutableSetOf<Manga>() // Cache search results
private val chapterCache = mutableMapOf<String, Manga>() // Cache chapter lists
private fun getApiHeaders(): Headers {
val userCookie = context.cookieJar.getCookies(domain).find {
it.name == "user"
@ -61,243 +53,185 @@ internal class ReaperComics(context: MangaLoaderContext) :
}
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
if(page > 1) return emptyList()
val url = buildString {
append("https://")
append(domain)
append("api.$domain")
append("/query?page=$page&perPage=9999&series_type=Comic")
when (filter) {
is MangaListFilter.Search -> {
val searchTitle = filter.query.trim()
if (searchCache.isNotEmpty()) {
if (page > 1) {
return emptyList()
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }
} else {
return searchAllPage(page, searchTitle)
}
append("&query_string=")
append(filter.query.urlEncoded())
}
is MangaListFilter.Advanced -> {
append("/")
if (filter.sortOrder == SortOrder.UPDATED) {
append("latest/")
append("&orderBy=")
val order = when (filter.sortOrder) {
SortOrder.UPDATED -> "updated_at"
SortOrder.POPULARITY -> "total_views"
SortOrder.NEWEST -> "created_at"
SortOrder.ALPHABETICAL -> "title"
SortOrder.ALPHABETICAL_DESC -> "title"
else -> "updated_at"
}
append(order)
val sortOrder = if (filter.sortOrder == SortOrder.ALPHABETICAL_DESC) "desc" else "asc"
append("&order=$sortOrder")
filter.states.oneOrThrowIfMany()?.let {
append("&status=")
append(
when (it) {
MangaState.ONGOING -> "Ongoing"
MangaState.FINISHED -> "Completed"
MangaState.ABANDONED -> "Dropped"
MangaState.PAUSED -> "Hiatus"
else -> "All"
},
)
}
if (filter.tags.isNotEmpty()) {
append("&tags_ids=")
append(filter.tags.joinToString(separator = "%") { it.key })
}
append("comics?page=")
append(page.toString())
}
null -> {
append("/latest/comics?page=")
append(page.toString())
append("&orderBy=updated_at")
append("&order=asc")
append("&adult=true")
append("&status=All")
}
}
}
return parseMangaList(webClient.httpGet(url).parseHtml())
return parseMangaList(webClient.httpGet(url).parseJson())
}
/**
* Search once all pages and stores them in cache
*
* @param page the page to start from
* @param searchTitle the title to search for
* @return the list of manga
*/
private suspend fun searchAllPage(page: Int, searchTitle: String): List<Manga> {
var currentPage = page
val url = buildString {
append("https://")
append(domain)
append("/comics?page=")
}
while (true) {
try {
val allEntries = parseMangaList(webClient.httpGet(url + currentPage).parseHtml())
if (allEntries.isEmpty()) {
break
}
searchCache.addAll(allEntries)
currentPage++
} catch (e: Exception) {
println("Error parsing page $currentPage: ${e.message}")
break
}
}
return searchCache.filter { it.title.contains(searchTitle, ignoreCase = true) }.toList()
}
/**
* Parse the list of manga from the given document
*
* @param docs the document to parse
* @return the list of manga
*/
private fun parseMangaList(docs: Document): List<Manga> {
return docs.select("main div.relative, main li.col-span-1").map {
val a = it.selectFirstOrThrow("a")
val url = a.attrAsAbsoluteUrl("href")
private fun parseMangaList(response: JSONObject): List<Manga> {
return response.getJSONArray("data").mapJSON { it ->
val id = it.getLong("id")
val url = "/comic/${it.getString("series_slug")}"
val title = it.getString("title")
val thumbnailPath = it.getString("thumbnail")
Manga(
id = generateUid(url),
id = id,
url = url,
title = (it.selectFirst("p a") ?: it.selectLast("a"))?.text().orEmpty(),
altTitle = null,
publicUrl = url,
rating = RATING_UNKNOWN,
title = title,
altTitle = it.getString("alternative_names").takeIf { it.isNotBlank() },
publicUrl = url.toAbsoluteUrl(domain),
description = it.getString("description"),
rating = it.getFloatOrDefault("rating", RATING_UNKNOWN) / 5f,
isNsfw = isNsfwSource,
coverUrl = it.selectFirstOrThrow("img").src().orEmpty(),
coverUrl = "https://media.reaperscans.com/file/4SRBHm//$thumbnailPath",
tags = emptySet(),
state = null,
state = when (it.getString("status")) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
"Dropped" -> MangaState.ABANDONED
"Hiatus" -> MangaState.PAUSED
else -> null
},
author = null,
source = source,
)
}
}
override suspend fun getAvailableTags(): Set<MangaTag> = emptySet()
override suspend fun getAvailableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain/comics").parseHtml()
val scriptContent = doc.select("script").find {
it.data().contains("tags")
}?.data()
if (scriptContent != null) {
val jsonString = scriptContent.substringAfter("push(").substringBeforeLast(")")
val jsonArray = JSONArray(jsonString)
val childrenArray = jsonArray.getString(1)
val tagsString = childrenArray.substringAfter("tags:[").substringBeforeLast("]")
val tagObjects = tagsString.split("},{")
return tagObjects.mapNotNullTo(mutableSetOf()) { tagString ->
val id = tagString.substringAfter("\"id\":").substringBefore(",")
val name = tagString.substringAfter("\"name\":\"").substringBefore("\"")
if (id.isNotEmpty() && name.isNotEmpty()) {
MangaTag(
key = id,
title = name.toTitleCase(sourceLocale),
source = source
)
} else {
null
}
}
}
return emptySet()
}
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys)
keys.add(userAgentKey)
}
private fun chapterListNextPageSelector(): String = "button[wire:click*=nextPage]"
private fun chapterListSelector() = "div[wire:id] > div > ul[role=list] > li"
private val dateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", sourceLocale)
override suspend fun getDetails(manga: Manga): Manga {
val cachedChapters = chapterCache[manga.url]
if (cachedChapters != null) {
return cachedChapters
val seriesid = manga.id
val url = "https://api.$domain/chapter/query?page=1&perPage=9999&series_id=$seriesid"
val response = makeRequest(url)
val data = response.getJSONArray("data")
val chapters = data.mapJSONIndexed { index, it ->
val chapterUrl = "/series/${it.getJSONObject("series").getString("series_slug")}/${it.getString("chapter_slug")}"
MangaChapter(
id = it.getLong("id"),
name = it.getString("chapter_name"),
number = data.length() - index,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(dateFormat, it.getString("created_at")),
branch = null,
source = source,
)
}
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val simpleDateFormat = SimpleDateFormat("dd/MM/yyyy", sourceLocale)
var totalChapters = (doc.selectFirst(selectTotalChapter)?.text()?.toIntOrNull() ?: 0) - 1
val chapters = mutableSetOf<MangaChapter>()
var hasNextPage = doc.selectFirst(chapterListNextPageSelector()) != null
chapters.addAll(
doc.select(chapterListSelector()).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = totalChapters--,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
branch = null,
source = source,
)
},
return manga.copy(
chapters = chapters
)
}
if (!hasNextPage) {
return manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
return try {
dateFormat.tryParse(date)
} catch (e: Exception) {
0L
}
}
val csrfToken = doc.selectFirst("meta[name=csrf-token]")?.attr("content") ?: error("Couldn't find csrf-token")
val livewareData = doc.selectFirst("div[wire:initial-data*=Models\\\\Comic]")?.attr("wire:initial-data")
?.let { JSONObject(it) } ?: error("Couldn't find LiveWireData")
val routeName =
livewareData.getJSONObject("fingerprint").getStringOrNull("name") ?: error("Couldn't find routeName")
val fingerprint = livewareData.getJSONObject("fingerprint")
var serverMemo = livewareData.getJSONObject("serverMemo")
var pageToQuery = 2
// Javascript: (Math.random() + 1).toString(36).substring(8)
val generateId = { ->
"1.${
Random.nextLong().toString(36)
}".substring(10)
} // Not exactly the same, but results in a 3-5 character string
while (hasNextPage) {
//need to format the payload to the expected response format since org.json.JSONObject are not ordered, and the server seems to care about the order of the keys
val payload = String.format(
responseTemplate,
fingerprint.getString("id"),
fingerprint.getString("path"),
serverMemo.getString("htmlHash"),
pageToQuery - 1,
pageToQuery - 1,
serverMemo.getJSONObject("dataMeta").getJSONObject("models").getJSONObject("comic").getString("id"),
serverMemo.getString("checksum"),
generateId(),
pageToQuery,
).toRequestBody(JSON_MEDIA_TYPE)
val headers = Headers.Builder().add("x-csrf-token", csrfToken).add("x-livewire", "true").build()
val responseData =
makeRequest("https://$domain/livewire/message/$routeName", payload, headers)
private val pageSelector = "div#content div.container img"
// response contains state that we need to preserve
serverMemo = mergeLeft(serverMemo, responseData.serverMemo)
val chaptersHtml = Jsoup.parse(responseData.effects.html, "https://$domain")
chapters.addAll(
chaptersHtml.select(chapterListSelector()).mapChapters { _, li ->
val a = li.selectFirstOrThrow("a")
val chapterUrl = a.attr("href").toRelativeUrl(domain)
MangaChapter(
id = generateUid(chapterUrl),
name = li.selectFirst("div.truncate p.truncate")?.text().orEmpty(),
number = totalChapters--,
url = chapterUrl,
scanlator = null,
uploadDate = parseChapterDate(
simpleDateFormat,
li.selectFirst("div.truncate div.items-center")?.text(),
),
branch = null,
source = source,
)
},
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
return doc.select(pageSelector).map { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
hasNextPage = chaptersHtml.selectFirst(chapterListNextPageSelector()) != null
pageToQuery++
}
val copy = manga.copy(
description = doc.selectFirst("div.p-4 p.prose")?.html(),
state = when (doc.selectFirst(selectState)?.text()?.lowercase()) {
"ongoing" -> MangaState.ONGOING
"complete" -> MangaState.FINISHED
else -> null
},
chapters = chapters.reversed(),
)
chapterCache[manga.url] = copy
return copy
}
private suspend fun makeRequest(url: String, payload: RequestBody, headers: Headers): LiveWireResponseDto {
private suspend fun makeRequest(url: String): JSONObject {
var retryCount = 0
val backoffDelay = 2000L // Initial delay (milliseconds)
val request = Request.Builder().url(url).post(payload).headers(headers).build()
val request = Request.Builder().url(url).headers(headers).build()
while (true) {
try {
val response = context.httpClient.newCall(request).execute().parseJson()
val effectsJson = response.getJSONObject("effects")
val serverMemoJson = response.getJSONObject("serverMemo")
val effects = LiveWireEffectsDto(effectsJson.getString("html"))
return LiveWireResponseDto(effects, serverMemoJson)
return response
} catch (e: Exception) {
// Log or handle the exception as needed
@ -311,72 +245,5 @@ internal class ReaperComics(context: MangaLoaderContext) :
}
}
}
/**
* Recursively merges j2 onto j1 in place
* If j1 and j2 both contain keys whose values aren't both jsonObjects, j2's value overwrites j1's
*
*/
private fun mergeLeft(j1: JSONObject, j2: JSONObject): JSONObject {
for (key in j2.keys()) {
val j1Value = j1.opt(key)
if (j1Value !is JSONObject) {
j1.put(key, j2[key])
} else if (j2[key] is JSONObject) {
j1.put(key, mergeLeft(j1Value, j2.getJSONObject(key)))
}
}
return j1
}
private fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
val d = date?.lowercase() ?: return 0
return when {
d.endsWith(" ago") -> parseRelativeDate(date)
else -> dateFormat.tryParse(date)
}
}
private fun parseRelativeDate(date: String): Long {
val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0
val cal = Calendar.getInstance()
return when {
WordSet("second").anyWordIn(date) -> cal.apply { add(Calendar.SECOND, -number) }.timeInMillis
WordSet("minute", "minutes").anyWordIn(date) -> cal.apply { add(Calendar.MINUTE, -number) }.timeInMillis
WordSet("hour", "hours").anyWordIn(date) -> cal.apply { add(Calendar.HOUR, -number) }.timeInMillis
WordSet("day", "days").anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis
WordSet("week", "weeks").anyWordIn(date) -> cal.apply { add(Calendar.WEEK_OF_YEAR, -number) }.timeInMillis
WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis
WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis
else -> 0
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
return doc.select("img.max-w-full").map { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
private class LiveWireResponseDto(
val effects: LiveWireEffectsDto,
val serverMemo: JSONObject,
)
private class LiveWireEffectsDto(
val html: String,
)
//!IMPORTANT
private val responseTemplate =
"""{"fingerprint":{"id":"%s","name":"frontend.comic-chapter-list","locale":"en","path":"%s","method":"GET","v":"acj"},"serverMemo":{"children":[],"errors":[],"htmlHash":"%s","data":{"comic":[],"page":%d,"paginators":{"page":%d}},"dataMeta":{"models":{"comic":{"class":"App\\Models\\Comic","id":"%s","relations":[],"connection":"pgsql","collectionClass":null}}},"checksum":"%s"},"updates":[{"type":"callMethod","payload":{"id":"%s","method":"gotoPage","params":[%d,"page"]}}]}"""
}

Loading…
Cancel
Save