From 66adc65a96b9427ef9f1a9b70cc568c1fe430a22 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 24 Apr 2023 16:48:17 +0300 Subject: [PATCH] [JapScan] New parser --- .../kotatsu/parsers/site/JapScanParser.kt | 193 ++++++++++++++++++ .../parsers/site/grouple/GroupleParser.kt | 7 +- .../koitharu/kotatsu/parsers/util/Jsoup.kt | 4 + .../koitharu/kotatsu/parsers/util/Parse.kt | 6 + 4 files changed, 206 insertions(+), 4 deletions(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/JapScanParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/JapScanParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/JapScanParser.kt new file mode 100644 index 00000000..8af433f5 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/JapScanParser.kt @@ -0,0 +1,193 @@ +package org.koitharu.kotatsu.parsers.site + +import okhttp3.Headers +import org.json.JSONObject +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.exception.ParseException +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.getStringOrNull +import org.koitharu.kotatsu.parsers.util.json.mapJSON +import java.text.SimpleDateFormat +import java.util.* + +@MangaSourceParser("JAPSCAN", "JapScan", "fr") +internal class JapScanParser(context: MangaLoaderContext) : PagedMangaParser(context, MangaSource.JAPSCAN, 30) { + + override val sortOrders: Set = EnumSet.of(SortOrder.ALPHABETICAL) + + override val configKeyDomain = ConfigKey.Domain("www.japscan.lol", arrayOf("www.japscan.lol", "japscan.ws")) + + override val headers: Headers = Headers.Builder() + .add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0") + .build() + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + require(tags.isNullOrEmpty()) { "Tags are not supported by this source" } + if (!query.isNullOrEmpty()) { + return if (page == paginator.firstPage) getListPageSearch(query) else emptyList() + } + val url = urlBuilder() + .addPathSegment("mangas") + .addPathSegment(page.toString()) + .build() + val root = webClient.httpGet(url).parseHtml() + .requireElementById("main") + .selectFirstOrThrow(".flex-wrap") + return root.select(".mainTitle") + .map { p -> + val div = checkNotNull(p.parent()) + val a = div.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + title = p.text(), + altTitle = null, + url = href, + publicUrl = href.toAbsoluteUrl(domain), + rating = RATING_UNKNOWN, + isNsfw = false, + coverUrl = div.selectFirstOrThrow("img").attrAsAbsoluteUrl("src"), + tags = setOf(), + state = null, + author = null, + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val root = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().requireElementById("main") + val dateFormat = SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH) + return manga.copy( + altTitle = root.tableValue("Nom Original:"), + isNsfw = root.tableValue("Âge conseillé :")?.extractIntOrNull().let { it != null && it >= 18 }, + tags = root.tableValue("Type(s):")?.split(", ")?.mapNotNullToSet { + it.toTag() + }.orEmpty() + root.tableValue("Genre(s):")?.split(", ")?.mapNotNullToSet { + it.toTag() + }.orEmpty(), + state = when (root.tableValue("Statut:")) { + "En Cours" -> MangaState.ONGOING + "Terminé", "Abondonné" -> MangaState.FINISHED + else -> null + }, + author = root.tableValue("Artiste(s):")?.substringBefore(','), + description = root.selectFirst("p.list-group-item-primary")?.html(), + chapters = root.requireElementById("chapters_list") + .select("div.chapters_list") + .mapChapters(reversed = true) { i, div -> + val a = div.selectFirst("a") ?: return@mapChapters null + val href = a.attrAsRelativeUrl("href") + MangaChapter( + id = generateUid(href), + name = a.text(), + number = i, + url = href, + scanlator = null, + uploadDate = dateFormat.tryParse(div.selectFirst("span.float-right")?.text()), + branch = null, + source = source, + ) + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val chapterUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(chapterUrl).parseHtml() + val scriptUrl = doc.getElementsByTag("script").firstNotNullOf { script -> + script.attrAsAbsoluteUrlOrNull("src")?.takeIf { it.contains("/zjs/") } + } + val embeddedData = doc.requireElementById("data").attr("data-data") + val script = webClient.httpGet(scriptUrl).parseRaw() + + val sample = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".toList() + val keyRegex = Regex("""'([\dA-Z]{62})'""", RegexOption.IGNORE_CASE) + + val keyTables = keyRegex.findAll(script) + .mapNotNullTo(ArrayList(2)) { match -> + match.groupValues[1].takeIf { + it.toList().sorted() == sample + } + } + check(keyTables.size == 2) + + var error: Exception? = null + repeat(2) { i -> + val key = keyTables[i].zip(keyTables[1 - i]).toMap() + try { + val unscrambledData = embeddedData.map { key[it] ?: it }.joinToString("") + if (unscrambledData.startsWith("ey")) { + val array = JSONObject(context.decodeBase64(unscrambledData).toString(Charsets.UTF_8)) + .getJSONArray("imagesLink") + val result = ArrayList(array.length()) + repeat(array.length()) { index -> + val url = array.getString(index) + result += MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + return result + } + } catch (e: Exception) { + error = e + } + } + throw (error ?: ParseException("Cannot decode pages list", chapterUrl)) + } + + override suspend fun getTags(): Set { + return emptySet() // not supported + } + + private suspend fun getListPageSearch( + query: String, + ): List { + val json = webClient.httpPost( + "https://$domain/live-search/", + mapOf("search" to query.urlEncoded()), + ).parseJsonArray() + return json.mapJSON { jo -> + val url = jo.getString("url") + Manga( + id = generateUid(url), + title = jo.getString("name"), + altTitle = jo.getStringOrNull("alternate_names")?.substringBefore(','), + url = url, + publicUrl = url.toAbsoluteUrl(domain), + rating = RATING_UNKNOWN, + isNsfw = false, + coverUrl = jo.getString("image").toAbsoluteUrl(domain), + tags = emptySet(), + state = null, + author = null, + source = source, + ) + } + } + + private fun Element.tableValue(label: String): String? { + return getElementsMatchingOwnText(label).firstOrNull()?.parent()?.ownTextOrNull() + } + + private fun String.extractIntOrNull(): Int? = this.filter(Char::isDigit).toIntOrNull() + + private fun String.toTag() = MangaTag( + title = this.toTitleCase(sourceLocale), + key = this.replace(' ', '-').lowercase(sourceLocale), + source = source, + ) +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt index 6b58614f..40a2a0a2 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt @@ -40,10 +40,9 @@ internal abstract class GroupleParser( "Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0", ) - override val headers: Headers - get() = Headers.Builder() - .add("User-Agent", config[userAgentKey]) - .build() + override val headers: Headers = Headers.Builder() + .add("User-Agent", config[userAgentKey]) + .build() override val sortOrders: Set = EnumSet.of( SortOrder.UPDATED, diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt index 6e5faeb8..24d1a6bd 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt @@ -115,3 +115,7 @@ fun Element.selectLast(cssQuery: String): Element? { fun Element.selectLastOrThrow(cssQuery: String): Element { return selectLast(cssQuery) ?: throw ParseException("Cannot find \"$cssQuery\"", baseUri()) } + +fun Element.textOrNull(): String? = text().takeUnless { it.isEmpty() } + +fun Element.ownTextOrNull(): String? = ownText().takeUnless { it.isEmpty() } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt index f6c193e1..d1aecc44 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt @@ -46,6 +46,12 @@ fun Response.parseJsonArray(): JSONArray = try { closeQuietly() } +fun Response.parseRaw(): String = try { + requireBody().string() +} finally { + closeQuietly() +} + /** * Convert url to relative if it is on [domain] * @return an url relative to the [domain] or absolute, if domain is mismatching