From 3d78b9003da5c4e41e06c068fb5f8cfa05183609 Mon Sep 17 00:00:00 2001 From: Zhifan Ye Date: Tue, 27 May 2025 03:48:57 +0800 Subject: [PATCH] [Manhuagui] Add source (#1812) Co-authored-by: Draken <131387159+dragonx943@users.noreply.github.com> --- .github/summary.yaml | 2 +- .../parsers/site/zh/ManhuaguiParser.kt | 448 ++++++++++++++++++ 2 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/ManhuaguiParser.kt diff --git a/.github/summary.yaml b/.github/summary.yaml index edffc3f7d..3d7b08d04 100644 --- a/.github/summary.yaml +++ b/.github/summary.yaml @@ -1 +1 @@ -total: 1230 \ No newline at end of file +total: 1231 diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/ManhuaguiParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/ManhuaguiParser.kt new file mode 100644 index 000000000..4731a3461 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/ManhuaguiParser.kt @@ -0,0 +1,448 @@ +package org.koitharu.kotatsu.parsers.site.zh + +import okhttp3.Headers +import okhttp3.HttpUrl.Companion.toHttpUrl +import org.json.JSONObject +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser +import org.koitharu.kotatsu.parsers.exception.ParseException +import org.koitharu.kotatsu.parsers.model.ContentRating +import org.koitharu.kotatsu.parsers.model.Demographic +import org.koitharu.kotatsu.parsers.model.Manga +import org.koitharu.kotatsu.parsers.model.MangaChapter +import org.koitharu.kotatsu.parsers.model.MangaListFilter +import org.koitharu.kotatsu.parsers.model.MangaListFilterCapabilities +import org.koitharu.kotatsu.parsers.model.MangaListFilterOptions +import org.koitharu.kotatsu.parsers.model.MangaPage +import org.koitharu.kotatsu.parsers.model.MangaParserSource +import org.koitharu.kotatsu.parsers.model.MangaState +import org.koitharu.kotatsu.parsers.model.MangaTag +import org.koitharu.kotatsu.parsers.model.RATING_UNKNOWN +import org.koitharu.kotatsu.parsers.model.SortOrder +import org.koitharu.kotatsu.parsers.model.YEAR_UNKNOWN +import org.koitharu.kotatsu.parsers.network.UserAgents +import org.koitharu.kotatsu.parsers.util.attrOrThrow +import org.koitharu.kotatsu.parsers.util.generateUid +import org.koitharu.kotatsu.parsers.util.ifNullOrEmpty +import org.koitharu.kotatsu.parsers.util.json.asTypedList +import org.koitharu.kotatsu.parsers.util.mapChapters +import org.koitharu.kotatsu.parsers.util.mapToSet +import org.koitharu.kotatsu.parsers.util.oneOrThrowIfMany +import org.koitharu.kotatsu.parsers.util.parseHtml +import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow +import org.koitharu.kotatsu.parsers.util.selectOrThrow +import org.koitharu.kotatsu.parsers.util.src +import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy +import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl +import org.koitharu.kotatsu.parsers.util.urlEncoded +import java.util.EnumSet +import java.util.Locale + +@MangaSourceParser("MANHUAGUI", "Manhuagui", "zh") +internal class ManhuaguiParser(context: MangaLoaderContext) : + LegacyPagedMangaParser(context, MangaParserSource.MANHUAGUI, pageSize = 42) { + + override val configKeyDomain = ConfigKey.Domain("www.manhuagui.com") + override val userAgentKey = ConfigKey.UserAgent(UserAgents.CHROME_MOBILE) + + val configKeyImgServer = ConfigKey.PreferredImageServer( + presetValues = arrayOf("us", "us2", "us3", "eu", "eu2", "eu3").associateWith { it }, + defaultValue = "us", + ) + + val imgServer = "${config[configKeyImgServer]}.hamreus.com" + + override fun onCreateConfig(keys: MutableCollection>) { + super.onCreateConfig(keys) + keys.add(userAgentKey) + keys.add(configKeyImgServer) + } + + override fun getRequestHeaders(): Headers = super.getRequestHeaders().newBuilder() + .add("Referer", "https://$domain") + .build() + + override val defaultSortOrder: SortOrder + get() = SortOrder.UPDATED + + override val availableSortOrders: Set + get() = EnumSet.of( + SortOrder.NEWEST, // 最新发布 + SortOrder.UPDATED, // 最新更新 + SortOrder.POPULARITY, // 人气最旺 + SortOrder.RATING, // 评分最高 + ) + + override val filterCapabilities: MangaListFilterCapabilities + get() = MangaListFilterCapabilities( + isSearchSupported = true, + isYearSupported = true, + isOriginalLocaleSupported = true, + ) + + private val fetchedTags = suspendLazy(initializer = ::fetchAvailableTags) + + override suspend fun getFilterOptions() = MangaListFilterOptions( + availableLocales = setOf( + Locale.JAPAN, Locale.TRADITIONAL_CHINESE, Locale.ROOT, + Locale.US, Locale.SIMPLIFIED_CHINESE, Locale.KOREA, + ), + availableTags = fetchedTags.get(), + availableDemographics = EnumSet.complementOf(EnumSet.of(Demographic.JOSEI)), + availableStates = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED), + ) + + private val listUrl = "/list" + private val searchUrl = "/s" + private val ratingUrl = "/tools/vote.ashx" + private val sectionChaptersSelector = ".chapter-list" + + private fun Any?.toQueryParam(): String? = when (this) { + + is String -> urlEncoded() // Title + + is Locale -> when (this) { + Locale.JAPAN -> "japan" // 日本 + Locale.TRADITIONAL_CHINESE -> "hongkong" // 港台 + Locale.ROOT -> "other" // 其他 (I do not know if it is sure to use it) + Locale.US -> "europe" // 欧美 + Locale.SIMPLIFIED_CHINESE -> "china" // 内地 + Locale.KOREA -> "korea" // 韩国 + else -> null + } + + is MangaTag -> key + + is Demographic -> when (this) { + Demographic.SHOUJO -> "shaonv" // 少女 + Demographic.SHOUNEN -> "shaonian" // 少年 + Demographic.SEINEN -> "qingnian" // 青年 + Demographic.KODOMO -> "ertong" // 儿童 + Demographic.NONE -> "tongyong" // 通用 + else -> null + } + + is Int -> when { // Year + this >= 2010 && this <= 2025 -> toString() + this >= 2000 && this < 2010 -> "200x" + this >= 1990 && this < 2000 -> "199x" + this >= 1980 && this < 1990 -> "198x" + this == YEAR_UNKNOWN -> null + this < 1980 -> "197x" + else -> null + } + + is MangaState -> when (this) { + MangaState.ONGOING -> "lianzai" // 连载 + MangaState.FINISHED -> "wanjie" // 完结 + else -> null + } + + else -> null + } + + private fun String.addQueryParameters(params: JSONObject): String { + val builder = this.toHttpUrl().newBuilder() + val keys = params.keys() + while (keys.hasNext()) { + val key = keys.next() + val value = params.get(key).toString() + builder.addQueryParameter(key, value) + } + return builder.build().toString() + } + + override suspend fun getListPage( + page: Int, + order: SortOrder, + filter: MangaListFilter, + ): List { + + // Flag of whether there is title query param + var flagHasTitleQuery = false + + val url = buildString { + var queryFull: String? + var orderStr: String? + var pageStr: String? + if (filter.query == null) { + append(listUrl.toAbsoluteUrl(domain)) + queryFull = listOfNotNull( + filter.locale, + filter.tags.oneOrThrowIfMany(), + filter.demographics.oneOrThrowIfMany(), + filter.year.takeUnless { it == YEAR_UNKNOWN }, + filter.states.oneOrThrowIfMany(), + ).joinToString("_") { it.toQueryParam().toString() } + orderStr = when (order) { + SortOrder.UPDATED -> "update" + SortOrder.POPULARITY -> "view" + SortOrder.RATING -> "rate" + else -> "index" + } + pageStr = "/${orderStr}_p${page}.html" + } else { + flagHasTitleQuery = true + append(searchUrl.toAbsoluteUrl(domain)) + queryFull = filter.query + orderStr = when (order) { + SortOrder.POPULARITY -> "_o1" + SortOrder.NEWEST -> "_o2" + SortOrder.RATING -> "_o3" + else -> "" + } + pageStr = "${orderStr}_p${page}.html" + } + + append("/${queryFull}${pageStr}") + } + + val doc = webClient.httpGet(url.toHttpUrl()).parseHtml() + + return doc.select(if (flagHasTitleQuery) "div.book-result > ul > li" else "div.book-list > ul#contList > li") + .map { li -> + val a = li.selectFirstOrThrow("a") + val em = + li.selectFirst(if (flagHasTitleQuery) "div.book-score > p:first-child > strong" else "span.updateon > em") + val href = a.attrOrThrow("href") + val rating = em?.text()?.toFloat()?.div(10) ?: RATING_UNKNOWN + Manga( + id = generateUid(href), + title = a.attr("title"), + altTitles = emptySet(), + url = href, + publicUrl = href.toAbsoluteUrl(domain), + rating = rating, + contentRating = null, + coverUrl = a.selectFirst("img")?.src(), + tags = emptySet(), + state = null, + authors = emptySet(), + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.publicUrl).parseHtml() + + val altTitles = doc.select(".book-title h2").eachText().toSet() + val contentRating: ContentRating = doc.selectFirst("input#__VIEWSTATE").let { + when (it) { + null -> ContentRating.SAFE + else -> ContentRating.ADULT + } + } + val tags = doc.select("ul.detail-list > li:nth-child(2) > span:first-child > a").mapToSet { e -> + MangaTag( + title = e.text(), + key = e.attr("href").removePrefix(listUrl).removeSurrounding("/"), + source = source, + ) + } + val state = doc.selectFirst("li.status > span > span")?.className()?.let { className -> + when (className) { + "red" -> MangaState.ONGOING + "dgreen" -> MangaState.FINISHED + else -> null + } + } + val authors = doc.select("a[href^=\"/author\"]").eachText().toSet() + val description = doc.selectFirst("div.book-intro > #intro-all > p")?.text() + val chapters = parseChapters(doc) + + return manga.copy( + altTitles = altTitles, + contentRating = contentRating, + tags = tags, + state = state, + authors = authors, + description = description, + chapters = chapters, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val chapUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(chapUrl).parseHtml() + val regex = Regex("""^.*\}\('(.*)',(\d*),(\d*),'([\w\+/=]*)'.*${'$'}""", RegexOption.MULTILINE) + val result = regex.find(doc.html()) ?: throw ParseException("Cannot find chapter metadata", chapUrl) + val metadataRaw = decompressLZStringFromBase64(result.groupValues[4]) ?: throw ParseException( + "Cannot decompress chapter metadata", + chapUrl, + ) + val json = unpack(result.groupValues[1], metadataRaw.split("|")) + + val files = json.getJSONArray("files") + val semiFullUrl = json.getString("path").toAbsoluteUrl(imgServer) + val signature = json.getJSONObject("sl") + + return files.asTypedList().map { it -> + val fullUrl = (semiFullUrl + it).addQueryParameters(signature) + MangaPage( + id = generateUid(fullUrl), + url = fullUrl, + preview = fullUrl, + source = source, + ) + } + } + + // private funs + + private suspend fun fetchAvailableTags(): Set { + val doc = webClient.httpGet(listUrl.toAbsoluteUrl(domain)).parseHtml() + val tags = doc.selectOrThrow("div.filter-nav > .filter.genre > ul > li > a").drop(1) + return tags.mapToSet { a -> + val title = a.text() + val key = a.attr("href").removePrefix(listUrl).removeSurrounding("/") + MangaTag( + title = title, + key = key, + source = source, + ) + } + } + + private fun parseChapters(doc: Document, url: String? = null): List { + val (sectionTitles, sectionChapters) = doc.selectFirst("#__VIEWSTATE").let { + if (it != null) { + val viewStateStr = decompressLZStringFromBase64(it.attrOrThrow("value")) + ?: throw ParseException("Cannot decompress __VIEWSTATE", url.ifNullOrEmpty { "" }) + val doc1 = Jsoup.parse(viewStateStr) + Pair( + doc1.select("h4 span"), + doc1.select(sectionChaptersSelector), + ) + } else { + Pair( + doc.select(".chapter h4 span"), + doc.select(sectionChaptersSelector), + ) + } + } + + // Parse chapters from each section + val chapters = sectionTitles + .zip(sectionChapters) + .flatMapIndexed { index, (title, section) -> + val chaps = section.select("ul").flatMap { + it.select("li a").asReversed() + } + chaps.mapChapters { chapIdx, chap -> + MangaChapter( + url = chap.attrOrThrow("href"), + id = generateUid(chap.attrOrThrow("href")), + title = chap.attrOrThrow("title"), + number = (chapIdx + 1).toFloat(), + volume = index + 1, + scanlator = null, + uploadDate = 0, + branch = title.text(), + source = source, + ) + } + } + + return chapters + } + + private fun decompressLZStringFromBase64(input: String): String? { + if (input.isBlank()) return null + + data class Data(var value: Char = '0', var position: Int = 0, var index: Int = 1) + + val keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" + val getNextValue = { it: Int -> keyStr.indexOf(input[it]).toChar() } + val builder = StringBuilder() + val dictionary = mutableListOf("0", "1", "2") + val data = Data(getNextValue(0), 32, 1) + var (bits, numBits, enlargeIn, dictSize) = listOf(0, 3, 4, 4) + var (c, w, entry) = listOf("", "", "") + + fun Int.power() = 1 shl this + fun Int.string() = this.toChar().toString() + + fun doPower(initBits: Int, initPower: Int, initMaxPower: Int, mode: Int = 0) { + bits = initBits + var power = initPower + val maxpower = initMaxPower.power() + while (power != maxpower) { + val resb = data.value.code and data.position + data.position = data.position shr 1 + if (data.position == 0) { + data.position = 32 + data.value = getNextValue(data.index++) + } + bits = bits or ((if (resb > 0) 1 else 0) * power) + power = power shl 1 + } + when (mode) { + 1 -> c = bits.string() + 2 -> dictionary.add(dictSize++.also { enlargeIn-- }, bits.string()) + } + } + + fun checkEnlargeIn() { + if (enlargeIn-- == 0) { + enlargeIn = numBits.power() + numBits++ + } + } + + doPower(bits, 1, 2) + when (bits) { + 0 -> doPower(0, 1, 8, 1) + 1 -> doPower(0, 1, 16, 1) + 2 -> return "" + } + + dictionary.add(3, c) + w = c + builder.append(w) + + while (true) { + if (data.index > input.length) return "" + doPower(0, 1, numBits) + when (bits) { + 0 -> doPower(0, 1, 8, 2).also { checkEnlargeIn() } + 1 -> doPower(0, 1, 16, 2).also { checkEnlargeIn() } + 2 -> return builder.toString() + } + entry = when { + dictionary.size > bits -> dictionary[bits] + bits == dictSize -> w + w[0] + else -> return null + } + builder.append(entry) + dictionary.add(dictSize++, w + entry[0]) + w = entry + checkEnlargeIn() + } + } + + private fun unpack(src: String, syms: List): JSONObject { + fun base62(n: Int) = when { + n < 10 -> n.toString() + n < 36 -> ('a' + (n - 10)).toString() + else -> ('A' + (n - 36)).toString() + } + + fun encode62(num: Int): String = if (num >= 62) encode62(num / 62) + base62(num % 62) else base62(num) + + val working = syms.foldRightIndexed(src) { idx, replacement, acc -> + if (replacement.isNotEmpty()) { + val token = encode62(idx) + Regex("\\b${Regex.escape(token)}\\b").replace(acc, replacement) + } else acc + } + + return JSONObject(Regex("""\((\{.+\})\)""", RegexOption.DOT_MATCHES_ALL) + .find(working)?.groupValues?.get(1) + ?: throw IllegalArgumentException("JSON payload not found after unpacking.")) + } +}