From 4a0e7221b0f53aaa81a5e94f4d2fc3da07a04838 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 25 Dec 2023 13:08:19 +0200 Subject: [PATCH] [HitomiLa] Refactor --- .../parsers/site/all/HitomiLaParser.kt | 476 +++++++++--------- 1 file changed, 233 insertions(+), 243 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 5cfe0c8f..8ce4c24d 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -1,10 +1,17 @@ package org.koitharu.kotatsu.parsers.site.all -import kotlinx.coroutines.* -import kotlinx.coroutines.sync.* +import androidx.collection.ArraySet +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock import okhttp3.Headers -import org.json.* -import org.koitharu.kotatsu.parsers.* +import org.json.JSONArray +import org.json.JSONObject +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaParser +import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* @@ -24,82 +31,74 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private val ltnBaseUrl get() = "https://${getDomain("ltn")}" - override val availableSortOrders: Set = - EnumSet.of( - SortOrder.NEWEST, - SortOrder.POPULARITY, - ) + override val availableSortOrders: Set = EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) - private val localeMap: Map = - mapOf( - Locale("id") to "indonesian", - Locale("jv") to "javanese", - Locale("ca") to "catalan", - Locale("ceb") to "cebuano", - Locale("cs") to "czech", - Locale("da") to "danish", - Locale("de") to "german", - Locale("et") to "estonian", - Locale.ENGLISH to "english", - Locale("es") to "spanish", - Locale("eo") to "esperanto", - Locale("fr") to "french", - Locale("it") to "italian", - Locale("hi") to "hindi", - Locale("hu") to "hungarian", - Locale("pl") to "polish", - Locale("pt") to "portuguese", - Locale("vi") to "vietnamese", - Locale("tr") to "turkish", - Locale("ru") to "russian", - Locale("uk") to "ukrainian", - Locale("ar") to "arabic", - Locale.KOREAN to "korean", - Locale.CHINESE to "chinese", - Locale.JAPANESE to "japanese", - ) + private val localeMap: Map = mapOf( + Locale("id") to "indonesian", + Locale("jv") to "javanese", + Locale("ca") to "catalan", + Locale("ceb") to "cebuano", + Locale("cs") to "czech", + Locale("da") to "danish", + Locale("de") to "german", + Locale("et") to "estonian", + Locale.ENGLISH to "english", + Locale("es") to "spanish", + Locale("eo") to "esperanto", + Locale("fr") to "french", + Locale("it") to "italian", + Locale("hi") to "hindi", + Locale("hu") to "hungarian", + Locale("pl") to "polish", + Locale("pt") to "portuguese", + Locale("vi") to "vietnamese", + Locale("tr") to "turkish", + Locale("ru") to "russian", + Locale("uk") to "ukrainian", + Locale("ar") to "arabic", + Locale.KOREAN to "korean", + Locale.CHINESE to "chinese", + Locale.JAPANESE to "japanese", + ) - private fun Locale?.getSiteLang(): String { - return when (this) { - null -> "all" - else -> localeMap[this] ?: "all" - } + private fun Locale?.getSiteLang(): String = when (this) { + null -> "all" + else -> localeMap[this] ?: "all" } - override suspend fun getAvailableLocales(): Set { - return localeMap.keys - } + override suspend fun getAvailableLocales(): Set = localeMap.keys - override suspend fun getAvailableTags(): Set { - return coroutineScope { - ('a'..'z').map { alphabet -> - async { - val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() - - doc.select(".posts > li").mapNotNull { element -> - val num = - element.ownText().let { - Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 - } - - if (num > 100) { - val url = element.selectFirst("a") - val href = - url?.attrAsRelativeUrl("href") - ?: return@mapNotNull null - - MangaTag( - title = url.ownText().toCamelCase(), - key = href.tagUrlToTag(), - source = source, - ) - } else { - null + override suspend fun getAvailableTags(): Set = coroutineScope { + ('a'..'z').map { alphabet -> + async { + val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() + + doc.select(".posts > li").mapNotNull { element -> + val num = + element.ownText().let { + Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 } + + if (num > 100) { + val url = element.selectFirst("a") + val href = + url?.attrAsRelativeUrl("href") + ?: return@mapNotNull null + + MangaTag( + title = url.ownText().toCamelCase(), + key = href.tagUrlToTag(), + source = source, + ) + } else { + null } } - }.awaitAll().flatten().toSet() - } + } + }.awaitAll().flatten().toSet() } private var cachedSearchIds: List = emptyList() @@ -107,42 +106,45 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo override suspend fun getList( offset: Int, filter: MangaListFilter?, - ): List { - return when (filter) { - is MangaListFilter.Advanced -> { - if (filter.tags.isEmpty()) { - when (filter.sortOrder) { - SortOrder.POPULARITY -> { - getGalleryIDsFromNozomi("popular", "today", filter.locale.getSiteLang(), offset.nextOffsetRange()) - } - - else -> { - getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) - } + ): List = when (filter) { + is MangaListFilter.Advanced -> { + if (filter.tags.isEmpty()) { + when (filter.sortOrder) { + SortOrder.POPULARITY -> { + getGalleryIDsFromNozomi( + "popular", + "today", + filter.locale.getSiteLang(), + offset.nextOffsetRange(), + ) } - } else { - if (offset == 0) { - cachedSearchIds = - hitomiSearch( - filter.tags.joinToString(" ") { it.key }, - filter.sortOrder == SortOrder.POPULARITY, - filter.locale.getSiteLang(), - ).toList() + + else -> { + getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) } - cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } - } - - is MangaListFilter.Search -> { + } else { if (offset == 0) { - cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() + cachedSearchIds = + hitomiSearch( + filter.tags.joinToString(" ") { it.key }, + filter.sortOrder == SortOrder.POPULARITY, + filter.locale.getSiteLang(), + ).toList() } cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } + } - else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) - }.toMangaList() - } + is MangaListFilter.Search -> { + if (offset == 0) { + cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() + } + cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) + } + + else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) + }.toMangaList() private fun Int.nextOffsetRange(): LongRange { val bytes = this * 4L @@ -243,6 +245,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo area = "tag" tag = it } + "language" -> { area = null lang = tag @@ -255,9 +258,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val key = hashTerm(it) val node = getGalleryNodeAtAddress(0) - val data = - bSearch(key, node) - ?: return emptySet() + val data = bSearch(key, node) ?: return emptySet() return getGalleryIDsFromData(data) } @@ -266,7 +267,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun getGalleryIDsFromData(data: Pair): Set { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val (offset, length) = data - require(length in 0..100000000) { + require(length in 1..100000000) { "Length $length is too long" } @@ -283,10 +284,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val expectedLength = numberOfGalleryIDs * 4 + 4 - if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { - throw IllegalArgumentException("number_of_galleryids $numberOfGalleryIDs is too long") - } else if (inbuf.size != expectedLength) { - throw IllegalArgumentException("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + require(numberOfGalleryIDs in 1..10000000) { + "number_of_galleryids $numberOfGalleryIDs is too long" + } + require(inbuf.size == expectedLength) { + "inbuf.byteLength ${inbuf.size} != expected_length $expectedLength" } for (i in 0.until(numberOfGalleryIDs)) @@ -316,7 +318,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return 0 } - private fun locateKey( + fun locateKey( key: UByteArray, node: Node, ): Pair { @@ -331,7 +333,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Pair(false, node.keys.size) } - private fun isLeaf(node: Node): Boolean { + fun isLeaf(node: Node): Boolean { for (subnode in node.subNodeAddresses) if (subnode != 0L) { return false @@ -361,19 +363,17 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo language: String, range: LongRange? = null, ): Set { - val nozomiAddress = - when (area) { - null -> "$ltnBaseUrl/$tag-$language.nozomi" - else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" - } + val nozomiAddress = when (area) { + null -> "$ltnBaseUrl/$tag-$language.nozomi" + else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" + } val bytes = getRangedResponse(nozomiAddress, range) val nozomi = mutableSetOf() - val arrayBuffer = - ByteBuffer - .wrap(bytes) - .order(ByteOrder.BIG_ENDIAN) + val arrayBuffer = ByteBuffer + .wrap(bytes) + .order(ByteOrder.BIG_ENDIAN) while (arrayBuffer.hasRemaining()) nozomi.add(arrayBuffer.int) @@ -381,10 +381,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return nozomi } - private val galleriesIndexVersion = - SuspendLazy { - webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() - } + private val galleriesIndexVersion = SuspendLazy { + webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() + } private data class Node( val keys: List, @@ -393,10 +392,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo ) private fun decodeNode(data: ByteArray): Node { - val buffer = - ByteBuffer - .wrap(data) - .order(ByteOrder.BIG_ENDIAN) + val buffer = ByteBuffer + .wrap(data) + .order(ByteOrder.BIG_ENDIAN) val uData = data.toUByteArray() @@ -447,11 +445,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo url: String, range: LongRange? = null, ): ByteArray { - val rangeHeaders = - when (range) { - null -> Headers.headersOf() - else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") - } + val rangeHeaders = when (range) { + null -> Headers.headersOf() + else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") + } return webClient.httpGet(url, rangeHeaders).parseBytes() } @@ -464,113 +461,109 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return MessageDigest.getInstance("SHA-256").digest(data) } - private suspend fun Collection.toMangaList(): List { - return coroutineScope { - map { id -> - async { - runCatching { - val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() - - Manga( - id = generateUid(id.toString()), - title = doc.selectFirstOrThrow("h1").text(), - url = id.toString(), - coverUrl = - "https:" + - doc.selectFirstOrThrow("picture > source") - .attr("data-srcset") - .substringBefore(" "), - publicUrl = - doc.selectFirstOrThrow("h1 > a") - .attrAsRelativeUrl("href") - .toAbsoluteUrl(domain), - author = null, - tags = emptySet(), - isNsfw = true, - rating = RATING_UNKNOWN, - altTitle = null, - state = null, - source = source, - ) - }.getOrNull() - } - }.awaitAll().filterNotNull() - } + private suspend fun Collection.toMangaList(): List = coroutineScope { + map { id -> + async { + runCatching { + val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() + + Manga( + id = generateUid(id.toString()), + title = doc.selectFirstOrThrow("h1").text(), + url = id.toString(), + coverUrl = + "https:" + + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), + publicUrl = + doc.selectFirstOrThrow("h1 > a") + .attrAsRelativeUrl("href") + .toAbsoluteUrl(domain), + author = null, + tags = emptySet(), + isNsfw = true, + rating = RATING_UNKNOWN, + altTitle = null, + state = null, + source = source, + ) + }.getOrNull() + } + }.awaitAll().filterNotNull() } override suspend fun getDetails(manga: Manga): Manga { - val json = - webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return manga.copy( title = json.getString("title"), largeCoverUrl = - json.getJSONArray("files").getJSONObject(0).let { - val hash = it.getString("hash") - val commonId = commonImageId() - val imageId = imageIdFromHash(hash) - val subDomain = 'a' + subdomainOffset(imageId) - - "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" - }, + json.getJSONArray("files").getJSONObject(0).let { + val hash = it.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" + }, author = - json.optJSONArray("artists") - ?.mapJSON { it.getString("artist").toCamelCase() } - ?.joinToString(), + json.optJSONArray("artists") + ?.mapJSON { it.getString("artist").toCamelCase() } + ?.joinToString(), publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), tags = - buildSet { - json.optJSONArray("characters") - ?.mapToTags("character") - ?.let(::addAll) - json.optJSONArray("tags") - ?.mapToTags("tag") - ?.let(::addAll) - json.optJSONArray("artists") - ?.mapToTags("artist") - ?.let(::addAll) - json.optJSONArray("parodys") - ?.mapToTags("parody") - ?.let(::addAll) - json.optJSONArray("groups") - ?.mapToTags("group") - ?.let(::addAll) - }, - chapters = - listOf( - MangaChapter( - id = generateUid(manga.url), - url = manga.url, - name = json.getString("title"), - scanlator = json.getString("type").toTitleCase(), - number = 1, - branch = json.getString("language_localname"), - source = source, - uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), - ), + buildSet { + json.optJSONArray("characters") + ?.mapToTags("character") + ?.let(::addAll) + json.optJSONArray("tags") + ?.mapToTags("tag") + ?.let(::addAll) + json.optJSONArray("artists") + ?.mapToTags("artist") + ?.let(::addAll) + json.optJSONArray("parodys") + ?.mapToTags("parody") + ?.let(::addAll) + json.optJSONArray("groups") + ?.mapToTags("group") + ?.let(::addAll) + }, + chapters = listOf( + MangaChapter( + id = generateUid(manga.url), + url = manga.url, + name = json.getString("title"), + scanlator = json.getString("type").toTitleCase(), + number = 1, + branch = json.getString("language_localname"), + source = source, + uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), ), + ), ) } - private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) + private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) private fun JSONArray.mapToTags(key: String): Set { val tags = ArraySet(length()) mapJSON { MangaTag( title = - it.getString(key).toCamelCase().let { title -> - if (it.getStringOrNull("female")?.toIntOrNull() == 1) { - "$title ♀" - } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { - "$title ♂" - } else { - title - } - }, + it.getString(key).toCamelCase().let { title -> + if (it.getStringOrNull("female")?.toIntOrNull() == 1) { + "$title ♀" + } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { + "$title ♂" + } else { + title + } + }, key = it.getString("url").tagUrlToTag(), source = source, ).let(tags::add) @@ -595,11 +588,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getRelatedManga(seed: Manga): List { - val json = - webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) // any better way to get List from this json? return json.getJSONArray("related").let { @@ -608,11 +600,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getPages(chapter: MangaChapter): List { - val json = - webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return json.getJSONArray("files").mapJSON { image -> val hash = image.getString("hash") @@ -637,25 +628,24 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private val subdomainOffsetMap = mutableMapOf() private var commonImageId = "" - private suspend fun refreshScript() = - mutex.withLock { - if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { - val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() + private suspend fun refreshScript() = mutex.withLock { + if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { + val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() - subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() - val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() + subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() + val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() - subdomainOffsetMap.clear() - Regex("case (\\d+):").findAll(ggScript).forEach { - val case = it.groupValues[1].toInt() - subdomainOffsetMap[case] = o - } + subdomainOffsetMap.clear() + Regex("case (\\d+):").findAll(ggScript).forEach { + val case = it.groupValues[1].toInt() + subdomainOffsetMap[case] = o + } - commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] + commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] - scriptLastRetrieval = System.currentTimeMillis() - } + scriptLastRetrieval = System.currentTimeMillis() } + } // m <-- gg.js private suspend fun subdomainOffset(imageId: Int): Int {