[HitomiLa] Refactor

pull/428/head
Koitharu 2 years ago
parent 43bdbe5a01
commit 4a0e7221b0
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -1,10 +1,17 @@
package org.koitharu.kotatsu.parsers.site.all package org.koitharu.kotatsu.parsers.site.all
import kotlinx.coroutines.* import androidx.collection.ArraySet
import kotlinx.coroutines.sync.* import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import okhttp3.Headers import okhttp3.Headers
import org.json.* import org.json.JSONArray
import org.koitharu.kotatsu.parsers.* import org.json.JSONObject
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
@ -24,82 +31,74 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
private val ltnBaseUrl get() = "https://${getDomain("ltn")}" private val ltnBaseUrl get() = "https://${getDomain("ltn")}"
override val availableSortOrders: Set<SortOrder> = override val availableSortOrders: Set<SortOrder> = EnumSet.of(
EnumSet.of( SortOrder.NEWEST,
SortOrder.NEWEST, SortOrder.POPULARITY,
SortOrder.POPULARITY, )
)
private val localeMap: Map<Locale, String> = private val localeMap: Map<Locale, String> = mapOf(
mapOf( Locale("id") to "indonesian",
Locale("id") to "indonesian", Locale("jv") to "javanese",
Locale("jv") to "javanese", Locale("ca") to "catalan",
Locale("ca") to "catalan", Locale("ceb") to "cebuano",
Locale("ceb") to "cebuano", Locale("cs") to "czech",
Locale("cs") to "czech", Locale("da") to "danish",
Locale("da") to "danish", Locale("de") to "german",
Locale("de") to "german", Locale("et") to "estonian",
Locale("et") to "estonian", Locale.ENGLISH to "english",
Locale.ENGLISH to "english", Locale("es") to "spanish",
Locale("es") to "spanish", Locale("eo") to "esperanto",
Locale("eo") to "esperanto", Locale("fr") to "french",
Locale("fr") to "french", Locale("it") to "italian",
Locale("it") to "italian", Locale("hi") to "hindi",
Locale("hi") to "hindi", Locale("hu") to "hungarian",
Locale("hu") to "hungarian", Locale("pl") to "polish",
Locale("pl") to "polish", Locale("pt") to "portuguese",
Locale("pt") to "portuguese", Locale("vi") to "vietnamese",
Locale("vi") to "vietnamese", Locale("tr") to "turkish",
Locale("tr") to "turkish", Locale("ru") to "russian",
Locale("ru") to "russian", Locale("uk") to "ukrainian",
Locale("uk") to "ukrainian", Locale("ar") to "arabic",
Locale("ar") to "arabic", Locale.KOREAN to "korean",
Locale.KOREAN to "korean", Locale.CHINESE to "chinese",
Locale.CHINESE to "chinese", Locale.JAPANESE to "japanese",
Locale.JAPANESE to "japanese", )
)
private fun Locale?.getSiteLang(): String { private fun Locale?.getSiteLang(): String = when (this) {
return when (this) { null -> "all"
null -> "all" else -> localeMap[this] ?: "all"
else -> localeMap[this] ?: "all"
}
} }
override suspend fun getAvailableLocales(): Set<Locale> { override suspend fun getAvailableLocales(): Set<Locale> = localeMap.keys
return localeMap.keys
}
override suspend fun getAvailableTags(): Set<MangaTag> { override suspend fun getAvailableTags(): Set<MangaTag> = coroutineScope {
return coroutineScope { ('a'..'z').map { alphabet ->
('a'..'z').map { alphabet -> async {
async { val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml()
val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml()
doc.select(".posts > li").mapNotNull { element ->
doc.select(".posts > li").mapNotNull { element -> val num =
val num = element.ownText().let {
element.ownText().let { Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0
Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0
}
if (num > 100) {
val url = element.selectFirst("a")
val href =
url?.attrAsRelativeUrl("href")
?: return@mapNotNull null
MangaTag(
title = url.ownText().toCamelCase(),
key = href.tagUrlToTag(),
source = source,
)
} else {
null
} }
if (num > 100) {
val url = element.selectFirst("a")
val href =
url?.attrAsRelativeUrl("href")
?: return@mapNotNull null
MangaTag(
title = url.ownText().toCamelCase(),
key = href.tagUrlToTag(),
source = source,
)
} else {
null
} }
} }
}.awaitAll().flatten().toSet() }
} }.awaitAll().flatten().toSet()
} }
private var cachedSearchIds: List<Int> = emptyList() private var cachedSearchIds: List<Int> = emptyList()
@ -107,42 +106,45 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
override suspend fun getList( override suspend fun getList(
offset: Int, offset: Int,
filter: MangaListFilter?, filter: MangaListFilter?,
): List<Manga> { ): List<Manga> = when (filter) {
return when (filter) { is MangaListFilter.Advanced -> {
is MangaListFilter.Advanced -> { if (filter.tags.isEmpty()) {
if (filter.tags.isEmpty()) { when (filter.sortOrder) {
when (filter.sortOrder) { SortOrder.POPULARITY -> {
SortOrder.POPULARITY -> { getGalleryIDsFromNozomi(
getGalleryIDsFromNozomi("popular", "today", filter.locale.getSiteLang(), offset.nextOffsetRange()) "popular",
} "today",
filter.locale.getSiteLang(),
else -> { offset.nextOffsetRange(),
getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) )
}
} }
} else {
if (offset == 0) { else -> {
cachedSearchIds = getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange())
hitomiSearch(
filter.tags.joinToString(" ") { it.key },
filter.sortOrder == SortOrder.POPULARITY,
filter.locale.getSiteLang(),
).toList()
} }
cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size))
} }
} } else {
is MangaListFilter.Search -> {
if (offset == 0) { if (offset == 0) {
cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() cachedSearchIds =
hitomiSearch(
filter.tags.joinToString(" ") { it.key },
filter.sortOrder == SortOrder.POPULARITY,
filter.locale.getSiteLang(),
).toList()
} }
cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size))
} }
}
else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) is MangaListFilter.Search -> {
}.toMangaList() if (offset == 0) {
} cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList()
}
cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size))
}
else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange())
}.toMangaList()
private fun Int.nextOffsetRange(): LongRange { private fun Int.nextOffsetRange(): LongRange {
val bytes = this * 4L val bytes = this * 4L
@ -243,6 +245,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
area = "tag" area = "tag"
tag = it tag = it
} }
"language" -> { "language" -> {
area = null area = null
lang = tag lang = tag
@ -255,9 +258,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
val key = hashTerm(it) val key = hashTerm(it)
val node = getGalleryNodeAtAddress(0) val node = getGalleryNodeAtAddress(0)
val data = val data = bSearch(key, node) ?: return emptySet()
bSearch(key, node)
?: return emptySet()
return getGalleryIDsFromData(data) return getGalleryIDsFromData(data)
} }
@ -266,7 +267,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
private suspend fun getGalleryIDsFromData(data: Pair<Long, Int>): Set<Int> { private suspend fun getGalleryIDsFromData(data: Pair<Long, Int>): Set<Int> {
val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data"
val (offset, length) = data val (offset, length) = data
require(length in 0..100000000) { require(length in 1..100000000) {
"Length $length is too long" "Length $length is too long"
} }
@ -283,10 +284,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
val expectedLength = numberOfGalleryIDs * 4 + 4 val expectedLength = numberOfGalleryIDs * 4 + 4
if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { require(numberOfGalleryIDs in 1..10000000) {
throw IllegalArgumentException("number_of_galleryids $numberOfGalleryIDs is too long") "number_of_galleryids $numberOfGalleryIDs is too long"
} else if (inbuf.size != expectedLength) { }
throw IllegalArgumentException("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") require(inbuf.size == expectedLength) {
"inbuf.byteLength ${inbuf.size} != expected_length $expectedLength"
} }
for (i in 0.until(numberOfGalleryIDs)) for (i in 0.until(numberOfGalleryIDs))
@ -316,7 +318,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return 0 return 0
} }
private fun locateKey( fun locateKey(
key: UByteArray, key: UByteArray,
node: Node, node: Node,
): Pair<Boolean, Int> { ): Pair<Boolean, Int> {
@ -331,7 +333,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return Pair(false, node.keys.size) return Pair(false, node.keys.size)
} }
private fun isLeaf(node: Node): Boolean { fun isLeaf(node: Node): Boolean {
for (subnode in node.subNodeAddresses) for (subnode in node.subNodeAddresses)
if (subnode != 0L) { if (subnode != 0L) {
return false return false
@ -361,19 +363,17 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
language: String, language: String,
range: LongRange? = null, range: LongRange? = null,
): Set<Int> { ): Set<Int> {
val nozomiAddress = val nozomiAddress = when (area) {
when (area) { null -> "$ltnBaseUrl/$tag-$language.nozomi"
null -> "$ltnBaseUrl/$tag-$language.nozomi" else -> "$ltnBaseUrl/$area/$tag-$language.nozomi"
else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" }
}
val bytes = getRangedResponse(nozomiAddress, range) val bytes = getRangedResponse(nozomiAddress, range)
val nozomi = mutableSetOf<Int>() val nozomi = mutableSetOf<Int>()
val arrayBuffer = val arrayBuffer = ByteBuffer
ByteBuffer .wrap(bytes)
.wrap(bytes) .order(ByteOrder.BIG_ENDIAN)
.order(ByteOrder.BIG_ENDIAN)
while (arrayBuffer.hasRemaining()) while (arrayBuffer.hasRemaining())
nozomi.add(arrayBuffer.int) nozomi.add(arrayBuffer.int)
@ -381,10 +381,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return nozomi return nozomi
} }
private val galleriesIndexVersion = private val galleriesIndexVersion = SuspendLazy {
SuspendLazy { webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw()
webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() }
}
private data class Node( private data class Node(
val keys: List<UByteArray>, val keys: List<UByteArray>,
@ -393,10 +392,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
) )
private fun decodeNode(data: ByteArray): Node { private fun decodeNode(data: ByteArray): Node {
val buffer = val buffer = ByteBuffer
ByteBuffer .wrap(data)
.wrap(data) .order(ByteOrder.BIG_ENDIAN)
.order(ByteOrder.BIG_ENDIAN)
val uData = data.toUByteArray() val uData = data.toUByteArray()
@ -447,11 +445,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
url: String, url: String,
range: LongRange? = null, range: LongRange? = null,
): ByteArray { ): ByteArray {
val rangeHeaders = val rangeHeaders = when (range) {
when (range) { null -> Headers.headersOf()
null -> Headers.headersOf() else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}")
else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") }
}
return webClient.httpGet(url, rangeHeaders).parseBytes() return webClient.httpGet(url, rangeHeaders).parseBytes()
} }
@ -464,113 +461,109 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return MessageDigest.getInstance("SHA-256").digest(data) return MessageDigest.getInstance("SHA-256").digest(data)
} }
private suspend fun Collection<Int>.toMangaList(): List<Manga> { private suspend fun Collection<Int>.toMangaList(): List<Manga> = coroutineScope {
return coroutineScope { map { id ->
map { id -> async {
async { runCatching {
runCatching { val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml()
val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml()
Manga(
Manga( id = generateUid(id.toString()),
id = generateUid(id.toString()), title = doc.selectFirstOrThrow("h1").text(),
title = doc.selectFirstOrThrow("h1").text(), url = id.toString(),
url = id.toString(), coverUrl =
coverUrl = "https:" +
"https:" + doc.selectFirstOrThrow("picture > source")
doc.selectFirstOrThrow("picture > source") .attr("data-srcset")
.attr("data-srcset") .substringBefore(" "),
.substringBefore(" "), publicUrl =
publicUrl = doc.selectFirstOrThrow("h1 > a")
doc.selectFirstOrThrow("h1 > a") .attrAsRelativeUrl("href")
.attrAsRelativeUrl("href") .toAbsoluteUrl(domain),
.toAbsoluteUrl(domain), author = null,
author = null, tags = emptySet(),
tags = emptySet(), isNsfw = true,
isNsfw = true, rating = RATING_UNKNOWN,
rating = RATING_UNKNOWN, altTitle = null,
altTitle = null, state = null,
state = null, source = source,
source = source, )
) }.getOrNull()
}.getOrNull() }
} }.awaitAll().filterNotNull()
}.awaitAll().filterNotNull()
}
} }
override suspend fun getDetails(manga: Manga): Manga { override suspend fun getDetails(manga: Manga): Manga {
val json = val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js")
webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") .parseRaw()
.parseRaw() .substringAfter("var galleryinfo = ")
.substringAfter("var galleryinfo = ") .let(::JSONObject)
.let(::JSONObject)
return manga.copy( return manga.copy(
title = json.getString("title"), title = json.getString("title"),
largeCoverUrl = largeCoverUrl =
json.getJSONArray("files").getJSONObject(0).let { json.getJSONArray("files").getJSONObject(0).let {
val hash = it.getString("hash") val hash = it.getString("hash")
val commonId = commonImageId() val commonId = commonImageId()
val imageId = imageIdFromHash(hash) val imageId = imageIdFromHash(hash)
val subDomain = 'a' + subdomainOffset(imageId) val subDomain = 'a' + subdomainOffset(imageId)
"https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp"
}, },
author = author =
json.optJSONArray("artists") json.optJSONArray("artists")
?.mapJSON { it.getString("artist").toCamelCase() } ?.mapJSON { it.getString("artist").toCamelCase() }
?.joinToString(), ?.joinToString(),
publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain),
tags = tags =
buildSet { buildSet {
json.optJSONArray("characters") json.optJSONArray("characters")
?.mapToTags("character") ?.mapToTags("character")
?.let(::addAll) ?.let(::addAll)
json.optJSONArray("tags") json.optJSONArray("tags")
?.mapToTags("tag") ?.mapToTags("tag")
?.let(::addAll) ?.let(::addAll)
json.optJSONArray("artists") json.optJSONArray("artists")
?.mapToTags("artist") ?.mapToTags("artist")
?.let(::addAll) ?.let(::addAll)
json.optJSONArray("parodys") json.optJSONArray("parodys")
?.mapToTags("parody") ?.mapToTags("parody")
?.let(::addAll) ?.let(::addAll)
json.optJSONArray("groups") json.optJSONArray("groups")
?.mapToTags("group") ?.mapToTags("group")
?.let(::addAll) ?.let(::addAll)
}, },
chapters = chapters = listOf(
listOf( MangaChapter(
MangaChapter( id = generateUid(manga.url),
id = generateUid(manga.url), url = manga.url,
url = manga.url, name = json.getString("title"),
name = json.getString("title"), scanlator = json.getString("type").toTitleCase(),
scanlator = json.getString("type").toTitleCase(), number = 1,
number = 1, branch = json.getString("language_localname"),
branch = json.getString("language_localname"), source = source,
source = source, uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")),
uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")),
),
), ),
),
) )
} }
private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH)
private fun JSONArray.mapToTags(key: String): Set<MangaTag> { private fun JSONArray.mapToTags(key: String): Set<MangaTag> {
val tags = ArraySet<MangaTag>(length()) val tags = ArraySet<MangaTag>(length())
mapJSON { mapJSON {
MangaTag( MangaTag(
title = title =
it.getString(key).toCamelCase().let { title -> it.getString(key).toCamelCase().let { title ->
if (it.getStringOrNull("female")?.toIntOrNull() == 1) { if (it.getStringOrNull("female")?.toIntOrNull() == 1) {
"$title" "$title"
} else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) {
"$title" "$title"
} else { } else {
title title
} }
}, },
key = it.getString("url").tagUrlToTag(), key = it.getString("url").tagUrlToTag(),
source = source, source = source,
).let(tags::add) ).let(tags::add)
@ -595,11 +588,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
} }
override suspend fun getRelatedManga(seed: Manga): List<Manga> { override suspend fun getRelatedManga(seed: Manga): List<Manga> {
val json = val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js")
webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") .parseRaw()
.parseRaw() .substringAfter("var galleryinfo = ")
.substringAfter("var galleryinfo = ") .let(::JSONObject)
.let(::JSONObject)
// any better way to get List<Int> from this json? // any better way to get List<Int> from this json?
return json.getJSONArray("related").let { return json.getJSONArray("related").let {
@ -608,11 +600,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
} }
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val json = val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js")
webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") .parseRaw()
.parseRaw() .substringAfter("var galleryinfo = ")
.substringAfter("var galleryinfo = ") .let(::JSONObject)
.let(::JSONObject)
return json.getJSONArray("files").mapJSON { image -> return json.getJSONArray("files").mapJSON { image ->
val hash = image.getString("hash") val hash = image.getString("hash")
@ -637,25 +628,24 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
private val subdomainOffsetMap = mutableMapOf<Int, Int>() private val subdomainOffsetMap = mutableMapOf<Int, Int>()
private var commonImageId = "" private var commonImageId = ""
private suspend fun refreshScript() = private suspend fun refreshScript() = mutex.withLock {
mutex.withLock { if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) {
if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw()
val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw()
subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt()
val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt()
subdomainOffsetMap.clear() subdomainOffsetMap.clear()
Regex("case (\\d+):").findAll(ggScript).forEach { Regex("case (\\d+):").findAll(ggScript).forEach {
val case = it.groupValues[1].toInt() val case = it.groupValues[1].toInt()
subdomainOffsetMap[case] = o subdomainOffsetMap[case] = o
} }
commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1]
scriptLastRetrieval = System.currentTimeMillis() scriptLastRetrieval = System.currentTimeMillis()
}
} }
}
// m <-- gg.js // m <-- gg.js
private suspend fun subdomainOffset(imageId: Int): Int { private suspend fun subdomainOffset(imageId: Int): Int {

Loading…
Cancel
Save