[HitomiLa] Refactor

Koitharu 2 years ago
parent 43bdbe5a01
commit 4a0e7221b0
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -1,10 +1,17 @@
package org.koitharu.kotatsu.parsers.site.all
import kotlinx.coroutines.*
import kotlinx.coroutines.sync.*
import androidx.collection.ArraySet
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import okhttp3.Headers
import org.json.*
import org.koitharu.kotatsu.parsers.*
import org.json.JSONArray
import org.json.JSONObject
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
@ -24,82 +31,74 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
private val ltnBaseUrl get() = "https://${getDomain("ltn")}"
override val availableSortOrders: Set<SortOrder> =
EnumSet.of(
SortOrder.NEWEST,
SortOrder.POPULARITY,
)
override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.NEWEST,
SortOrder.POPULARITY,
)
private val localeMap: Map<Locale, String> =
mapOf(
Locale("id") to "indonesian",
Locale("jv") to "javanese",
Locale("ca") to "catalan",
Locale("ceb") to "cebuano",
Locale("cs") to "czech",
Locale("da") to "danish",
Locale("de") to "german",
Locale("et") to "estonian",
Locale.ENGLISH to "english",
Locale("es") to "spanish",
Locale("eo") to "esperanto",
Locale("fr") to "french",
Locale("it") to "italian",
Locale("hi") to "hindi",
Locale("hu") to "hungarian",
Locale("pl") to "polish",
Locale("pt") to "portuguese",
Locale("vi") to "vietnamese",
Locale("tr") to "turkish",
Locale("ru") to "russian",
Locale("uk") to "ukrainian",
Locale("ar") to "arabic",
Locale.KOREAN to "korean",
Locale.CHINESE to "chinese",
Locale.JAPANESE to "japanese",
)
private val localeMap: Map<Locale, String> = mapOf(
Locale("id") to "indonesian",
Locale("jv") to "javanese",
Locale("ca") to "catalan",
Locale("ceb") to "cebuano",
Locale("cs") to "czech",
Locale("da") to "danish",
Locale("de") to "german",
Locale("et") to "estonian",
Locale.ENGLISH to "english",
Locale("es") to "spanish",
Locale("eo") to "esperanto",
Locale("fr") to "french",
Locale("it") to "italian",
Locale("hi") to "hindi",
Locale("hu") to "hungarian",
Locale("pl") to "polish",
Locale("pt") to "portuguese",
Locale("vi") to "vietnamese",
Locale("tr") to "turkish",
Locale("ru") to "russian",
Locale("uk") to "ukrainian",
Locale("ar") to "arabic",
Locale.KOREAN to "korean",
Locale.CHINESE to "chinese",
Locale.JAPANESE to "japanese",
)
private fun Locale?.getSiteLang(): String {
return when (this) {
null -> "all"
else -> localeMap[this] ?: "all"
}
private fun Locale?.getSiteLang(): String = when (this) {
null -> "all"
else -> localeMap[this] ?: "all"
}
override suspend fun getAvailableLocales(): Set<Locale> {
return localeMap.keys
}
override suspend fun getAvailableLocales(): Set<Locale> = localeMap.keys
override suspend fun getAvailableTags(): Set<MangaTag> {
return coroutineScope {
('a'..'z').map { alphabet ->
async {
val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml()
doc.select(".posts > li").mapNotNull { element ->
val num =
element.ownText().let {
Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0
}
if (num > 100) {
val url = element.selectFirst("a")
val href =
url?.attrAsRelativeUrl("href")
?: return@mapNotNull null
MangaTag(
title = url.ownText().toCamelCase(),
key = href.tagUrlToTag(),
source = source,
)
} else {
null
override suspend fun getAvailableTags(): Set<MangaTag> = coroutineScope {
('a'..'z').map { alphabet ->
async {
val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml()
doc.select(".posts > li").mapNotNull { element ->
val num =
element.ownText().let {
Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0
}
if (num > 100) {
val url = element.selectFirst("a")
val href =
url?.attrAsRelativeUrl("href")
?: return@mapNotNull null
MangaTag(
title = url.ownText().toCamelCase(),
key = href.tagUrlToTag(),
source = source,
)
} else {
null
}
}
}.awaitAll().flatten().toSet()
}
}
}.awaitAll().flatten().toSet()
}
private var cachedSearchIds: List<Int> = emptyList()
@ -107,42 +106,45 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
override suspend fun getList(
offset: Int,
filter: MangaListFilter?,
): List<Manga> {
return when (filter) {
is MangaListFilter.Advanced -> {
if (filter.tags.isEmpty()) {
when (filter.sortOrder) {
SortOrder.POPULARITY -> {
getGalleryIDsFromNozomi("popular", "today", filter.locale.getSiteLang(), offset.nextOffsetRange())
}
else -> {
getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange())
}
): List<Manga> = when (filter) {
is MangaListFilter.Advanced -> {
if (filter.tags.isEmpty()) {
when (filter.sortOrder) {
SortOrder.POPULARITY -> {
getGalleryIDsFromNozomi(
"popular",
"today",
filter.locale.getSiteLang(),
offset.nextOffsetRange(),
)
}
} else {
if (offset == 0) {
cachedSearchIds =
hitomiSearch(
filter.tags.joinToString(" ") { it.key },
filter.sortOrder == SortOrder.POPULARITY,
filter.locale.getSiteLang(),
).toList()
else -> {
getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange())
}
cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size))
}
}
is MangaListFilter.Search -> {
} else {
if (offset == 0) {
cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList()
cachedSearchIds =
hitomiSearch(
filter.tags.joinToString(" ") { it.key },
filter.sortOrder == SortOrder.POPULARITY,
filter.locale.getSiteLang(),
).toList()
}
cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size))
}
}
else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange())
}.toMangaList()
}
is MangaListFilter.Search -> {
if (offset == 0) {
cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList()
}
cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size))
}
else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange())
}.toMangaList()
private fun Int.nextOffsetRange(): LongRange {
val bytes = this * 4L
@ -243,6 +245,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
area = "tag"
tag = it
}
"language" -> {
area = null
lang = tag
@ -255,9 +258,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
val key = hashTerm(it)
val node = getGalleryNodeAtAddress(0)
val data =
bSearch(key, node)
?: return emptySet()
val data = bSearch(key, node) ?: return emptySet()
return getGalleryIDsFromData(data)
}
@ -266,7 +267,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
private suspend fun getGalleryIDsFromData(data: Pair<Long, Int>): Set<Int> {
val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data"
val (offset, length) = data
require(length in 0..100000000) {
require(length in 1..100000000) {
"Length $length is too long"
}
@ -283,10 +284,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
val expectedLength = numberOfGalleryIDs * 4 + 4
if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) {
throw IllegalArgumentException("number_of_galleryids $numberOfGalleryIDs is too long")
} else if (inbuf.size != expectedLength) {
throw IllegalArgumentException("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength")
require(numberOfGalleryIDs in 1..10000000) {
"number_of_galleryids $numberOfGalleryIDs is too long"
}
require(inbuf.size == expectedLength) {
"inbuf.byteLength ${inbuf.size} != expected_length $expectedLength"
}
for (i in 0.until(numberOfGalleryIDs))
@ -316,7 +318,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return 0
}
private fun locateKey(
fun locateKey(
key: UByteArray,
node: Node,
): Pair<Boolean, Int> {
@ -331,7 +333,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return Pair(false, node.keys.size)
}
private fun isLeaf(node: Node): Boolean {
fun isLeaf(node: Node): Boolean {
for (subnode in node.subNodeAddresses)
if (subnode != 0L) {
return false
@ -361,19 +363,17 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
language: String,
range: LongRange? = null,
): Set<Int> {
val nozomiAddress =
when (area) {
null -> "$ltnBaseUrl/$tag-$language.nozomi"
else -> "$ltnBaseUrl/$area/$tag-$language.nozomi"
}
val nozomiAddress = when (area) {
null -> "$ltnBaseUrl/$tag-$language.nozomi"
else -> "$ltnBaseUrl/$area/$tag-$language.nozomi"
}
val bytes = getRangedResponse(nozomiAddress, range)
val nozomi = mutableSetOf<Int>()
val arrayBuffer =
ByteBuffer
.wrap(bytes)
.order(ByteOrder.BIG_ENDIAN)
val arrayBuffer = ByteBuffer
.wrap(bytes)
.order(ByteOrder.BIG_ENDIAN)
while (arrayBuffer.hasRemaining())
nozomi.add(arrayBuffer.int)
@ -381,10 +381,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return nozomi
}
private val galleriesIndexVersion =
SuspendLazy {
webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw()
}
private val galleriesIndexVersion = SuspendLazy {
webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw()
}
private data class Node(
val keys: List<UByteArray>,
@ -393,10 +392,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
)
private fun decodeNode(data: ByteArray): Node {
val buffer =
ByteBuffer
.wrap(data)
.order(ByteOrder.BIG_ENDIAN)
val buffer = ByteBuffer
.wrap(data)
.order(ByteOrder.BIG_ENDIAN)
val uData = data.toUByteArray()
@ -447,11 +445,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
url: String,
range: LongRange? = null,
): ByteArray {
val rangeHeaders =
when (range) {
null -> Headers.headersOf()
else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}")
}
val rangeHeaders = when (range) {
null -> Headers.headersOf()
else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}")
}
return webClient.httpGet(url, rangeHeaders).parseBytes()
}
@ -464,113 +461,109 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
return MessageDigest.getInstance("SHA-256").digest(data)
}
private suspend fun Collection<Int>.toMangaList(): List<Manga> {
return coroutineScope {
map { id ->
async {
runCatching {
val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml()
Manga(
id = generateUid(id.toString()),
title = doc.selectFirstOrThrow("h1").text(),
url = id.toString(),
coverUrl =
"https:" +
doc.selectFirstOrThrow("picture > source")
.attr("data-srcset")
.substringBefore(" "),
publicUrl =
doc.selectFirstOrThrow("h1 > a")
.attrAsRelativeUrl("href")
.toAbsoluteUrl(domain),
author = null,
tags = emptySet(),
isNsfw = true,
rating = RATING_UNKNOWN,
altTitle = null,
state = null,
source = source,
)
}.getOrNull()
}
}.awaitAll().filterNotNull()
}
private suspend fun Collection<Int>.toMangaList(): List<Manga> = coroutineScope {
map { id ->
async {
runCatching {
val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml()
Manga(
id = generateUid(id.toString()),
title = doc.selectFirstOrThrow("h1").text(),
url = id.toString(),
coverUrl =
"https:" +
doc.selectFirstOrThrow("picture > source")
.attr("data-srcset")
.substringBefore(" "),
publicUrl =
doc.selectFirstOrThrow("h1 > a")
.attrAsRelativeUrl("href")
.toAbsoluteUrl(domain),
author = null,
tags = emptySet(),
isNsfw = true,
rating = RATING_UNKNOWN,
altTitle = null,
state = null,
source = source,
)
}.getOrNull()
}
}.awaitAll().filterNotNull()
}
override suspend fun getDetails(manga: Manga): Manga {
val json =
webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js")
.parseRaw()
.substringAfter("var galleryinfo = ")
.let(::JSONObject)
val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js")
.parseRaw()
.substringAfter("var galleryinfo = ")
.let(::JSONObject)
return manga.copy(
title = json.getString("title"),
largeCoverUrl =
json.getJSONArray("files").getJSONObject(0).let {
val hash = it.getString("hash")
val commonId = commonImageId()
val imageId = imageIdFromHash(hash)
val subDomain = 'a' + subdomainOffset(imageId)
"https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp"
},
json.getJSONArray("files").getJSONObject(0).let {
val hash = it.getString("hash")
val commonId = commonImageId()
val imageId = imageIdFromHash(hash)
val subDomain = 'a' + subdomainOffset(imageId)
"https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp"
},
author =
json.optJSONArray("artists")
?.mapJSON { it.getString("artist").toCamelCase() }
?.joinToString(),
json.optJSONArray("artists")
?.mapJSON { it.getString("artist").toCamelCase() }
?.joinToString(),
publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain),
tags =
buildSet {
json.optJSONArray("characters")
?.mapToTags("character")
?.let(::addAll)
json.optJSONArray("tags")
?.mapToTags("tag")
?.let(::addAll)
json.optJSONArray("artists")
?.mapToTags("artist")
?.let(::addAll)
json.optJSONArray("parodys")
?.mapToTags("parody")
?.let(::addAll)
json.optJSONArray("groups")
?.mapToTags("group")
?.let(::addAll)
},
chapters =
listOf(
MangaChapter(
id = generateUid(manga.url),
url = manga.url,
name = json.getString("title"),
scanlator = json.getString("type").toTitleCase(),
number = 1,
branch = json.getString("language_localname"),
source = source,
uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")),
),
buildSet {
json.optJSONArray("characters")
?.mapToTags("character")
?.let(::addAll)
json.optJSONArray("tags")
?.mapToTags("tag")
?.let(::addAll)
json.optJSONArray("artists")
?.mapToTags("artist")
?.let(::addAll)
json.optJSONArray("parodys")
?.mapToTags("parody")
?.let(::addAll)
json.optJSONArray("groups")
?.mapToTags("group")
?.let(::addAll)
},
chapters = listOf(
MangaChapter(
id = generateUid(manga.url),
url = manga.url,
name = json.getString("title"),
scanlator = json.getString("type").toTitleCase(),
number = 1,
branch = json.getString("language_localname"),
source = source,
uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")),
),
),
)
}
private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH)
private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH)
private fun JSONArray.mapToTags(key: String): Set<MangaTag> {
val tags = ArraySet<MangaTag>(length())
mapJSON {
MangaTag(
title =
it.getString(key).toCamelCase().let { title ->
if (it.getStringOrNull("female")?.toIntOrNull() == 1) {
"$title"
} else if (it.getStringOrNull("male")?.toIntOrNull() == 1) {
"$title"
} else {
title
}
},
it.getString(key).toCamelCase().let { title ->
if (it.getStringOrNull("female")?.toIntOrNull() == 1) {
"$title"
} else if (it.getStringOrNull("male")?.toIntOrNull() == 1) {
"$title"
} else {
title
}
},
key = it.getString("url").tagUrlToTag(),
source = source,
).let(tags::add)
@ -595,11 +588,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
}
override suspend fun getRelatedManga(seed: Manga): List<Manga> {
val json =
webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js")
.parseRaw()
.substringAfter("var galleryinfo = ")
.let(::JSONObject)
val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js")
.parseRaw()
.substringAfter("var galleryinfo = ")
.let(::JSONObject)
// any better way to get List<Int> from this json?
return json.getJSONArray("related").let {
@ -608,11 +600,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val json =
webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js")
.parseRaw()
.substringAfter("var galleryinfo = ")
.let(::JSONObject)
val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js")
.parseRaw()
.substringAfter("var galleryinfo = ")
.let(::JSONObject)
return json.getJSONArray("files").mapJSON { image ->
val hash = image.getString("hash")
@ -637,25 +628,24 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo
private val subdomainOffsetMap = mutableMapOf<Int, Int>()
private var commonImageId = ""
private suspend fun refreshScript() =
mutex.withLock {
if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) {
val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw()
private suspend fun refreshScript() = mutex.withLock {
if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) {
val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw()
subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt()
val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt()
subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt()
val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt()
subdomainOffsetMap.clear()
Regex("case (\\d+):").findAll(ggScript).forEach {
val case = it.groupValues[1].toInt()
subdomainOffsetMap[case] = o
}
subdomainOffsetMap.clear()
Regex("case (\\d+):").findAll(ggScript).forEach {
val case = it.groupValues[1].toInt()
subdomainOffsetMap[case] = o
}
commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1]
commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1]
scriptLastRetrieval = System.currentTimeMillis()
}
scriptLastRetrieval = System.currentTimeMillis()
}
}
// m <-- gg.js
private suspend fun subdomainOffset(imageId: Int): Int {

Loading…
Cancel
Save