From d8c32047d04c92f5a37cd6ef2a1475b78489da0c Mon Sep 17 00:00:00 2001 From: devi Date: Fri, 22 Dec 2023 17:23:52 +0100 Subject: [PATCH 01/14] Fix MangaATrend Add LerManga Remove ReaperScansPt.kt --- .../parsers/site/heancms/pt/ReaperScansPt.kt | 13 -- .../site/mangareader/ar/Mangaatrend.kt | 10 -- .../kotatsu/parsers/site/pt/LerManga.kt | 144 ++++++++++++++++++ .../parsers/site/zeistmanga/ar/Mangaatrend.kt | 12 ++ 4 files changed, 156 insertions(+), 23 deletions(-) delete mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/heancms/pt/ReaperScansPt.kt delete mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/ar/Mangaatrend.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LerManga.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ar/Mangaatrend.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/heancms/pt/ReaperScansPt.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/heancms/pt/ReaperScansPt.kt deleted file mode 100644 index 7fa8461d..00000000 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/heancms/pt/ReaperScansPt.kt +++ /dev/null @@ -1,13 +0,0 @@ -package org.koitharu.kotatsu.parsers.site.heancms.pt - -import org.koitharu.kotatsu.parsers.MangaLoaderContext -import org.koitharu.kotatsu.parsers.MangaSourceParser -import org.koitharu.kotatsu.parsers.config.ConfigKey -import org.koitharu.kotatsu.parsers.model.MangaSource -import org.koitharu.kotatsu.parsers.site.heancms.HeanCms - -@MangaSourceParser("REAPERSCANSPT", "ReaperScans.net", "pt") -internal class ReaperScansPt(context: MangaLoaderContext) : - HeanCms(context, MangaSource.REAPERSCANSPT, "reaperscans.net") { - override val configKeyDomain = ConfigKey.Domain("reaperscans.net", "reaperbr.online") -} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/ar/Mangaatrend.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/ar/Mangaatrend.kt deleted file mode 100644 index e36d349c..00000000 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/ar/Mangaatrend.kt +++ /dev/null @@ -1,10 +0,0 @@ -package org.koitharu.kotatsu.parsers.site.mangareader.ar - -import org.koitharu.kotatsu.parsers.MangaLoaderContext -import org.koitharu.kotatsu.parsers.MangaSourceParser -import org.koitharu.kotatsu.parsers.model.MangaSource -import org.koitharu.kotatsu.parsers.site.mangareader.MangaReaderParser - -@MangaSourceParser("MANGAATREND", "Manga A Trend", "ar") -internal class Mangaatrend(context: MangaLoaderContext) : - MangaReaderParser(context, MangaSource.MANGAATREND, "mangaatrend.net", pageSize = 40, searchPageSize = 20) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LerManga.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LerManga.kt new file mode 100644 index 00000000..477f8cf7 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LerManga.kt @@ -0,0 +1,144 @@ +package org.koitharu.kotatsu.parsers.site.pt + +import org.koitharu.kotatsu.parsers.ErrorMessages +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.SimpleDateFormat +import java.util.* + +@MangaSourceParser("LERMANGA", "LerManga", "pt") +class LerManga(context: MangaLoaderContext) : PagedMangaParser(context, MangaSource.LERMANGA, 24) { + + override val availableSortOrders: Set = + EnumSet.of(SortOrder.UPDATED, SortOrder.POPULARITY, SortOrder.ALPHABETICAL, SortOrder.RATING) + + override val configKeyDomain = ConfigKey.Domain("lermanga.org") + + override val isMultipleTagsSupported = false + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + + val url = buildString { + append("https://") + append(domain) + append("/mangas") + + if (page > 1) { + append("/page/") + append(page.toString()) + } + + when (filter) { + + is MangaListFilter.Search -> { + throw IllegalArgumentException(ErrorMessages.SEARCH_NOT_SUPPORTED) + } + + is MangaListFilter.Advanced -> { + if (filter.tags.isNotEmpty()) { + filter.tags.oneOrThrowIfMany()?.let { + append("/genero/") + append(it.key) + } + } + + append("/?orderby=") + append( + when (filter.sortOrder) { + SortOrder.UPDATED -> "modified&order=desc" + SortOrder.POPULARITY -> "views&order=desc" + SortOrder.ALPHABETICAL -> "title&order=asc" + SortOrder.RATING -> "rating&order=desc" + else -> "modified&order=desc" + }, + ) + } + + null -> append("/?orderby=modified&order=desc") + } + } + val doc = webClient.httpGet(url).parseHtml() + return doc.select(".tab-content .flw-item").map { div -> + val a = div.selectFirstOrThrow("a.film-poster-ahref") + val href = a.attrAsAbsoluteUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href, + title = div.selectLastOrThrow("h3.film-name").text(), + coverUrl = div.selectFirst("img.film-poster-img")?.src().orEmpty(), + altTitle = null, + rating = div.selectFirst(".item__rating")?.ownText()?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN, + tags = emptySet(), + description = null, + state = null, + author = null, + isNsfw = div.selectFirst(".tick-itemadult") != null, + source = source, + ) + } + } + + override suspend fun getAvailableTags(): Set { + val doc = webClient.httpGet("https://$domain").parseHtml().requireElementById("menu-header") + return doc.select("#menu-item:contains(GÊNERO) ul li a").mapNotNullToSet { a -> + MangaTag( + key = a.attr("href").removeSuffix("/").substringAfterLast("/"), + title = a.text(), + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val dateFormat = SimpleDateFormat("dd-MM-yyyy", sourceLocale) + return manga.copy( + description = doc.selectFirstOrThrow("div.boxAnimeSobreLast p").html(), + tags = doc.selectFirst("ul.genre-list")?.select("li a")?.mapNotNullToSet { a -> + MangaTag( + key = a.attr("href").removeSuffix("/").substringAfterLast("/"), + title = a.text(), + source = source, + ) + }.orEmpty(), + isNsfw = doc.select("ul.genre-list li").text().contains("Adulto"), + chapters = doc.select("div.manga-chapters div.single-chapter").mapChapters(reversed = true) { i, div -> + val a = div.selectFirstOrThrow("a") + val href = a.attrAsAbsoluteUrl("href") + MangaChapter( + id = generateUid(href), + name = a.text(), + number = i + 1, + url = href, + scanlator = null, + uploadDate = dateFormat.tryParse(div.selectFirstOrThrow("small small").text()), + branch = null, + source = source, + ) + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + val script = doc.selectFirstOrThrow(".heading-header + script").attr("src") + val data = Base64.getDecoder().decode(script.replace("data:text/javascript;base64,", "")).decodeToString() + val images = + data.substringAfter("var imagens_cap=[").substringBeforeLast("]").replace("\\", "").replace("\"", "") + .split(",") + return images.map { img -> + MangaPage( + id = generateUid(img), + url = img, + preview = null, + source = source, + ) + } + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ar/Mangaatrend.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ar/Mangaatrend.kt new file mode 100644 index 00000000..e1f0b591 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ar/Mangaatrend.kt @@ -0,0 +1,12 @@ +package org.koitharu.kotatsu.parsers.site.zeistmanga.ar + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.zeistmanga.ZeistMangaParser + +@MangaSourceParser("MANGAATREND", "MangaATrend", "ar") +internal class Mangaatrend(context: MangaLoaderContext) : + ZeistMangaParser(context, MangaSource.MANGAATREND, "mangaatrend.net") { + override val selectPage = "#seoneurons-target img" +} From 3f3ab5a7be7ead74962da969c2e4d217021211c7 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 19:14:51 +0500 Subject: [PATCH 02/14] Hitomi.la --- .../parsers/site/all/HitomiLaParser.kt | 596 ++++++++++++++++++ .../koitharu/kotatsu/parsers/util/Parse.kt | 6 + 2 files changed, 602 insertions(+) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt new file mode 100644 index 00000000..caf525bc --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -0,0 +1,596 @@ +package org.koitharu.kotatsu.parsers.site.all + +import kotlinx.coroutines.* +import kotlinx.coroutines.sync.* +import okhttp3.Headers +import org.json.* +import org.koitharu.kotatsu.parsers.* +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.getStringOrNull +import org.koitharu.kotatsu.parsers.util.json.mapJSON +import java.nio.ByteBuffer +import java.nio.ByteOrder +import java.security.MessageDigest +import java.text.SimpleDateFormat +import java.util.* +import kotlin.math.min + +@OptIn(ExperimentalUnsignedTypes::class) +@MangaSourceParser("HITOMILA", "Hitomi.La", type = ContentType.HENTAI) +class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSource.HITOMILA) { + + override val configKeyDomain = ConfigKey.Domain("hitomi.la") + + private val ltnBaseUrl get() = "https://${getDomain("ltn")}" + + override val availableSortOrders: Set = EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) + + override suspend fun getAvailableTags(): Set { + return coroutineScope { + ('a'..'z').map { alphabet -> + async { + val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() + + doc.select(".posts > li").mapNotNull { element -> + val num = element.ownText().let { + Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 + } + + if (num > 100) { + val url = element.selectFirst("a") + val href = url?.attrAsRelativeUrl("href") + ?: return@mapNotNull null + + MangaTag( + title = url.ownText().toCamelCase(), + key = href.tagUrlToTag(), + source = source, + ) + } else { + null + } + } + } + }.awaitAll().flatten().toSet() + } + } + + private var cachedSearchIds: List = emptyList() + + override suspend fun getList(offset: Int, filter: MangaListFilter?): List { + return when (filter) { + is MangaListFilter.Advanced -> { + if (filter.tags.isEmpty()) { + when (filter.sortOrder) { + SortOrder.POPULARITY -> { + getGalleryIDsFromNozomi("popular", "today", "all", offset.nextOffsetRange()) + } + + else -> { + getGalleryIDsFromNozomi(null, "index", "all", offset.nextOffsetRange()) + } + } + } else { + if (offset == 0) { + cachedSearchIds = hitomiSearch( + filter.tags.joinToString(" ") { it.key }, + filter.sortOrder == SortOrder.POPULARITY + ).toList() + } + cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + } + } + + is MangaListFilter.Search -> { + if (offset == 0) { + cachedSearchIds = hitomiSearch( + filter.query, + filter.sortOrder == SortOrder.POPULARITY + ).toList() + } + cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + } + + else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) + }.toMangaList() + } + + private fun Int.nextOffsetRange(): LongRange { + val bytes = this*4L + return bytes.until(bytes+100L) + } + + private suspend fun hitomiSearch(query: String, sortByPopularity: Boolean = false) : Set = coroutineScope { + val terms = query + .trim() + .replace(Regex("""^\?"""), "") + .lowercase() + .split(Regex("\\s+")) + .map { + it.replace('_', ' ') + } + + val positiveTerms = LinkedList() + val negativeTerms = LinkedList() + + for (term in terms) { + if (term.startsWith("-")) + negativeTerms.push(term.removePrefix("-")) + else if (term.isNotBlank()) + positiveTerms.push(term) + } + + val positiveResults = positiveTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } + + val negativeResults = negativeTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } + + val results = when { + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") + else -> emptySet() + }.toMutableSet() + + fun filterPositive(newResults: Set) { + when { + results.isEmpty() -> results.addAll(newResults) + else -> results.retainAll(newResults) + } + } + + fun filterNegative(newResults: Set) { + results.removeAll(newResults) + } + + //positive results + positiveResults.forEach { + filterPositive(it.await()) + } + + //negative results + negativeResults.forEach { + filterNegative(it.await()) + } + + results + } + + //search.js + private suspend fun getGalleryIDsForQuery(query: String) : Set { + query.replace("_", " ").let { + if (it.indexOf(':') > -1) { + val sides = it.split(":") + val ns = sides[0] + var tag = sides[1] + + var area : String? = ns + var language = "all" + when (ns) { + "female", "male" -> { + area = "tag" + tag = it + } + "language" -> { + area = null + language = tag + tag = "index" + } + } + + return getGalleryIDsFromNozomi(area, tag, language) + } + + val key = hashTerm(it) + val field = "galleries" + + val node = getNodeAtAddress(field, 0) + + val data = bSearch(field, key, node) + + if (data != null) + return getGalleryIDsFromData(data) + + return emptySet() + } + } + + private suspend fun getGalleryIDsFromData(data: Pair) : Set { + val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" + val (offset, length) = data + if (length > 100000000 || length <= 0) + throw Exception("length $length is too long") + + val inbuf = getURLAtRange(url, offset.until(offset+length)) + + val galleryIDs = mutableSetOf() + + val buffer = ByteBuffer + .wrap(inbuf) + .order(ByteOrder.BIG_ENDIAN) + + val numberOfGalleryIDs = buffer.int + + val expectedLength = numberOfGalleryIDs*4+4 + + if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) + throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") + else if (inbuf.size != expectedLength) + throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + + for (i in 0.until(numberOfGalleryIDs)) + galleryIDs.add(buffer.int) + + return galleryIDs + } + + private suspend fun bSearch(field: String, key: UByteArray, node: Node) : Pair? { + fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { + val top = min(dv1.size, dv2.size) + + for (i in 0.until(top)) { + if (dv1[i] < dv2[i]) + return -1 + else if (dv1[i] > dv2[i]) + return 1 + } + + return 0 + } + + fun locateKey(key: UByteArray, node: Node) : Pair { + for (i in node.keys.indices) { + val cmpResult = compareArrayBuffers(key, node.keys[i]) + + if (cmpResult <= 0) + return Pair(cmpResult==0, i) + } + + return Pair(false, node.keys.size) + } + + fun isLeaf(node: Node) : Boolean { + for (subnode in node.subNodeAddresses) + if (subnode != 0L) + return false + + return true + } + + if (node.keys.isEmpty()) + return null + + val (there, where) = locateKey(key, node) + if (there) + return node.datas[where] + else if (isLeaf(node)) + return null + + val nextNode = getNodeAtAddress(field, node.subNodeAddresses[where]) + + return bSearch(field, key, nextNode) + } + + private suspend fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String, range: LongRange? = null) : Set { + val nozomiAddress = when(area) { + null -> "$ltnBaseUrl/$tag-$language.nozomi" + else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" + } + + val bytes = getURLAtRange(nozomiAddress, range) + val nozomi = mutableSetOf() + + val arrayBuffer = ByteBuffer + .wrap(bytes) + .order(ByteOrder.BIG_ENDIAN) + + while (arrayBuffer.hasRemaining()) + nozomi.add(arrayBuffer.int) + + return nozomi + } + + private val tagIndexVersion = SuspendLazy { getIndexVersion("tagindex") } + private val galleriesIndexVersion = SuspendLazy { getIndexVersion("galleriesindex") } + + private suspend fun getIndexVersion(name: String) = + webClient.httpGet("$ltnBaseUrl/$name/version?_=${System.currentTimeMillis()}").parseRaw() + + private data class Node( + val keys: List, + val datas: List>, + val subNodeAddresses: List, + ) + + private fun decodeNode(data: ByteArray) : Node { + val buffer = ByteBuffer + .wrap(data) + .order(ByteOrder.BIG_ENDIAN) + + val uData = data.toUByteArray() + + val numberOfKeys = buffer.int + val keys = ArrayList() + + for (i in 0.until(numberOfKeys)) { + val keySize = buffer.int + + if (keySize == 0 || keySize > 32) + throw Exception("fatal: !keySize || keySize > 32") + + keys.add(uData.sliceArray(buffer.position().until(buffer.position()+keySize))) + buffer.position(buffer.position()+keySize) + } + + val numberOfDatas = buffer.int + val datas = ArrayList>() + + for (i in 0.until(numberOfDatas)) { + val offset = buffer.long + val length = buffer.int + + datas.add(Pair(offset, length)) + } + + val numberOfSubNodeAddresses = 16 + 1 + val subNodeAddresses = ArrayList() + + for (i in 0.until(numberOfSubNodeAddresses)) { + val subNodeAddress = buffer.long + subNodeAddresses.add(subNodeAddress) + } + + return Node(keys, datas, subNodeAddresses) + } + + private suspend fun getNodeAtAddress(field: String, address: Long) : Node { + val url = + when(field) { + "galleries" -> "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" + "languages" -> "$ltnBaseUrl/galleriesindex/languages.${galleriesIndexVersion.get()}.index" + "nozomiurl" -> "$ltnBaseUrl/galleriesindex/nozomiurl.${galleriesIndexVersion.get()}.index" + else -> "$ltnBaseUrl/tagindex/$field.${tagIndexVersion.get()}.index" + } + + val nodedata = getURLAtRange(url, address.until(address + 464)) + + return decodeNode(nodedata) + } + + private suspend fun getURLAtRange(url: String, range: LongRange? = null) : ByteArray { + val rangeHeaders = when (range) { + null -> Headers.headersOf() + else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") + } + + return webClient.httpGet(url, rangeHeaders).parseBytes() + } + + private fun hashTerm(term: String) : UByteArray { + return sha256(term.toByteArray()).copyOfRange(0, 4).toUByteArray() + } + + private fun sha256(data: ByteArray) : ByteArray { + return MessageDigest.getInstance("SHA-256").digest(data) + } + + private suspend fun Collection.toMangaList(): List { + return coroutineScope { + map { id -> + async { + runCatching { + val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() + + Manga( + id = generateUid(id.toString()), + title = doc.selectFirstOrThrow("h1").text(), + url = id.toString(), + coverUrl = "https:" + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), + publicUrl = doc.selectFirstOrThrow("h1 > a") + .attrAsRelativeUrl("href") + .toAbsoluteUrl(domain), + author = null, + tags = emptySet(), + isNsfw = true, + rating = RATING_UNKNOWN, + altTitle = null, + state = null, + source = source, + ) + }.getOrNull() + } + }.awaitAll().filterNotNull() + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) + + return manga.copy( + title = json.getString("title"), + largeCoverUrl = json.getJSONArray("files").getJSONObject(0).let { + val hash = it.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" + }, + author = json.optJSONArray("artists") + ?.mapJSON { it.getString("artist").toCamelCase() } + ?.joinToString(), + publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), + tags = buildSet { + json.optJSONArray("characters") + ?.mapToTags("character") + ?.let(::addAll) + json.optJSONArray("tags") + ?.mapToTags("tag") + ?.let(::addAll) + json.optJSONArray("artists") + ?.mapToTags("artist") + ?.let(::addAll) + json.optJSONArray("parodys") + ?.mapToTags("parody") + ?.let(::addAll) + json.optJSONArray("groups") + ?.mapToTags("group") + ?.let(::addAll) + }, + chapters = listOf( + MangaChapter( + id = generateUid(manga.url), + url = manga.url, + name = json.getString("title"), + scanlator = json.getString("type").toTitleCase(), + number = 1, + branch = json.getString("language_localname"), + source = source, + uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), + ) + ) + ) + } + + companion object { + private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) + } + + private fun JSONArray.mapToTags(key: String): Set { + val tags = mutableSetOf() + mapJSON { + MangaTag( + title = it.getString(key).toCamelCase().let { title -> + if (it.getStringOrNull("female")?.toIntOrNull() == 1) { + "$title ♀" + } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { + "$title ♂" + } else { + title + } + }, + key = it.getString("url").tagUrlToTag(), + source = source + ).let(tags::add) + } + return tags + } + + private fun String.tagUrlToTag(): String { + val urlContent = this.split("/") + val ns = urlContent[1] + val tag = urlContent[2] + .substringBeforeLast("-") + .urlDecode() + .replace(" ", "_") + + return if (tag.split(":")[0] in listOf("female", "male")) { + tag + } else { + "$ns:$tag" + } + } + + override suspend fun getRelatedManga(seed: Manga): List { + val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) + + // any better way to get List from this json? + return json.getJSONArray("related").let { + 0.until(it.length()).map { i -> it.getInt(i) } + }.toMangaList() + } + + override suspend fun getPages(chapter: MangaChapter): List { + val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) + + return json.getJSONArray("files").mapJSON { image -> + val hash = image.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + MangaPage( + id= generateUid(hash), + url = "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp", + preview = "https://${getDomain("${subDomain}tn")}/webpsmalltn/${thumbPathFromHash(hash)}/$hash.webp", + source = source + ) + } + } + + /// ---> + + private var scriptLastRetrieval: Long? = null + private val mutex = Mutex() + private var subdomainOffsetDefault = 0 + private val subdomainOffsetMap = mutableMapOf() + private var commonImageId = "" + + private suspend fun refreshScript() = mutex.withLock { + if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { + val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() + + subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() + val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() + + subdomainOffsetMap.clear() + Regex("case (\\d+):").findAll(ggScript).forEach { + val case = it.groupValues[1].toInt() + subdomainOffsetMap[case] = o + } + + commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] + + scriptLastRetrieval = System.currentTimeMillis() + } + } + + // m <-- gg.js + private suspend fun subdomainOffset(imageId: Int): Int { + refreshScript() + return subdomainOffsetMap[imageId] ?: subdomainOffsetDefault + } + + // b <-- gg.js + private suspend fun commonImageId(): String { + refreshScript() + return commonImageId + } + + // s <-- gg.js + private fun imageIdFromHash(hash: String): Int { + val match = Regex("(..)(.)$").find(hash) + return match!!.groupValues.let { it[2]+it[1] }.toInt(16) + } + + // real_full_path_from_hash <-- common.js + private fun thumbPathFromHash(hash: String): String { + return hash.replace(Regex("""^.*(..)(.)$"""), "$2/$1") + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt index 342b71cf..5ebd8807 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt @@ -52,6 +52,12 @@ fun Response.parseRaw(): String = try { closeQuietly() } +fun Response.parseBytes(): ByteArray = try { + requireBody().bytes() +} finally { + closeQuietly() +} + /** * Convert url to relative if it is on [domain] * @return an url relative to the [domain] or absolute, if domain is mismatching From a40a8d329abd0a8c609fbd84b8e48b1e47102309 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 19:55:21 +0500 Subject: [PATCH 03/14] simplify search a bit --- .../parsers/site/all/HitomiLaParser.kt | 46 +++++++------------ 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index caf525bc..2c16566d 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -197,16 +197,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } val key = hashTerm(it) - val field = "galleries" + val node = getGalleryNodeAtAddress(0) + val data = bSearch(key, node) + ?: return emptySet() - val node = getNodeAtAddress(field, 0) - - val data = bSearch(field, key, node) - - if (data != null) - return getGalleryIDsFromData(data) - - return emptySet() + return getGalleryIDsFromData(data) } } @@ -216,7 +211,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo if (length > 100000000 || length <= 0) throw Exception("length $length is too long") - val inbuf = getURLAtRange(url, offset.until(offset+length)) + val inbuf = getRangedResponse(url, offset.until(offset+length)) val galleryIDs = mutableSetOf() @@ -239,7 +234,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return galleryIDs } - private suspend fun bSearch(field: String, key: UByteArray, node: Node) : Pair? { + private suspend fun bSearch(key: UByteArray, node: Node) : Pair? { fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { val top = min(dv1.size, dv2.size) @@ -281,9 +276,8 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo else if (isLeaf(node)) return null - val nextNode = getNodeAtAddress(field, node.subNodeAddresses[where]) - - return bSearch(field, key, nextNode) + val nextNode = getGalleryNodeAtAddress(node.subNodeAddresses[where]) + return bSearch(key, nextNode) } private suspend fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String, range: LongRange? = null) : Set { @@ -292,7 +286,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" } - val bytes = getURLAtRange(nozomiAddress, range) + val bytes = getRangedResponse(nozomiAddress, range) val nozomi = mutableSetOf() val arrayBuffer = ByteBuffer @@ -305,11 +299,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return nozomi } - private val tagIndexVersion = SuspendLazy { getIndexVersion("tagindex") } - private val galleriesIndexVersion = SuspendLazy { getIndexVersion("galleriesindex") } - - private suspend fun getIndexVersion(name: String) = - webClient.httpGet("$ltnBaseUrl/$name/version?_=${System.currentTimeMillis()}").parseRaw() + private val galleriesIndexVersion = SuspendLazy { + webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() + } private data class Node( val keys: List, @@ -358,21 +350,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Node(keys, datas, subNodeAddresses) } - private suspend fun getNodeAtAddress(field: String, address: Long) : Node { - val url = - when(field) { - "galleries" -> "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" - "languages" -> "$ltnBaseUrl/galleriesindex/languages.${galleriesIndexVersion.get()}.index" - "nozomiurl" -> "$ltnBaseUrl/galleriesindex/nozomiurl.${galleriesIndexVersion.get()}.index" - else -> "$ltnBaseUrl/tagindex/$field.${tagIndexVersion.get()}.index" - } + private suspend fun getGalleryNodeAtAddress(address: Long) : Node { + val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" - val nodedata = getURLAtRange(url, address.until(address + 464)) + val nodedata = getRangedResponse(url, address.until(address + 464)) return decodeNode(nodedata) } - private suspend fun getURLAtRange(url: String, range: LongRange? = null) : ByteArray { + private suspend fun getRangedResponse(url: String, range: LongRange? = null) : ByteArray { val rangeHeaders = when (range) { null -> Headers.headersOf() else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") From b61c5e8f12319ac2bfbd9cd0222a35d0cd4d5eb9 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 20:29:16 +0500 Subject: [PATCH 04/14] hitomi: locales --- .../parsers/site/all/HitomiLaParser.kt | 62 ++++++++++++++++--- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 2c16566d..5ecee0ce 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -30,6 +30,45 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo SortOrder.POPULARITY, ) + private val localeMap: Map = mapOf( + Locale("id") to "indonesian", + Locale("jv") to "javanese", + Locale("ca") to "catalan", + Locale("ceb") to "cebuano", + Locale("cs") to "czech", + Locale("da") to "danish", + Locale("de") to "german", + Locale("et") to "estonian", + Locale.ENGLISH to "english", + Locale("es") to "spanish", + Locale("eo") to "esperanto", + Locale("fr") to "french", + Locale("it") to "italian", + Locale("hi") to "hindi", + Locale("hu") to "hungarian", + Locale("pl") to "polish", + Locale("pt") to "portuguese", + Locale("vi") to "vietnamese", + Locale("tr") to "turkish", + Locale("ru") to "russian", + Locale("uk") to "ukrainian", + Locale("ar") to "arabic", + Locale.KOREAN to "korean", + Locale.CHINESE to "chinese", + Locale.JAPANESE to "japanese", + ) + + private fun Locale?.getSiteLang(): String { + return when (this) { + null -> "all" + else -> localeMap[this] ?: "all" + } + } + + override suspend fun getAvailableLocales(): Set { + return localeMap.keys + } + override suspend fun getAvailableTags(): Set { return coroutineScope { ('a'..'z').map { alphabet -> @@ -68,19 +107,25 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo if (filter.tags.isEmpty()) { when (filter.sortOrder) { SortOrder.POPULARITY -> { - getGalleryIDsFromNozomi("popular", "today", "all", offset.nextOffsetRange()) + getGalleryIDsFromNozomi("popular", "today", filter.locale.getSiteLang(), offset.nextOffsetRange()) } else -> { - getGalleryIDsFromNozomi(null, "index", "all", offset.nextOffsetRange()) + getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) } } } else { if (offset == 0) { - cachedSearchIds = hitomiSearch( - filter.tags.joinToString(" ") { it.key }, - filter.sortOrder == SortOrder.POPULARITY - ).toList() + val query = filter.tags.joinToString(" ") { it.key }.let { + val lang = filter.locale.getSiteLang() + if (lang != "all") { + "$it language:$lang" + } else { + it + } + } + + cachedSearchIds = hitomiSearch(query,filter.sortOrder == SortOrder.POPULARITY).toList() } cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) } @@ -88,10 +133,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo is MangaListFilter.Search -> { if (offset == 0) { - cachedSearchIds = hitomiSearch( - filter.query, - filter.sortOrder == SortOrder.POPULARITY - ).toList() + cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() } cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) } From 495c9fad33ade61157b551280c064e0ece8e7080 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 22:23:43 +0500 Subject: [PATCH 05/14] hitomi: formatting --- .../parsers/site/all/HitomiLaParser.kt | 549 ++++++++++-------- 1 file changed, 305 insertions(+), 244 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 5ecee0ce..af141efd 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -20,43 +20,44 @@ import kotlin.math.min @OptIn(ExperimentalUnsignedTypes::class) @MangaSourceParser("HITOMILA", "Hitomi.La", type = ContentType.HENTAI) class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSource.HITOMILA) { - override val configKeyDomain = ConfigKey.Domain("hitomi.la") private val ltnBaseUrl get() = "https://${getDomain("ltn")}" - override val availableSortOrders: Set = EnumSet.of( - SortOrder.NEWEST, - SortOrder.POPULARITY, - ) + override val availableSortOrders: Set = + EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) - private val localeMap: Map = mapOf( - Locale("id") to "indonesian", - Locale("jv") to "javanese", - Locale("ca") to "catalan", - Locale("ceb") to "cebuano", - Locale("cs") to "czech", - Locale("da") to "danish", - Locale("de") to "german", - Locale("et") to "estonian", - Locale.ENGLISH to "english", - Locale("es") to "spanish", - Locale("eo") to "esperanto", - Locale("fr") to "french", - Locale("it") to "italian", - Locale("hi") to "hindi", - Locale("hu") to "hungarian", - Locale("pl") to "polish", - Locale("pt") to "portuguese", - Locale("vi") to "vietnamese", - Locale("tr") to "turkish", - Locale("ru") to "russian", - Locale("uk") to "ukrainian", - Locale("ar") to "arabic", - Locale.KOREAN to "korean", - Locale.CHINESE to "chinese", - Locale.JAPANESE to "japanese", - ) + private val localeMap: Map = + mapOf( + Locale("id") to "indonesian", + Locale("jv") to "javanese", + Locale("ca") to "catalan", + Locale("ceb") to "cebuano", + Locale("cs") to "czech", + Locale("da") to "danish", + Locale("de") to "german", + Locale("et") to "estonian", + Locale.ENGLISH to "english", + Locale("es") to "spanish", + Locale("eo") to "esperanto", + Locale("fr") to "french", + Locale("it") to "italian", + Locale("hi") to "hindi", + Locale("hu") to "hungarian", + Locale("pl") to "polish", + Locale("pt") to "portuguese", + Locale("vi") to "vietnamese", + Locale("tr") to "turkish", + Locale("ru") to "russian", + Locale("uk") to "ukrainian", + Locale("ar") to "arabic", + Locale.KOREAN to "korean", + Locale.CHINESE to "chinese", + Locale.JAPANESE to "japanese", + ) private fun Locale?.getSiteLang(): String { return when (this) { @@ -76,14 +77,16 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() doc.select(".posts > li").mapNotNull { element -> - val num = element.ownText().let { - Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 - } + val num = + element.ownText().let { + Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 + } if (num > 100) { val url = element.selectFirst("a") - val href = url?.attrAsRelativeUrl("href") - ?: return@mapNotNull null + val href = + url?.attrAsRelativeUrl("href") + ?: return@mapNotNull null MangaTag( title = url.ownText().toCamelCase(), @@ -101,7 +104,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private var cachedSearchIds: List = emptyList() - override suspend fun getList(offset: Int, filter: MangaListFilter?): List { + override suspend fun getList( + offset: Int, + filter: MangaListFilter?, + ): List { return when (filter) { is MangaListFilter.Advanced -> { if (filter.tags.isEmpty()) { @@ -116,18 +122,19 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } } else { if (offset == 0) { - val query = filter.tags.joinToString(" ") { it.key }.let { - val lang = filter.locale.getSiteLang() - if (lang != "all") { - "$it language:$lang" - } else { - it + val query = + filter.tags.joinToString(" ") { it.key }.let { + val lang = filter.locale.getSiteLang() + if (lang != "all") { + "$it language:$lang" + } else { + it + } } - } - cachedSearchIds = hitomiSearch(query,filter.sortOrder == SortOrder.POPULARITY).toList() + cachedSearchIds = hitomiSearch(query, filter.sortOrder == SortOrder.POPULARITY).toList() } - cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } } @@ -135,7 +142,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo if (offset == 0) { cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() } - cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) @@ -143,85 +150,94 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } private fun Int.nextOffsetRange(): LongRange { - val bytes = this*4L - return bytes.until(bytes+100L) + val bytes = this * 4L + return bytes.until(bytes + 100L) } - private suspend fun hitomiSearch(query: String, sortByPopularity: Boolean = false) : Set = coroutineScope { - val terms = query - .trim() - .replace(Regex("""^\?"""), "") - .lowercase() - .split(Regex("\\s+")) - .map { - it.replace('_', ' ') + private suspend fun hitomiSearch( + query: String, + sortByPopularity: Boolean = false, + ): Set = + coroutineScope { + val terms = + query + .trim() + .replace(Regex("""^\?"""), "") + .lowercase() + .split(Regex("\\s+")) + .map { + it.replace('_', ' ') + } + + val positiveTerms = LinkedList() + val negativeTerms = LinkedList() + + for (term in terms) { + if (term.startsWith("-")) { + negativeTerms.push(term.removePrefix("-")) + } else if (term.isNotBlank()) { + positiveTerms.push(term) + } } - val positiveTerms = LinkedList() - val negativeTerms = LinkedList() + val positiveResults = + positiveTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } - for (term in terms) { - if (term.startsWith("-")) - negativeTerms.push(term.removePrefix("-")) - else if (term.isNotBlank()) - positiveTerms.push(term) - } + val negativeResults = + negativeTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } - val positiveResults = positiveTerms.map { - async { - runCatching { - getGalleryIDsForQuery(it) - }.getOrDefault(emptySet()) + val results = + when { + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") + else -> emptySet() + }.toMutableSet() + + fun filterPositive(newResults: Set) { + when { + results.isEmpty() -> results.addAll(newResults) + else -> results.retainAll(newResults) + } } - } - val negativeResults = negativeTerms.map { - async { - runCatching { - getGalleryIDsForQuery(it) - }.getOrDefault(emptySet()) + fun filterNegative(newResults: Set) { + results.removeAll(newResults) } - } - - val results = when { - sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") - positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") - else -> emptySet() - }.toMutableSet() - fun filterPositive(newResults: Set) { - when { - results.isEmpty() -> results.addAll(newResults) - else -> results.retainAll(newResults) + // positive results + positiveResults.forEach { + filterPositive(it.await()) } - } - - fun filterNegative(newResults: Set) { - results.removeAll(newResults) - } - //positive results - positiveResults.forEach { - filterPositive(it.await()) - } + // negative results + negativeResults.forEach { + filterNegative(it.await()) + } - //negative results - negativeResults.forEach { - filterNegative(it.await()) + results } - results - } - - //search.js - private suspend fun getGalleryIDsForQuery(query: String) : Set { + // search.js + private suspend fun getGalleryIDsForQuery(query: String): Set { query.replace("_", " ").let { if (it.indexOf(':') > -1) { val sides = it.split(":") val ns = sides[0] var tag = sides[1] - var area : String? = ns + var area: String? = ns var language = "all" when (ns) { "female", "male" -> { @@ -240,35 +256,39 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val key = hashTerm(it) val node = getGalleryNodeAtAddress(0) - val data = bSearch(key, node) - ?: return emptySet() + val data = + bSearch(key, node) + ?: return emptySet() return getGalleryIDsFromData(data) } } - private suspend fun getGalleryIDsFromData(data: Pair) : Set { + private suspend fun getGalleryIDsFromData(data: Pair): Set { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val (offset, length) = data - if (length > 100000000 || length <= 0) + if (length > 100000000 || length <= 0) { throw Exception("length $length is too long") + } - val inbuf = getRangedResponse(url, offset.until(offset+length)) + val inbuf = getRangedResponse(url, offset.until(offset + length)) val galleryIDs = mutableSetOf() - val buffer = ByteBuffer - .wrap(inbuf) - .order(ByteOrder.BIG_ENDIAN) + val buffer = + ByteBuffer + .wrap(inbuf) + .order(ByteOrder.BIG_ENDIAN) val numberOfGalleryIDs = buffer.int - val expectedLength = numberOfGalleryIDs*4+4 + val expectedLength = numberOfGalleryIDs * 4 + 4 - if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) + if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") - else if (inbuf.size != expectedLength) + } else if (inbuf.size != expectedLength) { throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + } for (i in 0.until(numberOfGalleryIDs)) galleryIDs.add(buffer.int) @@ -276,64 +296,85 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return galleryIDs } - private suspend fun bSearch(key: UByteArray, node: Node) : Pair? { - fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { + private suspend fun bSearch( + key: UByteArray, + node: Node, + ): Pair? { + fun compareArrayBuffers( + dv1: UByteArray, + dv2: UByteArray, + ): Int { val top = min(dv1.size, dv2.size) for (i in 0.until(top)) { - if (dv1[i] < dv2[i]) + if (dv1[i] < dv2[i]) { return -1 - else if (dv1[i] > dv2[i]) + } else if (dv1[i] > dv2[i]) { return 1 + } } return 0 } - fun locateKey(key: UByteArray, node: Node) : Pair { + fun locateKey( + key: UByteArray, + node: Node, + ): Pair { for (i in node.keys.indices) { val cmpResult = compareArrayBuffers(key, node.keys[i]) - if (cmpResult <= 0) - return Pair(cmpResult==0, i) + if (cmpResult <= 0) { + return Pair(cmpResult == 0, i) + } } return Pair(false, node.keys.size) } - fun isLeaf(node: Node) : Boolean { + fun isLeaf(node: Node): Boolean { for (subnode in node.subNodeAddresses) - if (subnode != 0L) + if (subnode != 0L) { return false + } return true } - if (node.keys.isEmpty()) + if (node.keys.isEmpty()) { return null + } val (there, where) = locateKey(key, node) - if (there) + if (there) { return node.datas[where] - else if (isLeaf(node)) + } else if (isLeaf(node)) { return null + } val nextNode = getGalleryNodeAtAddress(node.subNodeAddresses[where]) return bSearch(key, nextNode) } - private suspend fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String, range: LongRange? = null) : Set { - val nozomiAddress = when(area) { - null -> "$ltnBaseUrl/$tag-$language.nozomi" - else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" - } + private suspend fun getGalleryIDsFromNozomi( + area: String?, + tag: String, + language: String, + range: LongRange? = null, + ): Set { + val nozomiAddress = + when (area) { + null -> "$ltnBaseUrl/$tag-$language.nozomi" + else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" + } val bytes = getRangedResponse(nozomiAddress, range) val nozomi = mutableSetOf() - val arrayBuffer = ByteBuffer - .wrap(bytes) - .order(ByteOrder.BIG_ENDIAN) + val arrayBuffer = + ByteBuffer + .wrap(bytes) + .order(ByteOrder.BIG_ENDIAN) while (arrayBuffer.hasRemaining()) nozomi.add(arrayBuffer.int) @@ -341,9 +382,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return nozomi } - private val galleriesIndexVersion = SuspendLazy { - webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() - } + private val galleriesIndexVersion = + SuspendLazy { + webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() + } private data class Node( val keys: List, @@ -351,10 +393,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val subNodeAddresses: List, ) - private fun decodeNode(data: ByteArray) : Node { - val buffer = ByteBuffer - .wrap(data) - .order(ByteOrder.BIG_ENDIAN) + private fun decodeNode(data: ByteArray): Node { + val buffer = + ByteBuffer + .wrap(data) + .order(ByteOrder.BIG_ENDIAN) val uData = data.toUByteArray() @@ -364,11 +407,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo for (i in 0.until(numberOfKeys)) { val keySize = buffer.int - if (keySize == 0 || keySize > 32) + if (keySize == 0 || keySize > 32) { throw Exception("fatal: !keySize || keySize > 32") + } - keys.add(uData.sliceArray(buffer.position().until(buffer.position()+keySize))) - buffer.position(buffer.position()+keySize) + keys.add(uData.sliceArray(buffer.position().until(buffer.position() + keySize))) + buffer.position(buffer.position() + keySize) } val numberOfDatas = buffer.int @@ -392,7 +436,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Node(keys, datas, subNodeAddresses) } - private suspend fun getGalleryNodeAtAddress(address: Long) : Node { + private suspend fun getGalleryNodeAtAddress(address: Long): Node { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" val nodedata = getRangedResponse(url, address.until(address + 464)) @@ -400,20 +444,24 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return decodeNode(nodedata) } - private suspend fun getRangedResponse(url: String, range: LongRange? = null) : ByteArray { - val rangeHeaders = when (range) { - null -> Headers.headersOf() - else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") - } + private suspend fun getRangedResponse( + url: String, + range: LongRange? = null, + ): ByteArray { + val rangeHeaders = + when (range) { + null -> Headers.headersOf() + else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") + } return webClient.httpGet(url, rangeHeaders).parseBytes() } - private fun hashTerm(term: String) : UByteArray { + private fun hashTerm(term: String): UByteArray { return sha256(term.toByteArray()).copyOfRange(0, 4).toUByteArray() } - private fun sha256(data: ByteArray) : ByteArray { + private fun sha256(data: ByteArray): ByteArray { return MessageDigest.getInstance("SHA-256").digest(data) } @@ -428,12 +476,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo id = generateUid(id.toString()), title = doc.selectFirstOrThrow("h1").text(), url = id.toString(), - coverUrl = "https:" + doc.selectFirstOrThrow("picture > source") - .attr("data-srcset") - .substringBefore(" "), - publicUrl = doc.selectFirstOrThrow("h1 > a") - .attrAsRelativeUrl("href") - .toAbsoluteUrl(domain), + coverUrl = + "https:" + + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), + publicUrl = + doc.selectFirstOrThrow("h1 > a") + .attrAsRelativeUrl("href") + .toAbsoluteUrl(domain), author = null, tags = emptySet(), isNsfw = true, @@ -449,54 +500,59 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getDetails(manga: Manga): Manga { - val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = + webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return manga.copy( title = json.getString("title"), - largeCoverUrl = json.getJSONArray("files").getJSONObject(0).let { - val hash = it.getString("hash") - val commonId = commonImageId() - val imageId = imageIdFromHash(hash) - val subDomain = 'a' + subdomainOffset(imageId) - - "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" - }, - author = json.optJSONArray("artists") - ?.mapJSON { it.getString("artist").toCamelCase() } - ?.joinToString(), - publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), - tags = buildSet { - json.optJSONArray("characters") - ?.mapToTags("character") - ?.let(::addAll) - json.optJSONArray("tags") - ?.mapToTags("tag") - ?.let(::addAll) + largeCoverUrl = + json.getJSONArray("files").getJSONObject(0).let { + val hash = it.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" + }, + author = json.optJSONArray("artists") - ?.mapToTags("artist") - ?.let(::addAll) - json.optJSONArray("parodys") - ?.mapToTags("parody") - ?.let(::addAll) - json.optJSONArray("groups") - ?.mapToTags("group") - ?.let(::addAll) - }, - chapters = listOf( - MangaChapter( - id = generateUid(manga.url), - url = manga.url, - name = json.getString("title"), - scanlator = json.getString("type").toTitleCase(), - number = 1, - branch = json.getString("language_localname"), - source = source, - uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), - ) - ) + ?.mapJSON { it.getString("artist").toCamelCase() } + ?.joinToString(), + publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), + tags = + buildSet { + json.optJSONArray("characters") + ?.mapToTags("character") + ?.let(::addAll) + json.optJSONArray("tags") + ?.mapToTags("tag") + ?.let(::addAll) + json.optJSONArray("artists") + ?.mapToTags("artist") + ?.let(::addAll) + json.optJSONArray("parodys") + ?.mapToTags("parody") + ?.let(::addAll) + json.optJSONArray("groups") + ?.mapToTags("group") + ?.let(::addAll) + }, + chapters = + listOf( + MangaChapter( + id = generateUid(manga.url), + url = manga.url, + name = json.getString("title"), + scanlator = json.getString("type").toTitleCase(), + number = 1, + branch = json.getString("language_localname"), + source = source, + uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), + ), + ), ) } @@ -508,17 +564,18 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val tags = mutableSetOf() mapJSON { MangaTag( - title = it.getString(key).toCamelCase().let { title -> - if (it.getStringOrNull("female")?.toIntOrNull() == 1) { - "$title ♀" - } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { - "$title ♂" - } else { - title - } - }, + title = + it.getString(key).toCamelCase().let { title -> + if (it.getStringOrNull("female")?.toIntOrNull() == 1) { + "$title ♀" + } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { + "$title ♂" + } else { + title + } + }, key = it.getString("url").tagUrlToTag(), - source = source + source = source, ).let(tags::add) } return tags @@ -527,10 +584,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private fun String.tagUrlToTag(): String { val urlContent = this.split("/") val ns = urlContent[1] - val tag = urlContent[2] - .substringBeforeLast("-") - .urlDecode() - .replace(" ", "_") + val tag = + urlContent[2] + .substringBeforeLast("-") + .urlDecode() + .replace(" ", "_") return if (tag.split(":")[0] in listOf("female", "male")) { tag @@ -540,10 +598,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getRelatedManga(seed: Manga): List { - val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = + webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) // any better way to get List from this json? return json.getJSONArray("related").let { @@ -552,10 +611,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getPages(chapter: MangaChapter): List { - val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = + webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return json.getJSONArray("files").mapJSON { image -> val hash = image.getString("hash") @@ -564,15 +624,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val subDomain = 'a' + subdomainOffset(imageId) MangaPage( - id= generateUid(hash), + id = generateUid(hash), url = "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp", preview = "https://${getDomain("${subDomain}tn")}/webpsmalltn/${thumbPathFromHash(hash)}/$hash.webp", - source = source + source = source, ) } } - /// ---> + // / ---> private var scriptLastRetrieval: Long? = null private val mutex = Mutex() @@ -580,24 +640,25 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private val subdomainOffsetMap = mutableMapOf() private var commonImageId = "" - private suspend fun refreshScript() = mutex.withLock { - if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { - val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() + private suspend fun refreshScript() = + mutex.withLock { + if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { + val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() - subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() - val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() + subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() + val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() - subdomainOffsetMap.clear() - Regex("case (\\d+):").findAll(ggScript).forEach { - val case = it.groupValues[1].toInt() - subdomainOffsetMap[case] = o - } + subdomainOffsetMap.clear() + Regex("case (\\d+):").findAll(ggScript).forEach { + val case = it.groupValues[1].toInt() + subdomainOffsetMap[case] = o + } - commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] + commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] - scriptLastRetrieval = System.currentTimeMillis() + scriptLastRetrieval = System.currentTimeMillis() + } } - } // m <-- gg.js private suspend fun subdomainOffset(imageId: Int): Int { @@ -614,7 +675,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo // s <-- gg.js private fun imageIdFromHash(hash: String): Int { val match = Regex("(..)(.)$").find(hash) - return match!!.groupValues.let { it[2]+it[1] }.toInt(16) + return match!!.groupValues.let { it[2] + it[1] }.toInt(16) } // real_full_path_from_hash <-- common.js From 039075086e295a6a8f1dee0fb747671e55a07c74 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 22:48:13 +0500 Subject: [PATCH 06/14] hitomi: better locale in tag search --- .../parsers/site/all/HitomiLaParser.kt | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index af141efd..21027f8b 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -122,17 +122,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } } else { if (offset == 0) { - val query = - filter.tags.joinToString(" ") { it.key }.let { - val lang = filter.locale.getSiteLang() - if (lang != "all") { - "$it language:$lang" - } else { - it - } - } - - cachedSearchIds = hitomiSearch(query, filter.sortOrder == SortOrder.POPULARITY).toList() + cachedSearchIds = + hitomiSearch( + filter.tags.joinToString(" ") { it.key }, + filter.sortOrder == SortOrder.POPULARITY, + filter.locale.getSiteLang(), + ).toList() } cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } @@ -157,6 +152,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun hitomiSearch( query: String, sortByPopularity: Boolean = false, + language: String = "all", ): Set = coroutineScope { val terms = @@ -184,7 +180,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo positiveTerms.map { async { runCatching { - getGalleryIDsForQuery(it) + getGalleryIDsForQuery(it, language) }.getOrDefault(emptySet()) } } @@ -193,15 +189,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo negativeTerms.map { async { runCatching { - getGalleryIDsForQuery(it) + getGalleryIDsForQuery(it, language) }.getOrDefault(emptySet()) } } val results = when { - sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") - positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", language) + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", language) else -> emptySet() }.toMutableSet() @@ -230,7 +226,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } // search.js - private suspend fun getGalleryIDsForQuery(query: String): Set { + private suspend fun getGalleryIDsForQuery( + query: String, + language: String = "all", + ): Set { query.replace("_", " ").let { if (it.indexOf(':') > -1) { val sides = it.split(":") @@ -238,7 +237,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo var tag = sides[1] var area: String? = ns - var language = "all" + var lang = language when (ns) { "female", "male" -> { area = "tag" @@ -246,12 +245,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } "language" -> { area = null - language = tag + lang = tag tag = "index" } } - return getGalleryIDsFromNozomi(area, tag, language) + return getGalleryIDsFromNozomi(area, tag, lang) } val key = hashTerm(it) From a5219ceb6c8e228db99748e97a8634b1ba55e6d1 Mon Sep 17 00:00:00 2001 From: devi Date: Sat, 23 Dec 2023 18:59:48 +0100 Subject: [PATCH 07/14] New Template GattsuParser Add Baozimh, MuitoHentai, TopReadManhwa, HentaiSeason, HentaiTokyo, MundoHentaiOficial, UniversoHentai --- .../parsers/site/gattsu/GattsuParser.kt | 150 ++++++++++++++ .../parsers/site/gattsu/pt/HentaiSeason.kt | 11 + .../parsers/site/gattsu/pt/HentaiTokyo.kt | 13 ++ .../site/gattsu/pt/MundoHentaiOficial.kt | 48 +++++ .../parsers/site/gattsu/pt/UniversoHentai.kt | 50 +++++ .../parsers/site/madara/en/TopReadManhwa.kt | 12 ++ .../kotatsu/parsers/site/pt/MuitoHentai.kt | 126 +++++++++++ .../kotatsu/parsers/site/zh/Baozimh.kt | 195 ++++++++++++++++++ 8 files changed, 605 insertions(+) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/GattsuParser.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/TopReadManhwa.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/MuitoHentai.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/Baozimh.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/GattsuParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/GattsuParser.kt new file mode 100644 index 00000000..80bb6a6b --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/GattsuParser.kt @@ -0,0 +1,150 @@ +package org.koitharu.kotatsu.parsers.site.gattsu + +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.util.* + +internal abstract class GattsuParser( + context: MangaLoaderContext, + source: MangaSource, + domain: String, + pageSize: Int = 20, +) : PagedMangaParser(context, source, pageSize) { + + override val availableSortOrders: Set = EnumSet.of(SortOrder.UPDATED) + + override val configKeyDomain = ConfigKey.Domain(domain) + + override val isMultipleTagsSupported = false + + protected open val tagPrefix = "tag" + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + + val url = buildString { + append("https://") + append(domain) + when (filter) { + + is MangaListFilter.Search -> { + append("/page/") + append(page.toString()) + append("/?s=") + append(filter.query.urlEncoded()) + } + + is MangaListFilter.Advanced -> { + + filter.tags.oneOrThrowIfMany()?.let { + append("/$tagPrefix/") + append(it.key) + } + + append("/page/") + append(page.toString()) + + } + + null -> { + append("/page/") + append(page.toString()) + } + } + } + return parseMangaList(webClient.httpGet(url).parseHtml()) + } + + protected open fun parseMangaList(doc: Document): List { + return doc.select("div.lista ul li, div.videos div.video").mapNotNull { li -> + val a = li.selectFirstOrThrow("a") + val href = a.attrAsAbsoluteUrl("href") + if (!href.contains(domain)) { + //Some sources include ads in manga lists + return@mapNotNull null + } + Manga( + id = generateUid(href), + url = href, + publicUrl = href, + title = li.selectLastOrThrow(".thumb-titulo, .video-titulo").text(), + coverUrl = li.selectFirst("img")?.src().orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + description = null, + state = null, + author = null, + isNsfw = isNsfwSource, + source = source, + ) + } + } + + protected open val tagUrl = "generos" + + override suspend fun getAvailableTags(): Set { + val doc = webClient.httpGet("https://$domain/$tagUrl/").parseHtml() + return doc.selectLastOrThrow(".meio-conteudo p, div.lista-tags ul").parseTags() + } + + protected open fun Element.parseTags() = select("a").mapToSet { + val key = it.attr("href").removeSuffix("/").substringAfterLast("/") + val name = it.selectFirst(".tag-titulo")?.text() ?: key + MangaTag( + key = key, + title = name, + source = source, + ) + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val urlChapter = doc.selectFirstOrThrow("ul.post-fotos li a, ul.paginaPostBotoes a").attrAsAbsoluteUrl("href") + return manga.copy( + description = doc.selectFirst("div.post-texto")?.html(), + tags = doc.selectFirst(".post-itens li:contains(Tags), .paginaPostInfo li:contains(Categorias)") + ?.parseTags().orEmpty(), + author = doc.selectFirst(".post-itens li:contains(Autor) a, .paginaPostInfo li:contains(Artista) a") + ?.text(), + chapters = listOf( + MangaChapter( + id = manga.id, + name = manga.title, + number = 1, + url = urlChapter, + scanlator = null, + uploadDate = 0, + branch = null, + source = source, + ), + ), + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() + val totalPages = + doc.selectLastOrThrow("div.galeria-paginacao span").text().substringAfterLast("- ").substringBeforeLast(')') + .toInt() + val rawUrl = chapter.url.substringBeforeLast("=") + return (1..totalPages).map { + val url = "$rawUrl=$it" + MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } + + override suspend fun getPageUrl(page: MangaPage): String { + val doc = webClient.httpGet(page.url.toAbsoluteUrl(domain)).parseHtml() + return doc.selectFirstOrThrow("div.galeria-foto img").src() ?: doc.parseFailed("Image src not found") + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt new file mode 100644 index 00000000..c58f23e5 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt @@ -0,0 +1,11 @@ +package org.koitharu.kotatsu.parsers.site.gattsu.pt + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser + +@MangaSourceParser("HENTAISEASON", "HentaiSeason", type = ContentType.HENTAI) +internal class HentaiSeason(context: MangaLoaderContext) : + GattsuParser(context, MangaSource.HENTAISEASON, "hentaiseason.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt new file mode 100644 index 00000000..614f272e --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt @@ -0,0 +1,13 @@ +package org.koitharu.kotatsu.parsers.site.gattsu.pt + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser + +@MangaSourceParser("HENTAITOKYO", "HentaiTokyo", type = ContentType.HENTAI) +internal class HentaiTokyo(context: MangaLoaderContext) : + GattsuParser(context, MangaSource.HENTAITOKYO, "hentaitokyo.net") { + override val tagUrl = "tags" +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt new file mode 100644 index 00000000..bdb95dce --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt @@ -0,0 +1,48 @@ +package org.koitharu.kotatsu.parsers.site.gattsu.pt + +import org.jsoup.nodes.Document +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.Manga +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.model.RATING_UNKNOWN +import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser +import org.koitharu.kotatsu.parsers.util.attrAsAbsoluteUrl +import org.koitharu.kotatsu.parsers.util.domain +import org.koitharu.kotatsu.parsers.util.generateUid +import org.koitharu.kotatsu.parsers.util.selectLastOrThrow +import org.koitharu.kotatsu.parsers.util.src + +@MangaSourceParser("MUNDOHENTAIOFICIAL", "MundoHentaiOficial", type = ContentType.HENTAI) +internal class MundoHentaiOficial(context: MangaLoaderContext) : + GattsuParser(context, MangaSource.MUNDOHENTAIOFICIAL, "mundohentaioficial.com") { + + override val tagUrl = "tags" + + override fun parseMangaList(doc: Document): List { + return doc.select("div.lista ul li, div.videos div.video").mapNotNull { li -> + val a = li.selectLastOrThrow("a") + val href = a.attrAsAbsoluteUrl("href") + if (!href.contains(domain)) { + //Some sources include ads in manga lists + return@mapNotNull null + } + Manga( + id = generateUid(href), + url = href, + publicUrl = href, + title = li.selectLastOrThrow(".thumb-titulo, .video-titulo").text(), + coverUrl = li.selectFirst("img")?.src().orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + description = null, + state = null, + author = null, + isNsfw = isNsfwSource, + source = source, + ) + } + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt new file mode 100644 index 00000000..1133b0d8 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt @@ -0,0 +1,50 @@ +package org.koitharu.kotatsu.parsers.site.gattsu.pt + +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.MangaChapter +import org.koitharu.kotatsu.parsers.model.MangaPage +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.model.MangaTag +import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser +import org.koitharu.kotatsu.parsers.util.* + +@MangaSourceParser("UNIVERSOHENTAI", "UniversoHentai", type = ContentType.HENTAI) +internal class UniversoHentai(context: MangaLoaderContext) : + GattsuParser(context, MangaSource.UNIVERSOHENTAI, "universohentai.com") { + + override val tagPrefix = "category" + + override suspend fun getAvailableTags(): Set { + val doc = webClient.httpGet("https://$domain/tags/").parseHtml() + return doc.requireElementById("menu-topo").parseTags() + } + + override fun Element.parseTags() = select("a").mapNotNullToSet { + if (!it.attr("href").contains("/category/")) return@mapNotNullToSet null + val key = it.attr("href").removeSuffix("/").substringAfterLast("/") + MangaTag( + key = key, + title = it.text(), + source = source, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val images = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml().requireElementById("galeria") + .select(".galeria-foto img") + return images.map { img -> + val urlImages = img.src() ?: img.parseFailed("Image src not found") + MangaPage( + id = generateUid(urlImages), + url = urlImages, + preview = null, + source = source, + ) + } + } + + override suspend fun getPageUrl(page: MangaPage): String = page.url.toAbsoluteUrl(domain) +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/TopReadManhwa.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/TopReadManhwa.kt new file mode 100644 index 00000000..febb9201 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/TopReadManhwa.kt @@ -0,0 +1,12 @@ +package org.koitharu.kotatsu.parsers.site.madara.en + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.madara.MadaraParser + +@MangaSourceParser("TOPREADMANHWA", "TopReadManhwa", "en") +internal class TopReadManhwa(context: MangaLoaderContext) : + MadaraParser(context, MangaSource.TOPREADMANHWA, "topreadmanhwa.com") { + override val datePattern = "MM/dd/yyyy" +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/MuitoHentai.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/MuitoHentai.kt new file mode 100644 index 00000000..4fde9118 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/MuitoHentai.kt @@ -0,0 +1,126 @@ +package org.koitharu.kotatsu.parsers.site.pt + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.util.* + +@MangaSourceParser("MUITOHENTAI", "MuitoHentai", "pt", ContentType.HENTAI) +class MuitoHentai(context: MangaLoaderContext) : PagedMangaParser(context, MangaSource.MUITOHENTAI, 24) { + + override val availableSortOrders: Set = EnumSet.of(SortOrder.POPULARITY) + + override val configKeyDomain = ConfigKey.Domain("www.muitohentai.com") + + override val isMultipleTagsSupported = false + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + + val url = buildString { + append("https://") + append(domain) + when (filter) { + + is MangaListFilter.Search -> { + if (page > 1) return emptyList() + append("/buscar-manga/?q=") + append(filter.query.urlEncoded()) + } + + is MangaListFilter.Advanced -> { + append("/mangas") + + filter.tags.oneOrThrowIfMany()?.let { + append("/genero/") + append(it.key) + } + + append('/') + append(page.toString()) + append('/') + } + + null -> { + append("/mangas/") + append(page.toString()) + append('/') + } + } + } + val doc = webClient.httpGet(url).parseHtml() + return doc.requireElementById("archive-content").select("article").map { div -> + val a = div.selectFirstOrThrow("a") + val href = a.attrAsAbsoluteUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href, + title = div.selectLastOrThrow("h3").text(), + coverUrl = div.selectFirst("img")?.src().orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + description = null, + state = null, + author = null, + isNsfw = isNsfwSource, + source = source, + ) + } + } + + override suspend fun getAvailableTags(): Set { + val doc = webClient.httpGet("https://$domain/generos-dos-mangas/").parseHtml() + return doc.select("div.content a.profileSideBar").mapNotNullToSet { a -> + MangaTag( + key = a.attr("href").removeSuffix("/").substringAfterLast("/"), + title = a.text(), + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + return manga.copy( + description = doc.selectFirstOrThrow(".backgroundpost:contains(Sinopse)").html(), + tags = doc.select("a.genero_btn").mapNotNullToSet { a -> + MangaTag( + key = a.attr("href").removeSuffix("/").substringAfterLast("/"), + title = a.text(), + source = source, + ) + }, + chapters = doc.select(".backgroundpost h3 a").mapChapters(reversed = true) { i, a -> + val href = a.attrAsAbsoluteUrl("href") + MangaChapter( + id = generateUid(href), + name = a.text(), + number = i + 1, + url = href, + scanlator = null, + uploadDate = 0, + branch = null, + source = source, + ) + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() + val data = doc.selectFirstOrThrow("script:containsData(var arr = [)").data() + val images = data.substringAfter("[").substringBefore("];").replace("\"", "").split(",") + return images.map { img -> + MangaPage( + id = generateUid(img), + url = img, + preview = null, + source = source, + ) + } + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/Baozimh.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/Baozimh.kt new file mode 100644 index 00000000..52b46ed7 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zh/Baozimh.kt @@ -0,0 +1,195 @@ +package org.koitharu.kotatsu.parsers.site.zh + +import androidx.collection.ArrayMap +import org.json.JSONArray +import org.jsoup.nodes.Document +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.mapJSON +import java.util.* + +@MangaSourceParser("BAOZIMH", "Baozimh", "zh") +internal class Baozimh(context: MangaLoaderContext) : + PagedMangaParser(context, MangaSource.BAOZIMH, pageSize = 36) { + + override val availableSortOrders: Set = EnumSet.of(SortOrder.POPULARITY) + + override val availableStates: Set = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED) + + override val configKeyDomain = ConfigKey.Domain("www.baozimh.com") + + override val isMultipleTagsSupported = false + + private val tagsMap = SuspendLazy(::parseTags) + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + + when (filter) { + is MangaListFilter.Search -> { + if (page > 1) return emptyList() + val url = buildString { + append("https://") + append(domain) + append("/search?q=") + append(filter.query.urlEncoded()) + } + return parseMangaListSearch(webClient.httpGet(url).parseHtml()) + } + + is MangaListFilter.Advanced -> { + val url = buildString { + append("https://") + append(domain) + append("/api/bzmhq/amp_comic_list?filter=*®ion=all") + + if (filter.tags.isNotEmpty()) { + filter.tags.oneOrThrowIfMany()?.let { + append("&type=") + append(it.key) + } + } else { + append("&type=all") + } + + if (filter.states.isNotEmpty()) { + filter.states.oneOrThrowIfMany()?.let { + append("&state=") + append( + when (it) { + MangaState.ONGOING -> "serial" + MangaState.FINISHED -> "pub" + else -> "all" + }, + ) + } + } else { + append("&state=all") + } + + append("&limit=36&page=") + append(page.toString()) + } + + return parseMangaList(webClient.httpGet(url).parseJson().getJSONArray("items")) + } + + null -> { + val url = buildString { + append("https://") + append(domain) + append("/api/bzmhq/amp_comic_list?filter=*®ion=all&type=all&state=all&limit=36&page=") + append(page.toString()) + } + return parseMangaList(webClient.httpGet(url).parseJson().getJSONArray("items")) + } + } + } + + private fun parseMangaList(json: JSONArray): List { + return json.mapJSON { j -> + val href = "https://$domain/comic/" + j.getString("comic_id") + Manga( + id = generateUid(href), + url = href, + publicUrl = href, + coverUrl = "https://static-tw${domain.removePrefix("www")}/cover/" + j.getString("topic_img"), + title = j.getString("name"), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = j.getString("author"), + state = null, + source = source, + isNsfw = isNsfwSource, + ) + } + } + + private fun parseMangaListSearch(doc: Document): List { + return doc.select("div.comics-card").map { div -> + val href = div.selectFirstOrThrow("a").attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href, + coverUrl = div.selectFirst("amp-img")?.src().orEmpty(), + title = div.selectFirstOrThrow(".comics-card__title h3").text(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + isNsfw = isNsfwSource, + ) + } + } + + override suspend fun getAvailableTags(): Set { + return tagsMap.get().values.toSet() + } + + private suspend fun parseTags(): Map { + val tagElements = webClient.httpGet("https://$domain/classify").parseHtml() + .select("div.nav")[3].select("a.item:not(.active)") + val tagMap = ArrayMap(tagElements.size) + for (el in tagElements) { + val name = el.text() + if (name.isEmpty()) continue + tagMap[name] = MangaTag( + key = el.attr("href").substringAfter("type=").substringBefore("&"), + title = name, + source = source, + ) + } + return tagMap + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val state = doc.selectFirstOrThrow(".tag-list span.tag").text() + val tagMap = tagsMap.get() + val selectTag = doc.select(".tag-list span.tag").drop(1) + val tags = selectTag.mapNotNullToSet { tagMap[it.text()] } + return manga.copy( + description = doc.selectFirst(".comics-detail__desc")?.text().orEmpty(), + state = when (state) { + "連載中" -> MangaState.ONGOING + "已完結" -> MangaState.FINISHED + else -> null + }, + tags = tags, + chapters = doc.requireElementById("chapter-items").select("div.comics-chapters a") + .mapChapters(reversed = true) { i, a -> + val url = a.attrAsRelativeUrl("href").toAbsoluteUrl(domain) + MangaChapter( + id = generateUid(url), + name = a.selectFirstOrThrow("span").text(), + number = i + 1, + url = url, + scanlator = null, + uploadDate = 0, + branch = null, + source = source, + ) + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml().requireElementById("__nuxt") + return doc.select("button.pure-button").map { btn -> + val urlPage = btn.attr("on").substringAfter(": '").substringBefore("?t=") + MangaPage( + id = generateUid(urlPage), + url = urlPage, + preview = null, + source = source, + ) + } + } +} From edc07b5dbd64526edd6cac194cbc23e3a85243dc Mon Sep 17 00:00:00 2001 From: devi Date: Sun, 24 Dec 2023 12:00:06 +0100 Subject: [PATCH 08/14] New Template GuyaParser Add DankeFursLesen, GuyaCubari, Hachirumi, MahouShoujobu, Olaoe Add local on sources GattsuParser --- .../parsers/site/gattsu/pt/HentaiSeason.kt | 2 +- .../parsers/site/gattsu/pt/HentaiTokyo.kt | 2 +- .../site/gattsu/pt/MundoHentaiOficial.kt | 2 +- .../parsers/site/gattsu/pt/UniversoHentai.kt | 2 +- .../kotatsu/parsers/site/guya/GuyaParser.kt | 131 ++++++++++++++++++ .../parsers/site/guya/all/MahouShoujobu.kt | 10 ++ .../kotatsu/parsers/site/guya/en/Danke.kt | 10 ++ .../parsers/site/guya/en/GuyaCubari.kt | 10 ++ .../kotatsu/parsers/site/guya/en/Hachirumi.kt | 11 ++ .../kotatsu/parsers/site/madara/ar/Olaoe.kt | 14 ++ 10 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/GuyaParser.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/all/MahouShoujobu.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Danke.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/GuyaCubari.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Hachirumi.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ar/Olaoe.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt index c58f23e5..5589ea89 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiSeason.kt @@ -6,6 +6,6 @@ import org.koitharu.kotatsu.parsers.model.ContentType import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser -@MangaSourceParser("HENTAISEASON", "HentaiSeason", type = ContentType.HENTAI) +@MangaSourceParser("HENTAISEASON", "HentaiSeason", "pt", ContentType.HENTAI) internal class HentaiSeason(context: MangaLoaderContext) : GattsuParser(context, MangaSource.HENTAISEASON, "hentaiseason.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt index 614f272e..febe8c2f 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/HentaiTokyo.kt @@ -6,7 +6,7 @@ import org.koitharu.kotatsu.parsers.model.ContentType import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser -@MangaSourceParser("HENTAITOKYO", "HentaiTokyo", type = ContentType.HENTAI) +@MangaSourceParser("HENTAITOKYO", "HentaiTokyo", "pt", ContentType.HENTAI) internal class HentaiTokyo(context: MangaLoaderContext) : GattsuParser(context, MangaSource.HENTAITOKYO, "hentaitokyo.net") { override val tagUrl = "tags" diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt index bdb95dce..e187ec7a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/MundoHentaiOficial.kt @@ -14,7 +14,7 @@ import org.koitharu.kotatsu.parsers.util.generateUid import org.koitharu.kotatsu.parsers.util.selectLastOrThrow import org.koitharu.kotatsu.parsers.util.src -@MangaSourceParser("MUNDOHENTAIOFICIAL", "MundoHentaiOficial", type = ContentType.HENTAI) +@MangaSourceParser("MUNDOHENTAIOFICIAL", "MundoHentaiOficial", "pt", ContentType.HENTAI) internal class MundoHentaiOficial(context: MangaLoaderContext) : GattsuParser(context, MangaSource.MUNDOHENTAIOFICIAL, "mundohentaioficial.com") { diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt index 1133b0d8..f728442d 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/gattsu/pt/UniversoHentai.kt @@ -11,7 +11,7 @@ import org.koitharu.kotatsu.parsers.model.MangaTag import org.koitharu.kotatsu.parsers.site.gattsu.GattsuParser import org.koitharu.kotatsu.parsers.util.* -@MangaSourceParser("UNIVERSOHENTAI", "UniversoHentai", type = ContentType.HENTAI) +@MangaSourceParser("UNIVERSOHENTAI", "UniversoHentai", "pt", ContentType.HENTAI) internal class UniversoHentai(context: MangaLoaderContext) : GattsuParser(context, MangaSource.UNIVERSOHENTAI, "universohentai.com") { diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/GuyaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/GuyaParser.kt new file mode 100644 index 00000000..66f59eef --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/GuyaParser.kt @@ -0,0 +1,131 @@ +package org.koitharu.kotatsu.parsers.site.guya + +import org.json.JSONObject +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.util.* + +internal abstract class GuyaParser( + context: MangaLoaderContext, + source: MangaSource, + domain: String, + pageSize: Int = 0, +) : PagedMangaParser(context, source, pageSize) { + + override val availableSortOrders: Set = EnumSet.of(SortOrder.ALPHABETICAL) + + override val configKeyDomain = ConfigKey.Domain(domain) + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + if (page > 1) return emptyList() + val url = buildString { + append("https://") + append(domain) + append("/api/get_all_series/") + } + when (filter) { + + is MangaListFilter.Search -> { + return parseMangaList(webClient.httpGet(url).parseJson(), filter.query) + } + + is MangaListFilter.Advanced -> {} + + null -> {} + } + + return parseMangaList(webClient.httpGet(url).parseJson(), "") + } + + protected open fun parseMangaList(json: JSONObject, query: String): List { + val manga = ArrayList(json.length()) + val keys: Iterator = json.keys() + while (keys.hasNext()) { + val key = keys.next() + if (json.get(key) is JSONObject) { + if (query.isNotEmpty()) { + if (key.lowercase().contains(query.lowercase())) manga.add(addManga(json.getJSONObject(key), key)) + } else manga.add(addManga(json.getJSONObject(key), key)) + } + } + return manga + } + + private fun addManga(j: JSONObject, name: String): Manga { + val url = "https://$domain/read/manga/" + j.getString("slug") + val apiUrl = "https://$domain/api/series/" + j.getString("slug") + return Manga( + id = generateUid(apiUrl), + url = apiUrl, + publicUrl = url, + title = name, + coverUrl = j.getString("cover").toAbsoluteUrl(domain), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + description = j.getString("description"), + state = null, + author = j.getString("author"), + isNsfw = isNsfwSource, + source = source, + ) + } + + override suspend fun getAvailableTags(): Set = emptySet() + + override suspend fun getDetails(manga: Manga): Manga { + val json = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseJson().getJSONObject("chapters") + val slug = manga.url.removeSuffix('/').substringAfterLast('/') + val keys: Iterator = json.keys() + val chapters = ArrayList() + var i = 0 + while (keys.hasNext()) { + val key = keys.next() + ++i + val chapter = json.getJSONObject(key) + val url = "https://$domain/api/series/$slug/$key" + chapters.add( + MangaChapter( + id = generateUid(url), + name = chapter.getString("title"), + number = i, + url = url, + scanlator = null, + uploadDate = 0, + branch = null, + source = source, + ), + ) + } + return manga.copy(chapters = chapters) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val key = chapter.url.substringAfterLast('/') + val url = chapter.url.substringBeforeLast('/') + val slug = url.substringAfterLast('/') + val chapterPages = webClient.httpGet(url.toAbsoluteUrl(domain)).parseJson() + .getJSONObject("chapters").getJSONObject(key) + val images = chapterPages.getJSONObject("groups") + val folder = chapterPages.getString("folder") + val keysPages: Iterator = images.keys() + val firstKey = keysPages.next() + val jsonPages = images.getJSONArray(firstKey) + val pages = ArrayList(jsonPages.length()) + for (i in 0 until jsonPages.length()) { + val urlPage = "https://$domain/media/manga/$slug/chapters/$folder/$firstKey/" + jsonPages.getString(i) + pages.add( + MangaPage( + id = generateUid(urlPage), + url = urlPage, + preview = null, + source = source, + ), + ) + } + return pages + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/all/MahouShoujobu.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/all/MahouShoujobu.kt new file mode 100644 index 00000000..bfbdf762 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/all/MahouShoujobu.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.guya.all + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.guya.GuyaParser + +@MangaSourceParser("MAHOUSHOUJOBU", "MahouShoujobu") +internal class MahouShoujobu(context: MangaLoaderContext) : + GuyaParser(context, MangaSource.MAHOUSHOUJOBU, "mahoushoujobu.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Danke.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Danke.kt new file mode 100644 index 00000000..80fe6fda --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Danke.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.guya.en + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.guya.GuyaParser + +@MangaSourceParser("DANKE", "DankeFursLesen", "en") +internal class Danke(context: MangaLoaderContext) : + GuyaParser(context, MangaSource.DANKE, "danke.moe") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/GuyaCubari.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/GuyaCubari.kt new file mode 100644 index 00000000..6dff7ee6 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/GuyaCubari.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.guya.en + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.guya.GuyaParser + +@MangaSourceParser("GUYACUBARI", "GuyaCubari", "en") +internal class GuyaCubari(context: MangaLoaderContext) : + GuyaParser(context, MangaSource.GUYACUBARI, "guya.cubari.moe") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Hachirumi.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Hachirumi.kt new file mode 100644 index 00000000..42625374 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/guya/en/Hachirumi.kt @@ -0,0 +1,11 @@ +package org.koitharu.kotatsu.parsers.site.guya.en + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.guya.GuyaParser + +@MangaSourceParser("HACHIRUMI", "Hachirumi", "en", ContentType.HENTAI) +internal class Hachirumi(context: MangaLoaderContext) : + GuyaParser(context, MangaSource.HACHIRUMI, "hachirumi.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ar/Olaoe.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ar/Olaoe.kt new file mode 100644 index 00000000..f8ddc69e --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ar/Olaoe.kt @@ -0,0 +1,14 @@ +package org.koitharu.kotatsu.parsers.site.madara.ar + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.madara.MadaraParser + +@MangaSourceParser("OLAOE", "Olaoe", "ar") +internal class Olaoe(context: MangaLoaderContext) : + MadaraParser(context, MangaSource.OLAOE, "olaoe.cyou") { + override val datePattern = "dd-MM-yyyy" + override val tagPrefix = "/شوجو" + override val listUrl = "works/" +} From d5a6c95ffb882c5175fc381be0f2e4034979bf70 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 25 Dec 2023 12:40:31 +0200 Subject: [PATCH 09/14] [MangaInUa] Fix chapters loading --- .../koitharu/kotatsu/parsers/site/uk/MangaInUaParser.kt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/uk/MangaInUaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/uk/MangaInUaParser.kt index f169fe26..3b1fc2c6 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/uk/MangaInUaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/uk/MangaInUaParser.kt @@ -87,16 +87,17 @@ class MangaInUaParser(context: MangaLoaderContext) : PagedMangaParser( userHashRegex.find(script.html())?.groupValues?.getOrNull(1) } val dateFormat = SimpleDateFormat("dd.MM.yyyy", Locale.US) - val chapterNodes = webClient.httpPost( + val chaptersDoc = webClient.httpPost( "https://$domain/engine/ajax/controller.php?mod=load_chapters", mapOf( "action" to "show", "news_id" to linkToComics.attrOrThrow("data-news_id"), "news_category" to linkToComics.attrOrThrow("data-news_category"), "this_link" to "", - "user_hash" to userHash, + "user_hashs" to userHash, ), - ).parseHtml().select(".ltcitems") + ).parseHtml() + val chapterNodes = chaptersDoc.select(".ltcitems") var prevChapterName: String? = null var i = 0 return manga.copy( From 3d10456a87abdaaaf674ba92b848f2ae0d83e33f Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 25 Dec 2023 12:50:56 +0200 Subject: [PATCH 10/14] Apply suggestions from code review --- .../kotatsu/parsers/site/all/HitomiLaParser.kt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 21027f8b..5cfe0c8f 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -266,8 +266,8 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun getGalleryIDsFromData(data: Pair): Set { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val (offset, length) = data - if (length > 100000000 || length <= 0) { - throw Exception("length $length is too long") + require(length in 0..100000000) { + "Length $length is too long" } val inbuf = getRangedResponse(url, offset.until(offset + length)) @@ -284,9 +284,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val expectedLength = numberOfGalleryIDs * 4 + 4 if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { - throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") + throw IllegalArgumentException("number_of_galleryids $numberOfGalleryIDs is too long") } else if (inbuf.size != expectedLength) { - throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + throw IllegalArgumentException("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") } for (i in 0.until(numberOfGalleryIDs)) @@ -316,7 +316,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return 0 } - fun locateKey( + private fun locateKey( key: UByteArray, node: Node, ): Pair { @@ -331,7 +331,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Pair(false, node.keys.size) } - fun isLeaf(node: Node): Boolean { + private fun isLeaf(node: Node): Boolean { for (subnode in node.subNodeAddresses) if (subnode != 0L) { return false @@ -555,12 +555,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo ) } - companion object { - private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) - } + private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) private fun JSONArray.mapToTags(key: String): Set { - val tags = mutableSetOf() + val tags = ArraySet(length()) mapJSON { MangaTag( title = From 4a0e7221b0f53aaa81a5e94f4d2fc3da07a04838 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 25 Dec 2023 13:08:19 +0200 Subject: [PATCH 11/14] [HitomiLa] Refactor --- .../parsers/site/all/HitomiLaParser.kt | 476 +++++++++--------- 1 file changed, 233 insertions(+), 243 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 5cfe0c8f..8ce4c24d 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -1,10 +1,17 @@ package org.koitharu.kotatsu.parsers.site.all -import kotlinx.coroutines.* -import kotlinx.coroutines.sync.* +import androidx.collection.ArraySet +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock import okhttp3.Headers -import org.json.* -import org.koitharu.kotatsu.parsers.* +import org.json.JSONArray +import org.json.JSONObject +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaParser +import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* @@ -24,82 +31,74 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private val ltnBaseUrl get() = "https://${getDomain("ltn")}" - override val availableSortOrders: Set = - EnumSet.of( - SortOrder.NEWEST, - SortOrder.POPULARITY, - ) + override val availableSortOrders: Set = EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) - private val localeMap: Map = - mapOf( - Locale("id") to "indonesian", - Locale("jv") to "javanese", - Locale("ca") to "catalan", - Locale("ceb") to "cebuano", - Locale("cs") to "czech", - Locale("da") to "danish", - Locale("de") to "german", - Locale("et") to "estonian", - Locale.ENGLISH to "english", - Locale("es") to "spanish", - Locale("eo") to "esperanto", - Locale("fr") to "french", - Locale("it") to "italian", - Locale("hi") to "hindi", - Locale("hu") to "hungarian", - Locale("pl") to "polish", - Locale("pt") to "portuguese", - Locale("vi") to "vietnamese", - Locale("tr") to "turkish", - Locale("ru") to "russian", - Locale("uk") to "ukrainian", - Locale("ar") to "arabic", - Locale.KOREAN to "korean", - Locale.CHINESE to "chinese", - Locale.JAPANESE to "japanese", - ) + private val localeMap: Map = mapOf( + Locale("id") to "indonesian", + Locale("jv") to "javanese", + Locale("ca") to "catalan", + Locale("ceb") to "cebuano", + Locale("cs") to "czech", + Locale("da") to "danish", + Locale("de") to "german", + Locale("et") to "estonian", + Locale.ENGLISH to "english", + Locale("es") to "spanish", + Locale("eo") to "esperanto", + Locale("fr") to "french", + Locale("it") to "italian", + Locale("hi") to "hindi", + Locale("hu") to "hungarian", + Locale("pl") to "polish", + Locale("pt") to "portuguese", + Locale("vi") to "vietnamese", + Locale("tr") to "turkish", + Locale("ru") to "russian", + Locale("uk") to "ukrainian", + Locale("ar") to "arabic", + Locale.KOREAN to "korean", + Locale.CHINESE to "chinese", + Locale.JAPANESE to "japanese", + ) - private fun Locale?.getSiteLang(): String { - return when (this) { - null -> "all" - else -> localeMap[this] ?: "all" - } + private fun Locale?.getSiteLang(): String = when (this) { + null -> "all" + else -> localeMap[this] ?: "all" } - override suspend fun getAvailableLocales(): Set { - return localeMap.keys - } + override suspend fun getAvailableLocales(): Set = localeMap.keys - override suspend fun getAvailableTags(): Set { - return coroutineScope { - ('a'..'z').map { alphabet -> - async { - val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() - - doc.select(".posts > li").mapNotNull { element -> - val num = - element.ownText().let { - Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 - } - - if (num > 100) { - val url = element.selectFirst("a") - val href = - url?.attrAsRelativeUrl("href") - ?: return@mapNotNull null - - MangaTag( - title = url.ownText().toCamelCase(), - key = href.tagUrlToTag(), - source = source, - ) - } else { - null + override suspend fun getAvailableTags(): Set = coroutineScope { + ('a'..'z').map { alphabet -> + async { + val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() + + doc.select(".posts > li").mapNotNull { element -> + val num = + element.ownText().let { + Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 } + + if (num > 100) { + val url = element.selectFirst("a") + val href = + url?.attrAsRelativeUrl("href") + ?: return@mapNotNull null + + MangaTag( + title = url.ownText().toCamelCase(), + key = href.tagUrlToTag(), + source = source, + ) + } else { + null } } - }.awaitAll().flatten().toSet() - } + } + }.awaitAll().flatten().toSet() } private var cachedSearchIds: List = emptyList() @@ -107,42 +106,45 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo override suspend fun getList( offset: Int, filter: MangaListFilter?, - ): List { - return when (filter) { - is MangaListFilter.Advanced -> { - if (filter.tags.isEmpty()) { - when (filter.sortOrder) { - SortOrder.POPULARITY -> { - getGalleryIDsFromNozomi("popular", "today", filter.locale.getSiteLang(), offset.nextOffsetRange()) - } - - else -> { - getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) - } + ): List = when (filter) { + is MangaListFilter.Advanced -> { + if (filter.tags.isEmpty()) { + when (filter.sortOrder) { + SortOrder.POPULARITY -> { + getGalleryIDsFromNozomi( + "popular", + "today", + filter.locale.getSiteLang(), + offset.nextOffsetRange(), + ) } - } else { - if (offset == 0) { - cachedSearchIds = - hitomiSearch( - filter.tags.joinToString(" ") { it.key }, - filter.sortOrder == SortOrder.POPULARITY, - filter.locale.getSiteLang(), - ).toList() + + else -> { + getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) } - cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } - } - - is MangaListFilter.Search -> { + } else { if (offset == 0) { - cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() + cachedSearchIds = + hitomiSearch( + filter.tags.joinToString(" ") { it.key }, + filter.sortOrder == SortOrder.POPULARITY, + filter.locale.getSiteLang(), + ).toList() } cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } + } - else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) - }.toMangaList() - } + is MangaListFilter.Search -> { + if (offset == 0) { + cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() + } + cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) + } + + else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) + }.toMangaList() private fun Int.nextOffsetRange(): LongRange { val bytes = this * 4L @@ -243,6 +245,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo area = "tag" tag = it } + "language" -> { area = null lang = tag @@ -255,9 +258,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val key = hashTerm(it) val node = getGalleryNodeAtAddress(0) - val data = - bSearch(key, node) - ?: return emptySet() + val data = bSearch(key, node) ?: return emptySet() return getGalleryIDsFromData(data) } @@ -266,7 +267,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun getGalleryIDsFromData(data: Pair): Set { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val (offset, length) = data - require(length in 0..100000000) { + require(length in 1..100000000) { "Length $length is too long" } @@ -283,10 +284,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val expectedLength = numberOfGalleryIDs * 4 + 4 - if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { - throw IllegalArgumentException("number_of_galleryids $numberOfGalleryIDs is too long") - } else if (inbuf.size != expectedLength) { - throw IllegalArgumentException("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + require(numberOfGalleryIDs in 1..10000000) { + "number_of_galleryids $numberOfGalleryIDs is too long" + } + require(inbuf.size == expectedLength) { + "inbuf.byteLength ${inbuf.size} != expected_length $expectedLength" } for (i in 0.until(numberOfGalleryIDs)) @@ -316,7 +318,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return 0 } - private fun locateKey( + fun locateKey( key: UByteArray, node: Node, ): Pair { @@ -331,7 +333,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Pair(false, node.keys.size) } - private fun isLeaf(node: Node): Boolean { + fun isLeaf(node: Node): Boolean { for (subnode in node.subNodeAddresses) if (subnode != 0L) { return false @@ -361,19 +363,17 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo language: String, range: LongRange? = null, ): Set { - val nozomiAddress = - when (area) { - null -> "$ltnBaseUrl/$tag-$language.nozomi" - else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" - } + val nozomiAddress = when (area) { + null -> "$ltnBaseUrl/$tag-$language.nozomi" + else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" + } val bytes = getRangedResponse(nozomiAddress, range) val nozomi = mutableSetOf() - val arrayBuffer = - ByteBuffer - .wrap(bytes) - .order(ByteOrder.BIG_ENDIAN) + val arrayBuffer = ByteBuffer + .wrap(bytes) + .order(ByteOrder.BIG_ENDIAN) while (arrayBuffer.hasRemaining()) nozomi.add(arrayBuffer.int) @@ -381,10 +381,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return nozomi } - private val galleriesIndexVersion = - SuspendLazy { - webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() - } + private val galleriesIndexVersion = SuspendLazy { + webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() + } private data class Node( val keys: List, @@ -393,10 +392,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo ) private fun decodeNode(data: ByteArray): Node { - val buffer = - ByteBuffer - .wrap(data) - .order(ByteOrder.BIG_ENDIAN) + val buffer = ByteBuffer + .wrap(data) + .order(ByteOrder.BIG_ENDIAN) val uData = data.toUByteArray() @@ -447,11 +445,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo url: String, range: LongRange? = null, ): ByteArray { - val rangeHeaders = - when (range) { - null -> Headers.headersOf() - else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") - } + val rangeHeaders = when (range) { + null -> Headers.headersOf() + else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") + } return webClient.httpGet(url, rangeHeaders).parseBytes() } @@ -464,113 +461,109 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return MessageDigest.getInstance("SHA-256").digest(data) } - private suspend fun Collection.toMangaList(): List { - return coroutineScope { - map { id -> - async { - runCatching { - val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() - - Manga( - id = generateUid(id.toString()), - title = doc.selectFirstOrThrow("h1").text(), - url = id.toString(), - coverUrl = - "https:" + - doc.selectFirstOrThrow("picture > source") - .attr("data-srcset") - .substringBefore(" "), - publicUrl = - doc.selectFirstOrThrow("h1 > a") - .attrAsRelativeUrl("href") - .toAbsoluteUrl(domain), - author = null, - tags = emptySet(), - isNsfw = true, - rating = RATING_UNKNOWN, - altTitle = null, - state = null, - source = source, - ) - }.getOrNull() - } - }.awaitAll().filterNotNull() - } + private suspend fun Collection.toMangaList(): List = coroutineScope { + map { id -> + async { + runCatching { + val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() + + Manga( + id = generateUid(id.toString()), + title = doc.selectFirstOrThrow("h1").text(), + url = id.toString(), + coverUrl = + "https:" + + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), + publicUrl = + doc.selectFirstOrThrow("h1 > a") + .attrAsRelativeUrl("href") + .toAbsoluteUrl(domain), + author = null, + tags = emptySet(), + isNsfw = true, + rating = RATING_UNKNOWN, + altTitle = null, + state = null, + source = source, + ) + }.getOrNull() + } + }.awaitAll().filterNotNull() } override suspend fun getDetails(manga: Manga): Manga { - val json = - webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return manga.copy( title = json.getString("title"), largeCoverUrl = - json.getJSONArray("files").getJSONObject(0).let { - val hash = it.getString("hash") - val commonId = commonImageId() - val imageId = imageIdFromHash(hash) - val subDomain = 'a' + subdomainOffset(imageId) - - "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" - }, + json.getJSONArray("files").getJSONObject(0).let { + val hash = it.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" + }, author = - json.optJSONArray("artists") - ?.mapJSON { it.getString("artist").toCamelCase() } - ?.joinToString(), + json.optJSONArray("artists") + ?.mapJSON { it.getString("artist").toCamelCase() } + ?.joinToString(), publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), tags = - buildSet { - json.optJSONArray("characters") - ?.mapToTags("character") - ?.let(::addAll) - json.optJSONArray("tags") - ?.mapToTags("tag") - ?.let(::addAll) - json.optJSONArray("artists") - ?.mapToTags("artist") - ?.let(::addAll) - json.optJSONArray("parodys") - ?.mapToTags("parody") - ?.let(::addAll) - json.optJSONArray("groups") - ?.mapToTags("group") - ?.let(::addAll) - }, - chapters = - listOf( - MangaChapter( - id = generateUid(manga.url), - url = manga.url, - name = json.getString("title"), - scanlator = json.getString("type").toTitleCase(), - number = 1, - branch = json.getString("language_localname"), - source = source, - uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), - ), + buildSet { + json.optJSONArray("characters") + ?.mapToTags("character") + ?.let(::addAll) + json.optJSONArray("tags") + ?.mapToTags("tag") + ?.let(::addAll) + json.optJSONArray("artists") + ?.mapToTags("artist") + ?.let(::addAll) + json.optJSONArray("parodys") + ?.mapToTags("parody") + ?.let(::addAll) + json.optJSONArray("groups") + ?.mapToTags("group") + ?.let(::addAll) + }, + chapters = listOf( + MangaChapter( + id = generateUid(manga.url), + url = manga.url, + name = json.getString("title"), + scanlator = json.getString("type").toTitleCase(), + number = 1, + branch = json.getString("language_localname"), + source = source, + uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), ), + ), ) } - private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) + private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) private fun JSONArray.mapToTags(key: String): Set { val tags = ArraySet(length()) mapJSON { MangaTag( title = - it.getString(key).toCamelCase().let { title -> - if (it.getStringOrNull("female")?.toIntOrNull() == 1) { - "$title ♀" - } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { - "$title ♂" - } else { - title - } - }, + it.getString(key).toCamelCase().let { title -> + if (it.getStringOrNull("female")?.toIntOrNull() == 1) { + "$title ♀" + } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { + "$title ♂" + } else { + title + } + }, key = it.getString("url").tagUrlToTag(), source = source, ).let(tags::add) @@ -595,11 +588,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getRelatedManga(seed: Manga): List { - val json = - webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) // any better way to get List from this json? return json.getJSONArray("related").let { @@ -608,11 +600,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getPages(chapter: MangaChapter): List { - val json = - webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return json.getJSONArray("files").mapJSON { image -> val hash = image.getString("hash") @@ -637,25 +628,24 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private val subdomainOffsetMap = mutableMapOf() private var commonImageId = "" - private suspend fun refreshScript() = - mutex.withLock { - if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { - val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() + private suspend fun refreshScript() = mutex.withLock { + if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { + val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() - subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() - val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() + subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() + val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() - subdomainOffsetMap.clear() - Regex("case (\\d+):").findAll(ggScript).forEach { - val case = it.groupValues[1].toInt() - subdomainOffsetMap[case] = o - } + subdomainOffsetMap.clear() + Regex("case (\\d+):").findAll(ggScript).forEach { + val case = it.groupValues[1].toInt() + subdomainOffsetMap[case] = o + } - commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] + commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] - scriptLastRetrieval = System.currentTimeMillis() - } + scriptLastRetrieval = System.currentTimeMillis() } + } // m <-- gg.js private suspend fun subdomainOffset(imageId: Int): Int { From d4b252fd71b2ff6368b686ba12ab6d63b9da83ad Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Mon, 25 Dec 2023 19:28:41 +0500 Subject: [PATCH 12/14] HitomiLa: fix thumbnails I dismissed this in their scripts as the thumbnails worked fine but now they broke --- .../parsers/site/all/HitomiLaParser.kt | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 8ce4c24d..27c2ecb2 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -9,6 +9,7 @@ import kotlinx.coroutines.sync.withLock import okhttp3.Headers import org.json.JSONArray import org.json.JSONObject +import org.jsoup.Jsoup import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaParser import org.koitharu.kotatsu.parsers.MangaSourceParser @@ -465,7 +466,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo map { id -> async { runCatching { - val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() + val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html") + .parseRaw().let { rewriteTnPaths(it) } + .let(Jsoup::parse) Manga( id = generateUid(id.toString()), @@ -473,9 +476,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo url = id.toString(), coverUrl = "https:" + - doc.selectFirstOrThrow("picture > source") - .attr("data-srcset") - .substringBefore(" "), + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), publicUrl = doc.selectFirstOrThrow("h1 > a") .attrAsRelativeUrl("href") @@ -669,4 +672,31 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private fun thumbPathFromHash(hash: String): String { return hash.replace(Regex("""^.*(..)(.)$"""), "$2/$1") } + + private suspend fun subdomainFromURL(url: String, base: String? = null) : String { + var retval = "b" + + if (!base.isNullOrBlank()) + retval = base + + val regex = Regex("""/[0-9a-f]{61}([0-9a-f]{2})([0-9a-f])""") + val hashMatch = regex.find(url) ?: return "a" + val imageId = hashMatch.groupValues.let { it[2]+it[1] }.toIntOrNull(16) + + if (imageId != null) { + retval = ('a'+ subdomainOffset(imageId)).toString() + retval + } + + return retval + } + + // rewrite_tn_paths <-- common.js + private suspend fun rewriteTnPaths(html: String): String { + val tnRegex = Regex("""//tn\.hitomi\.la/[^/]+/[0-9a-f]/[0-9a-f]{2}/[0-9a-f]{64}""") + val url = tnRegex.find(html)?.value ?: return html + val newSubdomain = subdomainFromURL(url, "tn") + val newUrl = url.replace(Regex("""//..?\.hitomi\.la/"""), "//${getDomain(newSubdomain)}/") + + return html.replace(tnRegex, newUrl) + } } From 859b07e45418df7a8ad3c1fe2cbe82281ebda983 Mon Sep 17 00:00:00 2001 From: devi Date: Mon, 25 Dec 2023 17:02:06 +0100 Subject: [PATCH 13/14] Add Ler999, ShadowCeviri --- .../parsers/site/zeistmanga/ZeistMangaParser.kt | 6 ++++++ .../kotatsu/parsers/site/zeistmanga/pt/Ler999.kt | 10 ++++++++++ .../parsers/site/zeistmanga/tr/ShadowCeviri.kt | 15 +++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/pt/Ler999.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/tr/ShadowCeviri.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ZeistMangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ZeistMangaParser.kt index 82a65774..7fd208ae 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ZeistMangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/ZeistMangaParser.kt @@ -207,9 +207,15 @@ internal abstract class ZeistMangaParser( else -> null } } + val author = doc.selectFirst("div.y6x11p:contains(الكاتب) .dt") + ?: doc.selectFirst("div.y6x11p:contains(Author) .dt") + ?: doc.selectFirst("div.y6x11p:contains(Autor) .dt") + ?: doc.selectFirst("div.y6x11p:contains(Yazar) .dt") + val desc = doc.getElementById("synopsis") ?: doc.getElementById("Sinopse") ?: doc.getElementById("sinopas") val chaptersDeferred = async { loadChapters(manga.url, doc) } manga.copy( + author = author?.text(), tags = doc.select(selectTags).mapNotNullToSet { a -> MangaTag( key = a.attr("href").substringAfterLast("label/").substringBefore("?"), diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/pt/Ler999.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/pt/Ler999.kt new file mode 100644 index 00000000..5993c064 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/pt/Ler999.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.zeistmanga.pt + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.zeistmanga.ZeistMangaParser + +@MangaSourceParser("LER999", "Ler999", "pt") +internal class Ler999(context: MangaLoaderContext) : + ZeistMangaParser(context, MangaSource.LER999, "ler999.blogspot.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/tr/ShadowCeviri.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/tr/ShadowCeviri.kt new file mode 100644 index 00000000..1aa5412a --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/zeistmanga/tr/ShadowCeviri.kt @@ -0,0 +1,15 @@ +package org.koitharu.kotatsu.parsers.site.zeistmanga.tr + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.zeistmanga.ZeistMangaParser + +@MangaSourceParser("SHADOWCEVIRI", "ShadowCeviri", "tr", ContentType.COMICS) +internal class ShadowCeviri(context: MangaLoaderContext) : + ZeistMangaParser(context, MangaSource.SHADOWCEVIRI, "shadowceviri.blogspot.com") { + override val sateOngoing: String = "Devam Ediyor" + override val sateFinished: String = "Tamamlandı" + override val sateAbandoned: String = "Güncel" +} From 904e0719ebac71981ef14be34d5fb52bf456ffa8 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 25 Dec 2023 19:48:33 +0200 Subject: [PATCH 14/14] [HitomiLa] Fix tag names --- .../parsers/site/all/HitomiLaParser.kt | 84 ++++++++++--------- .../kotatsu/parsers/MangaParserTest.kt | 27 +++--- 2 files changed, 58 insertions(+), 53 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 27c2ecb2..27ac2306 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -90,7 +90,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo ?: return@mapNotNull null MangaTag( - title = url.ownText().toCamelCase(), + title = url.ownText().toTagTitle(), key = href.tagUrlToTag(), source = source, ) @@ -158,15 +158,14 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo language: String = "all", ): Set = coroutineScope { - val terms = - query - .trim() - .replace(Regex("""^\?"""), "") - .lowercase() - .split(Regex("\\s+")) - .map { - it.replace('_', ' ') - } + val terms = query + .trim() + .replace(Regex("""^\?"""), "") + .lowercase() + .split(Regex("\\s+")) + .map { + it.replace('_', ' ') + } val positiveTerms = LinkedList() val negativeTerms = LinkedList() @@ -179,30 +178,27 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } } - val positiveResults = - positiveTerms.map { - async { - runCatching { - getGalleryIDsForQuery(it, language) - }.getOrDefault(emptySet()) - } + val positiveResults = positiveTerms.map { + async { + runCatchingCancellable { + getGalleryIDsForQuery(it, language) + }.getOrDefault(emptySet()) } + } - val negativeResults = - negativeTerms.map { - async { - runCatching { - getGalleryIDsForQuery(it, language) - }.getOrDefault(emptySet()) - } + val negativeResults = negativeTerms.map { + async { + runCatchingCancellable { + getGalleryIDsForQuery(it, language) + }.getOrDefault(emptySet()) } + } - val results = - when { - sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", language) - positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", language) - else -> emptySet() - }.toMutableSet() + val results = when { + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", language) + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", language) + else -> emptySet() + }.toMutableSet() fun filterPositive(newResults: Set) { when { @@ -465,10 +461,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun Collection.toMangaList(): List = coroutineScope { map { id -> async { - runCatching { - val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html") - .parseRaw().let { rewriteTnPaths(it) } - .let(Jsoup::parse) + runCatchingCancellable { + val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").let { + val baseUri = it.request.url.toString() + val html = it.parseRaw() + Jsoup.parse(rewriteTnPaths(html), baseUri) + } Manga( id = generateUid(id.toString()), @@ -476,9 +474,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo url = id.toString(), coverUrl = "https:" + - doc.selectFirstOrThrow("picture > source") - .attr("data-srcset") - .substringBefore(" "), + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), publicUrl = doc.selectFirstOrThrow("h1 > a") .attrAsRelativeUrl("href") @@ -673,7 +671,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return hash.replace(Regex("""^.*(..)(.)$"""), "$2/$1") } - private suspend fun subdomainFromURL(url: String, base: String? = null) : String { + private suspend fun subdomainFromURL(url: String, base: String? = null): String { var retval = "b" if (!base.isNullOrBlank()) @@ -681,10 +679,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val regex = Regex("""/[0-9a-f]{61}([0-9a-f]{2})([0-9a-f])""") val hashMatch = regex.find(url) ?: return "a" - val imageId = hashMatch.groupValues.let { it[2]+it[1] }.toIntOrNull(16) + val imageId = hashMatch.groupValues.let { it[2] + it[1] }.toIntOrNull(16) if (imageId != null) { - retval = ('a'+ subdomainOffset(imageId)).toString() + retval + retval = ('a' + subdomainOffset(imageId)).toString() + retval } return retval @@ -699,4 +697,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return html.replace(tnRegex, newUrl) } + + private fun String.toTagTitle(): String { + return toCamelCase() + .replace("♂", "(male)") + .replace("♀", "(female)") + } } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index b3cf9130..b086e735 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -3,7 +3,6 @@ package org.koitharu.kotatsu.parsers import kotlinx.coroutines.test.runTest import okhttp3.HttpUrl import org.junit.jupiter.api.Disabled -import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.params.ParameterizedTest import org.koitharu.kotatsu.parsers.model.Manga import org.koitharu.kotatsu.parsers.model.MangaListFilter @@ -16,16 +15,18 @@ import org.koitharu.kotatsu.test_util.isDistinct import org.koitharu.kotatsu.test_util.isDistinctBy import org.koitharu.kotatsu.test_util.isUrlAbsolute import org.koitharu.kotatsu.test_util.maxDuplicates +import kotlin.time.Duration.Companion.minutes -@ExtendWith(AuthCheckExtension::class) +//@ExtendWith(AuthCheckExtension::class) internal class MangaParserTest { private val context = MangaLoaderContextMock + private val timeout = 2.minutes @ParameterizedTest(name = "{index}|list|{0}") @MangaSources - fun list(source: MangaSource) = runTest { + fun list(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null) checkMangaList(list, "list") @@ -34,7 +35,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|pagination|{0}") @MangaSources - fun pagination(source: MangaSource) = runTest { + fun pagination(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val page1 = parser.getList(0, filter = null) val page2 = parser.getList(page1.size, filter = null) @@ -54,7 +55,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|search|{0}") @MangaSources - fun search(source: MangaSource) = runTest { + fun search(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val subject = parser.getList( offset = 0, @@ -78,7 +79,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|tags|{0}") @MangaSources - fun tags(source: MangaSource) = runTest { + fun tags(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val tags = parser.getAvailableTags() assert(tags.isNotEmpty()) { "No tags found" } @@ -98,7 +99,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|tags_multiple|{0}") @MangaSources - fun tagsMultiple(source: MangaSource) = runTest { + fun tagsMultiple(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val tags = parser.getAvailableTags().shuffled().take(2).toSet() @@ -117,7 +118,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|locale|{0}") @MangaSources - fun locale(source: MangaSource) = runTest { + fun locale(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val locales = parser.getAvailableLocales() if (locales.isEmpty()) { @@ -137,7 +138,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|details|{0}") @MangaSources - fun details(source: MangaSource) = runTest { + fun details(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null) val manga = list[3] @@ -166,7 +167,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|pages|{0}") @MangaSources - fun pages(source: MangaSource) = runTest { + fun pages(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val list = parser.getList(0, sortOrder = SortOrder.UPDATED, tags = null) val manga = list.first() @@ -190,7 +191,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|favicon|{0}") @MangaSources - fun favicon(source: MangaSource) = runTest { + fun favicon(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val favicons = parser.getFavicons() val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg") @@ -206,7 +207,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|domain|{0}") @MangaSources - fun domain(source: MangaSource) = runTest { + fun domain(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) val defaultDomain = parser.domain val url = HttpUrl.Builder().host(defaultDomain).scheme("https").toString() @@ -222,7 +223,7 @@ internal class MangaParserTest { @ParameterizedTest(name = "{index}|authorization|{0}") @MangaSources @Disabled - fun authorization(source: MangaSource) = runTest { + fun authorization(source: MangaSource) = runTest(timeout = timeout) { val parser = context.newParserInstance(source) if (parser is MangaParserAuthProvider) { val username = parser.getUsername()