From a78afba15e8a35711f681804f816d92e16ebac5c Mon Sep 17 00:00:00 2001 From: Draken <131387159+dragonx943@users.noreply.github.com> Date: Sun, 15 Jun 2025 00:00:17 +0700 Subject: [PATCH] [KuroNeko + TruyenHentai18] Fixes (#1879) Solved #1683 #1604 --- .../kotatsu/parsers/site/vi/KuroNeko.kt | 134 +++---- .../kotatsu/parsers/site/vi/TruyenHentai18.kt | 361 ++++++++++-------- 2 files changed, 260 insertions(+), 235 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/KuroNeko.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/KuroNeko.kt index ac077040..9894a635 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/KuroNeko.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/KuroNeko.kt @@ -29,6 +29,9 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co override val filterCapabilities: MangaListFilterCapabilities get() = MangaListFilterCapabilities( isSearchSupported = true, + isMultipleTagsSupported = true, + isTagsExclusionSupported = true, + isSearchWithFiltersSupported = true, ) override suspend fun getFilterOptions() = MangaListFilterOptions( @@ -41,81 +44,48 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co append("https://") append(domain) - when { - - !filter.query.isNullOrEmpty() -> { - append("/tim-kiem") - append("?filter[name]=") - append(filter.query.urlEncoded()) - - if (page > 1) { - append("&page=") - append(page) - } - - append("&sort=") - append( - when (order) { - SortOrder.POPULARITY -> "-views" - SortOrder.UPDATED -> "-updated_at" - SortOrder.NEWEST -> "-created_at" - SortOrder.ALPHABETICAL -> "name" - SortOrder.ALPHABETICAL_DESC -> "-name" - else -> "-updated_at" - }, - ) - } - - filter.tags.isNotEmpty() -> { - val tag = filter.tags.first() - append("/the-loai/") - append(tag.key) - - append("?page=") - append(page) - } - - else -> { - append("/danh-sach") - append("?sort=") - append( - when (order) { - SortOrder.POPULARITY -> "-views" - SortOrder.UPDATED -> "-updated_at" - SortOrder.NEWEST -> "-created_at" - SortOrder.ALPHABETICAL -> "name" - SortOrder.ALPHABETICAL_DESC -> "-name" - else -> "-updated_at" - }, - ) - append("&page=") - append(page) - } + append("/tim-kiem") + append("?sort=") + append( + when (order) { + SortOrder.POPULARITY -> "-views" + SortOrder.UPDATED -> "-updated_at" + SortOrder.NEWEST -> "-created_at" + SortOrder.ALPHABETICAL -> "name" + SortOrder.ALPHABETICAL_DESC -> "-name" + else -> "-updated_at" + }, + ) + + if (!filter.query.isNullOrEmpty()) { + append("&keyword=") + append(filter.query.urlEncoded()) } - if (filter.query.isNullOrEmpty()) { - append("&sort=") - when (order) { - SortOrder.POPULARITY -> append("-views") - SortOrder.UPDATED -> append("-updated_at") - SortOrder.NEWEST -> append("-created_at") - SortOrder.ALPHABETICAL -> append("name") - SortOrder.ALPHABETICAL_DESC -> append("-name") - else -> append("-updated_at") - } + if (page > 1) { + append("&page=") + append(page) } - if (filter.states.isNotEmpty()) { - append("&filter[status]=") - filter.states.forEach { - append( - when (it) { - MangaState.ONGOING -> "2," - MangaState.FINISHED -> "1," - else -> "1,2" - }, - ) - } + append("&filter[status]=") + filter.states.forEach { + append( + when (it) { + MangaState.ONGOING -> "2," + MangaState.FINISHED -> "1," + else -> "2,1" + }, + ) + } + + if (filter.tags.isNotEmpty()) { + append("&filter[accept_genres]=") + filter.tags.joinTo(this, separator = ",") { it.key } + } + + if (filter.tagsExclude.isNotEmpty()) { + append("&filter[reject_genres]=") + filter.tagsExclude.joinTo(this, separator = ",") { it.key } } } @@ -200,6 +170,17 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co } } + private suspend fun availableTags(): Set { + val doc = webClient.httpGet("https://$domain").parseHtml() + return doc.select("ul.grid.grid-cols-2 a").mapIndexed { index, a -> + MangaTag( + key = (index + 1).toString(), + title = a.text(), + source = source, + ) + }.toSet() + } + private fun parseDateTime(dateStr: String): Long = runCatching { val parts = dateStr.split(' ') val dateParts = parts[0].split('-') @@ -216,15 +197,4 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co ) calendar.timeInMillis }.getOrDefault(0L) - - private suspend fun availableTags(): Set { - val doc = webClient.httpGet("https://$domain").parseHtml() - return doc.select("ul.grid.grid-cols-2 a").mapToSet { a -> - MangaTag( - key = a.attr("href").removeSuffix('/').substringAfterLast('/'), - title = a.text(), - source = source, - ) - } - } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/TruyenHentai18.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/TruyenHentai18.kt index 2eb9450a..bc4ce8b4 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/TruyenHentai18.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/TruyenHentai18.kt @@ -1,22 +1,28 @@ package org.koitharu.kotatsu.parsers.site.vi +import org.json.JSONArray +import org.json.JSONObject import org.jsoup.nodes.Document +import org.jsoup.Jsoup import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.* import java.text.SimpleDateFormat import java.util.* -import org.koitharu.kotatsu.parsers.Broken -@Broken("Need to remake parser") @MangaSourceParser("TRUYENHENTAI18", "TruyenHentai18", "vi", ContentType.HENTAI) -internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaParser(context, MangaParserSource.TRUYENHENTAI18, 18) { +internal class TruyenHentai18(context: MangaLoaderContext): + LegacyPagedMangaParser(context, MangaParserSource.TRUYENHENTAI18, 18) { override val configKeyDomain = ConfigKey.Domain("truyenhentai18.app") + private val apiSuffix = "api.th18.app" + private val cdnSuffix = "vi-api.th18.app" + override fun onCreateConfig(keys: MutableCollection>) { super.onCreateConfig(keys) keys.add(userAgentKey) @@ -24,14 +30,13 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar override val availableSortOrders: Set = EnumSet.of( SortOrder.UPDATED, - SortOrder.POPULARITY, - SortOrder.RATING, + SortOrder.NEWEST, + SortOrder.NEWEST_ASC, ) override val filterCapabilities: MangaListFilterCapabilities get() = MangaListFilterCapabilities( isSearchSupported = true, - isSearchWithFiltersSupported = false, ) override suspend fun getFilterOptions() = MangaListFilterOptions( @@ -40,199 +45,249 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { val url = when { - !filter.query.isNullOrEmpty() -> { + filter.tags.isNotEmpty() -> { buildString { append(domain) + append("/vi/the-loai/") + append(filter.tags.first().key) append("/page/") append(page) - append("?s=") - append(filter.query.urlEncoded()) - } - } - !filter.author.isNullOrEmpty() -> { - buildString { - append(domain) - append("/artist/") - append(filter.author.urlEncoded()) } } + else -> { buildString { - append(domain) - if (filter.tags.isNotEmpty()) { - append("/category/") - append(filter.tags.first().key) - } else { - append( - when (order) { - SortOrder.UPDATED -> "/moi-cap-nhat" - SortOrder.POPULARITY -> "/xem-nhieu-nhat" - SortOrder.RATING -> "/truyen-de-xuat" - else -> "/moi-cap-nhat" - } - ) - } - if (page > 1) { - append("/page/") - append(page) + append(apiSuffix + "/posts") + append("?language=vi") + + append("&order=") + append( + when (order) { + SortOrder.UPDATED -> "latest" + SortOrder.NEWEST -> "newest" + SortOrder.NEWEST_ASC -> "oldest" + else -> "latest" // default + } + ) + + append("&limit=24") + append("&page=") + append(page) + + if (!filter.query.isNullOrEmpty()) { + append("&query=${filter.query}") } } } } - val doc = webClient.httpGet("https://$url").parseHtml() + val fullUrl = "https://" + url return when { - !filter.query.isNullOrEmpty() -> parseSearchList(doc) - !filter.author.isNullOrEmpty() -> parseSearchList(doc) - else -> parseMangaList(doc) + filter.tags.isNotEmpty() -> parseNextList(webClient.httpGet(fullUrl).parseHtml()) + else -> { + val doc = webClient.httpGet(fullUrl).parseJson() + parseJSONList(doc) + } } } - private fun parseMangaList(doc: Document): List { - return doc.select("a.item-cover.ms-3.me-3").mapNotNull { element -> - val href = element.attrAsRelativeUrl("href") ?: return@mapNotNull null - val img = element.selectFirst("img") ?: return@mapNotNull null - val coverUrl = img.attr("data-src").orEmpty() - val title = img.attr("alt").orEmpty() - + private fun parseJSONList(json: JSONObject): List { + return json.getJSONArray("data").mapJSON { mangaItem -> Manga( - id = generateUid(href), - title = title, - altTitles = emptySet(), - url = href, - publicUrl = href.toAbsoluteUrl(domain), + id = mangaItem.getLong("id"), + title = mangaItem.getString("title"), + altTitles = setOfNotNull( + mangaItem.optString("official_name").takeIf { !it.isNullOrBlank() } + ), + url = mangaItem.getString("slug"), + publicUrl = mangaItem.getString("slug").toAbsoluteUrl(domain), rating = RATING_UNKNOWN, contentRating = ContentRating.ADULT, - coverUrl = coverUrl, - tags = emptySet(), - state = null, - authors = emptySet(), + coverUrl = "https://$cdnSuffix/uploads/${mangaItem.getString("thumbnail")}", + tags = mangaItem.optJSONArray("genres")?.mapJSON { genreItem -> + MangaTag( + key = genreItem.getString("slug"), + title = genreItem.getString("name"), + source = source + ) + }?.toSet() ?: emptySet(), + state = when (mangaItem.optString("post_status")) { + "completed" -> MangaState.FINISHED + else -> MangaState.ONGOING + }, + authors = mangaItem.optJSONArray("authors")?.mapJSON { authorItem -> + authorItem.optString("name") + }?.filterNotNull()?.toSet() ?: emptySet(), source = source, + description = mangaItem.optString("content").orEmpty(), ) } } - private fun parseSearchList(doc: Document): List { - return doc.select("div.card.mb-3.small-item").mapNotNull { element -> - val href = element.selectFirst("a")?.attrAsRelativeUrl("href") ?: return@mapNotNull null - val img = element.selectFirst("img") ?: return@mapNotNull null - val coverUrl = img.attr("data-src").orEmpty() - val title = img.attr("alt").orEmpty() + private fun parseNextList(doc: Document): List { // need to clean code, very slow response + val script = doc.select("script").firstOrNull { it.data().contains("response") } + ?: throw Exception("Không tìm thấy script chứa dữ liệu manga") + + val scriptContent = script.data() + val cleanedScript = scriptContent + .replace("self.__next_f.push([1,", "") + .replace("\"5:", "") + .replace("[[\"$\",\"script\",null,{\"type\":\"application/ld+json\",\"dangerouslySetInnerHTML\":{\"__html\":\"$1a\"}}],", "") + .replace("[[\"$\",\"script\",null,{\"type\":\"application/ld+json\",\"dangerouslySetInnerHTML\":{\"__html\":", "") + .replace("\\\\\",", ",") + .replace("\\\"", "\"") + .replace("\\\\", "\\") + .replace("\\n", "") + .replace("\\t", "") + .replace("\\r", "") + val responseStart = cleanedScript.indexOf("{\"response\":") + if (responseStart == -1) throw Exception("Không tìm thấy object 'response' trong script") + + var bracketCount = 0 + var i = responseStart + var jsonStr = "" + + while (i < cleanedScript.length) { + val c = cleanedScript[i] + when (c) { + '{' -> bracketCount++ + '}' -> bracketCount-- + } + jsonStr += c + if (bracketCount == 0 && jsonStr.isNotEmpty()) break + i++ + } + + val responseObj = org.json.JSONObject(jsonStr) + val dataArray = responseObj.getJSONObject("response").optJSONArray("data") + ?: throw Exception("Không tìm thấy trường 'data' trong object 'response'") + + return (0 until dataArray.length()).map { idx -> + val item = dataArray.getJSONObject(idx) + val genres = item.optJSONArray("genres")?.let { genresArray -> + (0 until genresArray.length()).mapNotNull { gIdx -> + val genreItem = genresArray.optJSONObject(gIdx) ?: return@mapNotNull null + MangaTag( + key = genreItem.optString("slug"), + title = genreItem.optString("name"), + source = source + ) + }.toSet() + } ?: emptySet() + + val authors = item.optJSONArray("authors")?.let { authorsArray -> + (0 until authorsArray.length()).mapNotNull { aIdx -> + authorsArray.optJSONObject(aIdx)?.optString("name") + }.toSet() + } ?: emptySet() + Manga( - id = generateUid(href), - title = title, - altTitles = emptySet(), - url = href, - publicUrl = href.toAbsoluteUrl(domain), + id = item.getLong("id"), + title = item.getString("title"), + altTitles = setOfNotNull( + item.optString("official_name").takeIf { it.isNotBlank() } + ), + url = item.getString("slug"), + publicUrl = item.getString("slug").toAbsoluteUrl(domain), rating = RATING_UNKNOWN, contentRating = ContentRating.ADULT, - coverUrl = coverUrl, - tags = emptySet(), - state = null, - authors = emptySet(), + coverUrl = "https://$cdnSuffix/uploads/${item.getString("thumbnail")}", + tags = genres, + state = when (item.optString("post_status")) { + "completed" -> MangaState.FINISHED + else -> MangaState.ONGOING + }, + authors = authors, source = source, + description = item.optString("content").orEmpty() ) } } override suspend fun getDetails(manga: Manga): Manga { - val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() - val rating = doc.selectFirst("div.kksr-stars")?.attr("data-rating")?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN - val description = doc.selectFirst("div.mt-3.desc-text")?.text() - - val author = doc.select("div.attr-item").firstOrNull { - it.selectFirst("b")?.text() == "Tác giả:" - }?.selectFirst("a")?.text() - - val tags = doc.select("ul.post-categories li a").mapNotNull { element -> - val name = element.text() - val key = element.attr("href").substringAfter("/category/") - MangaTag( - key = key, - title = name, - source = source, - ) - }.toSet() - - val chapters = doc.select("div.p-2.d-flex.flex-column.flex-md-row.item").reversed() - .mapChapters(reversed = false) { i, e -> - val name = e.selectFirst("b")?.text() ?: "" - val href = e.selectFirst("a")?.attrAsRelativeUrl("href") ?: "" - val dateText = e.selectFirst("i.ps-3")?.text() - MangaChapter( - id = generateUid(href), - title = name, - url = href, - number = i + 1f, - volume = 0, - uploadDate = parseChapterDate(dateText), - scanlator = null, - branch = null, - source = source, - ) - } - + val fullUrl = "https://$domain/vi/" + manga.url + ".html" + val doc = webClient.httpGet(fullUrl).parseHtml() return manga.copy( - rating = rating, - authors = setOfNotNull(author), - description = description, - chapters = chapters, - tags = tags, - contentRating = ContentRating.ADULT, - ) - } - - override suspend fun getPages(chapter: MangaChapter): List { - val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() - return doc.select("div#viewer p img").mapNotNull { img -> // Need debug - val url = img.attr("src") ?: return@mapNotNull null - MangaPage( - id = generateUid(url), - url = url, - preview = null, - source = source, + chapters = doc.select("div.grid.grid-cols-1.md\\:grid-cols-2.gap-4 a.block") + .mapChapters(reversed = false) { i, e -> + val name = e.selectFirst("span.truncate")?.text() ?: e.attr("title") ?: "" + val href = e.selectFirst("a")?.attrAsRelativeUrl("href") ?: "" + val dateText = e.selectFirst("div.text-xs.text-gray-500")?.text() + MangaChapter( + id = generateUid(href), + title = name, + url = href, + number = i + 1f, + volume = 0, + uploadDate = parseChapterDate(dateText), + scanlator = null, + branch = null, + source = source, + ) + } ) - } } - private fun parseChapterDate(dateText: String?): Long { - if (dateText == null) return 0 - - val relativeTimePattern = Regex("(\\d+)\\s*(ngày|tuần|tháng|năm) trước") - val absoluteTimePattern = Regex("(\\d{2}-\\d{2}-\\d{4})") + override suspend fun getPages(chapter: MangaChapter): List { + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() + val scriptContent = doc.select("script") + .firstOrNull { it.data().startsWith("self.__next_f.push([1,\"\\u003cp\\u003e\\u003c") } + ?.data() + + if (scriptContent != null) { + val regex = Regex("""self\.__next_f\.push\(\[1,\"(.*)\"\]\)""") + val htmlEncoded = regex.find(scriptContent)?.groupValues?.getOrNull(1) + if (!htmlEncoded.isNullOrEmpty()) { + val html = try { + JSONArray("[\"$htmlEncoded\"]").getString(0) + } catch (e: Exception) { + htmlEncoded + .replace("\\u003c", "<") + .replace("\\u003e", ">") + .replace("\\\"", "\"") + .replace("\\/", "/") + } + + val imageUrls = Jsoup.parse(html).select("img").mapNotNull { it.attr("src") } + if (imageUrls.isNotEmpty()) { + return imageUrls.map { url -> + MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } else return emptyList() + } + } + return emptyList() + } + private fun parseChapterDate(date: String?): Long { + if (date == null) return 0 return when { - dateText.contains("ngày trước") -> { - val match = relativeTimePattern.find(dateText) - val days = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0 - System.currentTimeMillis() - days * 86400 * 1000 - } + date.contains("giây trước") -> System.currentTimeMillis() - date.removeSuffix(" giây trước").toLong() * 1000 + date.contains("phút trước") -> System.currentTimeMillis() - date.removeSuffix(" phút trước") + .toLong() * 60 * 1000 - dateText.contains("tuần trước") -> { - val match = relativeTimePattern.find(dateText) - val weeks = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0 - System.currentTimeMillis() - weeks * 7 * 86400 * 1000 - } + date.contains("giờ trước") -> System.currentTimeMillis() - date.removeSuffix(" giờ trước") + .toLong() * 60 * 60 * 1000 - dateText.contains("tháng trước") -> { - val match = relativeTimePattern.find(dateText) - val months = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0 - System.currentTimeMillis() - months * 30 * 86400 * 1000 - } + date.contains("ngày trước") -> System.currentTimeMillis() - date.removeSuffix(" ngày trước") + .toLong() * 24 * 60 * 60 * 1000 - dateText.contains("năm trước") -> { - val match = relativeTimePattern.find(dateText) - val years = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0 - System.currentTimeMillis() - years * 365 * 86400 * 1000 - } + date.contains("tuần trước") -> System.currentTimeMillis() - date.removeSuffix(" tuần trước") + .toLong() * 7 * 24 * 60 * 60 * 1000 - absoluteTimePattern.matches(dateText) -> { - val formatter = SimpleDateFormat("dd-MM-yyyy", Locale.getDefault()) - formatter.tryParse(dateText) - } + date.contains("tháng trước") -> System.currentTimeMillis() - date.removeSuffix(" tháng trước") + .toLong() * 30 * 24 * 60 * 60 * 1000 + + date.contains("năm trước") -> System.currentTimeMillis() - date.removeSuffix(" năm trước") + .toLong() * 365 * 24 * 60 * 60 * 1000 - else -> 0L + else -> SimpleDateFormat("dd/MM/yyyy", Locale.US).parse(date)?.time ?: 0L } } }