[KuroNeko + TruyenHentai18] Fixes (#1879)

Solved #1683 #1604
master
Draken 11 months ago committed by GitHub
parent 0e946cf84c
commit a78afba15e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -29,6 +29,9 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities(
isSearchSupported = true,
isMultipleTagsSupported = true,
isTagsExclusionSupported = true,
isSearchWithFiltersSupported = true,
)
override suspend fun getFilterOptions() = MangaListFilterOptions(
@ -41,19 +44,8 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
append("https://")
append(domain)
when {
!filter.query.isNullOrEmpty() -> {
append("/tim-kiem")
append("?filter[name]=")
append(filter.query.urlEncoded())
if (page > 1) {
append("&page=")
append(page)
}
append("&sort=")
append("?sort=")
append(
when (order) {
SortOrder.POPULARITY -> "-views"
@ -64,58 +56,36 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
else -> "-updated_at"
},
)
}
filter.tags.isNotEmpty() -> {
val tag = filter.tags.first()
append("/the-loai/")
append(tag.key)
append("?page=")
append(page)
if (!filter.query.isNullOrEmpty()) {
append("&keyword=")
append(filter.query.urlEncoded())
}
else -> {
append("/danh-sach")
append("?sort=")
append(
when (order) {
SortOrder.POPULARITY -> "-views"
SortOrder.UPDATED -> "-updated_at"
SortOrder.NEWEST -> "-created_at"
SortOrder.ALPHABETICAL -> "name"
SortOrder.ALPHABETICAL_DESC -> "-name"
else -> "-updated_at"
},
)
if (page > 1) {
append("&page=")
append(page)
}
}
if (filter.query.isNullOrEmpty()) {
append("&sort=")
when (order) {
SortOrder.POPULARITY -> append("-views")
SortOrder.UPDATED -> append("-updated_at")
SortOrder.NEWEST -> append("-created_at")
SortOrder.ALPHABETICAL -> append("name")
SortOrder.ALPHABETICAL_DESC -> append("-name")
else -> append("-updated_at")
}
}
if (filter.states.isNotEmpty()) {
append("&filter[status]=")
filter.states.forEach {
append(
when (it) {
MangaState.ONGOING -> "2,"
MangaState.FINISHED -> "1,"
else -> "1,2"
else -> "2,1"
},
)
}
if (filter.tags.isNotEmpty()) {
append("&filter[accept_genres]=")
filter.tags.joinTo(this, separator = ",") { it.key }
}
if (filter.tagsExclude.isNotEmpty()) {
append("&filter[reject_genres]=")
filter.tagsExclude.joinTo(this, separator = ",") { it.key }
}
}
@ -200,6 +170,17 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
}
}
private suspend fun availableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain").parseHtml()
return doc.select("ul.grid.grid-cols-2 a").mapIndexed { index, a ->
MangaTag(
key = (index + 1).toString(),
title = a.text(),
source = source,
)
}.toSet()
}
private fun parseDateTime(dateStr: String): Long = runCatching {
val parts = dateStr.split(' ')
val dateParts = parts[0].split('-')
@ -216,15 +197,4 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
)
calendar.timeInMillis
}.getOrDefault(0L)
private suspend fun availableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain").parseHtml()
return doc.select("ul.grid.grid-cols-2 a").mapToSet { a ->
MangaTag(
key = a.attr("href").removeSuffix('/').substringAfterLast('/'),
title = a.text(),
source = source,
)
}
}
}

@ -1,22 +1,28 @@
package org.koitharu.kotatsu.parsers.site.vi
import org.json.JSONArray
import org.json.JSONObject
import org.jsoup.nodes.Document
import org.jsoup.Jsoup
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.*
import java.text.SimpleDateFormat
import java.util.*
import org.koitharu.kotatsu.parsers.Broken
@Broken("Need to remake parser")
@MangaSourceParser("TRUYENHENTAI18", "TruyenHentai18", "vi", ContentType.HENTAI)
internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaParser(context, MangaParserSource.TRUYENHENTAI18, 18) {
internal class TruyenHentai18(context: MangaLoaderContext):
LegacyPagedMangaParser(context, MangaParserSource.TRUYENHENTAI18, 18) {
override val configKeyDomain = ConfigKey.Domain("truyenhentai18.app")
private val apiSuffix = "api.th18.app"
private val cdnSuffix = "vi-api.th18.app"
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys)
keys.add(userAgentKey)
@ -24,14 +30,13 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar
override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
SortOrder.POPULARITY,
SortOrder.RATING,
SortOrder.NEWEST,
SortOrder.NEWEST_ASC,
)
override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities(
isSearchSupported = true,
isSearchWithFiltersSupported = false,
)
override suspend fun getFilterOptions() = MangaListFilterOptions(
@ -40,126 +45,175 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val url = when {
!filter.query.isNullOrEmpty() -> {
filter.tags.isNotEmpty() -> {
buildString {
append(domain)
append("/vi/the-loai/")
append(filter.tags.first().key)
append("/page/")
append(page)
append("?s=")
append(filter.query.urlEncoded())
}
}
!filter.author.isNullOrEmpty() -> {
buildString {
append(domain)
append("/artist/")
append(filter.author.urlEncoded())
}
}
else -> {
buildString {
append(domain)
if (filter.tags.isNotEmpty()) {
append("/category/")
append(filter.tags.first().key)
} else {
append(apiSuffix + "/posts")
append("?language=vi")
append("&order=")
append(
when (order) {
SortOrder.UPDATED -> "/moi-cap-nhat"
SortOrder.POPULARITY -> "/xem-nhieu-nhat"
SortOrder.RATING -> "/truyen-de-xuat"
else -> "/moi-cap-nhat"
SortOrder.UPDATED -> "latest"
SortOrder.NEWEST -> "newest"
SortOrder.NEWEST_ASC -> "oldest"
else -> "latest" // default
}
)
}
if (page > 1) {
append("/page/")
append("&limit=24")
append("&page=")
append(page)
if (!filter.query.isNullOrEmpty()) {
append("&query=${filter.query}")
}
}
}
}
val doc = webClient.httpGet("https://$url").parseHtml()
val fullUrl = "https://" + url
return when {
!filter.query.isNullOrEmpty() -> parseSearchList(doc)
!filter.author.isNullOrEmpty() -> parseSearchList(doc)
else -> parseMangaList(doc)
filter.tags.isNotEmpty() -> parseNextList(webClient.httpGet(fullUrl).parseHtml())
else -> {
val doc = webClient.httpGet(fullUrl).parseJson()
parseJSONList(doc)
}
}
}
private fun parseMangaList(doc: Document): List<Manga> {
return doc.select("a.item-cover.ms-3.me-3").mapNotNull { element ->
val href = element.attrAsRelativeUrl("href") ?: return@mapNotNull null
val img = element.selectFirst("img") ?: return@mapNotNull null
val coverUrl = img.attr("data-src").orEmpty()
val title = img.attr("alt").orEmpty()
private fun parseJSONList(json: JSONObject): List<Manga> {
return json.getJSONArray("data").mapJSON { mangaItem ->
Manga(
id = generateUid(href),
title = title,
altTitles = emptySet(),
url = href,
publicUrl = href.toAbsoluteUrl(domain),
id = mangaItem.getLong("id"),
title = mangaItem.getString("title"),
altTitles = setOfNotNull(
mangaItem.optString("official_name").takeIf { !it.isNullOrBlank() }
),
url = mangaItem.getString("slug"),
publicUrl = mangaItem.getString("slug").toAbsoluteUrl(domain),
rating = RATING_UNKNOWN,
contentRating = ContentRating.ADULT,
coverUrl = coverUrl,
tags = emptySet(),
state = null,
authors = emptySet(),
coverUrl = "https://$cdnSuffix/uploads/${mangaItem.getString("thumbnail")}",
tags = mangaItem.optJSONArray("genres")?.mapJSON { genreItem ->
MangaTag(
key = genreItem.getString("slug"),
title = genreItem.getString("name"),
source = source
)
}?.toSet() ?: emptySet(),
state = when (mangaItem.optString("post_status")) {
"completed" -> MangaState.FINISHED
else -> MangaState.ONGOING
},
authors = mangaItem.optJSONArray("authors")?.mapJSON { authorItem ->
authorItem.optString("name")
}?.filterNotNull()?.toSet() ?: emptySet(),
source = source,
description = mangaItem.optString("content").orEmpty(),
)
}
}
private fun parseSearchList(doc: Document): List<Manga> {
return doc.select("div.card.mb-3.small-item").mapNotNull { element ->
val href = element.selectFirst("a")?.attrAsRelativeUrl("href") ?: return@mapNotNull null
val img = element.selectFirst("img") ?: return@mapNotNull null
val coverUrl = img.attr("data-src").orEmpty()
val title = img.attr("alt").orEmpty()
private fun parseNextList(doc: Document): List<Manga> { // need to clean code, very slow response
val script = doc.select("script").firstOrNull { it.data().contains("response") }
?: throw Exception("Không tìm thấy script chứa dữ liệu manga")
val scriptContent = script.data()
val cleanedScript = scriptContent
.replace("self.__next_f.push([1,", "")
.replace("\"5:", "")
.replace("[[\"$\",\"script\",null,{\"type\":\"application/ld+json\",\"dangerouslySetInnerHTML\":{\"__html\":\"$1a\"}}],", "")
.replace("[[\"$\",\"script\",null,{\"type\":\"application/ld+json\",\"dangerouslySetInnerHTML\":{\"__html\":", "")
.replace("\\\\\",", ",")
.replace("\\\"", "\"")
.replace("\\\\", "\\")
.replace("\\n", "")
.replace("\\t", "")
.replace("\\r", "")
val responseStart = cleanedScript.indexOf("{\"response\":")
if (responseStart == -1) throw Exception("Không tìm thấy object 'response' trong script")
var bracketCount = 0
var i = responseStart
var jsonStr = ""
while (i < cleanedScript.length) {
val c = cleanedScript[i]
when (c) {
'{' -> bracketCount++
'}' -> bracketCount--
}
jsonStr += c
if (bracketCount == 0 && jsonStr.isNotEmpty()) break
i++
}
val responseObj = org.json.JSONObject(jsonStr)
val dataArray = responseObj.getJSONObject("response").optJSONArray("data")
?: throw Exception("Không tìm thấy trường 'data' trong object 'response'")
return (0 until dataArray.length()).map { idx ->
val item = dataArray.getJSONObject(idx)
val genres = item.optJSONArray("genres")?.let { genresArray ->
(0 until genresArray.length()).mapNotNull { gIdx ->
val genreItem = genresArray.optJSONObject(gIdx) ?: return@mapNotNull null
MangaTag(
key = genreItem.optString("slug"),
title = genreItem.optString("name"),
source = source
)
}.toSet()
} ?: emptySet()
val authors = item.optJSONArray("authors")?.let { authorsArray ->
(0 until authorsArray.length()).mapNotNull { aIdx ->
authorsArray.optJSONObject(aIdx)?.optString("name")
}.toSet()
} ?: emptySet()
Manga(
id = generateUid(href),
title = title,
altTitles = emptySet(),
url = href,
publicUrl = href.toAbsoluteUrl(domain),
id = item.getLong("id"),
title = item.getString("title"),
altTitles = setOfNotNull(
item.optString("official_name").takeIf { it.isNotBlank() }
),
url = item.getString("slug"),
publicUrl = item.getString("slug").toAbsoluteUrl(domain),
rating = RATING_UNKNOWN,
contentRating = ContentRating.ADULT,
coverUrl = coverUrl,
tags = emptySet(),
state = null,
authors = emptySet(),
coverUrl = "https://$cdnSuffix/uploads/${item.getString("thumbnail")}",
tags = genres,
state = when (item.optString("post_status")) {
"completed" -> MangaState.FINISHED
else -> MangaState.ONGOING
},
authors = authors,
source = source,
description = item.optString("content").orEmpty()
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val rating = doc.selectFirst("div.kksr-stars")?.attr("data-rating")?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN
val description = doc.selectFirst("div.mt-3.desc-text")?.text()
val author = doc.select("div.attr-item").firstOrNull {
it.selectFirst("b")?.text() == "Tác giả:"
}?.selectFirst("a")?.text()
val tags = doc.select("ul.post-categories li a").mapNotNull { element ->
val name = element.text()
val key = element.attr("href").substringAfter("/category/")
MangaTag(
key = key,
title = name,
source = source,
)
}.toSet()
val chapters = doc.select("div.p-2.d-flex.flex-column.flex-md-row.item").reversed()
val fullUrl = "https://$domain/vi/" + manga.url + ".html"
val doc = webClient.httpGet(fullUrl).parseHtml()
return manga.copy(
chapters = doc.select("div.grid.grid-cols-1.md\\:grid-cols-2.gap-4 a.block")
.mapChapters(reversed = false) { i, e ->
val name = e.selectFirst("b")?.text() ?: ""
val name = e.selectFirst("span.truncate")?.text() ?: e.attr("title") ?: ""
val href = e.selectFirst("a")?.attrAsRelativeUrl("href") ?: ""
val dateText = e.selectFirst("i.ps-3")?.text()
val dateText = e.selectFirst("div.text-xs.text-gray-500")?.text()
MangaChapter(
id = generateUid(href),
title = name,
@ -172,21 +226,32 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar
source = source,
)
}
return manga.copy(
rating = rating,
authors = setOfNotNull(author),
description = description,
chapters = chapters,
tags = tags,
contentRating = ContentRating.ADULT,
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
return doc.select("div#viewer p img").mapNotNull { img -> // Need debug
val url = img.attr("src") ?: return@mapNotNull null
val scriptContent = doc.select("script")
.firstOrNull { it.data().startsWith("self.__next_f.push([1,\"\\u003cp\\u003e\\u003c") }
?.data()
if (scriptContent != null) {
val regex = Regex("""self\.__next_f\.push\(\[1,\"(.*)\"\]\)""")
val htmlEncoded = regex.find(scriptContent)?.groupValues?.getOrNull(1)
if (!htmlEncoded.isNullOrEmpty()) {
val html = try {
JSONArray("[\"$htmlEncoded\"]").getString(0)
} catch (e: Exception) {
htmlEncoded
.replace("\\u003c", "<")
.replace("\\u003e", ">")
.replace("\\\"", "\"")
.replace("\\/", "/")
}
val imageUrls = Jsoup.parse(html).select("img").mapNotNull { it.attr("src") }
if (imageUrls.isNotEmpty()) {
return imageUrls.map { url ->
MangaPage(
id = generateUid(url),
url = url,
@ -194,45 +259,35 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar
source = source,
)
}
} else return emptyList()
}
}
return emptyList()
}
private fun parseChapterDate(dateText: String?): Long {
if (dateText == null) return 0
val relativeTimePattern = Regex("(\\d+)\\s*(ngày|tuần|tháng|năm) trước")
val absoluteTimePattern = Regex("(\\d{2}-\\d{2}-\\d{4})")
private fun parseChapterDate(date: String?): Long {
if (date == null) return 0
return when {
dateText.contains("ngày trước") -> {
val match = relativeTimePattern.find(dateText)
val days = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - days * 86400 * 1000
}
date.contains("giây trước") -> System.currentTimeMillis() - date.removeSuffix(" giây trước").toLong() * 1000
date.contains("phút trước") -> System.currentTimeMillis() - date.removeSuffix(" phút trước")
.toLong() * 60 * 1000
dateText.contains("tuần trước") -> {
val match = relativeTimePattern.find(dateText)
val weeks = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - weeks * 7 * 86400 * 1000
}
date.contains("giờ trước") -> System.currentTimeMillis() - date.removeSuffix(" giờ trước")
.toLong() * 60 * 60 * 1000
dateText.contains("tháng trước") -> {
val match = relativeTimePattern.find(dateText)
val months = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - months * 30 * 86400 * 1000
}
date.contains("ngày trước") -> System.currentTimeMillis() - date.removeSuffix(" ngày trước")
.toLong() * 24 * 60 * 60 * 1000
dateText.contains("năm trước") -> {
val match = relativeTimePattern.find(dateText)
val years = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - years * 365 * 86400 * 1000
}
date.contains("tuần trước") -> System.currentTimeMillis() - date.removeSuffix(" tuần trước")
.toLong() * 7 * 24 * 60 * 60 * 1000
absoluteTimePattern.matches(dateText) -> {
val formatter = SimpleDateFormat("dd-MM-yyyy", Locale.getDefault())
formatter.tryParse(dateText)
}
date.contains("tháng trước") -> System.currentTimeMillis() - date.removeSuffix(" tháng trước")
.toLong() * 30 * 24 * 60 * 60 * 1000
date.contains("năm trước") -> System.currentTimeMillis() - date.removeSuffix(" năm trước")
.toLong() * 365 * 24 * 60 * 60 * 1000
else -> 0L
else -> SimpleDateFormat("dd/MM/yyyy", Locale.US).parse(date)?.time ?: 0L
}
}
}

Loading…
Cancel
Save