[KuroNeko + TruyenHentai18] Fixes (#1879)

Solved #1683 #1604
master
Draken 11 months ago committed by GitHub
parent 0e946cf84c
commit a78afba15e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -29,6 +29,9 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
override val filterCapabilities: MangaListFilterCapabilities override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities( get() = MangaListFilterCapabilities(
isSearchSupported = true, isSearchSupported = true,
isMultipleTagsSupported = true,
isTagsExclusionSupported = true,
isSearchWithFiltersSupported = true,
) )
override suspend fun getFilterOptions() = MangaListFilterOptions( override suspend fun getFilterOptions() = MangaListFilterOptions(
@ -41,81 +44,48 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
append("https://") append("https://")
append(domain) append(domain)
when { append("/tim-kiem")
append("?sort=")
!filter.query.isNullOrEmpty() -> { append(
append("/tim-kiem") when (order) {
append("?filter[name]=") SortOrder.POPULARITY -> "-views"
append(filter.query.urlEncoded()) SortOrder.UPDATED -> "-updated_at"
SortOrder.NEWEST -> "-created_at"
if (page > 1) { SortOrder.ALPHABETICAL -> "name"
append("&page=") SortOrder.ALPHABETICAL_DESC -> "-name"
append(page) else -> "-updated_at"
} },
)
append("&sort=")
append( if (!filter.query.isNullOrEmpty()) {
when (order) { append("&keyword=")
SortOrder.POPULARITY -> "-views" append(filter.query.urlEncoded())
SortOrder.UPDATED -> "-updated_at"
SortOrder.NEWEST -> "-created_at"
SortOrder.ALPHABETICAL -> "name"
SortOrder.ALPHABETICAL_DESC -> "-name"
else -> "-updated_at"
},
)
}
filter.tags.isNotEmpty() -> {
val tag = filter.tags.first()
append("/the-loai/")
append(tag.key)
append("?page=")
append(page)
}
else -> {
append("/danh-sach")
append("?sort=")
append(
when (order) {
SortOrder.POPULARITY -> "-views"
SortOrder.UPDATED -> "-updated_at"
SortOrder.NEWEST -> "-created_at"
SortOrder.ALPHABETICAL -> "name"
SortOrder.ALPHABETICAL_DESC -> "-name"
else -> "-updated_at"
},
)
append("&page=")
append(page)
}
} }
if (filter.query.isNullOrEmpty()) { if (page > 1) {
append("&sort=") append("&page=")
when (order) { append(page)
SortOrder.POPULARITY -> append("-views")
SortOrder.UPDATED -> append("-updated_at")
SortOrder.NEWEST -> append("-created_at")
SortOrder.ALPHABETICAL -> append("name")
SortOrder.ALPHABETICAL_DESC -> append("-name")
else -> append("-updated_at")
}
} }
if (filter.states.isNotEmpty()) { append("&filter[status]=")
append("&filter[status]=") filter.states.forEach {
filter.states.forEach { append(
append( when (it) {
when (it) { MangaState.ONGOING -> "2,"
MangaState.ONGOING -> "2," MangaState.FINISHED -> "1,"
MangaState.FINISHED -> "1," else -> "2,1"
else -> "1,2" },
}, )
) }
}
if (filter.tags.isNotEmpty()) {
append("&filter[accept_genres]=")
filter.tags.joinTo(this, separator = ",") { it.key }
}
if (filter.tagsExclude.isNotEmpty()) {
append("&filter[reject_genres]=")
filter.tagsExclude.joinTo(this, separator = ",") { it.key }
} }
} }
@ -200,6 +170,17 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
} }
} }
private suspend fun availableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain").parseHtml()
return doc.select("ul.grid.grid-cols-2 a").mapIndexed { index, a ->
MangaTag(
key = (index + 1).toString(),
title = a.text(),
source = source,
)
}.toSet()
}
private fun parseDateTime(dateStr: String): Long = runCatching { private fun parseDateTime(dateStr: String): Long = runCatching {
val parts = dateStr.split(' ') val parts = dateStr.split(' ')
val dateParts = parts[0].split('-') val dateParts = parts[0].split('-')
@ -216,15 +197,4 @@ internal class KuroNeko(context: MangaLoaderContext) : LegacyPagedMangaParser(co
) )
calendar.timeInMillis calendar.timeInMillis
}.getOrDefault(0L) }.getOrDefault(0L)
private suspend fun availableTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain").parseHtml()
return doc.select("ul.grid.grid-cols-2 a").mapToSet { a ->
MangaTag(
key = a.attr("href").removeSuffix('/').substringAfterLast('/'),
title = a.text(),
source = source,
)
}
}
} }

@ -1,22 +1,28 @@
package org.koitharu.kotatsu.parsers.site.vi package org.koitharu.kotatsu.parsers.site.vi
import org.json.JSONArray
import org.json.JSONObject
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.Jsoup
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.*
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.* import java.util.*
import org.koitharu.kotatsu.parsers.Broken
@Broken("Need to remake parser")
@MangaSourceParser("TRUYENHENTAI18", "TruyenHentai18", "vi", ContentType.HENTAI) @MangaSourceParser("TRUYENHENTAI18", "TruyenHentai18", "vi", ContentType.HENTAI)
internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaParser(context, MangaParserSource.TRUYENHENTAI18, 18) { internal class TruyenHentai18(context: MangaLoaderContext):
LegacyPagedMangaParser(context, MangaParserSource.TRUYENHENTAI18, 18) {
override val configKeyDomain = ConfigKey.Domain("truyenhentai18.app") override val configKeyDomain = ConfigKey.Domain("truyenhentai18.app")
private val apiSuffix = "api.th18.app"
private val cdnSuffix = "vi-api.th18.app"
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) { override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys) super.onCreateConfig(keys)
keys.add(userAgentKey) keys.add(userAgentKey)
@ -24,14 +30,13 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar
override val availableSortOrders: Set<SortOrder> = EnumSet.of( override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED, SortOrder.UPDATED,
SortOrder.POPULARITY, SortOrder.NEWEST,
SortOrder.RATING, SortOrder.NEWEST_ASC,
) )
override val filterCapabilities: MangaListFilterCapabilities override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities( get() = MangaListFilterCapabilities(
isSearchSupported = true, isSearchSupported = true,
isSearchWithFiltersSupported = false,
) )
override suspend fun getFilterOptions() = MangaListFilterOptions( override suspend fun getFilterOptions() = MangaListFilterOptions(
@ -40,199 +45,249 @@ internal class TruyenHentai18(context: MangaLoaderContext) : LegacyPagedMangaPar
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val url = when { val url = when {
!filter.query.isNullOrEmpty() -> { filter.tags.isNotEmpty() -> {
buildString { buildString {
append(domain) append(domain)
append("/vi/the-loai/")
append(filter.tags.first().key)
append("/page/") append("/page/")
append(page) append(page)
append("?s=")
append(filter.query.urlEncoded())
}
}
!filter.author.isNullOrEmpty() -> {
buildString {
append(domain)
append("/artist/")
append(filter.author.urlEncoded())
} }
} }
else -> { else -> {
buildString { buildString {
append(domain) append(apiSuffix + "/posts")
if (filter.tags.isNotEmpty()) { append("?language=vi")
append("/category/")
append(filter.tags.first().key) append("&order=")
} else { append(
append( when (order) {
when (order) { SortOrder.UPDATED -> "latest"
SortOrder.UPDATED -> "/moi-cap-nhat" SortOrder.NEWEST -> "newest"
SortOrder.POPULARITY -> "/xem-nhieu-nhat" SortOrder.NEWEST_ASC -> "oldest"
SortOrder.RATING -> "/truyen-de-xuat" else -> "latest" // default
else -> "/moi-cap-nhat" }
} )
)
} append("&limit=24")
if (page > 1) { append("&page=")
append("/page/") append(page)
append(page)
if (!filter.query.isNullOrEmpty()) {
append("&query=${filter.query}")
} }
} }
} }
} }
val doc = webClient.httpGet("https://$url").parseHtml() val fullUrl = "https://" + url
return when { return when {
!filter.query.isNullOrEmpty() -> parseSearchList(doc) filter.tags.isNotEmpty() -> parseNextList(webClient.httpGet(fullUrl).parseHtml())
!filter.author.isNullOrEmpty() -> parseSearchList(doc) else -> {
else -> parseMangaList(doc) val doc = webClient.httpGet(fullUrl).parseJson()
parseJSONList(doc)
}
} }
} }
private fun parseMangaList(doc: Document): List<Manga> { private fun parseJSONList(json: JSONObject): List<Manga> {
return doc.select("a.item-cover.ms-3.me-3").mapNotNull { element -> return json.getJSONArray("data").mapJSON { mangaItem ->
val href = element.attrAsRelativeUrl("href") ?: return@mapNotNull null
val img = element.selectFirst("img") ?: return@mapNotNull null
val coverUrl = img.attr("data-src").orEmpty()
val title = img.attr("alt").orEmpty()
Manga( Manga(
id = generateUid(href), id = mangaItem.getLong("id"),
title = title, title = mangaItem.getString("title"),
altTitles = emptySet(), altTitles = setOfNotNull(
url = href, mangaItem.optString("official_name").takeIf { !it.isNullOrBlank() }
publicUrl = href.toAbsoluteUrl(domain), ),
url = mangaItem.getString("slug"),
publicUrl = mangaItem.getString("slug").toAbsoluteUrl(domain),
rating = RATING_UNKNOWN, rating = RATING_UNKNOWN,
contentRating = ContentRating.ADULT, contentRating = ContentRating.ADULT,
coverUrl = coverUrl, coverUrl = "https://$cdnSuffix/uploads/${mangaItem.getString("thumbnail")}",
tags = emptySet(), tags = mangaItem.optJSONArray("genres")?.mapJSON { genreItem ->
state = null, MangaTag(
authors = emptySet(), key = genreItem.getString("slug"),
title = genreItem.getString("name"),
source = source
)
}?.toSet() ?: emptySet(),
state = when (mangaItem.optString("post_status")) {
"completed" -> MangaState.FINISHED
else -> MangaState.ONGOING
},
authors = mangaItem.optJSONArray("authors")?.mapJSON { authorItem ->
authorItem.optString("name")
}?.filterNotNull()?.toSet() ?: emptySet(),
source = source, source = source,
description = mangaItem.optString("content").orEmpty(),
) )
} }
} }
private fun parseSearchList(doc: Document): List<Manga> { private fun parseNextList(doc: Document): List<Manga> { // need to clean code, very slow response
return doc.select("div.card.mb-3.small-item").mapNotNull { element -> val script = doc.select("script").firstOrNull { it.data().contains("response") }
val href = element.selectFirst("a")?.attrAsRelativeUrl("href") ?: return@mapNotNull null ?: throw Exception("Không tìm thấy script chứa dữ liệu manga")
val img = element.selectFirst("img") ?: return@mapNotNull null
val coverUrl = img.attr("data-src").orEmpty() val scriptContent = script.data()
val title = img.attr("alt").orEmpty() val cleanedScript = scriptContent
.replace("self.__next_f.push([1,", "")
.replace("\"5:", "")
.replace("[[\"$\",\"script\",null,{\"type\":\"application/ld+json\",\"dangerouslySetInnerHTML\":{\"__html\":\"$1a\"}}],", "")
.replace("[[\"$\",\"script\",null,{\"type\":\"application/ld+json\",\"dangerouslySetInnerHTML\":{\"__html\":", "")
.replace("\\\\\",", ",")
.replace("\\\"", "\"")
.replace("\\\\", "\\")
.replace("\\n", "")
.replace("\\t", "")
.replace("\\r", "")
val responseStart = cleanedScript.indexOf("{\"response\":")
if (responseStart == -1) throw Exception("Không tìm thấy object 'response' trong script")
var bracketCount = 0
var i = responseStart
var jsonStr = ""
while (i < cleanedScript.length) {
val c = cleanedScript[i]
when (c) {
'{' -> bracketCount++
'}' -> bracketCount--
}
jsonStr += c
if (bracketCount == 0 && jsonStr.isNotEmpty()) break
i++
}
val responseObj = org.json.JSONObject(jsonStr)
val dataArray = responseObj.getJSONObject("response").optJSONArray("data")
?: throw Exception("Không tìm thấy trường 'data' trong object 'response'")
return (0 until dataArray.length()).map { idx ->
val item = dataArray.getJSONObject(idx)
val genres = item.optJSONArray("genres")?.let { genresArray ->
(0 until genresArray.length()).mapNotNull { gIdx ->
val genreItem = genresArray.optJSONObject(gIdx) ?: return@mapNotNull null
MangaTag(
key = genreItem.optString("slug"),
title = genreItem.optString("name"),
source = source
)
}.toSet()
} ?: emptySet()
val authors = item.optJSONArray("authors")?.let { authorsArray ->
(0 until authorsArray.length()).mapNotNull { aIdx ->
authorsArray.optJSONObject(aIdx)?.optString("name")
}.toSet()
} ?: emptySet()
Manga( Manga(
id = generateUid(href), id = item.getLong("id"),
title = title, title = item.getString("title"),
altTitles = emptySet(), altTitles = setOfNotNull(
url = href, item.optString("official_name").takeIf { it.isNotBlank() }
publicUrl = href.toAbsoluteUrl(domain), ),
url = item.getString("slug"),
publicUrl = item.getString("slug").toAbsoluteUrl(domain),
rating = RATING_UNKNOWN, rating = RATING_UNKNOWN,
contentRating = ContentRating.ADULT, contentRating = ContentRating.ADULT,
coverUrl = coverUrl, coverUrl = "https://$cdnSuffix/uploads/${item.getString("thumbnail")}",
tags = emptySet(), tags = genres,
state = null, state = when (item.optString("post_status")) {
authors = emptySet(), "completed" -> MangaState.FINISHED
else -> MangaState.ONGOING
},
authors = authors,
source = source, source = source,
description = item.optString("content").orEmpty()
) )
} }
} }
override suspend fun getDetails(manga: Manga): Manga { override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() val fullUrl = "https://$domain/vi/" + manga.url + ".html"
val rating = doc.selectFirst("div.kksr-stars")?.attr("data-rating")?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN val doc = webClient.httpGet(fullUrl).parseHtml()
val description = doc.selectFirst("div.mt-3.desc-text")?.text()
val author = doc.select("div.attr-item").firstOrNull {
it.selectFirst("b")?.text() == "Tác giả:"
}?.selectFirst("a")?.text()
val tags = doc.select("ul.post-categories li a").mapNotNull { element ->
val name = element.text()
val key = element.attr("href").substringAfter("/category/")
MangaTag(
key = key,
title = name,
source = source,
)
}.toSet()
val chapters = doc.select("div.p-2.d-flex.flex-column.flex-md-row.item").reversed()
.mapChapters(reversed = false) { i, e ->
val name = e.selectFirst("b")?.text() ?: ""
val href = e.selectFirst("a")?.attrAsRelativeUrl("href") ?: ""
val dateText = e.selectFirst("i.ps-3")?.text()
MangaChapter(
id = generateUid(href),
title = name,
url = href,
number = i + 1f,
volume = 0,
uploadDate = parseChapterDate(dateText),
scanlator = null,
branch = null,
source = source,
)
}
return manga.copy( return manga.copy(
rating = rating, chapters = doc.select("div.grid.grid-cols-1.md\\:grid-cols-2.gap-4 a.block")
authors = setOfNotNull(author), .mapChapters(reversed = false) { i, e ->
description = description, val name = e.selectFirst("span.truncate")?.text() ?: e.attr("title") ?: ""
chapters = chapters, val href = e.selectFirst("a")?.attrAsRelativeUrl("href") ?: ""
tags = tags, val dateText = e.selectFirst("div.text-xs.text-gray-500")?.text()
contentRating = ContentRating.ADULT, MangaChapter(
) id = generateUid(href),
} title = name,
url = href,
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { number = i + 1f,
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() volume = 0,
return doc.select("div#viewer p img").mapNotNull { img -> // Need debug uploadDate = parseChapterDate(dateText),
val url = img.attr("src") ?: return@mapNotNull null scanlator = null,
MangaPage( branch = null,
id = generateUid(url), source = source,
url = url, )
preview = null, }
source = source,
) )
}
} }
private fun parseChapterDate(dateText: String?): Long { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
if (dateText == null) return 0 val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
val scriptContent = doc.select("script")
val relativeTimePattern = Regex("(\\d+)\\s*(ngày|tuần|tháng|năm) trước") .firstOrNull { it.data().startsWith("self.__next_f.push([1,\"\\u003cp\\u003e\\u003c") }
val absoluteTimePattern = Regex("(\\d{2}-\\d{2}-\\d{4})") ?.data()
if (scriptContent != null) {
val regex = Regex("""self\.__next_f\.push\(\[1,\"(.*)\"\]\)""")
val htmlEncoded = regex.find(scriptContent)?.groupValues?.getOrNull(1)
if (!htmlEncoded.isNullOrEmpty()) {
val html = try {
JSONArray("[\"$htmlEncoded\"]").getString(0)
} catch (e: Exception) {
htmlEncoded
.replace("\\u003c", "<")
.replace("\\u003e", ">")
.replace("\\\"", "\"")
.replace("\\/", "/")
}
val imageUrls = Jsoup.parse(html).select("img").mapNotNull { it.attr("src") }
if (imageUrls.isNotEmpty()) {
return imageUrls.map { url ->
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
} else return emptyList()
}
}
return emptyList()
}
private fun parseChapterDate(date: String?): Long {
if (date == null) return 0
return when { return when {
dateText.contains("ngày trước") -> { date.contains("giây trước") -> System.currentTimeMillis() - date.removeSuffix(" giây trước").toLong() * 1000
val match = relativeTimePattern.find(dateText) date.contains("phút trước") -> System.currentTimeMillis() - date.removeSuffix(" phút trước")
val days = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0 .toLong() * 60 * 1000
System.currentTimeMillis() - days * 86400 * 1000
}
dateText.contains("tuần trước") -> { date.contains("giờ trước") -> System.currentTimeMillis() - date.removeSuffix(" giờ trước")
val match = relativeTimePattern.find(dateText) .toLong() * 60 * 60 * 1000
val weeks = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - weeks * 7 * 86400 * 1000
}
dateText.contains("tháng trước") -> { date.contains("ngày trước") -> System.currentTimeMillis() - date.removeSuffix(" ngày trước")
val match = relativeTimePattern.find(dateText) .toLong() * 24 * 60 * 60 * 1000
val months = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - months * 30 * 86400 * 1000
}
dateText.contains("năm trước") -> { date.contains("tuần trước") -> System.currentTimeMillis() - date.removeSuffix(" tuần trước")
val match = relativeTimePattern.find(dateText) .toLong() * 7 * 24 * 60 * 60 * 1000
val years = match?.groups?.get(1)?.value?.toIntOrNull() ?: 0
System.currentTimeMillis() - years * 365 * 86400 * 1000
}
absoluteTimePattern.matches(dateText) -> { date.contains("tháng trước") -> System.currentTimeMillis() - date.removeSuffix(" tháng trước")
val formatter = SimpleDateFormat("dd-MM-yyyy", Locale.getDefault()) .toLong() * 30 * 24 * 60 * 60 * 1000
formatter.tryParse(dateText)
} date.contains("năm trước") -> System.currentTimeMillis() - date.removeSuffix(" năm trước")
.toLong() * 365 * 24 * 60 * 60 * 1000
else -> 0L else -> SimpleDateFormat("dd/MM/yyyy", Locale.US).parse(date)?.time ?: 0L
} }
} }
} }

Loading…
Cancel
Save