[Nhentai World + MeHentaiVN] Fixes (#1769)

* [HentaiRead] Fix lines

* [MeHentaiVN] Soft re-write due to source changed (#1604)

* [NHentaiWorld] Fixes fetchTags (due to source changed) and pass MangaParserTest:link (#1604)

* [MeHentaiVN] Small changes

* [MeHentaiVN] Update domain

---------

Solve task #1604

Co-authored-by: Draken <131387159+dragonx943@users.noreply.github.com>
master
Nam Huynh 12 months ago committed by GitHub
parent a65cbf4981
commit c9d32a804c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -227,7 +227,7 @@ internal class HentaiRead(context: MangaLoaderContext) :
override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val dateFormat = SimpleDateFormat("MMMM d, yyyy h:mm a", Locale.ENGLISH)
val title = buildString {
val mangaTitle = doc.selectFirst(selectTitle)?.text()?.cleanupTitle()
val parody = doc.selectFirst(selectParody)?.nextElementSibling()?.select("span:first-child")?.text()
@ -255,7 +255,8 @@ internal class HentaiRead(context: MangaLoaderContext) :
}
}
val uploadedDateString = doc.selectFirst(selectUploadedDate)?.nextElementSibling()?.text()
val dateFormat = SimpleDateFormat("MMMM d, yyyy h:mm a", Locale.ENGLISH)
val uploadDateString = doc.selectFirst(selectUploadedDate)?.nextElementSibling()?.text()
var tags = manga.tags
if (tags.count() == 0) {
@ -281,7 +282,7 @@ internal class HentaiRead(context: MangaLoaderContext) :
volume = 0,
url = manga.url,
scanlator = null,
uploadDate = dateFormat.tryParse(uploadedDateString),
uploadDate = dateFormat.tryParse(uploadDateString),
branch = "English",
source = source,
)

@ -1,8 +1,8 @@
package org.koitharu.kotatsu.parsers.site.vi
import okhttp3.Headers
import okio.ByteString.Companion.encode
import org.json.JSONArray
import org.json.JSONObject
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
@ -105,70 +105,71 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
}
override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val root = doc.selectFirst("div.flex-1.bg-neutral-900") ?: return manga
val chapterDateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT).apply {
timeZone = TimeZone.getTimeZone("GMT+7")
}
val tags = root.select("div.flex.flex-wrap.gap-2 button").mapNotNullToSet { button ->
val tagName = button.text().toTitleCase(sourceLocale)
val tagUrl = button.parent()?.attrOrNull("href")?.substringAfterLast('/')
if (tagUrl != null) {
MangaTag(title = tagName, key = tagUrl, source = source)
} else {
null
}
}
val state = when {
root.selectFirst("a[href*='status=completed']") != null -> MangaState.FINISHED
root.selectFirst("a[href*='status=progress']") != null -> MangaState.ONGOING
else -> null
}
val description = root.selectFirst("div#introduction-wrap p.font-light")?.html()?.nullIfEmpty()
val altTitles = description?.split("\n")?.mapNotNullToSet { line ->
when {
line.startsWith("Tên tiếng anh:", ignoreCase = true) ->
line.substringAfter(':').substringBefore("Tên gốc:").trim()
line.startsWith("Tên gốc:", ignoreCase = true) ->
line.substringAfter(':').trim().substringBefore(' ')
else -> null
}
}
val scriptTag = doc.select("script").firstOrNull { script ->
val data = script.data()
data.contains("data") && data.contains("chapterListEn")
}?.data()
val chapters = parseChapterList(scriptTag, manga, chapterDateFormat)
return manga.copy(
tags = tags,
state = state,
description = description,
altTitles = altTitles.orEmpty(),
chapters = chapters.reversed(),
)
}
private suspend fun parseChapterList(scriptTag: String?, manga: Manga, chapterDateFormat: SimpleDateFormat): List<MangaChapter> {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val root = doc.selectFirst("div.flex-1.bg-neutral-900") ?: return manga
val chapterDateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT).apply {
timeZone = TimeZone.getTimeZone("GMT+7")
}
val tags = root.select("div.flex.flex-wrap.gap-2 button").mapNotNullToSet { button ->
val tagName = button.text().toTitleCase(sourceLocale)
val tagUrl = button.parent()?.attrOrNull("href")?.substringAfterLast('/')
if (tagUrl != null) {
MangaTag(title = tagName, key = tagUrl, source = source)
} else {
null
}
}
val state = when {
root.selectFirst("a[href*='status=completed']") != null -> MangaState.FINISHED
root.selectFirst("a[href*='status=progress']") != null -> MangaState.ONGOING
else -> null
}
val description = root.selectFirst("div#introduction-wrap p.font-light")?.html()?.nullIfEmpty()
val altTitles = description?.split("\n")?.mapNotNullToSet { line ->
when {
line.startsWith("Tên tiếng anh:", ignoreCase = true) ->
line.substringAfter(':').substringBefore("Tên gốc:").trim()
line.startsWith("Tên gốc:", ignoreCase = true) ->
line.substringAfter(':').trim().substringBefore(' ')
else -> null
}
}
val scriptTag = doc.select("script").firstOrNull { script ->
val data = script.data()
data.contains("data") && data.contains("chapterListEn")
}?.data()
val chapters = parseChapterList(scriptTag, manga, chapterDateFormat)
return manga.copy(
title = doc.selectFirst("h1")!!.text(),
tags = tags,
state = state,
description = description,
altTitles = altTitles.orEmpty(),
chapters = chapters.reversed(),
)
}
private fun parseChapterList(scriptTag: String?, manga: Manga, chapterDateFormat: SimpleDateFormat): List<MangaChapter> {
val idManga = manga.url.substringAfter("detail/").toIntOrNull() ?: return emptyList()
val chapters = ArrayList<MangaChapter>()
if (scriptTag.isNullOrEmpty()) return chapters
val cleanedScript = scriptTag.replace("\\", "")
val cutScript = "null,{\"data\""
val needScript = cleanedScript.indexOf(cutScript)
if (needScript == -1) return chapters
val finalScript = cleanedScript.substring(needScript)
val vnPrefix = "null,{\"data\":"
val vnStart = finalScript.indexOf(vnPrefix)
if (vnStart == -1) return chapters
@ -176,13 +177,13 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
val vnEnd = finalScript.indexOf(beforeEn, vnStart)
if (vnEnd == -1) return chapters
val vnChapterStr = finalScript.substring(vnStart + vnPrefix.length, vnEnd)
val vnArray = try {
JSONArray(vnChapterStr)
} catch (e: Exception) {
JSONArray()
}
for (i in 0 until vnArray.length()) {
val chapter = vnArray.getJSONObject(i)
val name = chapter.optString("name", null) ?: continue
@ -212,13 +213,13 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
val enEnd = finalScript.indexOf(beforeId, enStart)
if (enEnd == -1) return chapters
val enChapterStr = finalScript.substring(enStart + enPrefix.length, enEnd)
val enArray = try {
JSONArray(enChapterStr)
} catch (e: Exception) {
JSONArray()
}
for (i in 0 until enArray.length()) {
val chapter = enArray.getJSONObject(i)
val name = chapter.optString("name", null) ?: continue
@ -239,7 +240,7 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
)
)
}
return chapters
}
@ -247,11 +248,11 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
val url = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(url).parseHtml()
val root = doc.select("img.m-auto.read-image.w-auto.h-auto.md\\:min-h-\\[800px\\].min-h-\\[300px\\]")
if (root.isEmpty()) { // for Debug #1604
throw ParseException("Root not found!", url)
}
return root.map { img ->
val imgUrl = img.requireSrc()
MangaPage(
@ -264,40 +265,30 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
}
private suspend fun fetchTags(): Set<MangaTag> {
val doc = webClient.httpGet(
urlBuilder()
.addPathSegment("genre")
.addPathSegment("all")
.build(),
).parseHtml()
val scriptTag = doc.select("script").firstOrNull { script ->
val data = script.data()
data.contains("buildId") && data.contains("options")
}?.data() ?: return emptySet()
val cleanedScript = scriptTag.replace("\\", "")
val optionsPrefix = "\"options\":"
val optionsStart = cleanedScript.indexOf(optionsPrefix)
val doc = webClient.httpGet("https://$domain").parseHtml()
val scriptSrc = doc.select("script")[7].src()!!
val docJS = webClient.httpGet(scriptSrc).parseRaw()
val optionsStart = docJS.indexOf("genres:[{")
if (optionsStart == -1) return emptySet()
val optionsEnd = cleanedScript.indexOf("\"zombie\"}]", optionsStart) + "\"zombie\"}]".length
val optionsEnd = docJS.indexOf("}]", optionsStart)
if (optionsEnd == -1) return emptySet()
val optionsStr = cleanedScript.substring(optionsStart + optionsPrefix.length, optionsEnd)
val optionsArray = try {
JSONArray(optionsStr)
} catch (e: Exception) {
return emptySet()
}
val optionsStr = docJS.substring(optionsStart + 7, optionsEnd + 2)
val optionsArray = JSONArray(
optionsStr
.replace(Regex(",description:\\s*\"[^\"]*\"(,?)"), "")
.replace(Regex("(\\w+):"), "\"$1\":")
)
return buildSet {
for (i in 0 until optionsArray.length()) {
// {"label":"Ahegao","href":"/genre/ahegao"}
val option = optionsArray.getJSONObject(i)
val title = option.getStringOrNull("label")?.toTitleCase(sourceLocale) ?: continue
val key = option.getStringOrNull("value") ?: continue
val title = option.getStringOrNull("label")!!.toTitleCase(sourceLocale)
val key = option.getStringOrNull("href")!!.split("/")[2]
if (title.isNotEmpty() && key.isNotEmpty()) {
if (title != "Tất cả" || key != "all") { // remove "All" tags, default list = all
add(MangaTag(title = title, key = key, source = source))
@ -306,4 +297,4 @@ internal class NhentaiWorld(context: MangaLoaderContext) :
}
}
}
}
}

@ -13,25 +13,30 @@ import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser
import org.koitharu.kotatsu.parsers.exception.NotFoundException
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import java.lang.NullPointerException
import java.net.URL
import java.util.*
@MangaSourceParser("MEHENTAIVN", "MeHentaiVN", "vi", ContentType.HENTAI)
internal class MeHentaiVN(context: MangaLoaderContext) :
WpComicsParser(context, MangaParserSource.MEHENTAIVN, "www.mehentaivn.xyz", 44) {
override val configKeyDomain: ConfigKey.Domain = ConfigKey.Domain("www.mehentaivn.xyz", "www.hentaivnx.autos")
override val userAgentKey = ConfigKey.UserAgent(UserAgents.CHROME_DESKTOP)
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys)
keys.add(userAgentKey)
}
override val configKeyDomain: ConfigKey.Domain = ConfigKey.Domain(
"www.mehentaivn.xyz",
"www.hentaivnx.autos",
"www.hentaivnx.com"
)
override fun getRequestHeaders() = super.getRequestHeaders().newBuilder()
.add("referer", "no-referrer")
.add("referer", "https://$domain/")
.build()
override val filterCapabilities: MangaListFilterCapabilities
get() = super.filterCapabilities.copy(
isMultipleTagsSupported = true,
isTagsExclusionSupported = true
)
override suspend fun getFilterOptions() = MangaListFilterOptions(
availableTags = fetchTags(),
availableStates = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED),
@ -40,6 +45,7 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val response =
when {
// url template: https://www.mehentaivn.xyz/tim-truyen?keyword=${query}
!filter.query.isNullOrEmpty() -> {
val url = buildString {
append("https://")
@ -47,11 +53,12 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
append(listUrl)
append("?keyword=")
append(filter.query.urlEncoded())
append("&page=")
append(page.toString())
if (page > 1) {
append("&page=$page")
}
}
val result = runCatchingCancellable { webClient.httpGet(url) }
val result = runCatchingCancellable { webClient.httpGet(url) } // execute
val exception = result.exceptionOrNull()
if (exception is NotFoundException) {
return emptyList()
@ -59,42 +66,54 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
result.getOrThrow()
}
// url tempalte: https://www.mehentaivn.xyz/tim-truyen-nang-cao?{query}
// Query Structure:
// genres=19775801,1& /* tags include */
// notgenres=19776383,19777327& /* tags exclude */
// minchapter=0& /* chapter count. Leaves 0 to get everything */
// sort=15& /* Sort order */
// contain= /* Not supported */
else -> {
val url = buildString {
append("https://")
append(domain)
append(listUrl)
if (filter.tags.isNotEmpty()) {
append('/')
filter.tags.oneOrThrowIfMany()?.let {
append(it.key)
}
}
append("?sort=")
append(
val queries = mutableListOf<String>()
// tags
queries.add("genres=${filter.tags.joinToString (",") { it.key }}")
// tags exclude
queries.add("notgenres=${filter.tagsExclude.joinToString (",") { it.key }}")
if (filter.tags.isNotEmpty() or filter.tagsExclude.isNotEmpty()) {
// This means our query is not empty!
val url = buildString {
append("http://$domain/tim-truyen-nang-cao?")
append(queries.joinToString("&"))
// order
when (order) {
SortOrder.UPDATED -> 0
SortOrder.POPULARITY -> 10
SortOrder.NEWEST -> 15
SortOrder.RATING -> 20
SortOrder.NEWEST -> append("&sort=15") // Truyện mới
SortOrder.POPULARITY -> append("&sort=10") // Top all
SortOrder.UPDATED -> append("&sort=0") // Truyện mới
SortOrder.RATING -> append("&sort=20") // Theo dõi
else -> throw IllegalArgumentException("Sort order ${order.name} not supported")
},
)
filter.states.oneOrThrowIfMany()?.let {
append("&status=")
append(
when (it) {
MangaState.ONGOING -> "1"
MangaState.FINISHED -> "2"
else -> "-1"
},
)
}
if (page > 1) {
append("&page=$page")
}
}
append("&page=")
append(page.toString())
}
webClient.httpGet(url)
webClient.httpGet(url) // execute
} else {
val url = buildString {
append("https://$domain/")
if (page > 1) {
append("?page=$page")
}
}
webClient.httpGet(url)
}
}
}
@ -102,11 +121,11 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
return parseSearchList(response.parseHtml(), tagMap)
}
private suspend fun parseSearchList(doc: Document, tagMap: ArrayMap<String, MangaTag>): List<Manga> {
private fun parseSearchList(doc: Document, tagMap: ArrayMap<String, MangaTag>): List<Manga> {
return doc.select("div.items div.item").mapNotNull { item ->
val tooltipElement = item.selectFirst("div.box_tootip")
val absUrl = item.selectFirst("div.image > a")?.attrAsAbsoluteUrlOrNull("href") ?: return@mapNotNull null
val slug = absUrl.substringAfterLast('/')
val url = absUrl.toRelativeUrl(domain)
val mangaState =
when (tooltipElement?.selectFirst("div.message_main > p:contains(Tình trạng)")?.ownText()) {
in ongoing -> MangaState.ONGOING
@ -117,13 +136,12 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
tooltipElement?.selectFirst("div.message_main > p:contains(Thể loại)")?.ownText().orEmpty()
val mangaTags = tagsElement.split(',').mapNotNullToSet { tagMap[it.trim()] }
val author = tooltipElement?.selectFirst("div.message_main > p:contains(Tác giả)")?.ownText()
val coverUrl = item.selectFirst("div.image a img")?.requireSrc()
val largeCoverUrl = null
val coverUrl = checkImgUrl(item.selectFirst("div.image a img")?.requireSrc())
Manga(
id = generateUid(slug),
id = generateUid(url),
title = item.selectFirst("div.box_tootip div.title, h3 a")?.text().orEmpty(),
altTitles = emptySet(),
url = absUrl.toRelativeUrl(domain),
url = url,
publicUrl = absUrl,
rating = RATING_UNKNOWN,
contentRating = null,
@ -157,7 +175,8 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
val author = doc.body().selectFirst(selectAut)?.textOrNull()
manga.copy(
description = doc.selectFirst(selectDesc)?.html(),
title = doc.select("h1.title-detail").text(),
description = "", // no more description for manga on this source
altTitles = setOfNotNull(doc.selectFirst("h2.other-name")?.textOrNull()),
authors = setOfNotNull(author),
state = doc.selectFirst(selectState)?.let {
@ -175,24 +194,9 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml()
val imageUrls = doc.select("div.page-chapter").flatMap { div ->
div.select("img").mapNotNull { img ->
val src = img.attr("src").takeIf { it.isNotEmpty() }
val dataSrc = img.attr("data-src").takeIf { it.isNotEmpty() }
val imageUrl = src ?: dataSrc
if (imageUrl != null && checkMangaImgs(imageUrl)) {
imageUrl
} else {
null
}
}
}
return imageUrls.map { url ->
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
return doc.select(".page-chapter img").map {
val url = checkImgUrl(it.requireSrc())
MangaPage(
id = generateUid(url),
url = url,
@ -202,14 +206,14 @@ internal class MeHentaiVN(context: MangaLoaderContext) :
}
}
private suspend fun checkMangaImgs(url: String): Boolean {
return try {
val response = webClient.httpHead(url)
val contentType = response.header("Content-Type") ?: ""
contentType.startsWith("image/")
} catch (e: Exception) {
false
}
private fun checkImgUrl (url: String?) : String {
if (url.isNullOrEmpty()) return ""
val urlImage = URL(url)
// Need updating frequently
if (urlImage.host.contains("duckduckgo.com")) return url.split("?u=")[1]
return url
}
private suspend fun fetchTags(): Set<MangaTag> {

Loading…
Cancel
Save