diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/HentaiRead.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/HentaiRead.kt index 30d794b9..bd524ee5 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/HentaiRead.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/HentaiRead.kt @@ -227,7 +227,7 @@ internal class HentaiRead(context: MangaLoaderContext) : override suspend fun getDetails(manga: Manga): Manga { val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() - val dateFormat = SimpleDateFormat("MMMM d, yyyy h:mm a", Locale.ENGLISH) + val title = buildString { val mangaTitle = doc.selectFirst(selectTitle)?.text()?.cleanupTitle() val parody = doc.selectFirst(selectParody)?.nextElementSibling()?.select("span:first-child")?.text() @@ -255,7 +255,8 @@ internal class HentaiRead(context: MangaLoaderContext) : } } - val uploadedDateString = doc.selectFirst(selectUploadedDate)?.nextElementSibling()?.text() + val dateFormat = SimpleDateFormat("MMMM d, yyyy h:mm a", Locale.ENGLISH) + val uploadDateString = doc.selectFirst(selectUploadedDate)?.nextElementSibling()?.text() var tags = manga.tags if (tags.count() == 0) { @@ -281,7 +282,7 @@ internal class HentaiRead(context: MangaLoaderContext) : volume = 0, url = manga.url, scanlator = null, - uploadDate = dateFormat.tryParse(uploadedDateString), + uploadDate = dateFormat.tryParse(uploadDateString), branch = "English", source = source, ) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/NhentaiWorld.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/NhentaiWorld.kt index 6aa226fd..b4e5c698 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/NhentaiWorld.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/NhentaiWorld.kt @@ -1,8 +1,8 @@ package org.koitharu.kotatsu.parsers.site.vi import okhttp3.Headers +import okio.ByteString.Companion.encode import org.json.JSONArray -import org.json.JSONObject import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.config.ConfigKey @@ -105,70 +105,71 @@ internal class NhentaiWorld(context: MangaLoaderContext) : } override suspend fun getDetails(manga: Manga): Manga { - val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() - val root = doc.selectFirst("div.flex-1.bg-neutral-900") ?: return manga - val chapterDateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT).apply { - timeZone = TimeZone.getTimeZone("GMT+7") - } - - val tags = root.select("div.flex.flex-wrap.gap-2 button").mapNotNullToSet { button -> - val tagName = button.text().toTitleCase(sourceLocale) - val tagUrl = button.parent()?.attrOrNull("href")?.substringAfterLast('/') - if (tagUrl != null) { - MangaTag(title = tagName, key = tagUrl, source = source) - } else { - null - } - } - - val state = when { - root.selectFirst("a[href*='status=completed']") != null -> MangaState.FINISHED - root.selectFirst("a[href*='status=progress']") != null -> MangaState.ONGOING - else -> null - } - - val description = root.selectFirst("div#introduction-wrap p.font-light")?.html()?.nullIfEmpty() - - val altTitles = description?.split("\n")?.mapNotNullToSet { line -> - when { - line.startsWith("Tên tiếng anh:", ignoreCase = true) -> - line.substringAfter(':').substringBefore("Tên gốc:").trim() - - line.startsWith("Tên gốc:", ignoreCase = true) -> - line.substringAfter(':').trim().substringBefore(' ') - - else -> null - } - } - - val scriptTag = doc.select("script").firstOrNull { script -> - val data = script.data() - data.contains("data") && data.contains("chapterListEn") - }?.data() - val chapters = parseChapterList(scriptTag, manga, chapterDateFormat) - - return manga.copy( - tags = tags, - state = state, - description = description, - altTitles = altTitles.orEmpty(), - chapters = chapters.reversed(), - ) - } - - private suspend fun parseChapterList(scriptTag: String?, manga: Manga, chapterDateFormat: SimpleDateFormat): List { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val root = doc.selectFirst("div.flex-1.bg-neutral-900") ?: return manga + val chapterDateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT).apply { + timeZone = TimeZone.getTimeZone("GMT+7") + } + + val tags = root.select("div.flex.flex-wrap.gap-2 button").mapNotNullToSet { button -> + val tagName = button.text().toTitleCase(sourceLocale) + val tagUrl = button.parent()?.attrOrNull("href")?.substringAfterLast('/') + if (tagUrl != null) { + MangaTag(title = tagName, key = tagUrl, source = source) + } else { + null + } + } + + val state = when { + root.selectFirst("a[href*='status=completed']") != null -> MangaState.FINISHED + root.selectFirst("a[href*='status=progress']") != null -> MangaState.ONGOING + else -> null + } + + val description = root.selectFirst("div#introduction-wrap p.font-light")?.html()?.nullIfEmpty() + + val altTitles = description?.split("\n")?.mapNotNullToSet { line -> + when { + line.startsWith("Tên tiếng anh:", ignoreCase = true) -> + line.substringAfter(':').substringBefore("Tên gốc:").trim() + + line.startsWith("Tên gốc:", ignoreCase = true) -> + line.substringAfter(':').trim().substringBefore(' ') + + else -> null + } + } + + val scriptTag = doc.select("script").firstOrNull { script -> + val data = script.data() + data.contains("data") && data.contains("chapterListEn") + }?.data() + val chapters = parseChapterList(scriptTag, manga, chapterDateFormat) + + return manga.copy( + title = doc.selectFirst("h1")!!.text(), + tags = tags, + state = state, + description = description, + altTitles = altTitles.orEmpty(), + chapters = chapters.reversed(), + ) + } + + private fun parseChapterList(scriptTag: String?, manga: Manga, chapterDateFormat: SimpleDateFormat): List { val idManga = manga.url.substringAfter("detail/").toIntOrNull() ?: return emptyList() - + val chapters = ArrayList() if (scriptTag.isNullOrEmpty()) return chapters val cleanedScript = scriptTag.replace("\\", "") - + val cutScript = "null,{\"data\"" val needScript = cleanedScript.indexOf(cutScript) if (needScript == -1) return chapters val finalScript = cleanedScript.substring(needScript) - + val vnPrefix = "null,{\"data\":" val vnStart = finalScript.indexOf(vnPrefix) if (vnStart == -1) return chapters @@ -176,13 +177,13 @@ internal class NhentaiWorld(context: MangaLoaderContext) : val vnEnd = finalScript.indexOf(beforeEn, vnStart) if (vnEnd == -1) return chapters val vnChapterStr = finalScript.substring(vnStart + vnPrefix.length, vnEnd) - + val vnArray = try { JSONArray(vnChapterStr) } catch (e: Exception) { JSONArray() } - + for (i in 0 until vnArray.length()) { val chapter = vnArray.getJSONObject(i) val name = chapter.optString("name", null) ?: continue @@ -212,13 +213,13 @@ internal class NhentaiWorld(context: MangaLoaderContext) : val enEnd = finalScript.indexOf(beforeId, enStart) if (enEnd == -1) return chapters val enChapterStr = finalScript.substring(enStart + enPrefix.length, enEnd) - + val enArray = try { JSONArray(enChapterStr) } catch (e: Exception) { JSONArray() } - + for (i in 0 until enArray.length()) { val chapter = enArray.getJSONObject(i) val name = chapter.optString("name", null) ?: continue @@ -239,7 +240,7 @@ internal class NhentaiWorld(context: MangaLoaderContext) : ) ) } - + return chapters } @@ -247,11 +248,11 @@ internal class NhentaiWorld(context: MangaLoaderContext) : val url = chapter.url.toAbsoluteUrl(domain) val doc = webClient.httpGet(url).parseHtml() val root = doc.select("img.m-auto.read-image.w-auto.h-auto.md\\:min-h-\\[800px\\].min-h-\\[300px\\]") - + if (root.isEmpty()) { // for Debug #1604 throw ParseException("Root not found!", url) } - + return root.map { img -> val imgUrl = img.requireSrc() MangaPage( @@ -264,40 +265,30 @@ internal class NhentaiWorld(context: MangaLoaderContext) : } private suspend fun fetchTags(): Set { - val doc = webClient.httpGet( - urlBuilder() - .addPathSegment("genre") - .addPathSegment("all") - .build(), - ).parseHtml() - - val scriptTag = doc.select("script").firstOrNull { script -> - val data = script.data() - data.contains("buildId") && data.contains("options") - }?.data() ?: return emptySet() - - val cleanedScript = scriptTag.replace("\\", "") - - val optionsPrefix = "\"options\":" - val optionsStart = cleanedScript.indexOf(optionsPrefix) + val doc = webClient.httpGet("https://$domain").parseHtml() + val scriptSrc = doc.select("script")[7].src()!! + val docJS = webClient.httpGet(scriptSrc).parseRaw() + + val optionsStart = docJS.indexOf("genres:[{") if (optionsStart == -1) return emptySet() - - val optionsEnd = cleanedScript.indexOf("\"zombie\"}]", optionsStart) + "\"zombie\"}]".length + + val optionsEnd = docJS.indexOf("}]", optionsStart) if (optionsEnd == -1) return emptySet() - - val optionsStr = cleanedScript.substring(optionsStart + optionsPrefix.length, optionsEnd) - - val optionsArray = try { - JSONArray(optionsStr) - } catch (e: Exception) { - return emptySet() - } - + + val optionsStr = docJS.substring(optionsStart + 7, optionsEnd + 2) + + val optionsArray = JSONArray( + optionsStr + .replace(Regex(",description:\\s*\"[^\"]*\"(,?)"), "") + .replace(Regex("(\\w+):"), "\"$1\":") + ) + return buildSet { for (i in 0 until optionsArray.length()) { + // {"label":"Ahegao","href":"/genre/ahegao"} val option = optionsArray.getJSONObject(i) - val title = option.getStringOrNull("label")?.toTitleCase(sourceLocale) ?: continue - val key = option.getStringOrNull("value") ?: continue + val title = option.getStringOrNull("label")!!.toTitleCase(sourceLocale) + val key = option.getStringOrNull("href")!!.split("/")[2] if (title.isNotEmpty() && key.isNotEmpty()) { if (title != "Tất cả" || key != "all") { // remove "All" tags, default list = all add(MangaTag(title = title, key = key, source = source)) @@ -306,4 +297,4 @@ internal class NhentaiWorld(context: MangaLoaderContext) : } } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/MeHentaiVN.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/MeHentaiVN.kt index c204a7f3..3cf0d501 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/MeHentaiVN.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/MeHentaiVN.kt @@ -13,25 +13,30 @@ import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser import org.koitharu.kotatsu.parsers.exception.NotFoundException import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* +import java.lang.NullPointerException +import java.net.URL import java.util.* @MangaSourceParser("MEHENTAIVN", "MeHentaiVN", "vi", ContentType.HENTAI) internal class MeHentaiVN(context: MangaLoaderContext) : WpComicsParser(context, MangaParserSource.MEHENTAIVN, "www.mehentaivn.xyz", 44) { - override val configKeyDomain: ConfigKey.Domain = ConfigKey.Domain("www.mehentaivn.xyz", "www.hentaivnx.autos") - - override val userAgentKey = ConfigKey.UserAgent(UserAgents.CHROME_DESKTOP) - - override fun onCreateConfig(keys: MutableCollection>) { - super.onCreateConfig(keys) - keys.add(userAgentKey) - } + override val configKeyDomain: ConfigKey.Domain = ConfigKey.Domain( + "www.mehentaivn.xyz", + "www.hentaivnx.autos", + "www.hentaivnx.com" + ) override fun getRequestHeaders() = super.getRequestHeaders().newBuilder() - .add("referer", "no-referrer") + .add("referer", "https://$domain/") .build() + override val filterCapabilities: MangaListFilterCapabilities + get() = super.filterCapabilities.copy( + isMultipleTagsSupported = true, + isTagsExclusionSupported = true + ) + override suspend fun getFilterOptions() = MangaListFilterOptions( availableTags = fetchTags(), availableStates = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED), @@ -40,6 +45,7 @@ internal class MeHentaiVN(context: MangaLoaderContext) : override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { val response = when { + // url template: https://www.mehentaivn.xyz/tim-truyen?keyword=${query} !filter.query.isNullOrEmpty() -> { val url = buildString { append("https://") @@ -47,11 +53,12 @@ internal class MeHentaiVN(context: MangaLoaderContext) : append(listUrl) append("?keyword=") append(filter.query.urlEncoded()) - append("&page=") - append(page.toString()) + if (page > 1) { + append("&page=$page") + } } - val result = runCatchingCancellable { webClient.httpGet(url) } + val result = runCatchingCancellable { webClient.httpGet(url) } // execute val exception = result.exceptionOrNull() if (exception is NotFoundException) { return emptyList() @@ -59,42 +66,54 @@ internal class MeHentaiVN(context: MangaLoaderContext) : result.getOrThrow() } + // url tempalte: https://www.mehentaivn.xyz/tim-truyen-nang-cao?{query} + // Query Structure: + // genres=19775801,1& /* tags include */ + // notgenres=19776383,19777327& /* tags exclude */ + // minchapter=0& /* chapter count. Leaves 0 to get everything */ + // sort=15& /* Sort order */ + // contain= /* Not supported */ else -> { - val url = buildString { - append("https://") - append(domain) - append(listUrl) - if (filter.tags.isNotEmpty()) { - append('/') - filter.tags.oneOrThrowIfMany()?.let { - append(it.key) - } - } - append("?sort=") - append( + val queries = mutableListOf() + + // tags + queries.add("genres=${filter.tags.joinToString (",") { it.key }}") + + // tags exclude + queries.add("notgenres=${filter.tagsExclude.joinToString (",") { it.key }}") + + if (filter.tags.isNotEmpty() or filter.tagsExclude.isNotEmpty()) { + // This means our query is not empty! + val url = buildString { + append("http://$domain/tim-truyen-nang-cao?") + append(queries.joinToString("&")) + + // order when (order) { - SortOrder.UPDATED -> 0 - SortOrder.POPULARITY -> 10 - SortOrder.NEWEST -> 15 - SortOrder.RATING -> 20 + SortOrder.NEWEST -> append("&sort=15") // Truyện mới + SortOrder.POPULARITY -> append("&sort=10") // Top all + SortOrder.UPDATED -> append("&sort=0") // Truyện mới + SortOrder.RATING -> append("&sort=20") // Theo dõi else -> throw IllegalArgumentException("Sort order ${order.name} not supported") - }, - ) - filter.states.oneOrThrowIfMany()?.let { - append("&status=") - append( - when (it) { - MangaState.ONGOING -> "1" - MangaState.FINISHED -> "2" - else -> "-1" - }, - ) + } + + if (page > 1) { + append("&page=$page") + } } - append("&page=") - append(page.toString()) - } - webClient.httpGet(url) + webClient.httpGet(url) // execute + + } else { + val url = buildString { + append("https://$domain/") + if (page > 1) { + append("?page=$page") + } + } + + webClient.httpGet(url) + } } } @@ -102,11 +121,11 @@ internal class MeHentaiVN(context: MangaLoaderContext) : return parseSearchList(response.parseHtml(), tagMap) } - private suspend fun parseSearchList(doc: Document, tagMap: ArrayMap): List { + private fun parseSearchList(doc: Document, tagMap: ArrayMap): List { return doc.select("div.items div.item").mapNotNull { item -> val tooltipElement = item.selectFirst("div.box_tootip") val absUrl = item.selectFirst("div.image > a")?.attrAsAbsoluteUrlOrNull("href") ?: return@mapNotNull null - val slug = absUrl.substringAfterLast('/') + val url = absUrl.toRelativeUrl(domain) val mangaState = when (tooltipElement?.selectFirst("div.message_main > p:contains(Tình trạng)")?.ownText()) { in ongoing -> MangaState.ONGOING @@ -117,13 +136,12 @@ internal class MeHentaiVN(context: MangaLoaderContext) : tooltipElement?.selectFirst("div.message_main > p:contains(Thể loại)")?.ownText().orEmpty() val mangaTags = tagsElement.split(',').mapNotNullToSet { tagMap[it.trim()] } val author = tooltipElement?.selectFirst("div.message_main > p:contains(Tác giả)")?.ownText() - val coverUrl = item.selectFirst("div.image a img")?.requireSrc() - val largeCoverUrl = null + val coverUrl = checkImgUrl(item.selectFirst("div.image a img")?.requireSrc()) Manga( - id = generateUid(slug), + id = generateUid(url), title = item.selectFirst("div.box_tootip div.title, h3 a")?.text().orEmpty(), altTitles = emptySet(), - url = absUrl.toRelativeUrl(domain), + url = url, publicUrl = absUrl, rating = RATING_UNKNOWN, contentRating = null, @@ -157,7 +175,8 @@ internal class MeHentaiVN(context: MangaLoaderContext) : val author = doc.body().selectFirst(selectAut)?.textOrNull() manga.copy( - description = doc.selectFirst(selectDesc)?.html(), + title = doc.select("h1.title-detail").text(), + description = "", // no more description for manga on this source altTitles = setOfNotNull(doc.selectFirst("h2.other-name")?.textOrNull()), authors = setOfNotNull(author), state = doc.selectFirst(selectState)?.let { @@ -175,24 +194,9 @@ internal class MeHentaiVN(context: MangaLoaderContext) : } override suspend fun getPages(chapter: MangaChapter): List { - val fullUrl = chapter.url.toAbsoluteUrl(domain) - val doc = webClient.httpGet(fullUrl).parseHtml() - - val imageUrls = doc.select("div.page-chapter").flatMap { div -> - div.select("img").mapNotNull { img -> - val src = img.attr("src").takeIf { it.isNotEmpty() } - val dataSrc = img.attr("data-src").takeIf { it.isNotEmpty() } - val imageUrl = src ?: dataSrc - - if (imageUrl != null && checkMangaImgs(imageUrl)) { - imageUrl - } else { - null - } - } - } - - return imageUrls.map { url -> + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() + return doc.select(".page-chapter img").map { + val url = checkImgUrl(it.requireSrc()) MangaPage( id = generateUid(url), url = url, @@ -202,14 +206,14 @@ internal class MeHentaiVN(context: MangaLoaderContext) : } } - private suspend fun checkMangaImgs(url: String): Boolean { - return try { - val response = webClient.httpHead(url) - val contentType = response.header("Content-Type") ?: "" - contentType.startsWith("image/") - } catch (e: Exception) { - false - } + private fun checkImgUrl (url: String?) : String { + if (url.isNullOrEmpty()) return "" + val urlImage = URL(url) + + // Need updating frequently + if (urlImage.host.contains("duckduckgo.com")) return url.split("?u=")[1] + + return url } private suspend fun fetchTags(): Set {