From e874837efb1f805de4454cbb03553c41cd689d8a Mon Sep 17 00:00:00 2001 From: Koitharu Date: Sun, 13 Apr 2025 09:43:31 +0300 Subject: [PATCH] Improve some parsers --- build.gradle | 10 +- .../parsers/core/LegacyPagedMangaParser.kt | 5 + .../kotatsu/parsers/core/PagedMangaParser.kt | 5 + .../kotatsu/parsers/model/MangaChapter.kt | 2 +- .../kotatsu/parsers/site/en/BatCave.kt | 256 ++++---- .../kotatsu/parsers/site/en/Hentalk.kt | 564 +++++++++--------- .../kotatsu/parsers/site/vi/MimiHentai.kt | 288 ++++----- .../kotatsu/parsers/MangaParserTest.kt | 2 +- 8 files changed, 577 insertions(+), 555 deletions(-) diff --git a/build.gradle b/build.gradle index 50a82aa8b..58161ae05 100644 --- a/build.gradle +++ b/build.gradle @@ -58,19 +58,19 @@ afterEvaluate { } dependencies { - implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.1' + implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.2' implementation 'com.squareup.okhttp3:okhttp:4.12.0' - implementation 'com.squareup.okio:okio:3.10.2' - api 'org.jsoup:jsoup:1.18.3' + implementation 'com.squareup.okio:okio:3.11.0' + api 'org.jsoup:jsoup:1.19.1' implementation 'org.json:json:20240303' - implementation 'androidx.collection:collection:1.4.5' + implementation 'androidx.collection:collection:1.5.0' ksp project(':kotatsu-parsers-ksp') testImplementation 'org.junit.jupiter:junit-jupiter-api:5.10.1' testImplementation 'org.junit.jupiter:junit-jupiter-engine:5.10.1' testImplementation 'org.junit.jupiter:junit-jupiter-params:5.10.1' - testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.10.1' + testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.10.2' testImplementation 'io.webfolder:quickjs:1.1.0' } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/core/LegacyPagedMangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/core/LegacyPagedMangaParser.kt index 57c51d39f..b572be8a7 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/core/LegacyPagedMangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/core/LegacyPagedMangaParser.kt @@ -38,6 +38,11 @@ public abstract class LegacyPagedMangaParser( public abstract suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List + protected fun setFirstPage(firstPage: Int, firstPageForSearch: Int = firstPage) { + paginator.firstPage = firstPage + searchPaginator.firstPage = firstPageForSearch + } + private suspend fun getList( paginator: Paginator, offset: Int, diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/core/PagedMangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/core/PagedMangaParser.kt index 87c994b09..805004a2a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/core/PagedMangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/core/PagedMangaParser.kt @@ -43,6 +43,11 @@ public abstract class PagedMangaParser( public abstract suspend fun getListPage(query: MangaSearchQuery, page: Int): List + protected fun setFirstPage(firstPage: Int, firstPageForSearch: Int = firstPage) { + paginator.firstPage = firstPage + searchPaginator.firstPage = firstPageForSearch + } + private suspend fun searchManga( paginator: Paginator, query: MangaSearchQuery, diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/model/MangaChapter.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/model/MangaChapter.kt index dacf377b8..8790b8dae 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/model/MangaChapter.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/model/MangaChapter.kt @@ -47,7 +47,7 @@ public data class MangaChapter( get() = title.ifNullOrEmpty { buildString { if (volume > 0) append("Vol ").append(volume).append(' ') - if (number > 0) append("Chapter ").append(number) else append("Unnamed") + if (number > 0) append("Chapter ").append(number.formatSimple()) else append("Unnamed") } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/BatCave.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/BatCave.kt index 5d47258a2..73135ec2c 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/BatCave.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/BatCave.kt @@ -1,7 +1,7 @@ package org.koitharu.kotatsu.parsers.site.en -import org.jsoup.nodes.Document -import org.jsoup.nodes.Element +import org.json.JSONObject +import org.koitharu.kotatsu.parsers.Broken import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.config.ConfigKey @@ -9,19 +9,18 @@ import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.getFloatOrDefault +import org.koitharu.kotatsu.parsers.util.json.getStringOrNull import org.koitharu.kotatsu.parsers.util.suspendlazy.getOrNull import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy import java.text.SimpleDateFormat import java.util.* -import org.json.JSONObject -import org.koitharu.kotatsu.parsers.Broken -@Broken("Need fix tags in getDetails") @MangaSourceParser("BATCAVE", "BatCave", "en") internal class BatCave(context: MangaLoaderContext) : LegacyPagedMangaParser(context, MangaParserSource.BATCAVE, 20) { - override val configKeyDomain = ConfigKey.Domain("batcave.biz") + override val configKeyDomain = ConfigKey.Domain("batcave.biz") private val availableTags = suspendLazy(initializer = ::fetchTags) @@ -30,123 +29,135 @@ internal class BatCave(context: MangaLoaderContext) : keys.add(userAgentKey) } - override val availableSortOrders: Set = EnumSet.of(SortOrder.UPDATED) + override val availableSortOrders: Set = EnumSet.of(SortOrder.UPDATED) - override val filterCapabilities: MangaListFilterCapabilities + override val filterCapabilities: MangaListFilterCapabilities get() = MangaListFilterCapabilities( isSearchSupported = true, - isMultipleTagsSupported = true, - isSearchWithFiltersSupported = false, - isYearRangeSupported = true + isMultipleTagsSupported = true, + isYearRangeSupported = true, ) - override suspend fun getFilterOptions() = MangaListFilterOptions( - availableTags = availableTags.get() + override suspend fun getFilterOptions() = MangaListFilterOptions( + availableTags = availableTags.get(), ) - override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { - val urlBuilder = StringBuilder() - when { - !filter.query.isNullOrEmpty() -> { - urlBuilder.append("/search/") - urlBuilder.append(filter.query.urlEncoded()) - if (page > 1) urlBuilder.append("/page/$page/") - } - else -> { - urlBuilder.append("/ComicList") - if (filter.yearFrom != YEAR_UNKNOWN) { - urlBuilder.append("/y[from]=${filter.yearFrom}") - } - if (filter.yearTo != YEAR_UNKNOWN) { - urlBuilder.append("/y[to]=${filter.yearTo}") - } - if (filter.tags.isNotEmpty()) { - urlBuilder.append("/g=") - urlBuilder.append(filter.tags.joinToString(",") { it.key }) - } - urlBuilder.append("/sort") - if (page > 1) { urlBuilder.append("/page/$page/") } - } - } - - val fullUrl = urlBuilder.toString().toAbsoluteUrl(domain) - val doc = webClient.httpGet(fullUrl).parseHtml() - return doc.select("div.readed.d-flex.short").map { item -> - val a = item.selectFirst("a.readed__img.img-fit-cover.anim") - ?: throw ParseException("Link element not found!", fullUrl) - val img = item.selectFirst("img[data-src]") - val titleElement = item.selectFirst("h2.readed__title a") - Manga( - id = generateUid(a.attr("href")), - url = a.attr("href"), - publicUrl = a.attr("href"), - title = titleElement.text(), - altTitles = emptySet(), - authors = emptySet(), - description = null, - tags = emptySet(), - rating = RATING_UNKNOWN, - state = null, - coverUrl = img.attr("data-src")?.toAbsoluteUrl(domain), - contentRating = if (isNsfwSource) ContentRating.ADULT else null, - source = source, - ) - } - } - - override suspend fun getDetails(manga: Manga): Manga { - val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() - - val dateFormat = SimpleDateFormat("dd.MM.yyyy", Locale.US) - - val scriptData = doc.selectFirst("script:containsData(__DATA__)")?.data() - ?.substringAfter("window.__DATA__ = ") - ?.substringBefore(";") - ?: throw ParseException("Script data not found", manga.url) - - val jsonData = JSONObject(scriptData) - val newsId = jsonData.getInt("news_id") - val chaptersJson = jsonData.getJSONArray("chapters") - - val chapters = (0 until chaptersJson.length()).map { i -> - val chapter = chaptersJson.getJSONObject(i) - val chapterId = chapter.getInt("id") - - MangaChapter( - id = generateUid("/reader/$newsId/$chapterId"), - url = "/reader/$newsId/$chapterId", - number = chapter.getInt("posi").toFloat(), - title = chapter.getString("title"), - uploadDate = runCatching { - dateFormat.parse(chapter.getString("date"))?.time - }.getOrNull() ?: 0L, - source = source, - scanlator = null, - branch = null, - volume = 0, - ) - } - - val author = doc.selectFirst("li:contains(Publisher:)")?.text()?.substringAfter("Publisher:")?.trim() - val state = when (doc.selectFirst("li:contains(Release type:)")?.text()?.substringAfter("Release type:")?.trim()) { + override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { + val urlBuilder = StringBuilder() + when { + !filter.query.isNullOrEmpty() -> { + urlBuilder.append("/search/") + urlBuilder.append(filter.query.urlEncoded()) + if (page > 1) urlBuilder.append("/page/$page/") + } + + else -> { + urlBuilder.append("/ComicList") + if (filter.yearFrom != YEAR_UNKNOWN) { + urlBuilder.append("/y[from]=${filter.yearFrom}") + } + if (filter.yearTo != YEAR_UNKNOWN) { + urlBuilder.append("/y[to]=${filter.yearTo}") + } + if (filter.tags.isNotEmpty()) { + urlBuilder.append("/g=") + urlBuilder.append(filter.tags.joinToString(",") { it.key }) + } + urlBuilder.append("/sort") + if (page > 1) { + urlBuilder.append("/page/$page/") + } + } + } + + val fullUrl = urlBuilder.toString().toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + return doc.select("div.readed.d-flex.short").map { item -> + val a = item.selectFirstOrThrow("a.readed__img.img-fit-cover.anim") + val titleElement = item.selectFirstOrThrow("h2.readed__title a") + val img = item.selectFirst("img[data-src]") + val href = a.attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = a.attr("href"), + title = titleElement.text(), + altTitles = emptySet(), + authors = emptySet(), + description = null, + tags = emptySet(), + rating = RATING_UNKNOWN, + state = null, + coverUrl = img?.attrAsAbsoluteUrlOrNull("data-src"), + contentRating = if (isNsfwSource) ContentRating.ADULT else null, + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + + val dateFormat = SimpleDateFormat("dd.MM.yyyy", Locale.US) + + val scriptData = doc.selectFirst("script:containsData(__DATA__)")?.data() + ?.substringAfter("window.__DATA__ = ") + ?.substringBefore(";") + ?: doc.parseFailed("Script data not found") + + val jsonData = JSONObject(scriptData) + val newsId = jsonData.getLong("news_id") + val chaptersJson = jsonData.getJSONArray("chapters") + + val chapters = List(chaptersJson.length()) { i -> + val chapter = chaptersJson.getJSONObject(i) + val chapterId = chapter.getLong("id") + + MangaChapter( + id = generateUid("$newsId/$chapterId"), + url = "/reader/$newsId/$chapterId", + number = chapter.getFloatOrDefault("posi", 0f), + title = chapter.getStringOrNull("title"), + uploadDate = dateFormat.tryParse(chapter.getStringOrNull("date")), + source = source, + scanlator = null, + branch = null, + volume = 0, + ) + } + + val author = doc.selectFirst("li:contains(Publisher:)") + ?.textOrNull() + ?.substringAfter("Publisher:") + ?.trim() + ?.nullIfEmpty() + val state = when ( + doc.selectFirst("li:contains(Release type:)")?.text()?.substringAfter("Release type:")?.trim() + ) { "Ongoing" -> MangaState.ONGOING else -> MangaState.FINISHED } - val allTags = availableTags.get() - val tags = doc.select("div.page__tags.d-flex a").mapNotNullToSet { a -> - val tagName = a.text() - allTags.find { it.title.equals(tagName, ignoreCase = true) } - } - - return manga.copy( - authors = setOfNotNull(author), - state = state, - chapters = chapters, - description = doc.select("div.page__text.full-text.clearfix").text(), - tags = tags - ) - } + val tagLinks = doc.getElementsByAttributeValueContaining("href", "/genres/") + val tags = if (tagLinks.isNotEmpty()) { + availableTags.getOrNull()?.let { allTags -> + tagLinks.mapNotNullToSet { a -> + val tagName = a.text() + allTags.find { it.title.equals(tagName, ignoreCase = true) } + } + } + } else { + null + } + + return manga.copy( + authors = setOfNotNull(author), + state = state, + chapters = chapters, + description = doc.select("div.page__text.full-text.clearfix").textOrNull(), + tags = tags ?: manga.tags, + ) + } override suspend fun getPages(chapter: MangaChapter): List { val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() @@ -165,30 +176,29 @@ internal class BatCave(context: MangaLoaderContext) : id = generateUid(imageUrl), url = imageUrl, preview = null, - source = source + source = source, ) } } private suspend fun fetchTags(): Set { val doc = webClient.httpGet("https://$domain/comix/").parseHtml() - val scriptData = doc.selectFirst("script:containsData(__XFILTER__)")?.data() - ?: throw ParseException("Script data not found", "$domain/genres") - + val scriptData = doc.selectFirstOrThrow("script:containsData(__XFILTER__)").data() + val genresJson = scriptData .substringAfter("\"g\":{") .substringBefore("}}}") + "}" - + val genresObj = JSONObject("{$genresJson}") val valuesArray = genresObj.getJSONArray("values") - - return (0 until valuesArray.length()).map { i -> + + return Set(valuesArray.length()) { i -> val genre = valuesArray.getJSONObject(i) MangaTag( key = genre.getInt("id").toString(), - title = genre.getString("value"), - source = source + title = genre.getString("value").toTitleCase(sourceLocale), + source = source, ) - }.toSet() + } } -} \ No newline at end of file +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/Hentalk.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/Hentalk.kt index d057ce10a..e711e370d 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/Hentalk.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/Hentalk.kt @@ -1,17 +1,18 @@ package org.koitharu.kotatsu.parsers.site.en -import org.json.JSONArray +import androidx.collection.ArraySet +import androidx.collection.MutableIntList +import androidx.collection.MutableIntObjectMap import org.json.JSONObject import org.jsoup.HttpStatusException import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser -import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.exception.ParseException -import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.network.UserAgents import org.koitharu.kotatsu.parsers.util.* -import org.koitharu.kotatsu.parsers.util.json.* import java.net.HttpURLConnection import java.text.SimpleDateFormat import java.util.* @@ -22,11 +23,11 @@ private const val SERVER_DATA = "" @MangaSourceParser("HENTALK", "Hentalk", "en", type = ContentType.HENTAI) internal class Hentalk(context: MangaLoaderContext) : LegacyPagedMangaParser(context, MangaParserSource.HENTALK, 24) { - - override val configKeyDomain = ConfigKey.Domain("hentalk.pw") - override val userAgentKey = ConfigKey.UserAgent(UserAgents.KOTATSU) - private val preferredServerKey = ConfigKey.PreferredImageServer( + override val configKeyDomain = ConfigKey.Domain("hentalk.pw") + override val userAgentKey = ConfigKey.UserAgent(UserAgents.KOTATSU) + + private val preferredServerKey = ConfigKey.PreferredImageServer( presetValues = mapOf( SERVER_DATA to "Original quality", SERVER_DATA_SAVER to "Compressed quality", @@ -40,292 +41,293 @@ internal class Hentalk(context: MangaLoaderContext) : keys.add(preferredServerKey) } - override val availableSortOrders: Set = EnumSet.of( + override val availableSortOrders: Set = EnumSet.of( SortOrder.UPDATED, SortOrder.NEWEST, - SortOrder.NEWEST_ASC, - SortOrder.ALPHABETICAL, - SortOrder.ALPHABETICAL_DESC, + SortOrder.NEWEST_ASC, + SortOrder.ALPHABETICAL, + SortOrder.ALPHABETICAL_DESC, ) - override val filterCapabilities: MangaListFilterCapabilities + override val filterCapabilities: MangaListFilterCapabilities get() = MangaListFilterCapabilities( isSearchSupported = true, isMultipleTagsSupported = true, isSearchWithFiltersSupported = true, - isAuthorSearchSupported = true + isAuthorSearchSupported = true, ) - override suspend fun getFilterOptions(): MangaListFilterOptions { - return MangaListFilterOptions( availableTags = emptySet() ) // not found any URLs for it + override suspend fun getFilterOptions() = MangaListFilterOptions() // not found any URLs for it + + override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { + val url = buildString { + append("https://") + append(domain) + append("/__data.json?x-sveltekit-trailing-slash=1&x-sveltekit-invalidated=001") + + when { + !filter.query.isNullOrEmpty() || filter.tags.isNotEmpty() || !filter.author.isNullOrEmpty() -> { + append("&q=") + + if (!filter.author.isNullOrEmpty()) { + append("artist:\"${space2plus(filter.author)}\"") + append('+') + } + + if (filter.tags.isNotEmpty()) { + filter.tags.forEach { tag -> + append("tag:\"${space2plus(tag.key)}\"") + append('+') + } + } + + if (!filter.query.isNullOrEmpty()) { + append(space2plus(filter.query)) + } else { + append('+') + } + } + } + + when (order) { + SortOrder.UPDATED -> append("&sort=released_at") + SortOrder.NEWEST_ASC -> append("&sort=created_at&order=asc") + SortOrder.NEWEST -> append("&sort=created_at&order=desc") + SortOrder.ALPHABETICAL -> append("&sort=title&order=asc") + SortOrder.ALPHABETICAL_DESC -> append("&sort=title&order=desc") + else -> {} + } + + if (page > 1) { + append("&page=") + append(page) + } + } + + val json = try { + webClient.httpGet(url).parseJson() + } catch (e: HttpStatusException) { + if (e.statusCode == HttpURLConnection.HTTP_INTERNAL_ERROR) { + return emptyList() + } else { + throw ParseException("Can't get data from source!", url) + } + } + + val dataArray = json.getJSONArray("nodes") + .optJSONObject(2) + ?.optJSONArray("data") + ?: return emptyList() + + val dataValues = MutableIntObjectMap(dataArray.length()) + for (i in 0 until dataArray.length()) { + dataValues[i] = dataArray.get(i) + } + + val archiveH = MutableIntList(dataArray.length()) + for (i in 0 until dataArray.length()) { + val item = dataArray.opt(i) + if (item is JSONObject && item.has("id") && item.has("hash") && + item.has("title") && item.has("thumbnail") && item.has("tags") + ) { + archiveH.add(i) + } + } + + val mangaList = ArrayList() + archiveH.forEach { tempIndex -> + val temp = dataArray.getJSONObject(tempIndex) + val idRef = temp.getInt("id") + val hashRef = temp.getInt("hash") + val titleRef = temp.getInt("title") + val thumbnailRef = temp.getInt("thumbnail") + val tagsRef = temp.getInt("tags") + + val mangaId = dataArray.getLong(idRef) + + val key = dataArray.getString(hashRef) + val title = dataArray.getString(titleRef) + val idThumbnail = dataArray.getInt(thumbnailRef) + + val tagsList = dataArray.optJSONArray(tagsRef) + val tags = ArraySet() + var author: String? = null + + if (tagsList != null) { + var i = 0 + while (i < tagsList.length()) { + val tagRefIndex = tagsList.getInt(i) + + if (dataValues.containsKey(tagRefIndex) && + dataValues[tagRefIndex] is JSONObject && + (dataValues[tagRefIndex] as JSONObject).has("namespace") + ) { + + val nsObj = dataValues[tagRefIndex] as JSONObject + val nsIndex = nsObj.getInt("namespace") + val nameIndex = nsObj.getInt("name") + + val nsValue = if (dataValues.containsKey(nsIndex)) dataValues[nsIndex].toString() else null + val nameValue = + if (dataValues.containsKey(nameIndex)) dataValues[nameIndex].toString() else null + + if (nsValue == "artist") { + author = nameValue?.nullIfEmpty() + } else if (nsValue == "tag" && nameValue != null) { + tags.add( + MangaTag( + key = nameValue, + title = nameValue, + source = source, + ), + ) + } + } + i++ + } + } + + mangaList.add( + Manga( + id = generateUid(mangaId), + url = "/g/$mangaId/__data.json?x-sveltekit-invalidated=001", + publicUrl = "https://$domain/g/$mangaId", + title = title, + altTitles = emptySet(), + coverUrl = "https://$domain/image/$key/$idThumbnail?type=cover", + largeCoverUrl = null, + authors = setOfNotNull(author), + tags = tags, + state = null, + description = null, + contentRating = ContentRating.ADULT, + source = source, + rating = RATING_UNKNOWN, + ), + ) + } + + return mangaList } - override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { - val url = buildString { - append("https://") - append(domain) - append("/__data.json?x-sveltekit-trailing-slash=1&x-sveltekit-invalidated=001") - - when { - !filter.query.isNullOrEmpty() || filter.tags.isNotEmpty() || !filter.author.isNullOrEmpty() -> { - append("&q=") - - if (!filter.author.isNullOrEmpty()) { - append("artist:\"${space2plus(filter.author)}\"") - append("+") - } - - if (filter.tags.isNotEmpty()) { - filter.tags.forEach { tag -> - append("tag:\"${space2plus(tag.key)}\"") - append("+") - } - } - - if (!filter.query.isNullOrEmpty()) { - append(space2plus(filter.query)) - } else { - append("+") - } - } - } - - when (order) { - SortOrder.UPDATED -> append("&sort=released_at") - SortOrder.NEWEST_ASC -> append("&sort=created_at&order=asc") - SortOrder.NEWEST -> append("&sort=created_at&order=desc") - SortOrder.ALPHABETICAL -> append("&sort=title&order=asc") - SortOrder.ALPHABETICAL_DESC -> append("&sort=title&order=desc") - else -> {} - } - - if (page > 1) { - append("&page=") - append(page) - } - } - - val json = try { - webClient.httpGet(url).parseJson() - } catch (e: HttpStatusException) { - if (e.statusCode == HttpURLConnection.HTTP_INTERNAL_ERROR) { - return emptyList() - } else { - throw ParseException("Can't get data from source!", url) - } - } - - val mangaList = mutableListOf() - val dataValues = mutableMapOf() - - val dataArray = json.getJSONArray("nodes") - .optJSONObject(2) - ?.optJSONArray("data") - ?: return emptyList() - - for (i in 0 until dataArray.length()) { - dataValues[i] = dataArray.get(i) - } - - val archiveH = mutableListOf() - for (i in 0 until dataArray.length()) { - val item = dataArray.opt(i) - if (item is JSONObject && item.has("id") && item.has("hash") && - item.has("title") && item.has("thumbnail") && item.has("tags")) { - archiveH.add(i) - } - } - - for (tempIndex in archiveH) { - val temp = dataArray.getJSONObject(tempIndex) - val idRef = temp.getInt("id") - val hashRef = temp.getInt("hash") - val titleRef = temp.getInt("title") - val thumbnailRef = temp.getInt("thumbnail") - val tagsRef = temp.getInt("tags") - - val mangaId = dataArray.getLong(idRef) - - val key = dataArray.getString(hashRef) - val title = dataArray.getString(titleRef) - val idThumbnail = dataArray.getInt(thumbnailRef) - - val tagsList = dataArray.optJSONArray(tagsRef) - val tags = mutableSetOf() - var author: String? = null - - if (tagsList != null) { - var i = 0 - while (i < tagsList.length()) { - val tagRefIndex = tagsList.getInt(i) - - if (dataValues.containsKey(tagRefIndex) && - dataValues[tagRefIndex] is JSONObject && - (dataValues[tagRefIndex] as JSONObject).has("namespace")) { - - val nsObj = dataValues[tagRefIndex] as JSONObject - val nsIndex = nsObj.getInt("namespace") - val nameIndex = nsObj.getInt("name") - - val nsValue = if (dataValues.containsKey(nsIndex)) dataValues[nsIndex].toString() else "" - val nameValue = if (dataValues.containsKey(nameIndex)) dataValues[nameIndex].toString() else "" - - if (nsValue == "artist") { - author = nameValue - } else if (nsValue == "tag") { - tags.add(MangaTag( - key = nameValue, - title = nameValue, - source = source - )) - } - } - i++ - } - } - - mangaList.add(Manga( - id = generateUid(mangaId), - url = "/g/$mangaId/__data.json?x-sveltekit-invalidated=001", - publicUrl = "https://$domain/g/$mangaId", - title = title, - altTitles = emptySet(), - coverUrl = "https://$domain/image/$key/$idThumbnail?type=cover", - largeCoverUrl = null, - authors = setOfNotNull(author), - tags = tags, - state = null, - description = null, - contentRating = ContentRating.ADULT, - source = source, - rating = RATING_UNKNOWN, - )) - } - - return mangaList - } - - override suspend fun getDetails(manga: Manga): Manga { - val json = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseJson() - val mangaId = manga.url.substringAfter("/g/").substringBefore("/") - - val dataArray = json.getJSONArray("nodes") - .optJSONObject(2) - ?.optJSONArray("data") - ?: return manga.copy() - - var createdAt = "" - - for (i in 0 until dataArray.length()) { - val item = dataArray.opt(i) - if (item is JSONObject && item.has("createdAt")) { - val addedAt = item.getInt("createdAt") - if (dataArray.length() > addedAt) { - createdAt = dataArray.optString(addedAt, "") - break - } - } - } - - val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) - val parseTime = dateFormat.tryParse(createdAt) - val chapter = MangaChapter( - id = generateUid("/g/$mangaId/read/1"), - url = "/g/$mangaId/read/1/__data.json?x-sveltekit-invalidated=011", - title = "Oneshot", // for all, just has 1 chapter - number = 0f, - uploadDate = parseTime, - volume = 0, - branch = null, - scanlator = null, - source = source, - ) - - return manga.copy( - chapters = listOf(chapter) - ) - } + override suspend fun getDetails(manga: Manga): Manga { + val json = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseJson() + val mangaId = manga.url.substringAfter("/g/").substringBefore('/') + + val dataArray = json.getJSONArray("nodes") + .optJSONObject(2) + ?.optJSONArray("data") + ?: return manga + + var createdAt = "" + + for (i in 0 until dataArray.length()) { + val item = dataArray.opt(i) + if (item is JSONObject && item.has("createdAt")) { + val addedAt = item.getInt("createdAt") + if (dataArray.length() > addedAt) { + createdAt = dataArray.optString(addedAt, "") + break + } + } + } + + val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) + val parseTime = dateFormat.tryParse(createdAt) + val chapter = MangaChapter( + id = generateUid("/g/$mangaId/read/1"), + url = "/g/$mangaId/read/1/__data.json?x-sveltekit-invalidated=011", + title = "Oneshot", // for all, just has 1 chapter + number = 0f, + uploadDate = parseTime, + volume = 0, + branch = null, + scanlator = null, + source = source, + ) + + return manga.copy( + chapters = listOf(chapter), + ) + } override suspend fun getPages(chapter: MangaChapter): List { - val json = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseJson() - val dataArray = json.getJSONArray("nodes") - .optJSONObject(2) - ?.optJSONArray("data") - ?: return emptyList() - - var compressID = "" - for (i in 0 until dataArray.length()) { - val item = dataArray.opt(i) - if (item is JSONObject && item.has("hash")) { - if (i < 20) { - val hashValue = dataArray.getString(item.getInt("hash")) - if (hashValue.length == 8) { - compressID = hashValue - break - } - } - } - } - - var hashID = "" - for (i in 0 until dataArray.length()) { - val item = dataArray.opt(i) - if (item is JSONObject && item.has("hash") && item.has("id")) { - val hashIndex = item.getInt("hash") - hashID = dataArray.getString(hashIndex) - break - } - } - - if (hashID.isEmpty()) { - for (i in 0 until dataArray.length()) { - val item = dataArray.opt(i) - if (item is JSONObject && item.has("gallery")) { - val galleryIndex = item.getInt("gallery") - val galleryTemplate = dataArray.optJSONObject(galleryIndex) - if (galleryTemplate != null && galleryTemplate.has("hash")) { - val hashIndex = galleryTemplate.getInt("hash") - hashID = dataArray.getString(hashIndex) - break - } - } - } - } - - val imgList = mutableListOf() - for (i in 0 until dataArray.length()) { - val item = dataArray.opt(i) - if (item is JSONObject && item.has("filename")) { - val filenameIndex = item.getInt("filename") - if (dataArray.length() > filenameIndex) { - val filename = dataArray.optString(filenameIndex, "") - if (filename.isNotEmpty()) { - imgList.add(filename) - } - } - } - } - - val server = config[preferredServerKey] ?: SERVER_DATA - return imgList.map { imgEx -> - val baseUrl = "https://$domain/image/$hashID/$imgEx" - val imageUrl = when (server) { - SERVER_DATA -> baseUrl - SERVER_DATA_SAVER -> baseUrl + SERVER_DATA_SAVER + compressID - else -> baseUrl - } - - MangaPage( - id = generateUid(imageUrl), - url = imageUrl, - preview = null, - source = source, - ) - } - } - - private fun space2plus(input: String): String { - return input.replace(" ", "+") - } + val json = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseJson() + val dataArray = json.getJSONArray("nodes") + .optJSONObject(2) + ?.optJSONArray("data") + ?: return emptyList() + + var compressID = "" + for (i in 0 until dataArray.length()) { + val item = dataArray.opt(i) + if (item is JSONObject && item.has("hash")) { + if (i < 20) { + val hashValue = dataArray.getString(item.getInt("hash")) + if (hashValue.length == 8) { + compressID = hashValue + break + } + } + } + } + + var hashID = "" + for (i in 0 until dataArray.length()) { + val item = dataArray.opt(i) + if (item is JSONObject && item.has("hash") && item.has("id")) { + val hashIndex = item.getInt("hash") + hashID = dataArray.getString(hashIndex) + break + } + } + + if (hashID.isEmpty()) { + for (i in 0 until dataArray.length()) { + val item = dataArray.opt(i) + if (item is JSONObject && item.has("gallery")) { + val galleryIndex = item.getInt("gallery") + val galleryTemplate = dataArray.optJSONObject(galleryIndex) + if (galleryTemplate != null && galleryTemplate.has("hash")) { + val hashIndex = galleryTemplate.getInt("hash") + hashID = dataArray.getString(hashIndex) + break + } + } + } + } + + val imgList = ArrayList(dataArray.length()) + for (i in 0 until dataArray.length()) { + val item = dataArray.opt(i) + if (item is JSONObject && item.has("filename")) { + val filenameIndex = item.getInt("filename") + if (dataArray.length() > filenameIndex) { + val filename = dataArray.optString(filenameIndex, "") + if (filename.isNotEmpty()) { + imgList.add(filename) + } + } + } + } + + val server = config[preferredServerKey] ?: SERVER_DATA + return imgList.map { imgEx -> + val baseUrl = "https://$domain/image/$hashID/$imgEx" + val imageUrl = when (server) { + SERVER_DATA -> baseUrl + SERVER_DATA_SAVER -> baseUrl + SERVER_DATA_SAVER + compressID + else -> baseUrl + } + + MangaPage( + id = generateUid(imageUrl), + url = imageUrl, + preview = null, + source = source, + ) + } + } + private fun space2plus(input: String): String = input.replace(' ', '+') } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/MimiHentai.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/MimiHentai.kt index b61f8c6c5..984407b0f 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/MimiHentai.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/MimiHentai.kt @@ -1,160 +1,160 @@ package org.koitharu.kotatsu.parsers.site.vi import org.json.JSONArray -import org.json.JSONObject -import kotlinx.coroutines.async -import kotlinx.coroutines.coroutineScope import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.model.* -import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.json.* -import java.text.SimpleDateFormat import java.util.* @MangaSourceParser("MIMIHENTAI", "MimiHentai", "vi", type = ContentType.HENTAI) internal class MimiHentai(context: MangaLoaderContext) : - LegacyPagedMangaParser(context, MangaParserSource.MIMIHENTAI, 18) { - - private val apiSuffix = "api/v1/manga" - override val configKeyDomain = ConfigKey.Domain("mimihentai.com") - - override val availableSortOrders: Set = EnumSet.of(SortOrder.UPDATED) - - override suspend fun getFilterOptions() = MangaListFilterOptions(availableTags = fetchTags()) - override val filterCapabilities: MangaListFilterCapabilities - get() = MangaListFilterCapabilities( - isSearchSupported = true, - isSearchWithFiltersSupported = true, - isMultipleTagsSupported = true, - isAuthorSearchSupported = true - ) - - override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { - val url = buildString { - append("https://") - append(domain) - append("/$apiSuffix/advance-search?page=") - append(page - 1) // first page is 0, not 1 - append("&max=18") // page size, avoid rate limit - when { - !filter.query.isNullOrEmpty() -> { - append("&name=") - append(filter.query.urlEncoded()) - } - - !filter.author.isNullOrEmpty() -> { - append("&author=") - append(filter.author.urlEncoded()) - } - - filter.tags.isNotEmpty() -> { - append("&genre=") - append(filter.tags.joinToString(",") { it.key }) - } - } - } - - val json = webClient.httpGet(url).parseJson() - val data = json.getJSONArray("data") - return parseMangaList(data) - } - - private suspend fun parseMangaList(data: JSONArray): List { - return data.mapJSON { jo -> - val id = jo.getLong("id") - val title = jo.getString("title") - val description = jo.getString("description") - val authors = jo.getJSONArray("authors").asTypedList().mapToSet { it } - val differentNames = jo.getJSONArray("differentNames").asTypedList().mapToSet { it } - val state = when(description) { - "Đang Tiến Hành" -> MangaState.ONGOING - "Hoàn Thành" -> MangaState.FINISHED - else -> null - } - - Manga( - id = generateUid(id), - title = title, - altTitles = differentNames, - url = "/$apiSuffix/info/$id", - publicUrl = "https://$domain/g/$id", - rating = RATING_UNKNOWN, - contentRating = ContentRating.ADULT, - coverUrl = jo.getString("coverUrl"), - tags = emptySet(), - state = state, - authors = authors, - source = source, - ) - } - } - - override suspend fun getDetails(manga: Manga): Manga = coroutineScope { - val url = "https://" + domain + manga.url - val json = webClient.httpGet(url).parseJson() - - val relationInfo = json.getJSONObject("relationInfo") - val tags = relationInfo.getJSONArray("genres").mapJSON { jo -> - MangaTag( - title = jo.getString("name"), - key = jo.getLong("id").toString(), - source = source, - ) - }.toSet() - - val basicInfo = json.getJSONObject("basicInfo") - val id = basicInfo.getLong("id") - val description = basicInfo.optString("fdescription").takeUnless { it.isNullOrEmpty() } - val uploaderName = json.getString("uploaderName") - val urlChaps = "https://$domain/$apiSuffix/gallery/$id" - val parseUrlChaps = async { JSONArray(webClient.httpGet(urlChaps).parseHtml().text()) } - val chapters = parseUrlChaps.await().mapJSON { jo -> - MangaChapter( - id = generateUid(jo.getLong("id")), - title = jo.getString("title"), - number = jo.getInt("number").toFloat(), - url = "/$apiSuffix/chapter?id=${jo.getLong("id")}", - uploadDate = 0L, - source = source, - scanlator = uploaderName, - branch = null, - volume = 0 - ) - } - - manga.copy( - tags = tags, - description = description, - chapters = chapters - ) - } + LegacyPagedMangaParser(context, MangaParserSource.MIMIHENTAI, 18) { + + private val apiSuffix = "api/v1/manga" + override val configKeyDomain = ConfigKey.Domain("mimihentai.com") + + override val availableSortOrders: Set = EnumSet.of(SortOrder.UPDATED) + + override val filterCapabilities: MangaListFilterCapabilities + get() = MangaListFilterCapabilities( + isSearchSupported = true, + isSearchWithFiltersSupported = true, + isMultipleTagsSupported = true, + isAuthorSearchSupported = true, + ) + + init { + setFirstPage(0) + } + + override suspend fun getFilterOptions() = MangaListFilterOptions(availableTags = fetchTags()) + + override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { + val url = buildString { + append("https://") + append(domain) + append("/$apiSuffix/advance-search?page=") + append(page) + append("&max=18") // page size, avoid rate limit + when { + !filter.query.isNullOrEmpty() -> { + append("&name=") + append(filter.query.urlEncoded()) + } + + !filter.author.isNullOrEmpty() -> { + append("&author=") + append(filter.author.urlEncoded()) + } + + filter.tags.isNotEmpty() -> { + append("&genre=") + append(filter.tags.joinToString(",") { it.key }) + } + } + } + + val json = webClient.httpGet(url).parseJson() + val data = json.getJSONArray("data") + return parseMangaList(data) + } + + private fun parseMangaList(data: JSONArray): List { + return data.mapJSON { jo -> + val id = jo.getLong("id") + val title = jo.getString("title") + val description = jo.getStringOrNull("description") + val authors = jo.getJSONArray("authors").asTypedList().toSet() + val differentNames = jo.getJSONArray("differentNames").asTypedList().toSet() + val state = when (description) { + "Đang Tiến Hành" -> MangaState.ONGOING + "Hoàn Thành" -> MangaState.FINISHED + else -> null + } + + Manga( + id = generateUid(id), + title = title, + altTitles = differentNames, + url = "/$apiSuffix/info/$id", + publicUrl = "https://$domain/g/$id", + rating = RATING_UNKNOWN, + contentRating = ContentRating.ADULT, + coverUrl = jo.getString("coverUrl"), + tags = emptySet(), + state = state, + authors = authors, + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val url = manga.url.toAbsoluteUrl(domain) + val json = webClient.httpGet(url).parseJson() + + val relationInfo = json.getJSONObject("relationInfo") + val tags = relationInfo.getJSONArray("genres").mapJSONToSet { jo -> + MangaTag( + title = jo.getString("name").toTitleCase(sourceLocale), + key = jo.getLong("id").toString(), + source = source, + ) + } + + val basicInfo = json.getJSONObject("basicInfo") + val id = basicInfo.getLong("id") + val description = basicInfo.getStringOrNull("fdescription") + val uploaderName = json.getStringOrNull("uploaderName") + val urlChaps = "https://$domain/$apiSuffix/gallery/$id" + val parsedChapters = webClient.httpGet(urlChaps).parseJsonArray() + val chapters = parsedChapters.mapJSON { jo -> + MangaChapter( + id = generateUid(jo.getLong("id")), + title = jo.getStringOrNull("title"), + number = jo.getFloatOrDefault("number", 0f), + url = "/$apiSuffix/chapter?id=${jo.getLong("id")}", + uploadDate = 0L, + source = source, + scanlator = uploaderName, + branch = null, + volume = 0, + ) + } + + return manga.copy( + tags = tags, + description = description, + chapters = chapters, + ) + } override suspend fun getPages(chapter: MangaChapter): List { - val json = webClient.httpGet("https://$domain${chapter.url}").parseJson() - val imageUrls = json.getJSONArray("pages").asTypedList() - return imageUrls.map { url -> - MangaPage( - id = generateUid(url), - url = url, - preview = null, - source = source, - ) - } - } - - private suspend fun fetchTags(): Set { - val url = "https://$domain/$apiSuffix/genres" - val response = JSONArray(webClient.httpGet(url).parseHtml().text()) - return response.mapJSON { jo -> - MangaTag( - title = jo.getString("name"), - key = jo.getLong("id").toString(), - source = source, - ) - }.toSet() - } -} \ No newline at end of file + val json = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseJson() + val imageUrls = json.getJSONArray("pages").asTypedList() + return imageUrls.map { url -> + MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } + + private suspend fun fetchTags(): Set { + val url = "https://$domain/$apiSuffix/genres" + val response = webClient.httpGet(url).parseJsonArray() + return response.mapJSONToSet { jo -> + MangaTag( + title = jo.getString("name").toTitleCase(sourceLocale), + key = jo.getLong("id").toString(), + source = source, + ) + } + } +} diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index 4e80719fb..2e619725a 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -154,7 +154,7 @@ internal class MangaParserTest { val parser = context.newParserInstance(source) val list = parser.getList(MangaSearchQuery.EMPTY) - val manga = list[0] + val manga = list.random() parser.getDetails(manga).apply { assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }