Improve some parsers

master
Koitharu 1 year ago
parent c294f5bb61
commit e874837efb
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -58,19 +58,19 @@ afterEvaluate {
} }
dependencies { dependencies {
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.1' implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.2'
implementation 'com.squareup.okhttp3:okhttp:4.12.0' implementation 'com.squareup.okhttp3:okhttp:4.12.0'
implementation 'com.squareup.okio:okio:3.10.2' implementation 'com.squareup.okio:okio:3.11.0'
api 'org.jsoup:jsoup:1.18.3' api 'org.jsoup:jsoup:1.19.1'
implementation 'org.json:json:20240303' implementation 'org.json:json:20240303'
implementation 'androidx.collection:collection:1.4.5' implementation 'androidx.collection:collection:1.5.0'
ksp project(':kotatsu-parsers-ksp') ksp project(':kotatsu-parsers-ksp')
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.10.1' testImplementation 'org.junit.jupiter:junit-jupiter-api:5.10.1'
testImplementation 'org.junit.jupiter:junit-jupiter-engine:5.10.1' testImplementation 'org.junit.jupiter:junit-jupiter-engine:5.10.1'
testImplementation 'org.junit.jupiter:junit-jupiter-params:5.10.1' testImplementation 'org.junit.jupiter:junit-jupiter-params:5.10.1'
testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.10.1' testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.10.2'
testImplementation 'io.webfolder:quickjs:1.1.0' testImplementation 'io.webfolder:quickjs:1.1.0'
} }

@ -38,6 +38,11 @@ public abstract class LegacyPagedMangaParser(
public abstract suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> public abstract suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga>
protected fun setFirstPage(firstPage: Int, firstPageForSearch: Int = firstPage) {
paginator.firstPage = firstPage
searchPaginator.firstPage = firstPageForSearch
}
private suspend fun getList( private suspend fun getList(
paginator: Paginator, paginator: Paginator,
offset: Int, offset: Int,

@ -43,6 +43,11 @@ public abstract class PagedMangaParser(
public abstract suspend fun getListPage(query: MangaSearchQuery, page: Int): List<Manga> public abstract suspend fun getListPage(query: MangaSearchQuery, page: Int): List<Manga>
protected fun setFirstPage(firstPage: Int, firstPageForSearch: Int = firstPage) {
paginator.firstPage = firstPage
searchPaginator.firstPage = firstPageForSearch
}
private suspend fun searchManga( private suspend fun searchManga(
paginator: Paginator, paginator: Paginator,
query: MangaSearchQuery, query: MangaSearchQuery,

@ -47,7 +47,7 @@ public data class MangaChapter(
get() = title.ifNullOrEmpty { get() = title.ifNullOrEmpty {
buildString { buildString {
if (volume > 0) append("Vol ").append(volume).append(' ') if (volume > 0) append("Vol ").append(volume).append(' ')
if (number > 0) append("Chapter ").append(number) else append("Unnamed") if (number > 0) append("Chapter ").append(number.formatSimple()) else append("Unnamed")
} }
} }

@ -1,7 +1,7 @@
package org.koitharu.kotatsu.parsers.site.en package org.koitharu.kotatsu.parsers.site.en
import org.jsoup.nodes.Document import org.json.JSONObject
import org.jsoup.nodes.Element import org.koitharu.kotatsu.parsers.Broken
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
@ -9,19 +9,18 @@ import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser
import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getFloatOrDefault
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import org.koitharu.kotatsu.parsers.util.suspendlazy.getOrNull import org.koitharu.kotatsu.parsers.util.suspendlazy.getOrNull
import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.* import java.util.*
import org.json.JSONObject
import org.koitharu.kotatsu.parsers.Broken
@Broken("Need fix tags in getDetails")
@MangaSourceParser("BATCAVE", "BatCave", "en") @MangaSourceParser("BATCAVE", "BatCave", "en")
internal class BatCave(context: MangaLoaderContext) : internal class BatCave(context: MangaLoaderContext) :
LegacyPagedMangaParser(context, MangaParserSource.BATCAVE, 20) { LegacyPagedMangaParser(context, MangaParserSource.BATCAVE, 20) {
override val configKeyDomain = ConfigKey.Domain("batcave.biz") override val configKeyDomain = ConfigKey.Domain("batcave.biz")
private val availableTags = suspendLazy(initializer = ::fetchTags) private val availableTags = suspendLazy(initializer = ::fetchTags)
@ -30,123 +29,135 @@ internal class BatCave(context: MangaLoaderContext) :
keys.add(userAgentKey) keys.add(userAgentKey)
} }
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED) override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED)
override val filterCapabilities: MangaListFilterCapabilities override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities( get() = MangaListFilterCapabilities(
isSearchSupported = true, isSearchSupported = true,
isMultipleTagsSupported = true, isMultipleTagsSupported = true,
isSearchWithFiltersSupported = false, isYearRangeSupported = true,
isYearRangeSupported = true
) )
override suspend fun getFilterOptions() = MangaListFilterOptions( override suspend fun getFilterOptions() = MangaListFilterOptions(
availableTags = availableTags.get() availableTags = availableTags.get(),
) )
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val urlBuilder = StringBuilder() val urlBuilder = StringBuilder()
when { when {
!filter.query.isNullOrEmpty() -> { !filter.query.isNullOrEmpty() -> {
urlBuilder.append("/search/") urlBuilder.append("/search/")
urlBuilder.append(filter.query.urlEncoded()) urlBuilder.append(filter.query.urlEncoded())
if (page > 1) urlBuilder.append("/page/$page/") if (page > 1) urlBuilder.append("/page/$page/")
} }
else -> {
urlBuilder.append("/ComicList") else -> {
if (filter.yearFrom != YEAR_UNKNOWN) { urlBuilder.append("/ComicList")
urlBuilder.append("/y[from]=${filter.yearFrom}") if (filter.yearFrom != YEAR_UNKNOWN) {
} urlBuilder.append("/y[from]=${filter.yearFrom}")
if (filter.yearTo != YEAR_UNKNOWN) { }
urlBuilder.append("/y[to]=${filter.yearTo}") if (filter.yearTo != YEAR_UNKNOWN) {
} urlBuilder.append("/y[to]=${filter.yearTo}")
if (filter.tags.isNotEmpty()) { }
urlBuilder.append("/g=") if (filter.tags.isNotEmpty()) {
urlBuilder.append(filter.tags.joinToString(",") { it.key }) urlBuilder.append("/g=")
} urlBuilder.append(filter.tags.joinToString(",") { it.key })
urlBuilder.append("/sort") }
if (page > 1) { urlBuilder.append("/page/$page/") } urlBuilder.append("/sort")
} if (page > 1) {
} urlBuilder.append("/page/$page/")
}
val fullUrl = urlBuilder.toString().toAbsoluteUrl(domain) }
val doc = webClient.httpGet(fullUrl).parseHtml() }
return doc.select("div.readed.d-flex.short").map { item ->
val a = item.selectFirst("a.readed__img.img-fit-cover.anim") val fullUrl = urlBuilder.toString().toAbsoluteUrl(domain)
?: throw ParseException("Link element not found!", fullUrl) val doc = webClient.httpGet(fullUrl).parseHtml()
val img = item.selectFirst("img[data-src]") return doc.select("div.readed.d-flex.short").map { item ->
val titleElement = item.selectFirst("h2.readed__title a") val a = item.selectFirstOrThrow("a.readed__img.img-fit-cover.anim")
Manga( val titleElement = item.selectFirstOrThrow("h2.readed__title a")
id = generateUid(a.attr("href")), val img = item.selectFirst("img[data-src]")
url = a.attr("href"), val href = a.attrAsRelativeUrl("href")
publicUrl = a.attr("href"), Manga(
title = titleElement.text(), id = generateUid(href),
altTitles = emptySet(), url = href,
authors = emptySet(), publicUrl = a.attr("href"),
description = null, title = titleElement.text(),
tags = emptySet(), altTitles = emptySet(),
rating = RATING_UNKNOWN, authors = emptySet(),
state = null, description = null,
coverUrl = img.attr("data-src")?.toAbsoluteUrl(domain), tags = emptySet(),
contentRating = if (isNsfwSource) ContentRating.ADULT else null, rating = RATING_UNKNOWN,
source = source, state = null,
) coverUrl = img?.attrAsAbsoluteUrlOrNull("data-src"),
} contentRating = if (isNsfwSource) ContentRating.ADULT else null,
} source = source,
)
override suspend fun getDetails(manga: Manga): Manga { }
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() }
val dateFormat = SimpleDateFormat("dd.MM.yyyy", Locale.US) override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val scriptData = doc.selectFirst("script:containsData(__DATA__)")?.data()
?.substringAfter("window.__DATA__ = ") val dateFormat = SimpleDateFormat("dd.MM.yyyy", Locale.US)
?.substringBefore(";")
?: throw ParseException("Script data not found", manga.url) val scriptData = doc.selectFirst("script:containsData(__DATA__)")?.data()
?.substringAfter("window.__DATA__ = ")
val jsonData = JSONObject(scriptData) ?.substringBefore(";")
val newsId = jsonData.getInt("news_id") ?: doc.parseFailed("Script data not found")
val chaptersJson = jsonData.getJSONArray("chapters")
val jsonData = JSONObject(scriptData)
val chapters = (0 until chaptersJson.length()).map { i -> val newsId = jsonData.getLong("news_id")
val chapter = chaptersJson.getJSONObject(i) val chaptersJson = jsonData.getJSONArray("chapters")
val chapterId = chapter.getInt("id")
val chapters = List(chaptersJson.length()) { i ->
MangaChapter( val chapter = chaptersJson.getJSONObject(i)
id = generateUid("/reader/$newsId/$chapterId"), val chapterId = chapter.getLong("id")
url = "/reader/$newsId/$chapterId",
number = chapter.getInt("posi").toFloat(), MangaChapter(
title = chapter.getString("title"), id = generateUid("$newsId/$chapterId"),
uploadDate = runCatching { url = "/reader/$newsId/$chapterId",
dateFormat.parse(chapter.getString("date"))?.time number = chapter.getFloatOrDefault("posi", 0f),
}.getOrNull() ?: 0L, title = chapter.getStringOrNull("title"),
source = source, uploadDate = dateFormat.tryParse(chapter.getStringOrNull("date")),
scanlator = null, source = source,
branch = null, scanlator = null,
volume = 0, branch = null,
) volume = 0,
} )
}
val author = doc.selectFirst("li:contains(Publisher:)")?.text()?.substringAfter("Publisher:")?.trim()
val state = when (doc.selectFirst("li:contains(Release type:)")?.text()?.substringAfter("Release type:")?.trim()) { val author = doc.selectFirst("li:contains(Publisher:)")
?.textOrNull()
?.substringAfter("Publisher:")
?.trim()
?.nullIfEmpty()
val state = when (
doc.selectFirst("li:contains(Release type:)")?.text()?.substringAfter("Release type:")?.trim()
) {
"Ongoing" -> MangaState.ONGOING "Ongoing" -> MangaState.ONGOING
else -> MangaState.FINISHED else -> MangaState.FINISHED
} }
val allTags = availableTags.get() val tagLinks = doc.getElementsByAttributeValueContaining("href", "/genres/")
val tags = doc.select("div.page__tags.d-flex a").mapNotNullToSet { a -> val tags = if (tagLinks.isNotEmpty()) {
val tagName = a.text() availableTags.getOrNull()?.let { allTags ->
allTags.find { it.title.equals(tagName, ignoreCase = true) } tagLinks.mapNotNullToSet { a ->
} val tagName = a.text()
allTags.find { it.title.equals(tagName, ignoreCase = true) }
return manga.copy( }
authors = setOfNotNull(author), }
state = state, } else {
chapters = chapters, null
description = doc.select("div.page__text.full-text.clearfix").text(), }
tags = tags
) return manga.copy(
} authors = setOfNotNull(author),
state = state,
chapters = chapters,
description = doc.select("div.page__text.full-text.clearfix").textOrNull(),
tags = tags ?: manga.tags,
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
@ -165,30 +176,29 @@ internal class BatCave(context: MangaLoaderContext) :
id = generateUid(imageUrl), id = generateUid(imageUrl),
url = imageUrl, url = imageUrl,
preview = null, preview = null,
source = source source = source,
) )
} }
} }
private suspend fun fetchTags(): Set<MangaTag> { private suspend fun fetchTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain/comix/").parseHtml() val doc = webClient.httpGet("https://$domain/comix/").parseHtml()
val scriptData = doc.selectFirst("script:containsData(__XFILTER__)")?.data() val scriptData = doc.selectFirstOrThrow("script:containsData(__XFILTER__)").data()
?: throw ParseException("Script data not found", "$domain/genres")
val genresJson = scriptData val genresJson = scriptData
.substringAfter("\"g\":{") .substringAfter("\"g\":{")
.substringBefore("}}}") + "}" .substringBefore("}}}") + "}"
val genresObj = JSONObject("{$genresJson}") val genresObj = JSONObject("{$genresJson}")
val valuesArray = genresObj.getJSONArray("values") val valuesArray = genresObj.getJSONArray("values")
return (0 until valuesArray.length()).map { i -> return Set(valuesArray.length()) { i ->
val genre = valuesArray.getJSONObject(i) val genre = valuesArray.getJSONObject(i)
MangaTag( MangaTag(
key = genre.getInt("id").toString(), key = genre.getInt("id").toString(),
title = genre.getString("value"), title = genre.getString("value").toTitleCase(sourceLocale),
source = source source = source,
) )
}.toSet() }
} }
} }

@ -1,17 +1,18 @@
package org.koitharu.kotatsu.parsers.site.en package org.koitharu.kotatsu.parsers.site.en
import org.json.JSONArray import androidx.collection.ArraySet
import androidx.collection.MutableIntList
import androidx.collection.MutableIntObjectMap
import org.json.JSONObject import org.json.JSONObject
import org.jsoup.HttpStatusException import org.jsoup.HttpStatusException
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser
import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.network.UserAgents import org.koitharu.kotatsu.parsers.network.UserAgents
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.*
import java.net.HttpURLConnection import java.net.HttpURLConnection
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.* import java.util.*
@ -22,11 +23,11 @@ private const val SERVER_DATA = ""
@MangaSourceParser("HENTALK", "Hentalk", "en", type = ContentType.HENTAI) @MangaSourceParser("HENTALK", "Hentalk", "en", type = ContentType.HENTAI)
internal class Hentalk(context: MangaLoaderContext) : internal class Hentalk(context: MangaLoaderContext) :
LegacyPagedMangaParser(context, MangaParserSource.HENTALK, 24) { LegacyPagedMangaParser(context, MangaParserSource.HENTALK, 24) {
override val configKeyDomain = ConfigKey.Domain("hentalk.pw")
override val userAgentKey = ConfigKey.UserAgent(UserAgents.KOTATSU)
private val preferredServerKey = ConfigKey.PreferredImageServer( override val configKeyDomain = ConfigKey.Domain("hentalk.pw")
override val userAgentKey = ConfigKey.UserAgent(UserAgents.KOTATSU)
private val preferredServerKey = ConfigKey.PreferredImageServer(
presetValues = mapOf( presetValues = mapOf(
SERVER_DATA to "Original quality", SERVER_DATA to "Original quality",
SERVER_DATA_SAVER to "Compressed quality", SERVER_DATA_SAVER to "Compressed quality",
@ -40,292 +41,293 @@ internal class Hentalk(context: MangaLoaderContext) :
keys.add(preferredServerKey) keys.add(preferredServerKey)
} }
override val availableSortOrders: Set<SortOrder> = EnumSet.of( override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED, SortOrder.UPDATED,
SortOrder.NEWEST, SortOrder.NEWEST,
SortOrder.NEWEST_ASC, SortOrder.NEWEST_ASC,
SortOrder.ALPHABETICAL, SortOrder.ALPHABETICAL,
SortOrder.ALPHABETICAL_DESC, SortOrder.ALPHABETICAL_DESC,
) )
override val filterCapabilities: MangaListFilterCapabilities override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities( get() = MangaListFilterCapabilities(
isSearchSupported = true, isSearchSupported = true,
isMultipleTagsSupported = true, isMultipleTagsSupported = true,
isSearchWithFiltersSupported = true, isSearchWithFiltersSupported = true,
isAuthorSearchSupported = true isAuthorSearchSupported = true,
) )
override suspend fun getFilterOptions(): MangaListFilterOptions { override suspend fun getFilterOptions() = MangaListFilterOptions() // not found any URLs for it
return MangaListFilterOptions( availableTags = emptySet() ) // not found any URLs for it
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val url = buildString {
append("https://")
append(domain)
append("/__data.json?x-sveltekit-trailing-slash=1&x-sveltekit-invalidated=001")
when {
!filter.query.isNullOrEmpty() || filter.tags.isNotEmpty() || !filter.author.isNullOrEmpty() -> {
append("&q=")
if (!filter.author.isNullOrEmpty()) {
append("artist:\"${space2plus(filter.author)}\"")
append('+')
}
if (filter.tags.isNotEmpty()) {
filter.tags.forEach { tag ->
append("tag:\"${space2plus(tag.key)}\"")
append('+')
}
}
if (!filter.query.isNullOrEmpty()) {
append(space2plus(filter.query))
} else {
append('+')
}
}
}
when (order) {
SortOrder.UPDATED -> append("&sort=released_at")
SortOrder.NEWEST_ASC -> append("&sort=created_at&order=asc")
SortOrder.NEWEST -> append("&sort=created_at&order=desc")
SortOrder.ALPHABETICAL -> append("&sort=title&order=asc")
SortOrder.ALPHABETICAL_DESC -> append("&sort=title&order=desc")
else -> {}
}
if (page > 1) {
append("&page=")
append(page)
}
}
val json = try {
webClient.httpGet(url).parseJson()
} catch (e: HttpStatusException) {
if (e.statusCode == HttpURLConnection.HTTP_INTERNAL_ERROR) {
return emptyList()
} else {
throw ParseException("Can't get data from source!", url)
}
}
val dataArray = json.getJSONArray("nodes")
.optJSONObject(2)
?.optJSONArray("data")
?: return emptyList()
val dataValues = MutableIntObjectMap<Any>(dataArray.length())
for (i in 0 until dataArray.length()) {
dataValues[i] = dataArray.get(i)
}
val archiveH = MutableIntList(dataArray.length())
for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i)
if (item is JSONObject && item.has("id") && item.has("hash") &&
item.has("title") && item.has("thumbnail") && item.has("tags")
) {
archiveH.add(i)
}
}
val mangaList = ArrayList<Manga>()
archiveH.forEach { tempIndex ->
val temp = dataArray.getJSONObject(tempIndex)
val idRef = temp.getInt("id")
val hashRef = temp.getInt("hash")
val titleRef = temp.getInt("title")
val thumbnailRef = temp.getInt("thumbnail")
val tagsRef = temp.getInt("tags")
val mangaId = dataArray.getLong(idRef)
val key = dataArray.getString(hashRef)
val title = dataArray.getString(titleRef)
val idThumbnail = dataArray.getInt(thumbnailRef)
val tagsList = dataArray.optJSONArray(tagsRef)
val tags = ArraySet<MangaTag>()
var author: String? = null
if (tagsList != null) {
var i = 0
while (i < tagsList.length()) {
val tagRefIndex = tagsList.getInt(i)
if (dataValues.containsKey(tagRefIndex) &&
dataValues[tagRefIndex] is JSONObject &&
(dataValues[tagRefIndex] as JSONObject).has("namespace")
) {
val nsObj = dataValues[tagRefIndex] as JSONObject
val nsIndex = nsObj.getInt("namespace")
val nameIndex = nsObj.getInt("name")
val nsValue = if (dataValues.containsKey(nsIndex)) dataValues[nsIndex].toString() else null
val nameValue =
if (dataValues.containsKey(nameIndex)) dataValues[nameIndex].toString() else null
if (nsValue == "artist") {
author = nameValue?.nullIfEmpty()
} else if (nsValue == "tag" && nameValue != null) {
tags.add(
MangaTag(
key = nameValue,
title = nameValue,
source = source,
),
)
}
}
i++
}
}
mangaList.add(
Manga(
id = generateUid(mangaId),
url = "/g/$mangaId/__data.json?x-sveltekit-invalidated=001",
publicUrl = "https://$domain/g/$mangaId",
title = title,
altTitles = emptySet(),
coverUrl = "https://$domain/image/$key/$idThumbnail?type=cover",
largeCoverUrl = null,
authors = setOfNotNull(author),
tags = tags,
state = null,
description = null,
contentRating = ContentRating.ADULT,
source = source,
rating = RATING_UNKNOWN,
),
)
}
return mangaList
} }
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { override suspend fun getDetails(manga: Manga): Manga {
val url = buildString { val json = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseJson()
append("https://") val mangaId = manga.url.substringAfter("/g/").substringBefore('/')
append(domain)
append("/__data.json?x-sveltekit-trailing-slash=1&x-sveltekit-invalidated=001") val dataArray = json.getJSONArray("nodes")
.optJSONObject(2)
when { ?.optJSONArray("data")
!filter.query.isNullOrEmpty() || filter.tags.isNotEmpty() || !filter.author.isNullOrEmpty() -> { ?: return manga
append("&q=")
var createdAt = ""
if (!filter.author.isNullOrEmpty()) {
append("artist:\"${space2plus(filter.author)}\"") for (i in 0 until dataArray.length()) {
append("+") val item = dataArray.opt(i)
} if (item is JSONObject && item.has("createdAt")) {
val addedAt = item.getInt("createdAt")
if (filter.tags.isNotEmpty()) { if (dataArray.length() > addedAt) {
filter.tags.forEach { tag -> createdAt = dataArray.optString(addedAt, "")
append("tag:\"${space2plus(tag.key)}\"") break
append("+") }
} }
} }
if (!filter.query.isNullOrEmpty()) { val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
append(space2plus(filter.query)) val parseTime = dateFormat.tryParse(createdAt)
} else { val chapter = MangaChapter(
append("+") id = generateUid("/g/$mangaId/read/1"),
} url = "/g/$mangaId/read/1/__data.json?x-sveltekit-invalidated=011",
} title = "Oneshot", // for all, just has 1 chapter
} number = 0f,
uploadDate = parseTime,
when (order) { volume = 0,
SortOrder.UPDATED -> append("&sort=released_at") branch = null,
SortOrder.NEWEST_ASC -> append("&sort=created_at&order=asc") scanlator = null,
SortOrder.NEWEST -> append("&sort=created_at&order=desc") source = source,
SortOrder.ALPHABETICAL -> append("&sort=title&order=asc") )
SortOrder.ALPHABETICAL_DESC -> append("&sort=title&order=desc")
else -> {} return manga.copy(
} chapters = listOf(chapter),
)
if (page > 1) { }
append("&page=")
append(page)
}
}
val json = try {
webClient.httpGet(url).parseJson()
} catch (e: HttpStatusException) {
if (e.statusCode == HttpURLConnection.HTTP_INTERNAL_ERROR) {
return emptyList()
} else {
throw ParseException("Can't get data from source!", url)
}
}
val mangaList = mutableListOf<Manga>()
val dataValues = mutableMapOf<Int, Any>()
val dataArray = json.getJSONArray("nodes")
.optJSONObject(2)
?.optJSONArray("data")
?: return emptyList()
for (i in 0 until dataArray.length()) {
dataValues[i] = dataArray.get(i)
}
val archiveH = mutableListOf<Int>()
for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i)
if (item is JSONObject && item.has("id") && item.has("hash") &&
item.has("title") && item.has("thumbnail") && item.has("tags")) {
archiveH.add(i)
}
}
for (tempIndex in archiveH) {
val temp = dataArray.getJSONObject(tempIndex)
val idRef = temp.getInt("id")
val hashRef = temp.getInt("hash")
val titleRef = temp.getInt("title")
val thumbnailRef = temp.getInt("thumbnail")
val tagsRef = temp.getInt("tags")
val mangaId = dataArray.getLong(idRef)
val key = dataArray.getString(hashRef)
val title = dataArray.getString(titleRef)
val idThumbnail = dataArray.getInt(thumbnailRef)
val tagsList = dataArray.optJSONArray(tagsRef)
val tags = mutableSetOf<MangaTag>()
var author: String? = null
if (tagsList != null) {
var i = 0
while (i < tagsList.length()) {
val tagRefIndex = tagsList.getInt(i)
if (dataValues.containsKey(tagRefIndex) &&
dataValues[tagRefIndex] is JSONObject &&
(dataValues[tagRefIndex] as JSONObject).has("namespace")) {
val nsObj = dataValues[tagRefIndex] as JSONObject
val nsIndex = nsObj.getInt("namespace")
val nameIndex = nsObj.getInt("name")
val nsValue = if (dataValues.containsKey(nsIndex)) dataValues[nsIndex].toString() else ""
val nameValue = if (dataValues.containsKey(nameIndex)) dataValues[nameIndex].toString() else ""
if (nsValue == "artist") {
author = nameValue
} else if (nsValue == "tag") {
tags.add(MangaTag(
key = nameValue,
title = nameValue,
source = source
))
}
}
i++
}
}
mangaList.add(Manga(
id = generateUid(mangaId),
url = "/g/$mangaId/__data.json?x-sveltekit-invalidated=001",
publicUrl = "https://$domain/g/$mangaId",
title = title,
altTitles = emptySet(),
coverUrl = "https://$domain/image/$key/$idThumbnail?type=cover",
largeCoverUrl = null,
authors = setOfNotNull(author),
tags = tags,
state = null,
description = null,
contentRating = ContentRating.ADULT,
source = source,
rating = RATING_UNKNOWN,
))
}
return mangaList
}
override suspend fun getDetails(manga: Manga): Manga {
val json = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseJson()
val mangaId = manga.url.substringAfter("/g/").substringBefore("/")
val dataArray = json.getJSONArray("nodes")
.optJSONObject(2)
?.optJSONArray("data")
?: return manga.copy()
var createdAt = ""
for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i)
if (item is JSONObject && item.has("createdAt")) {
val addedAt = item.getInt("createdAt")
if (dataArray.length() > addedAt) {
createdAt = dataArray.optString(addedAt, "")
break
}
}
}
val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
val parseTime = dateFormat.tryParse(createdAt)
val chapter = MangaChapter(
id = generateUid("/g/$mangaId/read/1"),
url = "/g/$mangaId/read/1/__data.json?x-sveltekit-invalidated=011",
title = "Oneshot", // for all, just has 1 chapter
number = 0f,
uploadDate = parseTime,
volume = 0,
branch = null,
scanlator = null,
source = source,
)
return manga.copy(
chapters = listOf(chapter)
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val json = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseJson() val json = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseJson()
val dataArray = json.getJSONArray("nodes") val dataArray = json.getJSONArray("nodes")
.optJSONObject(2) .optJSONObject(2)
?.optJSONArray("data") ?.optJSONArray("data")
?: return emptyList() ?: return emptyList()
var compressID = "" var compressID = ""
for (i in 0 until dataArray.length()) { for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i) val item = dataArray.opt(i)
if (item is JSONObject && item.has("hash")) { if (item is JSONObject && item.has("hash")) {
if (i < 20) { if (i < 20) {
val hashValue = dataArray.getString(item.getInt("hash")) val hashValue = dataArray.getString(item.getInt("hash"))
if (hashValue.length == 8) { if (hashValue.length == 8) {
compressID = hashValue compressID = hashValue
break break
} }
} }
} }
} }
var hashID = "" var hashID = ""
for (i in 0 until dataArray.length()) { for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i) val item = dataArray.opt(i)
if (item is JSONObject && item.has("hash") && item.has("id")) { if (item is JSONObject && item.has("hash") && item.has("id")) {
val hashIndex = item.getInt("hash") val hashIndex = item.getInt("hash")
hashID = dataArray.getString(hashIndex) hashID = dataArray.getString(hashIndex)
break break
} }
} }
if (hashID.isEmpty()) { if (hashID.isEmpty()) {
for (i in 0 until dataArray.length()) { for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i) val item = dataArray.opt(i)
if (item is JSONObject && item.has("gallery")) { if (item is JSONObject && item.has("gallery")) {
val galleryIndex = item.getInt("gallery") val galleryIndex = item.getInt("gallery")
val galleryTemplate = dataArray.optJSONObject(galleryIndex) val galleryTemplate = dataArray.optJSONObject(galleryIndex)
if (galleryTemplate != null && galleryTemplate.has("hash")) { if (galleryTemplate != null && galleryTemplate.has("hash")) {
val hashIndex = galleryTemplate.getInt("hash") val hashIndex = galleryTemplate.getInt("hash")
hashID = dataArray.getString(hashIndex) hashID = dataArray.getString(hashIndex)
break break
} }
} }
} }
} }
val imgList = mutableListOf<String>() val imgList = ArrayList<String>(dataArray.length())
for (i in 0 until dataArray.length()) { for (i in 0 until dataArray.length()) {
val item = dataArray.opt(i) val item = dataArray.opt(i)
if (item is JSONObject && item.has("filename")) { if (item is JSONObject && item.has("filename")) {
val filenameIndex = item.getInt("filename") val filenameIndex = item.getInt("filename")
if (dataArray.length() > filenameIndex) { if (dataArray.length() > filenameIndex) {
val filename = dataArray.optString(filenameIndex, "") val filename = dataArray.optString(filenameIndex, "")
if (filename.isNotEmpty()) { if (filename.isNotEmpty()) {
imgList.add(filename) imgList.add(filename)
} }
} }
} }
} }
val server = config[preferredServerKey] ?: SERVER_DATA val server = config[preferredServerKey] ?: SERVER_DATA
return imgList.map { imgEx -> return imgList.map { imgEx ->
val baseUrl = "https://$domain/image/$hashID/$imgEx" val baseUrl = "https://$domain/image/$hashID/$imgEx"
val imageUrl = when (server) { val imageUrl = when (server) {
SERVER_DATA -> baseUrl SERVER_DATA -> baseUrl
SERVER_DATA_SAVER -> baseUrl + SERVER_DATA_SAVER + compressID SERVER_DATA_SAVER -> baseUrl + SERVER_DATA_SAVER + compressID
else -> baseUrl else -> baseUrl
} }
MangaPage( MangaPage(
id = generateUid(imageUrl), id = generateUid(imageUrl),
url = imageUrl, url = imageUrl,
preview = null, preview = null,
source = source, source = source,
) )
} }
} }
private fun space2plus(input: String): String {
return input.replace(" ", "+")
}
private fun space2plus(input: String): String = input.replace(' ', '+')
} }

@ -1,160 +1,160 @@
package org.koitharu.kotatsu.parsers.site.vi package org.koitharu.kotatsu.parsers.site.vi
import org.json.JSONArray import org.json.JSONArray
import org.json.JSONObject
import kotlinx.coroutines.async
import kotlinx.coroutines.coroutineScope
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser import org.koitharu.kotatsu.parsers.core.LegacyPagedMangaParser
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.* import org.koitharu.kotatsu.parsers.util.json.*
import java.text.SimpleDateFormat
import java.util.* import java.util.*
@MangaSourceParser("MIMIHENTAI", "MimiHentai", "vi", type = ContentType.HENTAI) @MangaSourceParser("MIMIHENTAI", "MimiHentai", "vi", type = ContentType.HENTAI)
internal class MimiHentai(context: MangaLoaderContext) : internal class MimiHentai(context: MangaLoaderContext) :
LegacyPagedMangaParser(context, MangaParserSource.MIMIHENTAI, 18) { LegacyPagedMangaParser(context, MangaParserSource.MIMIHENTAI, 18) {
private val apiSuffix = "api/v1/manga" private val apiSuffix = "api/v1/manga"
override val configKeyDomain = ConfigKey.Domain("mimihentai.com") override val configKeyDomain = ConfigKey.Domain("mimihentai.com")
override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED) override val availableSortOrders: Set<SortOrder> = EnumSet.of(SortOrder.UPDATED)
override suspend fun getFilterOptions() = MangaListFilterOptions(availableTags = fetchTags()) override val filterCapabilities: MangaListFilterCapabilities
override val filterCapabilities: MangaListFilterCapabilities get() = MangaListFilterCapabilities(
get() = MangaListFilterCapabilities( isSearchSupported = true,
isSearchSupported = true, isSearchWithFiltersSupported = true,
isSearchWithFiltersSupported = true, isMultipleTagsSupported = true,
isMultipleTagsSupported = true, isAuthorSearchSupported = true,
isAuthorSearchSupported = true )
)
init {
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { setFirstPage(0)
val url = buildString { }
append("https://")
append(domain) override suspend fun getFilterOptions() = MangaListFilterOptions(availableTags = fetchTags())
append("/$apiSuffix/advance-search?page=")
append(page - 1) // first page is 0, not 1 override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
append("&max=18") // page size, avoid rate limit val url = buildString {
when { append("https://")
!filter.query.isNullOrEmpty() -> { append(domain)
append("&name=") append("/$apiSuffix/advance-search?page=")
append(filter.query.urlEncoded()) append(page)
} append("&max=18") // page size, avoid rate limit
when {
!filter.author.isNullOrEmpty() -> { !filter.query.isNullOrEmpty() -> {
append("&author=") append("&name=")
append(filter.author.urlEncoded()) append(filter.query.urlEncoded())
} }
filter.tags.isNotEmpty() -> { !filter.author.isNullOrEmpty() -> {
append("&genre=") append("&author=")
append(filter.tags.joinToString(",") { it.key }) append(filter.author.urlEncoded())
} }
}
} filter.tags.isNotEmpty() -> {
append("&genre=")
val json = webClient.httpGet(url).parseJson() append(filter.tags.joinToString(",") { it.key })
val data = json.getJSONArray("data") }
return parseMangaList(data) }
} }
private suspend fun parseMangaList(data: JSONArray): List<Manga> { val json = webClient.httpGet(url).parseJson()
return data.mapJSON { jo -> val data = json.getJSONArray("data")
val id = jo.getLong("id") return parseMangaList(data)
val title = jo.getString("title") }
val description = jo.getString("description")
val authors = jo.getJSONArray("authors").asTypedList<String>().mapToSet { it } private fun parseMangaList(data: JSONArray): List<Manga> {
val differentNames = jo.getJSONArray("differentNames").asTypedList<String>().mapToSet { it } return data.mapJSON { jo ->
val state = when(description) { val id = jo.getLong("id")
"Đang Tiến Hành" -> MangaState.ONGOING val title = jo.getString("title")
"Hoàn Thành" -> MangaState.FINISHED val description = jo.getStringOrNull("description")
else -> null val authors = jo.getJSONArray("authors").asTypedList<String>().toSet()
} val differentNames = jo.getJSONArray("differentNames").asTypedList<String>().toSet()
val state = when (description) {
Manga( "Đang Tiến Hành" -> MangaState.ONGOING
id = generateUid(id), "Hoàn Thành" -> MangaState.FINISHED
title = title, else -> null
altTitles = differentNames, }
url = "/$apiSuffix/info/$id",
publicUrl = "https://$domain/g/$id", Manga(
rating = RATING_UNKNOWN, id = generateUid(id),
contentRating = ContentRating.ADULT, title = title,
coverUrl = jo.getString("coverUrl"), altTitles = differentNames,
tags = emptySet(), url = "/$apiSuffix/info/$id",
state = state, publicUrl = "https://$domain/g/$id",
authors = authors, rating = RATING_UNKNOWN,
source = source, contentRating = ContentRating.ADULT,
) coverUrl = jo.getString("coverUrl"),
} tags = emptySet(),
} state = state,
authors = authors,
override suspend fun getDetails(manga: Manga): Manga = coroutineScope { source = source,
val url = "https://" + domain + manga.url )
val json = webClient.httpGet(url).parseJson() }
}
val relationInfo = json.getJSONObject("relationInfo")
val tags = relationInfo.getJSONArray("genres").mapJSON { jo -> override suspend fun getDetails(manga: Manga): Manga {
MangaTag( val url = manga.url.toAbsoluteUrl(domain)
title = jo.getString("name"), val json = webClient.httpGet(url).parseJson()
key = jo.getLong("id").toString(),
source = source, val relationInfo = json.getJSONObject("relationInfo")
) val tags = relationInfo.getJSONArray("genres").mapJSONToSet { jo ->
}.toSet() MangaTag(
title = jo.getString("name").toTitleCase(sourceLocale),
val basicInfo = json.getJSONObject("basicInfo") key = jo.getLong("id").toString(),
val id = basicInfo.getLong("id") source = source,
val description = basicInfo.optString("fdescription").takeUnless { it.isNullOrEmpty() } )
val uploaderName = json.getString("uploaderName") }
val urlChaps = "https://$domain/$apiSuffix/gallery/$id"
val parseUrlChaps = async { JSONArray(webClient.httpGet(urlChaps).parseHtml().text()) } val basicInfo = json.getJSONObject("basicInfo")
val chapters = parseUrlChaps.await().mapJSON { jo -> val id = basicInfo.getLong("id")
MangaChapter( val description = basicInfo.getStringOrNull("fdescription")
id = generateUid(jo.getLong("id")), val uploaderName = json.getStringOrNull("uploaderName")
title = jo.getString("title"), val urlChaps = "https://$domain/$apiSuffix/gallery/$id"
number = jo.getInt("number").toFloat(), val parsedChapters = webClient.httpGet(urlChaps).parseJsonArray()
url = "/$apiSuffix/chapter?id=${jo.getLong("id")}", val chapters = parsedChapters.mapJSON { jo ->
uploadDate = 0L, MangaChapter(
source = source, id = generateUid(jo.getLong("id")),
scanlator = uploaderName, title = jo.getStringOrNull("title"),
branch = null, number = jo.getFloatOrDefault("number", 0f),
volume = 0 url = "/$apiSuffix/chapter?id=${jo.getLong("id")}",
) uploadDate = 0L,
} source = source,
scanlator = uploaderName,
manga.copy( branch = null,
tags = tags, volume = 0,
description = description, )
chapters = chapters }
)
} return manga.copy(
tags = tags,
description = description,
chapters = chapters,
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val json = webClient.httpGet("https://$domain${chapter.url}").parseJson() val json = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseJson()
val imageUrls = json.getJSONArray("pages").asTypedList<String>() val imageUrls = json.getJSONArray("pages").asTypedList<String>()
return imageUrls.map { url -> return imageUrls.map { url ->
MangaPage( MangaPage(
id = generateUid(url), id = generateUid(url),
url = url, url = url,
preview = null, preview = null,
source = source, source = source,
) )
} }
} }
private suspend fun fetchTags(): Set<MangaTag> { private suspend fun fetchTags(): Set<MangaTag> {
val url = "https://$domain/$apiSuffix/genres" val url = "https://$domain/$apiSuffix/genres"
val response = JSONArray(webClient.httpGet(url).parseHtml().text()) val response = webClient.httpGet(url).parseJsonArray()
return response.mapJSON { jo -> return response.mapJSONToSet { jo ->
MangaTag( MangaTag(
title = jo.getString("name"), title = jo.getString("name").toTitleCase(sourceLocale),
key = jo.getLong("id").toString(), key = jo.getLong("id").toString(),
source = source, source = source,
) )
}.toSet() }
} }
} }

@ -154,7 +154,7 @@ internal class MangaParserTest {
val parser = context.newParserInstance(source) val parser = context.newParserInstance(source)
val list = parser.getList(MangaSearchQuery.EMPTY) val list = parser.getList(MangaSearchQuery.EMPTY)
val manga = list[0] val manga = list.random()
parser.getDetails(manga).apply { parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" } assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }

Loading…
Cancel
Save