From 3e32a6280ac02ce32ac6894a26d1a13085306069 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Sun, 12 May 2024 13:12:51 +0300 Subject: [PATCH] [LuratoonScan] Rewrite parser --- .../parsers/site/madara/pt/DemonSect.kt | 2 +- .../parsers/site/madara/pt/RandomScans.kt | 36 ----- .../parsers/site/pt/LuratoonScansParser.kt | 150 ++++++++++++++++++ .../kotatsu/parsers/util/FaviconParser.kt | 6 +- .../koitharu/kotatsu/parsers/util/OkHttp.kt | 6 + 5 files changed, 162 insertions(+), 38 deletions(-) delete mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/RandomScans.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LuratoonScansParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/DemonSect.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/DemonSect.kt index 130471c0..d2c75cab 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/DemonSect.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/DemonSect.kt @@ -7,4 +7,4 @@ import org.koitharu.kotatsu.parsers.site.madara.MadaraParser @MangaSourceParser("DEMONSECT", "DemonSect", "pt") internal class DemonSect(context: MangaLoaderContext) : - MadaraParser(context, MangaSource.DEMONSECT, "demonsect.com.br", 10) + MadaraParser(context, MangaSource.DEMONSECT, "dsectcomics.org", 10) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/RandomScans.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/RandomScans.kt deleted file mode 100644 index 7964ec7a..00000000 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/RandomScans.kt +++ /dev/null @@ -1,36 +0,0 @@ -package org.koitharu.kotatsu.parsers.site.madara.pt - -import org.jsoup.nodes.Document -import org.koitharu.kotatsu.parsers.MangaLoaderContext -import org.koitharu.kotatsu.parsers.MangaSourceParser -import org.koitharu.kotatsu.parsers.model.Manga -import org.koitharu.kotatsu.parsers.model.MangaSource -import org.koitharu.kotatsu.parsers.site.madara.MadaraParser -import org.koitharu.kotatsu.parsers.util.* - -@MangaSourceParser("RANDOMSCANS", "Luratoon Scan", "pt") -internal class RandomScans(context: MangaLoaderContext) : - MadaraParser(context, MangaSource.RANDOMSCANS, "luratoon.com") { - - override val listUrl = "todas-as-obras/" - - override fun parseMangaList(doc: Document): List { - return doc.selectFirstOrThrow(".all__comics").select(".comic").map { div -> - val href = div.selectFirst("a")?.attrAsRelativeUrlOrNull("href") ?: div.parseFailed("Link not found") - Manga( - id = generateUid(href), - url = href, - publicUrl = href.toAbsoluteUrl(div.host ?: domain), - coverUrl = div.selectFirst("img")?.src().orEmpty(), - title = div.selectFirstOrThrow("h2").text(), - altTitle = null, - rating = div.selectFirst("span.total_votes")?.ownText()?.toFloatOrNull()?.div(5f) ?: -1f, - tags = emptySet(), - author = null, - state = null, - source = source, - isNsfw = isNsfwSource, - ) - } - } -} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LuratoonScansParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LuratoonScansParser.kt new file mode 100644 index 00000000..d3dd5e50 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/pt/LuratoonScansParser.kt @@ -0,0 +1,150 @@ +package org.koitharu.kotatsu.parsers.site.pt + +import okhttp3.Interceptor +import okhttp3.MediaType.Companion.toMediaTypeOrNull +import okhttp3.Response +import okhttp3.ResponseBody.Companion.toResponseBody +import org.json.JSONArray +import org.koitharu.kotatsu.parsers.ErrorMessages +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaParser +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.SimpleDateFormat +import java.util.zip.ZipInputStream + +@MangaSourceParser("RANDOMSCANS", "Luratoon Scan", "pt") +internal class LuratoonScansParser(context: MangaLoaderContext) : MangaParser(context, MangaSource.RANDOMSCANS), + Interceptor { + + override val availableSortOrders = setOf(SortOrder.ALPHABETICAL) + + override val configKeyDomain = ConfigKey.Domain("luratoon.com") + + override val isSearchSupported = false + override val isTagsExclusionSupported = false + override val isMultipleTagsSupported = false + + override suspend fun getList(offset: Int, filter: MangaListFilter?): List { + if (offset > 0) { + return emptyList() + } + require(filter !is MangaListFilter.Search) { ErrorMessages.SEARCH_NOT_SUPPORTED } + val url = urlBuilder() + val tag = (filter as? MangaListFilter.Advanced)?.tags?.oneOrThrowIfMany() + if (tag == null) { + url.addPathSegment("todas-as-obras") + } else { + url.addPathSegment("pesquisar").addQueryParameter("category", tag.key) + } + val doc = webClient.httpGet(url.build()).parseHtml() + return doc.selectFirstOrThrow(".todas__as__obras").select(".comics__all__box").map { div -> + val a = div.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(div.host ?: domain), + coverUrl = div.selectFirst("img")?.src().orEmpty(), + title = div.text(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + isNsfw = false, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().body() + val summaryContainer = doc.selectFirstOrThrow(".sumario__container") + // 1 de Maio de 2024 às 20:15 + val dateFormat = SimpleDateFormat("dd 'de' MMM 'de' YYYY 'às' HH:mm", sourceLocale) + return manga.copy( + title = doc.selectFirst("h1.desc__titulo__comic")?.textOrNull() ?: manga.title, + altTitle = summaryContainer.getElementsContainingOwnText("Alternativo").firstOrNull() + ?.nextElementSibling()?.textOrNull(), + tags = summaryContainer.getElementsByAttributeValueContaining("href", "?category=").mapToSet { + MangaTag( + title = it.text().toTitleCase(sourceLocale), + key = it.attr("href").substringAfterLast('='), + source = source, + ) + }, + state = when (summaryContainer.getElementsContainingOwnText("Status").firstOrNull() + ?.nextElementSibling()?.text()?.lowercase()) { + "em lançamento" -> MangaState.ONGOING + "hiato" -> MangaState.PAUSED + "finalizado" -> MangaState.FINISHED + else -> null + }, + author = summaryContainer.getElementsContainingOwnText("Autor(es)").firstOrNull() + ?.nextElementSibling()?.textOrNull(), + largeCoverUrl = doc.selectFirst("img.sumario__img")?.attrAsAbsoluteUrlOrNull("src"), + description = summaryContainer.selectFirst(".sumario__sinopse__texto")?.html(), + chapters = doc.selectFirstOrThrow("ul.capitulos__lista") + .select("li") + .mapChapters(reversed = true) { _, li -> + val href = li.parent()?.attrAsRelativeUrlOrNull("href") ?: return@mapChapters null + val span = li.selectFirstOrThrow(".numero__capitulo") + MangaChapter( + id = generateUid(href), + name = span.text(), + number = 0.0f, + volume = 0, + url = href, + scanlator = null, + uploadDate = dateFormat.tryParse(span.nextElementSibling()?.text()), + branch = null, + source = source, + ) + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() + val regex = Regex("const\\s+urls\\s*=\\s*(\\[.*])") + val urls = doc.select("script").firstNotNullOf { + regex.find(it.data())?.groupValues?.getOrNull(1) + } + val ja = JSONArray(urls) + return (0 until ja.length()).map { i -> + val url = ja.getString(i) + MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } + + override suspend fun getAvailableTags(): Set = emptySet() + + override fun intercept(chain: Interceptor.Chain): Response { + val response = chain.proceed(chain.request()) + if (response.mimeType == "application/octet-stream") { + val (bytes, name) = ZipInputStream(checkNotNull(response.body).byteStream()).use { + val entry = it.nextEntry + it.readBytes() to entry?.name + } + val type = if (name?.endsWith(".avif", ignoreCase = true) == true) { + "image/avif" + } else { + "image/*" + }.toMediaTypeOrNull() + return response.newBuilder() + .setHeader("Content-Type", type?.toString()) + .body(bytes.toResponseBody(type)) + .build() + } else { + return response + } + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt index b396c045..80d12b7c 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt @@ -18,7 +18,11 @@ class FaviconParser( val manifestLink = doc.getElementsByAttributeValue("rel", "manifest").firstOrNull() ?.attrAsAbsoluteUrlOrNull("href") if (manifestLink != null) { - result += parseManifest(manifestLink) + runCatchingCancellable { + parseManifest(manifestLink) + }.onSuccess { manifest -> + result += manifest + } } val links = doc.getElementsByAttributeValueContaining("rel", "icon") links.mapNotNullTo(result) { link -> diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/OkHttp.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/OkHttp.kt index b8673cd5..4728cdad 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/OkHttp.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/OkHttp.kt @@ -31,3 +31,9 @@ fun Headers.Builder.mergeWith(other: Headers, replaceExisting: Boolean): Headers fun Response.copy() = newBuilder() .body(peekBody(Long.MAX_VALUE)) .build() + +fun Response.Builder.setHeader(name: String, value: String?) = if (value == null) { + removeHeader(name) +} else { + header(name, value) +}