[JapScan] New parser

source/neox^2
Koitharu 3 years ago
parent 8def9b5446
commit 66adc65a96
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -0,0 +1,193 @@
package org.koitharu.kotatsu.parsers.site
import okhttp3.Headers
import org.json.JSONObject
import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import org.koitharu.kotatsu.parsers.util.json.mapJSON
import java.text.SimpleDateFormat
import java.util.*
@MangaSourceParser("JAPSCAN", "JapScan", "fr")
internal class JapScanParser(context: MangaLoaderContext) : PagedMangaParser(context, MangaSource.JAPSCAN, 30) {
override val sortOrders: Set<SortOrder> = EnumSet.of(SortOrder.ALPHABETICAL)
override val configKeyDomain = ConfigKey.Domain("www.japscan.lol", arrayOf("www.japscan.lol", "japscan.ws"))
override val headers: Headers = Headers.Builder()
.add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0")
.build()
override suspend fun getListPage(
page: Int,
query: String?,
tags: Set<MangaTag>?,
sortOrder: SortOrder,
): List<Manga> {
require(tags.isNullOrEmpty()) { "Tags are not supported by this source" }
if (!query.isNullOrEmpty()) {
return if (page == paginator.firstPage) getListPageSearch(query) else emptyList()
}
val url = urlBuilder()
.addPathSegment("mangas")
.addPathSegment(page.toString())
.build()
val root = webClient.httpGet(url).parseHtml()
.requireElementById("main")
.selectFirstOrThrow(".flex-wrap")
return root.select(".mainTitle")
.map { p ->
val div = checkNotNull(p.parent())
val a = div.selectFirstOrThrow("a")
val href = a.attrAsRelativeUrl("href")
Manga(
id = generateUid(href),
title = p.text(),
altTitle = null,
url = href,
publicUrl = href.toAbsoluteUrl(domain),
rating = RATING_UNKNOWN,
isNsfw = false,
coverUrl = div.selectFirstOrThrow("img").attrAsAbsoluteUrl("src"),
tags = setOf(),
state = null,
author = null,
source = source,
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val root = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().requireElementById("main")
val dateFormat = SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH)
return manga.copy(
altTitle = root.tableValue("Nom Original:"),
isNsfw = root.tableValue("Âge conseillé :")?.extractIntOrNull().let { it != null && it >= 18 },
tags = root.tableValue("Type(s):")?.split(", ")?.mapNotNullToSet {
it.toTag()
}.orEmpty() + root.tableValue("Genre(s):")?.split(", ")?.mapNotNullToSet {
it.toTag()
}.orEmpty(),
state = when (root.tableValue("Statut:")) {
"En Cours" -> MangaState.ONGOING
"Terminé", "Abondonné" -> MangaState.FINISHED
else -> null
},
author = root.tableValue("Artiste(s):")?.substringBefore(','),
description = root.selectFirst("p.list-group-item-primary")?.html(),
chapters = root.requireElementById("chapters_list")
.select("div.chapters_list")
.mapChapters(reversed = true) { i, div ->
val a = div.selectFirst("a") ?: return@mapChapters null
val href = a.attrAsRelativeUrl("href")
MangaChapter(
id = generateUid(href),
name = a.text(),
number = i,
url = href,
scanlator = null,
uploadDate = dateFormat.tryParse(div.selectFirst("span.float-right")?.text()),
branch = null,
source = source,
)
},
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(chapterUrl).parseHtml()
val scriptUrl = doc.getElementsByTag("script").firstNotNullOf { script ->
script.attrAsAbsoluteUrlOrNull("src")?.takeIf { it.contains("/zjs/") }
}
val embeddedData = doc.requireElementById("data").attr("data-data")
val script = webClient.httpGet(scriptUrl).parseRaw()
val sample = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".toList()
val keyRegex = Regex("""'([\dA-Z]{62})'""", RegexOption.IGNORE_CASE)
val keyTables = keyRegex.findAll(script)
.mapNotNullTo(ArrayList(2)) { match ->
match.groupValues[1].takeIf {
it.toList().sorted() == sample
}
}
check(keyTables.size == 2)
var error: Exception? = null
repeat(2) { i ->
val key = keyTables[i].zip(keyTables[1 - i]).toMap()
try {
val unscrambledData = embeddedData.map { key[it] ?: it }.joinToString("")
if (unscrambledData.startsWith("ey")) {
val array = JSONObject(context.decodeBase64(unscrambledData).toString(Charsets.UTF_8))
.getJSONArray("imagesLink")
val result = ArrayList<MangaPage>(array.length())
repeat(array.length()) { index ->
val url = array.getString(index)
result += MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
return result
}
} catch (e: Exception) {
error = e
}
}
throw (error ?: ParseException("Cannot decode pages list", chapterUrl))
}
override suspend fun getTags(): Set<MangaTag> {
return emptySet() // not supported
}
private suspend fun getListPageSearch(
query: String,
): List<Manga> {
val json = webClient.httpPost(
"https://$domain/live-search/",
mapOf("search" to query.urlEncoded()),
).parseJsonArray()
return json.mapJSON { jo ->
val url = jo.getString("url")
Manga(
id = generateUid(url),
title = jo.getString("name"),
altTitle = jo.getStringOrNull("alternate_names")?.substringBefore(','),
url = url,
publicUrl = url.toAbsoluteUrl(domain),
rating = RATING_UNKNOWN,
isNsfw = false,
coverUrl = jo.getString("image").toAbsoluteUrl(domain),
tags = emptySet(),
state = null,
author = null,
source = source,
)
}
}
private fun Element.tableValue(label: String): String? {
return getElementsMatchingOwnText(label).firstOrNull()?.parent()?.ownTextOrNull()
}
private fun String.extractIntOrNull(): Int? = this.filter(Char::isDigit).toIntOrNull()
private fun String.toTag() = MangaTag(
title = this.toTitleCase(sourceLocale),
key = this.replace(' ', '-').lowercase(sourceLocale),
source = source,
)
}

@ -40,8 +40,7 @@ internal abstract class GroupleParser(
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
)
override val headers: Headers
get() = Headers.Builder()
override val headers: Headers = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.build()

@ -115,3 +115,7 @@ fun Element.selectLast(cssQuery: String): Element? {
fun Element.selectLastOrThrow(cssQuery: String): Element {
return selectLast(cssQuery) ?: throw ParseException("Cannot find \"$cssQuery\"", baseUri())
}
fun Element.textOrNull(): String? = text().takeUnless { it.isEmpty() }
fun Element.ownTextOrNull(): String? = ownText().takeUnless { it.isEmpty() }

@ -46,6 +46,12 @@ fun Response.parseJsonArray(): JSONArray = try {
closeQuietly()
}
fun Response.parseRaw(): String = try {
requireBody().string()
} finally {
closeQuietly()
}
/**
* Convert url to relative if it is on [domain]
* @return an url relative to the [domain] or absolute, if domain is mismatching

Loading…
Cancel
Save