[Webtoon] Fixes (#1912)

master
Naga 10 months ago committed by GitHub
parent 553615b3ff
commit 079b2346f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,50 +1,32 @@
package org.koitharu.kotatsu.parsers.site.all package org.koitharu.kotatsu.parsers.site.all
import androidx.collection.arraySetOf
import kotlinx.coroutines.async import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.coroutineScope
import okhttp3.HttpUrl import org.jsoup.nodes.Element
import okhttp3.HttpUrl.Companion.toHttpUrl
import org.json.JSONArray
import org.json.JSONObject
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.LegacyMangaParser import org.koitharu.kotatsu.parsers.core.LegacyMangaParser
import org.koitharu.kotatsu.parsers.exception.NotFoundException
import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.* import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy import java.util.EnumSet
import java.util.*
import javax.crypto.Mac
import javax.crypto.spec.SecretKeySpec
internal abstract class WebtoonsParser( internal abstract class WebtoonsParser(
context: MangaLoaderContext, context: MangaLoaderContext,
source: MangaParserSource, source: MangaParserSource,
) : LegacyMangaParser(context, source) { ) : LegacyMangaParser(context, source) {
private val signer by lazy {
WebtoonsUrlSigner("gUtPzJFZch4ZyAGviiyH94P99lQ3pFdRTwpJWDlSGFfwgpr6ses5ALOxWHOIT7R1")
}
// we don't __really__ support changing this domain because:
// 1. I don't think other websites have this exact API
// 2. most communication is done with other domains (hosting API and static content), which are not configurable
// 3. we rely on the HTTP client setting the referer header to webtoons.com
//
// This effectively means that changing the domain will break the source. Yikes
override val configKeyDomain = ConfigKey.Domain("webtoons.com") override val configKeyDomain = ConfigKey.Domain("webtoons.com")
private val apiDomain = "global.apis.naver.com" private val mobileApiDomain = "m.webtoons.com"
private val staticDomain = "webtoon-phinf.pstatic.net" private val staticDomain = "webtoon-phinf.pstatic.net"
override val availableSortOrders: Set<SortOrder> = EnumSet.of( override val availableSortOrders: EnumSet<SortOrder> = EnumSet.of(
SortOrder.POPULARITY, // views SortOrder.POPULARITY,
SortOrder.RATING, // star rating SortOrder.RATING,
//SortOrder.LIKE, // likes
SortOrder.UPDATED, SortOrder.UPDATED,
) )
@ -53,10 +35,10 @@ internal abstract class WebtoonsParser(
isSearchSupported = true, isSearchSupported = true,
) )
override val userAgentKey = ConfigKey.UserAgent("nApps (Android 12;; linewebtoon; 3.1.0)") override val userAgentKey = ConfigKey.UserAgent("Mozilla/5.0 (Linux; Android 12; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36")
override suspend fun getFilterOptions() = MangaListFilterOptions( override suspend fun getFilterOptions() = MangaListFilterOptions(
availableTags = getAllGenreList().values.toSet(), availableTags = availableTags()
) )
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) { override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
@ -68,7 +50,6 @@ internal abstract class WebtoonsParser(
return page.url.toAbsoluteUrl(staticDomain) return page.url.toAbsoluteUrl(staticDomain)
} }
// some language tags do not map perfectly to the ones used by the API
private val languageCode: String private val languageCode: String
get() = when (val tag = sourceLocale.toLanguageTag()) { get() = when (val tag = sourceLocale.toLanguageTag()) {
"in" -> "id" "in" -> "id"
@ -76,222 +57,216 @@ internal abstract class WebtoonsParser(
else -> tag else -> tag
} }
private suspend fun fetchEpisodes(titleNo: Long): List<MangaChapter> = coroutineScope { private suspend fun fetchEpisodes(titleNo: Long) : List<MangaChapter> {
val firstResult = val url = "https://$mobileApiDomain/api/v1/webtoon/$titleNo/episodes?pageSize=99999"
makeRequest("/lineWebtoon/webtoon/episodeList.json?v=5&titleNo=$titleNo&startIndex=0&pageSize=30") val json = webClient.httpGet(url).parseJson()
val totalEpisodeCount = firstResult.getJSONObject("episodeList").getInt("totalServiceEpisodeCount") val episodeList = json.optJSONObject("result")?.optJSONArray("episodeList")
val episodes = firstResult.getJSONObject("episodeList").getJSONArray("episode").toJSONList().toMutableList() ?: throw ParseException("No episodes found for title $titleNo", url)
val additionalEpisodes = (episodes.size until totalEpisodeCount step 30).map { startIndex ->
async {
makeRequest("/lineWebtoon/webtoon/episodeList.json?v=5&titleNo=$titleNo&startIndex=$startIndex&pageSize=30").getJSONObject(
"episodeList",
).getJSONArray("episode").toJSONList()
}
}.awaitAll().flatten()
episodes.addAll(additionalEpisodes) return episodeList.mapChapters { _, jo ->
val episodeTitle = jo.getStringOrNull("episodeTitle") ?: ""
val episodeNo = jo.getInt("episodeNo")
val viewerLink = jo.getString("viewerLink")
// Optimize object creation and sorting
episodes.mapChapters { i, jo ->
MangaChapter( MangaChapter(
id = generateUid("$titleNo-$i"), id = generateUid("$titleNo-$episodeNo"),
title = jo.getStringOrNull("episodeTitle"), title = episodeTitle,
number = jo.getInt("episodeSeq").toFloat(), number = episodeNo.toFloat(),
volume = 0, volume = 0,
url = "$titleNo-${jo.get("episodeNo")}", url = viewerLink,
uploadDate = jo.getLong("registerYmdt"), uploadDate = jo.getLong("exposureDateMillis"),
branch = null, branch = null,
scanlator = null, scanlator = null,
source = source, source = source,
) )
}.sortedBy(MangaChapter::number) }.sortedBy(MangaChapter::number)
}
private fun JSONArray.toJSONList(): List<JSONObject> {
val list = mutableListOf<JSONObject>()
for (i in 0 until length()) {
list.add(getJSONObject(i))
}
return list
} }
override suspend fun getDetails(manga: Manga): Manga = coroutineScope { override suspend fun getDetails(manga: Manga): Manga = coroutineScope {
val titleNo = manga.url.toLong() val titleNo = manga.url.toLong()
val chaptersDeferred = async { fetchEpisodes(titleNo) } val detailsUrl = manga.publicUrl.ifBlank {
val chapters = chaptersDeferred.await() "https://$domain/$languageCode/drama/placeholder/list?title_no=$titleNo"
makeRequest("/lineWebtoon/webtoon/titleInfo.json?titleNo=${titleNo}&anyServiceStatus=false").getJSONObject("titleInfo") }
.let { jo ->
val isNsfwSource = jo.getBooleanOrDefault("ageGradeNotice", isNsfwSource)
val author = jo.getStringOrNull("writingAuthorName")
MangaWebtoon(
Manga(
id = generateUid(titleNo),
title = jo.getString("title"),
altTitles = emptySet(),
url = "$titleNo",
publicUrl = "https://$domain/$languageCode/originals/a/list?title_no=${titleNo}",
rating = jo.getFloatOrDefault("starScoreAverage", -10f) / 10f,
contentRating = if (isNsfwSource) ContentRating.ADULT else null,
coverUrl = jo.getString("thumbnail").toAbsoluteUrl(staticDomain),
largeCoverUrl = jo.getStringOrNull("thumbnailVertical")?.toAbsoluteUrl(staticDomain),
tags = setOf(parseTag(jo.getJSONObject("genreInfo"))),
authors = setOfNotNull(author),
description = jo.getString("synopsis"),
// I don't think the API provides this info,
state = null,
chapters = chapters,
source = source,
),
date = jo.getLong("lastEpisodeRegisterYmdt"),
readCount = jo.getLong("readCount"),
//likeCount = jo.getLong("likeitCount"),
).manga
}
}
private val allGenreCache = suspendLazy { val doc = webClient.httpGet(detailsUrl).parseHtml()
makeRequest("/lineWebtoon/webtoon/genreList.json").getJSONObject("genreList").getJSONArray("genres")
.mapJSON { jo -> parseTag(jo) }.associateBy { tag -> tag.key }
}
private val allTitleCache = suspendLazy(soft = true) { val title = doc.select("meta[property='og:title']").attr("content")
makeRequest("/lineWebtoon/webtoon/titleList.json?").getJSONObject("titleList").getJSONArray("titles") .ifEmpty { doc.select("h1.subj, h3.subj").text().ifEmpty { manga.title } }
.mapJSON { jo ->
val titleNo = jo.getLong("titleNo") val description = listOf(
val isNsfwSource = jo.getBooleanOrDefault("ageGradeNotice", isNsfwSource) doc.select("meta[property='og:description']").attr("content"),
val author = jo.getStringOrNull("writingAuthorName") doc.select("#_asideDetail p.summary").text(),
MangaWebtoon( doc.select(".detail_header .summary").text()
Manga( ).firstOrNull { it.isNotBlank() }.orEmpty()
id = generateUid(titleNo),
url = titleNo.toString(), val coverUrl = doc.select("meta[property=\"og:image\"]").attr("content").let { url ->
publicUrl = "https://$domain/$languageCode/originals/a/list?title_no=$titleNo", if (url.isNotBlank()) url.toAbsoluteUrl(staticDomain) else manga.coverUrl
title = jo.getString("title"), }
coverUrl = jo.getString("thumbnail").toAbsoluteUrl(staticDomain),
altTitles = emptySet(), val author = listOf(
authors = setOfNotNull(author), doc.select("meta[property='com-linewebtoon:webtoon:author']").attr("content"),
contentRating = if (isNsfwSource) ContentRating.ADULT else null, doc.select(".detail_header .info .author").firstOrNull()?.text(),
rating = jo.getFloatOrDefault("starScoreAverage", -10f) / 10f, doc.select(".author_area").text()
tags = setOfNotNull(allGenreCache.get()[jo.getString("representGenre")]), ).firstOrNull { !it.isNullOrBlank() && it != "null" }
description = jo.getString("synopsis"),
state = null, val genreElements = doc.select(".detail_header .info .genre").ifEmpty {
source = source, doc.select("h2.genre")
), }
date = jo.getLong("lastEpisodeRegisterYmdt"), val genres = genreElements.map { it.text() }.toSet()
readCount = jo.getLong("readCount"),
//likeCount = jo.getLong("likeitCount"), val dayInfo = doc.select("#_asideDetail p.day_info").text().ifEmpty {
) doc.select(".day_info").text()
} }
val state = when {
dayInfo.contains("UP") || dayInfo.contains("EVERY") || dayInfo.contains("NOUVEAU") -> MangaState.ONGOING
dayInfo.contains("END") || dayInfo.contains("COMPLETED") || dayInfo.contains("TERMINÉ") -> MangaState.FINISHED
else -> null
}
val chapters = async { fetchEpisodes(titleNo) }.await()
Manga(
id = generateUid(titleNo),
title = title,
altTitles = emptySet(),
url = "$titleNo",
publicUrl = detailsUrl,
rating = RATING_UNKNOWN,
contentRating = null,
coverUrl = coverUrl,
largeCoverUrl = null,
tags = genres.map { genre -> MangaTag(title = genre, key = genre.lowercase(), source = source) }.toSet(),
authors = setOfNotNull(author.takeIf { it != "null" }),
description = description,
state = state,
chapters = chapters,
source = source,
)
} }
private suspend fun getAllGenreList(): Map<String, MangaTag> { private fun getSortOrderParam(order: SortOrder): String {
return allGenreCache.get() return when (order) {
SortOrder.POPULARITY -> "MANA"
SortOrder.RATING -> "LIKEIT"
SortOrder.UPDATED -> "UPDATE"
else -> "MANA"
}
} }
private suspend fun getAllTitleList(): List<MangaWebtoon> { private fun availableTags() = arraySetOf(
return allTitleCache.get() MangaTag("Action", "action", source),
MangaTag("Comedy", "comedy", source),
MangaTag("Drama", "drama", source),
MangaTag("Fantasy", "fantasy", source),
MangaTag("Horror", "horror", source),
MangaTag("Romance", "romance", source),
MangaTag("Sci-Fi", "sf", source),
MangaTag("Slice of Life", "slice_of_life", source),
MangaTag("Sports", "sports", source),
MangaTag("Supernatural", "supernatural", source),
MangaTag("Thriller", "thriller", source),
MangaTag("Historical", "historical", source),
MangaTag("Mystery", "mystery", source),
MangaTag("Superhero", "super_hero", source),
MangaTag("Heartwarming", "heartwarming", source),
MangaTag("Graphic Novel", "graphic_novel", source),
MangaTag("Informative", "tiptoon", source),
)
private val genreUrlMap: Map<String, String> = availableTags().associate {
it.title.lowercase() to it.key
} }
override suspend fun getList(offset: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { override suspend fun getList(offset: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val webtoons = when { val document = when {
!filter.query.isNullOrEmpty() -> { !filter.query.isNullOrEmpty() -> {
makeRequest("/lineWebtoon/webtoon/searchWebtoon?query=${filter.query.urlEncoded()}").getJSONObject("webtoonSearch") val searchUrl = "https://$domain/$languageCode/search?keyword=${filter.query.urlEncoded()}"
.getJSONArray("titleList").mapJSON { jo -> webClient.httpGet(searchUrl).parseHtml()
val titleNo = jo.getLong("titleNo") }
val author = jo.getStringOrNull("writingAuthorName") filter.tags.isNotEmpty() -> {
MangaWebtoon( val selectedGenre = filter.tags.first()
Manga( val genreUrlPath = genreUrlMap[selectedGenre.key] ?: selectedGenre.key
id = generateUid(titleNo), val sortParam = getSortOrderParam(order)
title = jo.getString("title"), val genreUrl = "https://$domain/$languageCode/genres/$genreUrlPath?sortOrder=$sortParam"
altTitles = emptySet(), webClient.httpGet(genreUrl).parseHtml()
url = titleNo.toString(),
publicUrl = "https://$domain/$languageCode/originals/a/list?title_no=$titleNo",
rating = RATING_UNKNOWN,
contentRating = if (isNsfwSource) ContentRating.ADULT else null,
coverUrl = jo.getString("thumbnail").toAbsoluteUrl(staticDomain),
largeCoverUrl = null,
tags = emptySet(),
authors = setOfNotNull(author),
description = null,
state = null,
source = source,
),
date = 0L,
readCount = 0L,
)
}
} }
else -> { else -> {
val genre = filter.tags.oneOrThrowIfMany()?.key ?: "ALL" val rankingType = when (order) {
SortOrder.POPULARITY -> "popular"
val genres = getAllGenreList() SortOrder.RATING -> "trending"
var result = getAllTitleList() SortOrder.UPDATED -> "originals"
else -> "popular"
if (genre != "ALL") {
result = result.filter { it.manga.tags.contains(genres[genre]) }
}
when (order) {
SortOrder.UPDATED -> result.sortedByDescending { it.date }
SortOrder.POPULARITY -> result.sortedByDescending { it.readCount }
SortOrder.RATING -> result.sortedByDescending { it.manga.rating }
//SortOrder.LIKE -> result.sortedBy { it.likeitCount }
else -> throw IllegalArgumentException("Unsupported sort order: $order")
} }
val rankingUrl = "https://$domain/$languageCode/ranking/$rankingType"
webClient.httpGet(rankingUrl).parseHtml()
} }
} }
return webtoons.map { it.manga }.subList(offset, (offset + 20).coerceAtMost(webtoons.size))
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { val selectedGenreForManga = if (filter.tags.isNotEmpty()) filter.tags.first() else null
val (titleNo, episodeNo) = requireNotNull(chapter.url.splitTwoParts('-'))
return makeRequest("/lineWebtoon/webtoon/episodeInfo.json?v=4&titleNo=$titleNo&episodeNo=$episodeNo").getJSONObject( return document.select(".webtoon_list li a, .card_wrap .card_item a")
"episodeInfo", .map { element -> createMangaFromElement(element, source, selectedGenreForManga) }
).getJSONArray("imageInfo").mapJSONIndexed { i, jo -> .drop(offset)
MangaPage( .take(20)
id = generateUid("$titleNo-$episodeNo-$i"),
url = jo.getString("url"),
preview = null,
source = source,
)
}
} }
private fun parseTag(jo: JSONObject): MangaTag { private fun createMangaFromElement(element: Element, source: MangaParserSource, selectedGenre: MangaTag? = null): Manga {
return MangaTag( val href = element.absUrl("href")
title = jo.getString("name"), val titleNo = extractTitleNoFromUrl(href)
key = jo.getString("code"), val title = element.select(".title, .card_title").text()
val thumbnailUrl = element.select("img").attr("src")
return Manga(
id = generateUid(titleNo),
title = title,
altTitles = emptySet(),
url = titleNo.toString(),
publicUrl = href,
rating = RATING_UNKNOWN,
contentRating = null,
coverUrl = thumbnailUrl.toAbsoluteUrl(staticDomain),
largeCoverUrl = null,
tags = selectedGenre?.let { setOf(it) } ?: emptySet(),
authors = emptySet(),
description = null,
state = null,
source = source, source = source,
) )
} }
private suspend fun makeRequest(url: String): JSONObject { private fun extractTitleNoFromUrl(url: String): Long {
val resp = webClient.httpGet(finalizeUrl(url)) return Regex("title_no=(\\d+)").find(url)?.groupValues?.get(1)?.toLong()
val message: JSONObject? = resp.parseJson().optJSONObject("message") ?: throw ParseException("Could not extract title_no from URL: $url", url)
return when (resp.code) {
in 200..299 -> checkNotNull(message).getJSONObject("result")
404 -> throw NotFoundException(message?.getStringOrNull("message").orEmpty(), url)
else -> {
val code = message?.getIntOrDefault("code", 0)
val errorMessage = message?.getStringOrNull("message")
throw ParseException("Api error (code=$code): $errorMessage", url)
}
}
} }
private fun finalizeUrl(url: String): HttpUrl { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val httpUrl = url.toAbsoluteUrl(apiDomain).toHttpUrl() val doc = try {
val builder = httpUrl.newBuilder().addQueryParameter("serviceZone", "GLOBAL") val absUrl = chapter.url.toAbsoluteUrl(domain)
if (httpUrl.queryParameter("v") == null) { webClient.httpGet(absUrl).parseHtml()
builder.addQueryParameter("v", "1") } catch (e: Exception) {
throw ParseException("Failed to get pages for chapter: ${chapter.title}", chapter.url, e)
} }
builder.addQueryParameter("language", languageCode).addQueryParameter("locale", "languageCode")
.addQueryParameter("platform", "APP_ANDROID") fun extractImages(selector: String, attr: String = "data-url"): List<MangaPage> {
signer.makeEncryptUrl(builder) return doc.select(selector).mapIndexedNotNull { i, element ->
return builder.build() val url = element.attr(attr).takeIf { it.isNotBlank() }
?: element.attr("src").takeIf { it.contains(staticDomain) }
?: return@mapIndexedNotNull null
MangaPage(
id = generateUid("${chapter.id}-$i"),
url = url,
preview = null,
source = source
)
}
}
return extractImages("div#_imageList > img")
.ifEmpty { extractImages("canvas[data-url]") }
.ifEmpty { extractImages("img[src*='$staticDomain'], img[data-url*='$staticDomain']") }
.ifEmpty { throw ParseException("No images found in chapter.", chapter.url) }
} }
@MangaSourceParser("WEBTOONS_EN", "Webtoons English", "en", type = ContentType.MANGA) @MangaSourceParser("WEBTOONS_EN", "Webtoons English", "en", type = ContentType.MANGA)
@ -310,37 +285,8 @@ internal abstract class WebtoonsParser(
class Thai(context: MangaLoaderContext) : WebtoonsParser(context, MangaParserSource.WEBTOONS_TH) class Thai(context: MangaLoaderContext) : WebtoonsParser(context, MangaParserSource.WEBTOONS_TH)
@MangaSourceParser("WEBTOONS_ZH", "Webtoons Chinese", "zh", type = ContentType.MANGA) @MangaSourceParser("WEBTOONS_ZH", "Webtoons Chinese", "zh", type = ContentType.MANGA)
class Chinese(context: MangaLoaderContext) : LineWebtoonsParser(context, MangaParserSource.WEBTOONS_ZH) class Chinese(context: MangaLoaderContext) : WebtoonsParser(context, MangaParserSource.WEBTOONS_ZH)
@MangaSourceParser("WEBTOONS_DE", "Webtoons German", "de", type = ContentType.MANGA) @MangaSourceParser("WEBTOONS_DE", "Webtoons German", "de", type = ContentType.MANGA)
class German(context: MangaLoaderContext) : LineWebtoonsParser(context, MangaParserSource.WEBTOONS_DE) class German(context: MangaLoaderContext) : WebtoonsParser(context, MangaParserSource.WEBTOONS_DE)
private inner class WebtoonsUrlSigner(private val secret: String) {
private val mac = Mac.getInstance("HmacSHA1").apply {
this.init(SecretKeySpec(secret.encodeToByteArray(), "HmacSHA1"))
}
private fun getMessage(url: String, msgpad: String): String {
return url.substring(0, 0xFF.coerceAtMost(url.length)) + msgpad
}
private fun getMessageDigest(s: String): String {
val signedMessage = synchronized(mac) { mac.doFinal(s.toByteArray()) }
return context.encodeBase64(signedMessage)
}
fun makeEncryptUrl(urlBuilder: HttpUrl.Builder) {
val msgPad = Calendar.getInstance().timeInMillis.toString()
val digest = getMessageDigest(getMessage(urlBuilder.build().toString(), msgPad))
urlBuilder.addQueryParameter("msgpad", msgPad).addQueryParameter("md", digest)
// .addEncodedQueryParameter("md", digest.urlEncoded())
}
}
private class MangaWebtoon(
@JvmField val manga: Manga,
@JvmField val date: Long,
@JvmField val readCount: Long,
)
} }

Loading…
Cancel
Save