From fd9097017392e8cf9d94476a23b18ea4e2069adf Mon Sep 17 00:00:00 2001 From: Koitharu Date: Sun, 21 Apr 2024 16:31:22 +0300 Subject: [PATCH] [MadTheme] Improve pages parsing --- .../parsers/site/madtheme/MadthemeParser.kt | 35 +++++++++++-------- .../parsers/site/madtheme/en/MangaBuddy.kt | 30 +--------------- .../parsers/site/madtheme/en/MangaJinx.kt | 24 +------------ .../parsers/site/madtheme/en/ManhuaScan.kt | 19 +--------- 4 files changed, 23 insertions(+), 85 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/MadthemeParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/MadthemeParser.kt index e9b2ecef..7b458f29 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/MadthemeParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/MadthemeParser.kt @@ -225,14 +225,28 @@ internal abstract class MadthemeParser( override suspend fun getPages(chapter: MangaChapter): List { val fullUrl = chapter.url.toAbsoluteUrl(domain) val doc = webClient.httpGet(fullUrl).parseHtml() - - val regexPages = Regex("chapImages\\s*=\\s*'(.*)'") + val known = HashSet() + val result = ArrayList() + // html parisng + doc.select(selectPage).forEach { img -> + val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found") + if (known.add(url)) { + result += MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } + // js parsing + val regexPages = Regex("chapImages\\s*=\\s*['\"](.*?)['\"]") val pages = doc.select("script").firstNotNullOfOrNull { script -> regexPages.find(script.html())?.groupValues?.getOrNull(1) }?.split(',') - if (pages != null) { - return pages.map { url -> - MangaPage( + pages?.forEach { url -> + if (known.add(url)) { + result += MangaPage( id = generateUid(url), url = url, preview = null, @@ -240,16 +254,7 @@ internal abstract class MadthemeParser( ) } } - // fallback to html parisng - return doc.select(selectPage).map { img -> - val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found") - MangaPage( - id = generateUid(url), - url = url, - preview = null, - source = source, - ) - } + return result } protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long { diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaBuddy.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaBuddy.kt index 23a8331b..909099a1 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaBuddy.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaBuddy.kt @@ -2,37 +2,9 @@ package org.koitharu.kotatsu.parsers.site.madtheme.en import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser -import org.koitharu.kotatsu.parsers.model.MangaChapter -import org.koitharu.kotatsu.parsers.model.MangaPage import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser -import org.koitharu.kotatsu.parsers.util.domain -import org.koitharu.kotatsu.parsers.util.generateUid -import org.koitharu.kotatsu.parsers.util.parseHtml -import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow -import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl -import java.util.ArrayList @MangaSourceParser("MANGABUDDY", "MangaBuddy", "en") internal class MangaBuddy(context: MangaLoaderContext) : - MadthemeParser(context, MangaSource.MANGABUDDY, "mangabuddy.com") { - - override suspend fun getPages(chapter: MangaChapter): List { - val chapterUrl = chapter.url.toAbsoluteUrl(domain) - val docs = webClient.httpGet(chapterUrl).parseHtml() - val script = docs.selectFirstOrThrow("script:containsData(chapImages)") - val images = script.data().substringAfter("'").substringBeforeLast("'").split(",") - val pages = ArrayList(images.size) - for (image in images) { - pages.add( - MangaPage( - id = generateUid(image), - url = image, - preview = null, - source = source, - ), - ) - } - return pages - } -} + MadthemeParser(context, MangaSource.MANGABUDDY, "mangabuddy.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaJinx.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaJinx.kt index fa8a1b8c..26ac2c4e 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaJinx.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/MangaJinx.kt @@ -4,16 +4,9 @@ import org.jsoup.nodes.Document import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.model.MangaChapter -import org.koitharu.kotatsu.parsers.model.MangaPage import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser -import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl -import org.koitharu.kotatsu.parsers.util.domain -import org.koitharu.kotatsu.parsers.util.generateUid -import org.koitharu.kotatsu.parsers.util.mapChapters -import org.koitharu.kotatsu.parsers.util.parseHtml -import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow -import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl +import org.koitharu.kotatsu.parsers.util.* import java.text.SimpleDateFormat @MangaSourceParser("MANGAJINX", "MangaJinx", "en") @@ -45,19 +38,4 @@ internal class MangaJinx(context: MangaLoaderContext) : ) } } - - override suspend fun getPages(chapter: MangaChapter): List { - val chapterUrl = chapter.url.toAbsoluteUrl(domain) - val docs = webClient.httpGet(chapterUrl).parseHtml() - val script = docs.selectFirstOrThrow("script:containsData(var chapImages)") - val images = script.data().substringAfter("= \"").substringBefore("\";").split(",") - return images.map { - MangaPage( - id = generateUid(it), - url = it, - preview = null, - source = source, - ) - } - } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/ManhuaScan.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/ManhuaScan.kt index 0d73749c..16b8f934 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/ManhuaScan.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madtheme/en/ManhuaScan.kt @@ -7,7 +7,7 @@ import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser import org.koitharu.kotatsu.parsers.util.* import java.text.SimpleDateFormat -import java.util.Locale +import java.util.* @MangaSourceParser("MANHUASCAN", "ManhuaScan.io", "en") internal class ManhuaScan(context: MangaLoaderContext) : @@ -120,21 +120,4 @@ internal class ManhuaScan(context: MangaLoaderContext) : ) } } - - override suspend fun getPages(chapter: MangaChapter): List { - val chapterUrl = chapter.url.toAbsoluteUrl(domain) - val docs = webClient.httpGet(chapterUrl).parseHtml() - - val script = docs.selectFirstOrThrow("script:containsData(var chapImages)") - val images = script.data().substringAfter("= \"").substringBefore("\";").split(",") - return images.map { - MangaPage( - id = generateUid(it), - url = it, - preview = null, - source = source, - ) - } - } - }