[MadTheme] Improve pages parsing

Koitharu 2 years ago
parent 75a55e4748
commit fd90970173
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -225,14 +225,28 @@ internal abstract class MadthemeParser(
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml()
val regexPages = Regex("chapImages\\s*=\\s*'(.*)'")
val known = HashSet<String>()
val result = ArrayList<MangaPage>()
// html parisng
doc.select(selectPage).forEach { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
if (known.add(url)) {
result += MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
// js parsing
val regexPages = Regex("chapImages\\s*=\\s*['\"](.*?)['\"]")
val pages = doc.select("script").firstNotNullOfOrNull { script ->
regexPages.find(script.html())?.groupValues?.getOrNull(1)
}?.split(',')
if (pages != null) {
return pages.map { url ->
MangaPage(
pages?.forEach { url ->
if (known.add(url)) {
result += MangaPage(
id = generateUid(url),
url = url,
preview = null,
@ -240,16 +254,7 @@ internal abstract class MadthemeParser(
)
}
}
// fallback to html parisng
return doc.select(selectPage).map { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
return result
}
protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {

@ -2,37 +2,9 @@ package org.koitharu.kotatsu.parsers.site.madtheme.en
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.generateUid
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import java.util.ArrayList
@MangaSourceParser("MANGABUDDY", "MangaBuddy", "en")
internal class MangaBuddy(context: MangaLoaderContext) :
MadthemeParser(context, MangaSource.MANGABUDDY, "mangabuddy.com") {
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val docs = webClient.httpGet(chapterUrl).parseHtml()
val script = docs.selectFirstOrThrow("script:containsData(chapImages)")
val images = script.data().substringAfter("'").substringBeforeLast("'").split(",")
val pages = ArrayList<MangaPage>(images.size)
for (image in images) {
pages.add(
MangaPage(
id = generateUid(image),
url = image,
preview = null,
source = source,
),
)
}
return pages
}
}
MadthemeParser(context, MangaSource.MANGABUDDY, "mangabuddy.com")

@ -4,16 +4,9 @@ import org.jsoup.nodes.Document
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser
import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.generateUid
import org.koitharu.kotatsu.parsers.util.mapChapters
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat
@MangaSourceParser("MANGAJINX", "MangaJinx", "en")
@ -45,19 +38,4 @@ internal class MangaJinx(context: MangaLoaderContext) :
)
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val docs = webClient.httpGet(chapterUrl).parseHtml()
val script = docs.selectFirstOrThrow("script:containsData(var chapImages)")
val images = script.data().substringAfter("= \"").substringBefore("\";").split(",")
return images.map {
MangaPage(
id = generateUid(it),
url = it,
preview = null,
source = source,
)
}
}
}

@ -7,7 +7,7 @@ import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser
import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat
import java.util.Locale
import java.util.*
@MangaSourceParser("MANHUASCAN", "ManhuaScan.io", "en")
internal class ManhuaScan(context: MangaLoaderContext) :
@ -120,21 +120,4 @@ internal class ManhuaScan(context: MangaLoaderContext) :
)
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val docs = webClient.httpGet(chapterUrl).parseHtml()
val script = docs.selectFirstOrThrow("script:containsData(var chapImages)")
val images = script.data().substringAfter("= \"").substringBefore("\";").split(",")
return images.map {
MangaPage(
id = generateUid(it),
url = it,
preview = null,
source = source,
)
}
}
}

Loading…
Cancel
Save