[MadTheme] Improve pages parsing

Koitharu 2 years ago
parent 75a55e4748
commit fd90970173
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -225,14 +225,28 @@ internal abstract class MadthemeParser(
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.toAbsoluteUrl(domain) val fullUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml() val doc = webClient.httpGet(fullUrl).parseHtml()
val known = HashSet<String>()
val regexPages = Regex("chapImages\\s*=\\s*'(.*)'") val result = ArrayList<MangaPage>()
// html parisng
doc.select(selectPage).forEach { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
if (known.add(url)) {
result += MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
// js parsing
val regexPages = Regex("chapImages\\s*=\\s*['\"](.*?)['\"]")
val pages = doc.select("script").firstNotNullOfOrNull { script -> val pages = doc.select("script").firstNotNullOfOrNull { script ->
regexPages.find(script.html())?.groupValues?.getOrNull(1) regexPages.find(script.html())?.groupValues?.getOrNull(1)
}?.split(',') }?.split(',')
if (pages != null) { pages?.forEach { url ->
return pages.map { url -> if (known.add(url)) {
MangaPage( result += MangaPage(
id = generateUid(url), id = generateUid(url),
url = url, url = url,
preview = null, preview = null,
@ -240,16 +254,7 @@ internal abstract class MadthemeParser(
) )
} }
} }
// fallback to html parisng return result
return doc.select(selectPage).map { img ->
val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
} }
protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long { protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {

@ -2,37 +2,9 @@ package org.koitharu.kotatsu.parsers.site.madtheme.en
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.generateUid
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import java.util.ArrayList
@MangaSourceParser("MANGABUDDY", "MangaBuddy", "en") @MangaSourceParser("MANGABUDDY", "MangaBuddy", "en")
internal class MangaBuddy(context: MangaLoaderContext) : internal class MangaBuddy(context: MangaLoaderContext) :
MadthemeParser(context, MangaSource.MANGABUDDY, "mangabuddy.com") { MadthemeParser(context, MangaSource.MANGABUDDY, "mangabuddy.com")
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val docs = webClient.httpGet(chapterUrl).parseHtml()
val script = docs.selectFirstOrThrow("script:containsData(chapImages)")
val images = script.data().substringAfter("'").substringBeforeLast("'").split(",")
val pages = ArrayList<MangaPage>(images.size)
for (image in images) {
pages.add(
MangaPage(
id = generateUid(image),
url = image,
preview = null,
source = source,
),
)
}
return pages
}
}

@ -4,16 +4,9 @@ import org.jsoup.nodes.Document
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaChapter import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser
import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.generateUid
import org.koitharu.kotatsu.parsers.util.mapChapters
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
@MangaSourceParser("MANGAJINX", "MangaJinx", "en") @MangaSourceParser("MANGAJINX", "MangaJinx", "en")
@ -45,19 +38,4 @@ internal class MangaJinx(context: MangaLoaderContext) :
) )
} }
} }
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val docs = webClient.httpGet(chapterUrl).parseHtml()
val script = docs.selectFirstOrThrow("script:containsData(var chapImages)")
val images = script.data().substringAfter("= \"").substringBefore("\";").split(",")
return images.map {
MangaPage(
id = generateUid(it),
url = it,
preview = null,
source = source,
)
}
}
} }

@ -7,7 +7,7 @@ import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser import org.koitharu.kotatsu.parsers.site.madtheme.MadthemeParser
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.Locale import java.util.*
@MangaSourceParser("MANHUASCAN", "ManhuaScan.io", "en") @MangaSourceParser("MANHUASCAN", "ManhuaScan.io", "en")
internal class ManhuaScan(context: MangaLoaderContext) : internal class ManhuaScan(context: MangaLoaderContext) :
@ -120,21 +120,4 @@ internal class ManhuaScan(context: MangaLoaderContext) :
) )
} }
} }
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val docs = webClient.httpGet(chapterUrl).parseHtml()
val script = docs.selectFirstOrThrow("script:containsData(var chapImages)")
val images = script.data().substringAfter("= \"").substringBefore("\";").split(",")
return images.map {
MangaPage(
id = generateUid(it),
url = it,
preview = null,
source = source,
)
}
}
} }

Loading…
Cancel
Save