Batch parsers fixes

pull/168/head
Koitharu 3 years ago
parent e8d299782b
commit 413f4a2f10
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -25,8 +25,8 @@ abstract class MangaParser @InternalParsersApi constructor(
val config by lazy { context.getConfig(source) }
open val sourceLocale: Locale?
get() = source.locale?.let { Locale(it) }
open val sourceLocale: Locale
get() = source.locale?.let { Locale(it) } ?: Locale.ROOT
/**
* Provide default domain and available alternatives, if any.

@ -34,9 +34,9 @@ internal class CloneMangaParser(context: MangaLoaderContext) : PagedMangaParser(
val doc = webClient.httpGet(link).parseHtml()
val mangas = doc.getElementsByClass("comicPreviewContainer")
return mangas.mapNotNull { item ->
val attr = item.getElementsByClass("comicPreview").attr("style")
val background = item.selectFirstOrThrow(".comicPreview").styleValueOrNull("background")
val href = item.selectFirst("a")?.attrAsAbsoluteUrl("href") ?: return@mapNotNull null
val cover = attr.substring(attr.indexOf("site/themes"), attr.indexOf(")"))
val cover = background?.substring(background.indexOf("site/themes"), background.indexOf(")"))
Manga(
id = generateUid(href),
title = item.selectFirst("h3")?.text() ?: return@mapNotNull null,
@ -74,7 +74,7 @@ internal class CloneMangaParser(context: MangaLoaderContext) : PagedMangaParser(
scanlator = null,
branch = null,
uploadDate = 0L,
source = MangaSource.DUMMY,
source = source,
)
chapters.add(chapter)
}
@ -92,12 +92,10 @@ internal class CloneMangaParser(context: MangaLoaderContext) : PagedMangaParser(
url = imgUrl,
referer = imgUrl,
preview = null,
source = MangaSource.DUMMY,
source = source,
),
)
}
override suspend fun getTags(): Set<MangaTag> {
return emptySet()
}
override suspend fun getTags(): Set<MangaTag> = emptySet()
}

@ -1,5 +1,6 @@
package org.koitharu.kotatsu.parsers.site
import androidx.collection.ArrayMap
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
@ -16,7 +17,9 @@ class Manhwa18Parser(context: MangaLoaderContext) :
get() = ConfigKey.Domain("manhwa18.net", null)
override val sortOrders: Set<SortOrder>
get() = EnumSet.of(SortOrder.UPDATED, SortOrder.POPULARITY, SortOrder.ALPHABETICAL)
get() = EnumSet.of(SortOrder.UPDATED, SortOrder.POPULARITY, SortOrder.ALPHABETICAL, SortOrder.NEWEST)
private val tagsMap = SuspendLazy(::parseTags)
override suspend fun getFavicons(): Favicons {
return Favicons(
@ -33,9 +36,10 @@ class Manhwa18Parser(context: MangaLoaderContext) :
val author = cardInfoElement?.selectFirst("b:contains(Author(s))")?.parent()
?.select("a.btn")
?.joinToString(", ") { it.text() }
val availableTags = tagsMap.get()
val tags = cardInfoElement?.selectFirst("b:contains(Genre(s))")?.parent()
?.select("a.btn")
?.mapToSet { MangaTag(it.text(), it.text().lowercase(), MangaSource.MANHWA18) }
?.mapNotNullToSet { availableTags[it.text().lowercase(Locale.ENGLISH)] }
val state = cardInfoElement?.selectFirst("b:contains(Status)")?.parent()
?.selectFirst("a.btn")
?.let {
@ -104,34 +108,29 @@ class Manhwa18Parser(context: MangaLoaderContext) :
sortOrder: SortOrder,
): List<Manga> {
val sortQuery = when (sortOrder) {
SortOrder.ALPHABETICAL -> "name"
SortOrder.POPULARITY -> "views"
SortOrder.UPDATED -> "last_update"
SortOrder.ALPHABETICAL -> "az"
SortOrder.POPULARITY -> "top"
SortOrder.UPDATED -> "update"
SortOrder.NEWEST -> "new"
else -> ""
}
val sortType = if (sortOrder == SortOrder.ALPHABETICAL) "ASC" else "DESC"
val tagQuery = tags?.joinToString(",") { it.key }.orEmpty()
val url = buildString {
append("https://")
append(domain)
append("/manga-list.html?listType=pagination&page=")
append("/tim-kiem?page=")
append(page)
append("&artist=&author=&group=&m_status=&name=")
append(query?.urlEncoded().orEmpty())
append("&genre=$tagQuery")
append("&ungenre=")
if (!query.isNullOrEmpty()) {
append("&q=")
append(query.urlEncoded())
}
append("&accept_genres=$tagQuery")
append("&sort=")
append(sortQuery)
append("&sort_type=")
append(sortType)
}
val docs = webClient.httpGet(url).parseHtml()
val actualPage = docs.selectFirst("ul.pagination a.active")?.text()?.toIntOrNull()
if (actualPage != page) {
return emptyList()
}
return docs.select(".card-body .thumb-item-flow")
.map {
@ -158,9 +157,11 @@ class Manhwa18Parser(context: MangaLoaderContext) :
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
return webClient.httpGet(chapterUrl).parseHtml()
.select("div.chapter-content > img").map {
val url = it.attrAsRelativeUrlOrNull("src").orEmpty()
val doc = webClient.httpGet(chapterUrl).parseHtml()
return doc.requireElementById("chapter-content").select("img").mapNotNull {
val url = it.attrAsRelativeUrlOrNull("data-src")
?: it.attrAsRelativeUrlOrNull("src")
?: return@mapNotNull null
MangaPage(
id = generateUid(url),
url = url,
@ -172,14 +173,25 @@ class Manhwa18Parser(context: MangaLoaderContext) :
}
override suspend fun getTags(): Set<MangaTag> {
return webClient.httpGet("https://${domain}/").parseHtml().selectFirstOrThrow(".genres-menu")
.select("a.genres-item").orEmpty()
.mapToSet {
MangaTag(
title = it.text(),
key = it.text().lowercase(),
source = MangaSource.MANHWA18,
return tagsMap.get().values.toSet()
}
private suspend fun parseTags(): Map<String, MangaTag> {
val doc = webClient.httpGet("https://$domain/tim-kiem?q=").parseHtml()
val list = doc.getElementsByAttribute("data-genre-id")
if (list.isEmpty()) {
return emptyMap()
}
val result = ArrayMap<String, MangaTag>(list.size)
for (item in list) {
val id = item.attr("data-genre-id")
val name = item.text()
result[name.lowercase(Locale.ENGLISH)] = MangaTag(
title = name.toTitleCase(Locale.ENGLISH),
key = id,
source = source,
)
}
return result
}
}

@ -170,7 +170,7 @@ class UnionMangasParser(context: MangaLoaderContext) : PagedMangaParser(context,
}
private fun Element.toMangaTag() = MangaTag(
title = text().toTitleCase(sourceLocale ?: Locale.ROOT),
title = text().toTitleCase(sourceLocale),
key = attr("href").removeSuffix('/').substringAfterLast('/'),
source = source,
)

@ -6,13 +6,14 @@ import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.model.MangaTag
import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat
import java.util.*
@MangaSourceParser("ISEKAISCAN_EU", "IsekaiScan (eu)", "en")
@MangaSourceParser("ISEKAISCAN_EU", "IsekaiScan", "en")
internal class IsekaiScanEuParser(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.ISEKAISCAN_EU, "isekaiscan.eu") {
MadaraParser(context, MangaSource.ISEKAISCAN_EU, "isekaiscan.to") {
override val datePattern = "MM/dd/yyyy"
@ -44,4 +45,29 @@ internal class IsekaiScanEuParser(context: MangaLoaderContext) :
)
}
}
override suspend fun getTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain/mangax/").parseHtml()
val body = doc.body()
val root1 = body.selectFirst("header")?.selectFirst("ul.second-menu")
val root2 = body.selectFirst("div.genres_wrap")?.selectFirst("ul.list-unstyled")
if (root1 == null && root2 == null) {
doc.parseFailed("Root not found")
}
val list = root1?.select("li").orEmpty() + root2?.select("li").orEmpty()
val keySet = HashSet<String>(list.size)
return list.mapNotNullToSet { li ->
val a = li.selectFirst("a") ?: return@mapNotNullToSet null
val href = a.attr("href").removeSuffix("/")
.substringAfterLast(tagPrefix, "")
if (href.isEmpty() || !keySet.add(href)) {
return@mapNotNullToSet null
}
MangaTag(
key = href,
title = a.ownText().toTitleCase(Locale.ENGLISH),
source = source,
)
}
}
}

@ -126,7 +126,7 @@ abstract class Madara5Parser @InternalParsersApi constructor(
}
private suspend fun loadChapters(mangaId: Long): List<MangaChapter> {
val dateFormat = SimpleDateFormat(datePattern, sourceLocale ?: Locale.US)
val dateFormat = SimpleDateFormat(datePattern, sourceLocale)
val doc = webClient.httpGet("https://${domain}/ajax-list-chapter?mangaID=$mangaId").parseHtml()
return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li ->
val a = li.selectFirstOrThrow("a")
@ -166,7 +166,7 @@ abstract class Madara5Parser @InternalParsersApi constructor(
parseFailed("Cannot find tableValue for node ${text()}")
}
private fun String.asMangaState() = when (trim().lowercase(sourceLocale ?: Locale.US)) {
private fun String.asMangaState() = when (trim().lowercase(sourceLocale)) {
"ongoing" -> MangaState.ONGOING
"completed" -> MangaState.FINISHED
else -> null

@ -7,7 +7,6 @@ import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat
import java.util.*
internal abstract class Madara6Parser(
context: MangaLoaderContext,
@ -46,7 +45,7 @@ internal abstract class Madara6Parser(
protected open suspend fun loadChapters(mangaUrl: String): List<MangaChapter> {
val url = mangaUrl.toAbsoluteUrl(domain).removeSuffix('/') + "/ajax/chapters/"
val dateFormat = SimpleDateFormat(datePattern, sourceLocale ?: Locale.ROOT)
val dateFormat = SimpleDateFormat(datePattern, sourceLocale)
val doc = webClient.httpPost(url, emptyMap()).parseHtml()
return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li ->
val a = li.selectFirstOrThrow("a")

@ -153,7 +153,7 @@ internal abstract class MadaraParser(
protected open suspend fun getChapters(manga: Manga, doc: Document): List<MangaChapter> {
val root2 = doc.body().selectFirstOrThrow("div.content-area")
.selectFirstOrThrow("div.c-page")
val dateFormat = SimpleDateFormat(datePattern, sourceLocale ?: Locale.US)
val dateFormat = SimpleDateFormat(datePattern, sourceLocale)
return root2.select("li").asReversed().mapChapters { i, li ->
val a = li.selectFirst("a")
val href = a?.attrAsRelativeUrlOrNull("href") ?: li.parseFailed("Link is missing")
@ -285,11 +285,11 @@ internal abstract class MadaraParser(
}
}
private fun Element.src(): String? {
protected fun Element.src(): String? {
var result = absUrl("data-src")
if (result.isEmpty()) result = absUrl("data-cfsrc")
if (result.isEmpty()) result = absUrl("src")
return if (result.isEmpty()) null else result
return result.ifEmpty { null }
}
private fun createRequestTemplate() =

@ -5,10 +5,7 @@ import kotlinx.coroutines.coroutineScope
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.model.MangaTag
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat
import java.util.*
@ -16,6 +13,8 @@ import java.util.*
@MangaSourceParser("NEATMANGA", "NeatManga", "en")
internal class NeatManga(context: MangaLoaderContext) : MadaraParser(context, MangaSource.NEATMANGA, "neatmangas.com") {
override val datePattern = "dd MMMM yyyy"
override suspend fun getDetails(manga: Manga): Manga = coroutineScope {
val chaptersDeferred = async { getChapters(manga) }
val fullUrl = manga.url.toAbsoluteUrl(domain)
@ -72,4 +71,26 @@ internal class NeatManga(context: MangaLoaderContext) : MadaraParser(context, Ma
)
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml()
val root = doc.body().selectFirst("div.main-col-inner")
?.selectFirst("div.reading-content")
?: throw ParseException("Root not found", fullUrl)
return root.select("div.page-break").mapNotNull { div ->
val img = div.selectFirst("img")
if (img == null || img.attr("id").isNullOrEmpty()) {
return@mapNotNull null
}
val url = img.src()?.toRelativeUrl(domain) ?: div.parseFailed("Image src not found")
MangaPage(
id = generateUid(url),
url = url,
preview = null,
referer = fullUrl,
source = source,
)
}
}
}

@ -10,7 +10,6 @@ import org.koitharu.kotatsu.parsers.model.MangaState
import org.koitharu.kotatsu.parsers.util.attrAsAbsoluteUrlOrNull
import org.koitharu.kotatsu.parsers.util.mapToSet
import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow
import java.util.*
@MangaSourceParser("PRISMA_SCANS", "Prisma Scans", "pt")
internal class PrismaScansParser(context: MangaLoaderContext) :
@ -43,7 +42,7 @@ internal class PrismaScansParser(context: MangaLoaderContext) :
)
}
override fun String.asMangaState() = when (trim().lowercase(sourceLocale ?: Locale.ROOT)) {
override fun String.asMangaState() = when (trim().lowercase(sourceLocale)) {
"em lançamento" -> MangaState.ONGOING
"completo",
"cancelado",

@ -223,6 +223,7 @@ internal abstract class MangaReaderParser(
private fun Element.imageUrl(): String {
return attrAsAbsoluteUrlOrNull("src")
?: attrAsAbsoluteUrlOrNull("data-src")
?: attrAsAbsoluteUrlOrNull("data-cfsrc")
?: ""
}
@ -258,6 +259,7 @@ internal abstract class MangaReaderParser(
override val configKeyDomain: ConfigKey.Domain
get() = ConfigKey.Domain("manhwaindo.id", null)
override val chapterDateFormat = SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH)
override val listUrl: String get() = "/series"
override val tableMode: Boolean get() = false
}
@ -499,7 +501,7 @@ internal abstract class MangaReaderParser(
override val tableMode: Boolean
get() = false
override val chapterDateFormat: SimpleDateFormat = SimpleDateFormat("MMM d, yyyy", Locale.ENGLISH)
override val chapterDateFormat: SimpleDateFormat = SimpleDateFormat("MMM d, yyyy", sourceLocale)
override suspend fun parseInfoList(docs: Document, manga: Manga, chapters: List<MangaChapter>): Manga {
val infoElement = docs.selectFirst("div.infox")
@ -542,7 +544,7 @@ internal abstract class MangaReaderParser(
class KomikLokalParser(context: MangaLoaderContext) :
MangaReaderParser(context, MangaSource.KOMIKLOKAL, pageSize = 20, searchPageSize = 10) {
override val configKeyDomain: ConfigKey.Domain
get() = ConfigKey.Domain("komiklokal.pics", null)
get() = ConfigKey.Domain("komikmirror.art", null)
override val listUrl: String
get() = "/manga"
@ -592,7 +594,7 @@ internal abstract class MangaReaderParser(
class KomikDewasaParser(context: MangaLoaderContext) :
MangaReaderParser(context, MangaSource.KOMIKDEWASA, pageSize = 20, searchPageSize = 10) {
override val configKeyDomain: ConfigKey.Domain
get() = ConfigKey.Domain("komikdewasa.club", null)
get() = ConfigKey.Domain("komikdewasa.info", null)
override val listUrl: String
get() = "/manga"
@ -656,7 +658,7 @@ internal abstract class MangaReaderParser(
class KomikMangaParser(context: MangaLoaderContext) :
MangaReaderParser(context, MangaSource.KOMIKMANGA, pageSize = 20, searchPageSize = 10) {
override val configKeyDomain: ConfigKey.Domain
get() = ConfigKey.Domain("komikmanga.xyz", null)
get() = ConfigKey.Domain("komikhentai.co", null)
override val listUrl: String
get() = "/project"

@ -32,7 +32,7 @@ fun Element.attrOrNull(attributeKey: String) = attr(attributeKey).takeUnless { i
*/
fun Element.attrAsRelativeUrlOrNull(attributeKey: String): String? {
val attr = attr(attributeKey).trim()
if (attr.isEmpty()) {
if (attr.isEmpty() || attr.startsWith("data:")) {
return null
}
if (attr.startsWith("/")) {
@ -63,7 +63,7 @@ fun Element.attrAsRelativeUrl(attributeKey: String): String {
*/
fun Element.attrAsAbsoluteUrlOrNull(attributeKey: String): String? {
val attr = attr(attributeKey).trim()
if (attr.isEmpty()) {
if (attr.isEmpty() || attr.startsWith("data:")) {
return null
}
return (baseUri().toHttpUrlOrNull()?.newBuilder(attr) ?: return null).toString()

@ -23,7 +23,7 @@ internal class MangaParserTest {
@MangaSources
fun list(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null)
checkMangaList(list, "list")
assert(list.all { it.source == source })
}
@ -46,7 +46,7 @@ internal class MangaParserTest {
@MangaSources
fun search(source: MangaSource) = runTest {
val parser = source.newParser(context)
val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull {
val subject = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull {
it.title.length
} ?: error("No manga found")
val query = subject.title
@ -84,7 +84,7 @@ internal class MangaParserTest {
@MangaSources
fun details(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list[3]
parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
@ -113,7 +113,7 @@ internal class MangaParserTest {
@MangaSources
fun pages(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list.first()
val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null")
val pages = parser.getPages(chapter)

Loading…
Cancel
Save