Rework WpComicsParser

Adds sources
master
devi 2 years ago
parent 7bcc624a74
commit 53ca9c9677

@ -31,7 +31,6 @@ internal class AsuraScansParser(context: MangaLoaderContext) :
override val isMultipleTagsSupported = true
// &types=-1&order=desc
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
val url = buildString {
append("https://")
@ -72,7 +71,7 @@ internal class AsuraScansParser(context: MangaLoaderContext) :
SortOrder.NEWEST -> append("latest")
SortOrder.ALPHABETICAL_DESC -> append("desc")
SortOrder.ALPHABETICAL -> append("asc")
else -> append("Updated")
else -> append("update")
}
}

@ -0,0 +1,13 @@
package org.koitharu.kotatsu.parsers.site.madara.pt
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.ContentType
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.site.madara.MadaraParser
@MangaSourceParser("VILLAINESSSCAN", "VillainessScan", "pt", ContentType.HENTAI)
internal class VillainessScan(context: MangaLoaderContext) :
MadaraParser(context, MangaParserSource.VILLAINESSSCAN, "villainessscan.xyz", pageSize = 10) {
override val datePattern: String = "dd 'de' MMMM 'de' yyyy"
}

@ -35,7 +35,9 @@ internal abstract class WpComicsParser(
override val availableStates: Set<MangaState> = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED)
protected open val listUrl = "/tim-truyen-nang-cao"
override val isMultipleTagsSupported = false
protected open val listUrl = "/tim-truyen"
protected open val datePattern = "dd/MM/yy"
@ -49,12 +51,16 @@ internal abstract class WpComicsParser(
protected val ongoing: Set<String> = setOf(
"Đang tiến hành",
"Ongoing",
"Updating",
"連載中",
)
@JvmField
protected val finished: Set<String> = setOf(
"Hoàn thành",
"Complete",
"Completed",
"完結済み",
)
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
@ -64,7 +70,8 @@ internal abstract class WpComicsParser(
val url = buildString {
append("https://")
append(domain)
append("/tim-truyen?keyword=")
append(listUrl)
append("?keyword=")
append(filter.query.urlEncoded())
append("&page=")
append(page.toString())
@ -82,10 +89,14 @@ internal abstract class WpComicsParser(
val url = buildString {
append("https://")
append(domain)
val tagQuery = filter.tags.joinToString(",") { it.key }
append("/tim-truyen-nang-cao?genres=")
append(tagQuery)
append("&notgenres=&gender=-1&minchapter=1&sort=")
append(listUrl)
if (filter.tags.isNotEmpty()) {
append('/')
filter.tags.oneOrThrowIfMany()?.let {
append(it.key)
}
}
append("?sort=")
append(
when (filter.sortOrder) {
SortOrder.UPDATED -> 0
@ -116,32 +127,33 @@ internal abstract class WpComicsParser(
val url = buildString {
append("https://")
append(domain)
append("/tim-truyen-nang-cao?genres=&notgenres=&gender=-1&status=-1&minchapter=1&sort=0&page=")
append(listUrl)
append("?genres=&notgenres=&gender=-1&status=-1&minchapter=1&sort=0&page=")
append(page.toString())
}
webClient.httpGet(url)
}
}
val itemsElements = response.parseHtml()
.select("div.ModuleContent > div.items")
.select("div.item")
return itemsElements.mapNotNull { item ->
val tagMap = getOrCreateTagMap()
return parseMangaList(response.parseHtml(), tagMap)
}
protected open fun parseMangaList(doc: Document, tagMap: ArrayMap<String, MangaTag>): List<Manga> {
return doc.select("div.items div.item").mapNotNull { item ->
val tooltipElement = item.selectFirst("div.box_tootip") ?: return@mapNotNull null
val absUrl = item.selectFirst("div.image > a")?.attrAsAbsoluteUrlOrNull("href") ?: return@mapNotNull null
val slug = absUrl.substringAfterLast('/')
val mangaState = when (tooltipElement.selectFirst("div.message_main > p:contains(Tình trạng)")?.ownText()) {
"Đang tiến hành" -> MangaState.ONGOING
"Hoàn thành" -> MangaState.FINISHED
in ongoing -> MangaState.ONGOING
in finished -> MangaState.FINISHED
else -> null
}
val tagMap = getOrCreateTagMap()
val tagsElement = tooltipElement.selectFirst("div.message_main > p:contains(Thể loại)")?.ownText().orEmpty()
val mangaTags = tagsElement.split(',').mapNotNullToSet { tagMap[it.trim()] }
Manga(
id = generateUid(slug),
title = tooltipElement.selectFirst("div.title")?.text().orEmpty(),
title = item.selectFirst("div.box_tootip div.title, h3 a")?.text().orEmpty(),
altTitle = null,
url = absUrl.toRelativeUrl(domain),
publicUrl = absUrl,
@ -168,18 +180,17 @@ internal abstract class WpComicsParser(
return tagSet
}
private val mutex = Mutex()
private var tagCache: ArrayMap<String, MangaTag>? = null
private suspend fun getOrCreateTagMap(): ArrayMap<String, MangaTag> = mutex.withLock {
protected open suspend fun getOrCreateTagMap(): ArrayMap<String, MangaTag> = mutex.withLock {
tagCache?.let { return@withLock it }
val doc = webClient.httpGet("/tim-truyen-nang-cao".toAbsoluteUrl(domain)).parseHtml()
val tagItems = doc.select("div.genre-item")
val doc = webClient.httpGet(listUrl.toAbsoluteUrl(domain)).parseHtml()
val tagItems = doc.select("div.dropdown-genres select option")
val result = ArrayMap<String, MangaTag>(tagItems.size)
for (item in tagItems) {
val title = item.text()
val key = item.select("span[data-id]").attr("data-id")
val key = item.attr("value").substringAfterLast('/')
if (key.isNotEmpty() && title.isNotEmpty()) {
result[title] = MangaTag(title = title, key = key, source = source)
}
@ -190,35 +201,36 @@ internal abstract class WpComicsParser(
protected open val selectDesc = "div.detail-content p"
protected open val selectState = "div.col-info li.status p:not(.name)"
protected open val selectAut = "div.col-info li.author p:not(.name)"
protected open val selectTag = "div.col-info li.kind p:not(.name) a"
protected open val selectAut = "div.col-info li.author p:not(.name), li.author p.col-xs-8"
protected open val selectTag = "div.col-info li.kind p:not(.name) a, li.kind p.col-xs-8 a"
override suspend fun getDetails(manga: Manga): Manga = coroutineScope {
val fullUrl = manga.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml()
val chaptersDeferred = async { getChapters(doc) }
val desc = doc.selectFirstOrThrow(selectDesc).html()
val stateDiv = doc.selectFirst(selectState)
val state = stateDiv?.let {
when (it.text()) {
in ongoing -> MangaState.ONGOING
in finished -> MangaState.FINISHED
else -> null
}
}
val aut = doc.body().select(selectAut).text()
val tagMap = getOrCreateTagMap()
val tagsElement = doc.select("li.kind p.col-xs-8 a")
val mangaTags = tagsElement.mapNotNullToSet { tagMap[it.text()] }
manga.copy(
description = desc,
altTitle = null,
author = aut,
state = state,
description = doc.selectFirst(selectDesc)?.html().orEmpty(),
altTitle = doc.selectFirst("h2.other-name")?.text().orEmpty(),
author = doc.body().select(selectAut).text(),
state = doc.selectFirst(selectState)?.let {
when (it.text()) {
in ongoing -> MangaState.ONGOING
in finished -> MangaState.FINISHED
else -> null
}
},
tags = mangaTags,
rating = doc.selectFirst("div.star input")?.attr("value")?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN,
chapters = chaptersDeferred.await(),
)
}
protected open val selectDate = "div.col-xs-4"
protected open val selectChapter = "div#nt_listchapter li .chapter"
protected open val selectChapter = "div.list-chapter li.row:not(.heading)"
protected open suspend fun getChapters(doc: Document): List<MangaChapter> {
return doc.body().select(selectChapter).mapChapters(reversed = true) { i, li ->
@ -248,14 +260,11 @@ internal abstract class WpComicsParser(
}
}
protected open val selectPage = "div.reading-detail img"
protected open val selectPage = "div.page-chapter > img, li.blocks-gallery-item img"
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml()
return doc.select(selectPage).map { url ->
val img = url.src()?.toRelativeUrl(domain) ?: url.parseFailed("Image src not found")
MangaPage(
@ -268,16 +277,14 @@ internal abstract class WpComicsParser(
}
protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long {
// Clean date (e.g. 5th December 2019 to 5 December 2019) before parsing it
val d = date?.lowercase() ?: return 0
return when {
d.endsWith(" ago") ||
d.endsWith(" trước") // Handle translated 'ago' in Viêt Nam.
d.endsWith(" trước")
-> parseRelativeDate(date)
// Handle 'yesterday' and 'today', using midnight
d.startsWith("year") -> Calendar.getInstance().apply {
add(Calendar.DAY_OF_MONTH, -1) // yesterday
add(Calendar.DAY_OF_MONTH, -1)
set(Calendar.HOUR_OF_DAY, 0)
set(Calendar.MINUTE, 0)
set(Calendar.SECOND, 0)
@ -303,49 +310,33 @@ internal abstract class WpComicsParser(
}
}
// Parses dates in this form:
// 21 hours ago
private fun parseRelativeDate(date: String): Long {
val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0
val cal = Calendar.getInstance()
return when {
WordSet(
"day",
"days",
"d",
"ngày ",
).anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis
WordSet("jam", "saat", "heure", "hora", "horas", "hour", "hours", "h").anyWordIn(date) -> cal.apply {
add(
Calendar.HOUR,
-number,
)
WordSet("second", "giây").anyWordIn(date) -> cal.apply { add(Calendar.SECOND, -number) }.timeInMillis
WordSet("min", "minute", "minutes", "mins", "phút").anyWordIn(date) -> cal.apply {
add(Calendar.MINUTE, -number)
}.timeInMillis
WordSet(
"min",
"minute",
"minutes",
"mins",
"phút",
).anyWordIn(date) -> cal.apply {
add(
Calendar.MINUTE,
-number,
)
WordSet("jam", "saat", "heure", "hora", "horas", "hour", "hours", "h", "giờ").anyWordIn(date) -> cal.apply {
add(Calendar.HOUR, -number)
}.timeInMillis
WordSet("day", "days", "d", "ngày").anyWordIn(date) -> cal.apply {
add(Calendar.DAY_OF_MONTH, -number)
}.timeInMillis
WordSet("second").anyWordIn(date) -> cal.apply {
WordSet("month", "months", "tháng").anyWordIn(date) -> cal.apply {
add(
Calendar.SECOND,
Calendar.MONTH,
-number,
)
}.timeInMillis
WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis
WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis
WordSet("year", "năm").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis
else -> 0
}
}

@ -18,8 +18,6 @@ internal class XoxoComics(context: MangaLoaderContext) :
override val listUrl = "/comic-list"
override val datePattern = "MM/dd/yyyy"
override val isMultipleTagsSupported = false
override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
SortOrder.NEWEST,

@ -0,0 +1,99 @@
package org.koitharu.kotatsu.parsers.site.wpcomics.ja
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.exception.NotFoundException
import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaListFilter
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.model.MangaState
import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.oneOrThrowIfMany
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.runCatchingCancellable
import org.koitharu.kotatsu.parsers.util.urlEncoded
// Need to use 0ms.dev Proxy
@MangaSourceParser("MANGARAW", "MangaRaw", "ja")
internal class MangaRaw(context: MangaLoaderContext) :
WpComicsParser(context, MangaParserSource.MANGARAW, "mangaraw.xyz") {
override val listUrl = "/search/manga"
override suspend fun getListPage(page: Int, filter: MangaListFilter?): List<Manga> {
val response =
when (filter) {
is MangaListFilter.Search -> {
val url = buildString {
append("https://")
append(domain)
append(listUrl)
append("?keyword=")
append(filter.query.urlEncoded())
append("&page=")
append(page.toString())
}
val result = runCatchingCancellable { webClient.httpGet(url) }
val exception = result.exceptionOrNull()
if (exception is NotFoundException) {
return emptyList()
}
result.getOrThrow()
}
is MangaListFilter.Advanced -> {
val url = buildString {
append("https://")
append(domain)
append(listUrl)
append("?sort=")
append(
when (filter.sortOrder) {
SortOrder.UPDATED -> 0
SortOrder.POPULARITY -> 10
SortOrder.NEWEST -> 15
SortOrder.RATING -> 20
else -> throw IllegalArgumentException("Sort order ${filter.sortOrder.name} not supported")
},
)
if (filter.tags.isNotEmpty()) {
append("&genre=")
filter.tags.oneOrThrowIfMany()?.let {
append(it.key)
}
}
filter.states.oneOrThrowIfMany()?.let {
append("&status=")
append(
when (it) {
MangaState.ONGOING -> "1"
MangaState.FINISHED -> "2"
else -> "-1"
},
)
}
append("&page=")
append(page.toString())
}
webClient.httpGet(url)
}
null -> {
val url = buildString {
append("https://")
append(domain)
append(listUrl)
append("?genres=&notgenres=&gender=-1&status=-1&minchapter=1&sort=0&page=")
append(page.toString())
}
webClient.httpGet(url)
}
}
val tagMap = getOrCreateTagMap()
return parseMangaList(response.parseHtml(), tagMap)
}
}

@ -0,0 +1,10 @@
package org.koitharu.kotatsu.parsers.site.wpcomics.vi
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser
@MangaSourceParser("NETTRUYENAA", "NetTruyenAA", "vi")
internal class NetTruyenAA(context: MangaLoaderContext) :
WpComicsParser(context, MangaParserSource.NETTRUYENAA, "nettruyenaa.com")

@ -0,0 +1,10 @@
package org.koitharu.kotatsu.parsers.site.wpcomics.vi
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser
@MangaSourceParser("NETTRUYENX", "NetTruyenX", "vi")
internal class NetTruyenX(context: MangaLoaderContext) :
WpComicsParser(context, MangaParserSource.NETTRUYENX, "nettruyenx.com")

@ -0,0 +1,10 @@
package org.koitharu.kotatsu.parsers.site.wpcomics.vi
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser
@MangaSourceParser("NETTRUYENSS", "NhatTruyenSS", "vi")
internal class NhatTruyenSS(context: MangaLoaderContext) :
WpComicsParser(context, MangaParserSource.NETTRUYENSS, "www.nhattruyenss.net")

@ -1,12 +1,10 @@
package org.koitharu.kotatsu.parsers.site.wpcomics.vi
import org.koitharu.kotatsu.parsers.Broken
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser
@Broken
@MangaSourceParser("NHATTRUYENMIN", "NhattruyenPlus", "vi")
@MangaSourceParser("NHATTRUYENMIN", "NhatTruyenVn", "vi")
internal class Nhattruyenmin(context: MangaLoaderContext) :
WpComicsParser(context, MangaParserSource.NHATTRUYENMIN, "nhattruyenmax.com")
WpComicsParser(context, MangaParserSource.NHATTRUYENMIN, "nhattruyenvn.com")

Loading…
Cancel
Save