From 53ca9c9677af59bb841310977a9011826454ab8c Mon Sep 17 00:00:00 2001 From: devi Date: Thu, 25 Jul 2024 20:20:34 +0200 Subject: [PATCH] Rework WpComicsParser Adds sources --- .../parsers/site/en/AsuraScansParser.kt | 3 +- .../parsers/site/madara/pt/VillainessScan.kt | 13 ++ .../parsers/site/wpcomics/WpComicsParser.kt | 143 ++++++++---------- .../parsers/site/wpcomics/en/XoxoComics.kt | 2 - .../parsers/site/wpcomics/ja/MangaRaw.kt | 99 ++++++++++++ .../parsers/site/wpcomics/vi/NetTruyenAA.kt | 10 ++ .../parsers/site/wpcomics/vi/NetTruyenX.kt | 10 ++ .../parsers/site/wpcomics/vi/NhatTruyenSS.kt | 10 ++ .../parsers/site/wpcomics/vi/Nhattruyenmin.kt | 6 +- 9 files changed, 212 insertions(+), 84 deletions(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/VillainessScan.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/ja/MangaRaw.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenAA.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenX.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NhatTruyenSS.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/AsuraScansParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/AsuraScansParser.kt index 4421128e..fa23aa61 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/AsuraScansParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/en/AsuraScansParser.kt @@ -31,7 +31,6 @@ internal class AsuraScansParser(context: MangaLoaderContext) : override val isMultipleTagsSupported = true - // &types=-1&order=desc override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { val url = buildString { append("https://") @@ -72,7 +71,7 @@ internal class AsuraScansParser(context: MangaLoaderContext) : SortOrder.NEWEST -> append("latest") SortOrder.ALPHABETICAL_DESC -> append("desc") SortOrder.ALPHABETICAL -> append("asc") - else -> append("Updated") + else -> append("update") } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/VillainessScan.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/VillainessScan.kt new file mode 100644 index 00000000..4fca33ef --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/pt/VillainessScan.kt @@ -0,0 +1,13 @@ +package org.koitharu.kotatsu.parsers.site.madara.pt + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.MangaParserSource +import org.koitharu.kotatsu.parsers.site.madara.MadaraParser + +@MangaSourceParser("VILLAINESSSCAN", "VillainessScan", "pt", ContentType.HENTAI) +internal class VillainessScan(context: MangaLoaderContext) : + MadaraParser(context, MangaParserSource.VILLAINESSSCAN, "villainessscan.xyz", pageSize = 10) { + override val datePattern: String = "dd 'de' MMMM 'de' yyyy" +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt index 19e891e5..352b4d30 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt @@ -35,7 +35,9 @@ internal abstract class WpComicsParser( override val availableStates: Set = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED) - protected open val listUrl = "/tim-truyen-nang-cao" + override val isMultipleTagsSupported = false + + protected open val listUrl = "/tim-truyen" protected open val datePattern = "dd/MM/yy" @@ -49,12 +51,16 @@ internal abstract class WpComicsParser( protected val ongoing: Set = setOf( "Đang tiến hành", "Ongoing", + "Updating", + "連載中", ) @JvmField protected val finished: Set = setOf( "Hoàn thành", + "Complete", "Completed", + "完結済み", ) override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { @@ -64,7 +70,8 @@ internal abstract class WpComicsParser( val url = buildString { append("https://") append(domain) - append("/tim-truyen?keyword=") + append(listUrl) + append("?keyword=") append(filter.query.urlEncoded()) append("&page=") append(page.toString()) @@ -82,10 +89,14 @@ internal abstract class WpComicsParser( val url = buildString { append("https://") append(domain) - val tagQuery = filter.tags.joinToString(",") { it.key } - append("/tim-truyen-nang-cao?genres=") - append(tagQuery) - append("¬genres=&gender=-1&minchapter=1&sort=") + append(listUrl) + if (filter.tags.isNotEmpty()) { + append('/') + filter.tags.oneOrThrowIfMany()?.let { + append(it.key) + } + } + append("?sort=") append( when (filter.sortOrder) { SortOrder.UPDATED -> 0 @@ -116,32 +127,33 @@ internal abstract class WpComicsParser( val url = buildString { append("https://") append(domain) - append("/tim-truyen-nang-cao?genres=¬genres=&gender=-1&status=-1&minchapter=1&sort=0&page=") + append(listUrl) + append("?genres=¬genres=&gender=-1&status=-1&minchapter=1&sort=0&page=") append(page.toString()) } webClient.httpGet(url) } } - val itemsElements = response.parseHtml() - .select("div.ModuleContent > div.items") - .select("div.item") - return itemsElements.mapNotNull { item -> + val tagMap = getOrCreateTagMap() + return parseMangaList(response.parseHtml(), tagMap) + } + + protected open fun parseMangaList(doc: Document, tagMap: ArrayMap): List { + return doc.select("div.items div.item").mapNotNull { item -> val tooltipElement = item.selectFirst("div.box_tootip") ?: return@mapNotNull null val absUrl = item.selectFirst("div.image > a")?.attrAsAbsoluteUrlOrNull("href") ?: return@mapNotNull null val slug = absUrl.substringAfterLast('/') val mangaState = when (tooltipElement.selectFirst("div.message_main > p:contains(Tình trạng)")?.ownText()) { - "Đang tiến hành" -> MangaState.ONGOING - "Hoàn thành" -> MangaState.FINISHED + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED else -> null } - - val tagMap = getOrCreateTagMap() val tagsElement = tooltipElement.selectFirst("div.message_main > p:contains(Thể loại)")?.ownText().orEmpty() val mangaTags = tagsElement.split(',').mapNotNullToSet { tagMap[it.trim()] } Manga( id = generateUid(slug), - title = tooltipElement.selectFirst("div.title")?.text().orEmpty(), + title = item.selectFirst("div.box_tootip div.title, h3 a")?.text().orEmpty(), altTitle = null, url = absUrl.toRelativeUrl(domain), publicUrl = absUrl, @@ -168,18 +180,17 @@ internal abstract class WpComicsParser( return tagSet } - private val mutex = Mutex() private var tagCache: ArrayMap? = null - private suspend fun getOrCreateTagMap(): ArrayMap = mutex.withLock { + protected open suspend fun getOrCreateTagMap(): ArrayMap = mutex.withLock { tagCache?.let { return@withLock it } - val doc = webClient.httpGet("/tim-truyen-nang-cao".toAbsoluteUrl(domain)).parseHtml() - val tagItems = doc.select("div.genre-item") + val doc = webClient.httpGet(listUrl.toAbsoluteUrl(domain)).parseHtml() + val tagItems = doc.select("div.dropdown-genres select option") val result = ArrayMap(tagItems.size) for (item in tagItems) { val title = item.text() - val key = item.select("span[data-id]").attr("data-id") + val key = item.attr("value").substringAfterLast('/') if (key.isNotEmpty() && title.isNotEmpty()) { result[title] = MangaTag(title = title, key = key, source = source) } @@ -190,35 +201,36 @@ internal abstract class WpComicsParser( protected open val selectDesc = "div.detail-content p" protected open val selectState = "div.col-info li.status p:not(.name)" - protected open val selectAut = "div.col-info li.author p:not(.name)" - protected open val selectTag = "div.col-info li.kind p:not(.name) a" + protected open val selectAut = "div.col-info li.author p:not(.name), li.author p.col-xs-8" + protected open val selectTag = "div.col-info li.kind p:not(.name) a, li.kind p.col-xs-8 a" override suspend fun getDetails(manga: Manga): Manga = coroutineScope { val fullUrl = manga.url.toAbsoluteUrl(domain) val doc = webClient.httpGet(fullUrl).parseHtml() val chaptersDeferred = async { getChapters(doc) } - val desc = doc.selectFirstOrThrow(selectDesc).html() - val stateDiv = doc.selectFirst(selectState) - val state = stateDiv?.let { - when (it.text()) { - in ongoing -> MangaState.ONGOING - in finished -> MangaState.FINISHED - else -> null - } - } - val aut = doc.body().select(selectAut).text() + val tagMap = getOrCreateTagMap() + val tagsElement = doc.select("li.kind p.col-xs-8 a") + val mangaTags = tagsElement.mapNotNullToSet { tagMap[it.text()] } manga.copy( - description = desc, - altTitle = null, - author = aut, - state = state, + description = doc.selectFirst(selectDesc)?.html().orEmpty(), + altTitle = doc.selectFirst("h2.other-name")?.text().orEmpty(), + author = doc.body().select(selectAut).text(), + state = doc.selectFirst(selectState)?.let { + when (it.text()) { + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED + else -> null + } + }, + tags = mangaTags, + rating = doc.selectFirst("div.star input")?.attr("value")?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN, chapters = chaptersDeferred.await(), ) } protected open val selectDate = "div.col-xs-4" - protected open val selectChapter = "div#nt_listchapter li .chapter" + protected open val selectChapter = "div.list-chapter li.row:not(.heading)" protected open suspend fun getChapters(doc: Document): List { return doc.body().select(selectChapter).mapChapters(reversed = true) { i, li -> @@ -248,14 +260,11 @@ internal abstract class WpComicsParser( } } - - protected open val selectPage = "div.reading-detail img" + protected open val selectPage = "div.page-chapter > img, li.blocks-gallery-item img" override suspend fun getPages(chapter: MangaChapter): List { val fullUrl = chapter.url.toAbsoluteUrl(domain) val doc = webClient.httpGet(fullUrl).parseHtml() - - return doc.select(selectPage).map { url -> val img = url.src()?.toRelativeUrl(domain) ?: url.parseFailed("Image src not found") MangaPage( @@ -268,16 +277,14 @@ internal abstract class WpComicsParser( } protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long { - // Clean date (e.g. 5th December 2019 to 5 December 2019) before parsing it val d = date?.lowercase() ?: return 0 return when { d.endsWith(" ago") || - d.endsWith(" trước") // Handle translated 'ago' in Viêt Nam. + d.endsWith(" trước") -> parseRelativeDate(date) - // Handle 'yesterday' and 'today', using midnight d.startsWith("year") -> Calendar.getInstance().apply { - add(Calendar.DAY_OF_MONTH, -1) // yesterday + add(Calendar.DAY_OF_MONTH, -1) set(Calendar.HOUR_OF_DAY, 0) set(Calendar.MINUTE, 0) set(Calendar.SECOND, 0) @@ -303,49 +310,33 @@ internal abstract class WpComicsParser( } } - // Parses dates in this form: - // 21 hours ago private fun parseRelativeDate(date: String): Long { val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0 val cal = Calendar.getInstance() - return when { - WordSet( - "day", - "days", - "d", - "ngày ", - ).anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis - - WordSet("jam", "saat", "heure", "hora", "horas", "hour", "hours", "h").anyWordIn(date) -> cal.apply { - add( - Calendar.HOUR, - -number, - ) + + WordSet("second", "giây").anyWordIn(date) -> cal.apply { add(Calendar.SECOND, -number) }.timeInMillis + + WordSet("min", "minute", "minutes", "mins", "phút").anyWordIn(date) -> cal.apply { + add(Calendar.MINUTE, -number) }.timeInMillis - WordSet( - "min", - "minute", - "minutes", - "mins", - "phút", - ).anyWordIn(date) -> cal.apply { - add( - Calendar.MINUTE, - -number, - ) + WordSet("jam", "saat", "heure", "hora", "horas", "hour", "hours", "h", "giờ").anyWordIn(date) -> cal.apply { + add(Calendar.HOUR, -number) + }.timeInMillis + + WordSet("day", "days", "d", "ngày").anyWordIn(date) -> cal.apply { + add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis - WordSet("second").anyWordIn(date) -> cal.apply { + WordSet("month", "months", "tháng").anyWordIn(date) -> cal.apply { add( - Calendar.SECOND, + Calendar.MONTH, -number, ) }.timeInMillis - WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis - WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis + WordSet("year", "năm").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis else -> 0 } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt index cd16d482..f2b93ca8 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt @@ -18,8 +18,6 @@ internal class XoxoComics(context: MangaLoaderContext) : override val listUrl = "/comic-list" override val datePattern = "MM/dd/yyyy" - override val isMultipleTagsSupported = false - override val availableSortOrders: Set = EnumSet.of( SortOrder.UPDATED, SortOrder.NEWEST, diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/ja/MangaRaw.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/ja/MangaRaw.kt new file mode 100644 index 00000000..d07a7c87 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/ja/MangaRaw.kt @@ -0,0 +1,99 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.ja + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.exception.NotFoundException +import org.koitharu.kotatsu.parsers.model.Manga +import org.koitharu.kotatsu.parsers.model.MangaListFilter +import org.koitharu.kotatsu.parsers.model.MangaParserSource +import org.koitharu.kotatsu.parsers.model.MangaState +import org.koitharu.kotatsu.parsers.model.SortOrder +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser +import org.koitharu.kotatsu.parsers.util.domain +import org.koitharu.kotatsu.parsers.util.oneOrThrowIfMany +import org.koitharu.kotatsu.parsers.util.parseHtml +import org.koitharu.kotatsu.parsers.util.runCatchingCancellable +import org.koitharu.kotatsu.parsers.util.urlEncoded + +// Need to use 0ms.dev Proxy + +@MangaSourceParser("MANGARAW", "MangaRaw", "ja") +internal class MangaRaw(context: MangaLoaderContext) : + WpComicsParser(context, MangaParserSource.MANGARAW, "mangaraw.xyz") { + override val listUrl = "/search/manga" + + override suspend fun getListPage(page: Int, filter: MangaListFilter?): List { + val response = + when (filter) { + is MangaListFilter.Search -> { + val url = buildString { + append("https://") + append(domain) + append(listUrl) + append("?keyword=") + append(filter.query.urlEncoded()) + append("&page=") + append(page.toString()) + } + + val result = runCatchingCancellable { webClient.httpGet(url) } + val exception = result.exceptionOrNull() + if (exception is NotFoundException) { + return emptyList() + } + result.getOrThrow() + } + + is MangaListFilter.Advanced -> { + val url = buildString { + append("https://") + append(domain) + append(listUrl) + append("?sort=") + append( + when (filter.sortOrder) { + SortOrder.UPDATED -> 0 + SortOrder.POPULARITY -> 10 + SortOrder.NEWEST -> 15 + SortOrder.RATING -> 20 + else -> throw IllegalArgumentException("Sort order ${filter.sortOrder.name} not supported") + }, + ) + if (filter.tags.isNotEmpty()) { + append("&genre=") + filter.tags.oneOrThrowIfMany()?.let { + append(it.key) + } + } + filter.states.oneOrThrowIfMany()?.let { + append("&status=") + append( + when (it) { + MangaState.ONGOING -> "1" + MangaState.FINISHED -> "2" + else -> "-1" + }, + ) + } + append("&page=") + append(page.toString()) + } + + webClient.httpGet(url) + } + + null -> { + val url = buildString { + append("https://") + append(domain) + append(listUrl) + append("?genres=¬genres=&gender=-1&status=-1&minchapter=1&sort=0&page=") + append(page.toString()) + } + webClient.httpGet(url) + } + } + val tagMap = getOrCreateTagMap() + return parseMangaList(response.parseHtml(), tagMap) + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenAA.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenAA.kt new file mode 100644 index 00000000..68e77975 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenAA.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.vi + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaParserSource +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser + +@MangaSourceParser("NETTRUYENAA", "NetTruyenAA", "vi") +internal class NetTruyenAA(context: MangaLoaderContext) : + WpComicsParser(context, MangaParserSource.NETTRUYENAA, "nettruyenaa.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenX.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenX.kt new file mode 100644 index 00000000..c9e4dbfe --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NetTruyenX.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.vi + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaParserSource +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser + +@MangaSourceParser("NETTRUYENX", "NetTruyenX", "vi") +internal class NetTruyenX(context: MangaLoaderContext) : + WpComicsParser(context, MangaParserSource.NETTRUYENX, "nettruyenx.com") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NhatTruyenSS.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NhatTruyenSS.kt new file mode 100644 index 00000000..98424c24 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/NhatTruyenSS.kt @@ -0,0 +1,10 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.vi + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaParserSource +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser + +@MangaSourceParser("NETTRUYENSS", "NhatTruyenSS", "vi") +internal class NhatTruyenSS(context: MangaLoaderContext) : + WpComicsParser(context, MangaParserSource.NETTRUYENSS, "www.nhattruyenss.net") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt index 9b8f97a3..a3db3d2c 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt @@ -1,12 +1,10 @@ package org.koitharu.kotatsu.parsers.site.wpcomics.vi -import org.koitharu.kotatsu.parsers.Broken import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.model.MangaParserSource import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser -@Broken -@MangaSourceParser("NHATTRUYENMIN", "NhattruyenPlus", "vi") +@MangaSourceParser("NHATTRUYENMIN", "NhatTruyenVn", "vi") internal class Nhattruyenmin(context: MangaLoaderContext) : - WpComicsParser(context, MangaParserSource.NHATTRUYENMIN, "nhattruyenmax.com") + WpComicsParser(context, MangaParserSource.NHATTRUYENMIN, "nhattruyenvn.com")