From fefec4985dd6b05d9051ffe48f276cb2650d865c Mon Sep 17 00:00:00 2001 From: devi Date: Sat, 29 Jul 2023 16:18:58 +0200 Subject: [PATCH] add Wpcomics and sources --- .../parsers/site/wpcomics/WpComicsParser.kt | 310 ++++++++++++++++++ .../parsers/site/wpcomics/en/XoxoComics.kt | 118 +++++++ .../parsers/site/wpcomics/vi/Nettruyenmax.kt | 15 + .../parsers/site/wpcomics/vi/Nhattruyenmin.kt | 12 + 4 files changed, 455 insertions(+) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nettruyenmax.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt new file mode 100644 index 00000000..78cd2db6 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/WpComicsParser.kt @@ -0,0 +1,310 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics + +import kotlinx.coroutines.async +import kotlinx.coroutines.coroutineScope +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.DateFormat +import java.text.SimpleDateFormat +import java.util.* + +internal abstract class WpComicsParser( + context: MangaLoaderContext, + source: MangaSource, + domain: String, + pageSize: Int = 48, +) : PagedMangaParser(context, source, pageSize) { + + override val configKeyDomain = ConfigKey.Domain(domain) + + override val sortOrders: Set = EnumSet.of( + SortOrder.UPDATED, + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) + + protected open val listUrl = "/the-loai" + protected open val datePattern = "dd/MM/yy" + + + init { + paginator.firstPage = 1 + searchPaginator.firstPage = 1 + } + + + @JvmField + protected val ongoing: Set = setOf( + "Đang tiến hành", + "Ongoing" + ) + + @JvmField + protected val finished: Set = setOf( + "Hoàn thành", + "Completed " + ) + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val url = buildString { + append("https://") + append(domain) + append(listUrl) + + if(!tags.isNullOrEmpty()){ + append("/") + for (tag in tags) { + append(tag.key) + } + } + + append("?page=") + append(page.toString()) + + if(!query.isNullOrEmpty()){ + append("&keyword=") + append(query.urlEncoded()) + } + + + append("&sort=") + when (sortOrder) { + SortOrder.POPULARITY -> append("10") + SortOrder.UPDATED -> append("") + SortOrder.NEWEST -> append("15") + else -> append("") + } + } + val doc = webClient.httpGet(url).parseHtml() + + return doc.select("div.item").map { div -> + val href = div.selectFirstOrThrow("a").attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(div.host ?: domain), + coverUrl = div.selectFirst("img")?.src().orEmpty(), + title = div.selectFirstOrThrow("h3").text().orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + isNsfw = isNsfwSource, + ) + } + } + + override suspend fun getTags(): Set { + val doc = webClient.httpGet("https://$domain$listUrl").parseHtml() + return doc.select("div.genres ul li:not(.active)").mapNotNullToSet { li -> + val a = li.selectFirst("a") ?: return@mapNotNullToSet null + val href = a.attr("href").removeSuffix('/').substringAfterLast('/') + MangaTag( + key = href, + title = a.text(), + source = source, + ) + } + } + + protected open val selectDesc = "div.detail-content p" + protected open val selectState = "div.col-info li.status p:not(.name)" + protected open val selectAut = "div.col-info li.author p:not(.name)" + protected open val selectTag = "div.col-info li.kind p:not(.name) a" + + override suspend fun getDetails(manga: Manga): Manga = coroutineScope { + val fullUrl = manga.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + + val chaptersDeferred = async { getChapters(manga, doc) } + + val desc = doc.selectFirstOrThrow(selectDesc).html() + + val stateDiv = doc.selectFirst(selectState) + + val state = stateDiv?.let { + when (it.text()) { + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED + else -> null + } + } + + val aut = doc.body().select(selectAut).text() + + manga.copy( + tags = doc.body().select(selectTag).mapNotNullToSet { a -> + MangaTag( + key = a.attr("href").removeSuffix('/').substringAfterLast('/'), + title = a.text().toTitleCase(), + source = source, + ) + }, + description = desc, + altTitle = null, + author = aut, + state = state, + chapters = chaptersDeferred.await(), + ) + } + + + protected open val selectDate = "div.col-xs-4" + protected open val selectChapter = "div#nt_listchapter li:not(.heading)" + + protected open suspend fun getChapters(manga: Manga, doc: Document): List { + + return doc.body().select(selectChapter).mapChapters(reversed = true) { i, li -> + val a = li.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + val dateText = li.selectFirst(selectDate)?.text() + + val findHours = dateText?.contains(":") + val dateFormat = if(findHours == true) + { + SimpleDateFormat("HH:mm dd/MM", sourceLocale) + }else + { + SimpleDateFormat(datePattern, sourceLocale) + } + + MangaChapter( + id = generateUid(href), + name = a.text(), + number = i + 1, + url = href, + uploadDate = parseChapterDate( + dateFormat, + dateText, + ), + source = source, + scanlator = null, + branch = null, + ) + } + } + + + protected open val selectPage = "div.reading-detail img" + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + + + return doc.select(selectPage).map { url -> + val img = url.src()?.toRelativeUrl(domain) ?: url.parseFailed("Image src not found") + MangaPage( + id = generateUid(img), + url = img, + preview = null, + source = source, + ) + } + } + + + protected fun Element.src(): String? { + var result = absUrl("data-src") + if (result.isEmpty()) result = absUrl("data-original") + if (result.isEmpty()) result = absUrl("data-cdn") + if (result.isEmpty()) result = absUrl("src") + return result.ifEmpty { null } + } + + protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long { + // Clean date (e.g. 5th December 2019 to 5 December 2019) before parsing it + val d = date?.lowercase() ?: return 0 + return when { + d.endsWith(" ago") || + d.endsWith(" trước") // Handle translated 'ago' in Viêt Nam. + -> parseRelativeDate(date) + + // Handle 'yesterday' and 'today', using midnight + d.startsWith("year") -> Calendar.getInstance().apply { + add(Calendar.DAY_OF_MONTH, -1) // yesterday + set(Calendar.HOUR_OF_DAY, 0) + set(Calendar.MINUTE, 0) + set(Calendar.SECOND, 0) + set(Calendar.MILLISECOND, 0) + }.timeInMillis + + d.startsWith("today") -> Calendar.getInstance().apply { + set(Calendar.HOUR_OF_DAY, 0) + set(Calendar.MINUTE, 0) + set(Calendar.SECOND, 0) + set(Calendar.MILLISECOND, 0) + }.timeInMillis + + date.contains(Regex("""\d(st|nd|rd|th)""")) -> date.split(" ").map { + if (it.contains(Regex("""\d\D\D"""))) { + it.replace(Regex("""\D"""), "") + } else { + it + } + }.let { dateFormat.tryParse(it.joinToString(" ")) } + + else -> dateFormat.tryParse(date) + } + } + + // Parses dates in this form: + // 21 hours ago + private fun parseRelativeDate(date: String): Long { + val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0 + val cal = Calendar.getInstance() + + return when { + WordSet( + "day", + "days", + "d", + "ngày ", + ).anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis + + WordSet("jam", "saat", "heure", "hora", "horas", "hour", "hours", "h").anyWordIn(date) -> cal.apply { + add( + Calendar.HOUR, + -number, + ) + }.timeInMillis + + WordSet( + "min", + "minute", + "minutes", + "mins", + "phút", + ).anyWordIn(date) -> cal.apply { + add( + Calendar.MINUTE, + -number, + ) + }.timeInMillis + + WordSet("second").anyWordIn(date) -> cal.apply { + add( + Calendar.SECOND, + -number, + ) + }.timeInMillis + + WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis + WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis + else -> 0 + } + } + +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt new file mode 100644 index 00000000..9a55af92 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/en/XoxoComics.kt @@ -0,0 +1,118 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.en + + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.ContentType +import org.koitharu.kotatsu.parsers.model.Manga +import org.koitharu.kotatsu.parsers.model.MangaChapter +import org.koitharu.kotatsu.parsers.model.MangaPage +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.model.MangaTag +import org.koitharu.kotatsu.parsers.model.RATING_UNKNOWN +import org.koitharu.kotatsu.parsers.model.SortOrder +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser +import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl +import org.koitharu.kotatsu.parsers.util.domain +import org.koitharu.kotatsu.parsers.util.generateUid +import org.koitharu.kotatsu.parsers.util.host +import org.koitharu.kotatsu.parsers.util.parseFailed +import org.koitharu.kotatsu.parsers.util.parseHtml +import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow +import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl +import org.koitharu.kotatsu.parsers.util.toRelativeUrl +import org.koitharu.kotatsu.parsers.util.urlEncoded +import java.util.EnumSet + +@MangaSourceParser("XOXOCOMICS", "Xoxo Comics", "vi", ContentType.COMICS) +internal class XoxoComics(context: MangaLoaderContext) : + WpComicsParser(context, MangaSource.XOXOCOMICS, "xoxocomics.net", 50){ + + override val listUrl = "/genre" + override val datePattern = "MM/dd/yyyy" + + override val sortOrders: Set = EnumSet.of( + SortOrder.UPDATED, + SortOrder.NEWEST, + SortOrder.POPULARITY, + SortOrder.ALPHABETICAL + ) + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val url = buildString { + append("https://") + append(domain) + + if(!query.isNullOrEmpty()){ + append("/search?keyword=") + append(query.urlEncoded()) + append("&page=") + append(page.toString()) + }else + { + append(listUrl) + if(!tags.isNullOrEmpty()){ + append("/") + for (tag in tags) { + append(tag.key) + } + } + + append("/") + when (sortOrder) { + SortOrder.POPULARITY -> append("popular") + SortOrder.UPDATED -> append("") + SortOrder.NEWEST -> append("newest") + SortOrder.ALPHABETICAL -> append("alphabet") + else -> append("") + } + + append("?page=") + append(page.toString()) + + } + } + val doc = webClient.httpGet(url).parseHtml() + + return doc.select("div.item").map { div -> + val href = div.selectFirstOrThrow("a").attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(div.host ?: domain), + coverUrl = div.selectFirst("img")?.src().orEmpty(), + title = div.selectFirstOrThrow("h3").text().orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + isNsfw = isNsfwSource, + ) + } + } + + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + "/all" + val doc = webClient.httpGet(fullUrl).parseHtml() + + + return doc.select(selectPage).map { url -> + val img = url.src()?.toRelativeUrl(domain) ?: url.parseFailed("Image src not found") + MangaPage( + id = generateUid(img), + url = img, + preview = null, + source = source, + ) + } + } + +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nettruyenmax.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nettruyenmax.kt new file mode 100644 index 00000000..08a416ee --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nettruyenmax.kt @@ -0,0 +1,15 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.vi + + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser + + +@MangaSourceParser("NETTRUYENMAX", "Nettruyenmax", "vi") +internal class Nettruyenmax(context: MangaLoaderContext) : + WpComicsParser(context, MangaSource.NETTRUYENMAX, "www.nettruyenmax.com", 35){ + + override val listUrl = "/tim-truyen" +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt new file mode 100644 index 00000000..99270d87 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/wpcomics/vi/Nhattruyenmin.kt @@ -0,0 +1,12 @@ +package org.koitharu.kotatsu.parsers.site.wpcomics.vi + + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.wpcomics.WpComicsParser + + +@MangaSourceParser("NHATTRUYENMIN", "Nhattruyenmin", "vi") +internal class Nhattruyenmin(context: MangaLoaderContext) : + WpComicsParser(context, MangaSource.NHATTRUYENMIN, "nhattruyenmin.com")