From 826563694ee82b675c046eb65d98f11dc1912783 Mon Sep 17 00:00:00 2001 From: devi Date: Sun, 30 Jul 2023 21:53:10 +0200 Subject: [PATCH] add MangaboxParser and sourcres --- .../parsers/site/mangabox/MangaboxParser.kt | 322 ++++++++++++++++++ .../parsers/site/mangabox/en/Mangabat.kt | 22 ++ .../parsers/site/mangabox/en/Mangairo.kt | 168 +++++++++ .../parsers/site/mangabox/en/Manganato.kt | 19 ++ 4 files changed, 531 insertions(+) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/MangaboxParser.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangabat.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangairo.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Manganato.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/MangaboxParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/MangaboxParser.kt new file mode 100644 index 00000000..f2c86c3d --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/MangaboxParser.kt @@ -0,0 +1,322 @@ +package org.koitharu.kotatsu.parsers.site.mangabox + +import kotlinx.coroutines.async +import kotlinx.coroutines.coroutineScope +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.DateFormat +import java.text.SimpleDateFormat +import java.util.* + +internal abstract class MangaboxParser( + context: MangaLoaderContext, + source: MangaSource, + pageSize: Int = 24, +) : PagedMangaParser(context, source, pageSize) { + + override val sortOrders: Set = EnumSet.of( + SortOrder.UPDATED, + SortOrder.POPULARITY, + SortOrder.NEWEST, + ) + + protected open val listUrl = "/genre-all" + protected open val searchUrl = "/search/story/" + protected open val datePattern = "MMM dd,yy" + + + init { + paginator.firstPage = 1 + searchPaginator.firstPage = 1 + } + + + @JvmField + protected val ongoing: Set = setOf( + "Ongoing", + ) + + @JvmField + protected val finished: Set = setOf( + "Completed", + ) + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val url = buildString { + append("https://") + append(domain) + + if (!query.isNullOrEmpty()) { + append(searchUrl) + append(query.urlEncoded()) + append("?page=") + append(page.toString()) + + } else if (!tags.isNullOrEmpty()) { + append("/") + for (tag in tags) { + append(tag.key) + } + append("/") + append(page.toString()) + } else { + append("$listUrl/") + if (page > 1) { + append(page.toString()) + } + when (sortOrder) { + SortOrder.POPULARITY -> append("?type=topview") + SortOrder.UPDATED -> append("") + SortOrder.NEWEST -> append("?type=newest") + else -> append("") + } + } + + + } + + val doc = webClient.httpGet(url).parseHtml() + + return doc.select("div.content-genres-item, div.list-story-item").ifEmpty { + doc.select("div.search-story-item") + }.map { div -> + val href = div.selectFirstOrThrow("a").attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(div.host ?: domain), + coverUrl = div.selectFirst("img")?.src().orEmpty(), + title = div.selectFirstOrThrow("h3").text().orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + isNsfw = isNsfwSource, + ) + } + } + + protected open val selectTagMap = "div.panel-genres-list a:not(.genres-select)" + + override suspend fun getTags(): Set { + val doc = webClient.httpGet("https://$domain/$listUrl").parseHtml() + return doc.select(selectTagMap).mapNotNullToSet { a -> + val key = a.attr("href").removeSuffix('/').substringAfterLast('/') + val name = a.attr("title").replace(" Manga", "") + MangaTag( + key = key, + title = name, + source = source, + ) + } + } + + protected open val selectDesc = "div#noidungm, div#panel-story-info-description" + protected open val selectState = "li:contains(status), td:containsOwn(status) + td" + protected open val selectAlt = ".story-alternative, tr:has(.info-alternative) h2" + protected open val selectAut = "li:contains(author) a, td:contains(author) + td a" + protected open val selectTag = "div.manga-info-top li:contains(genres) a , td:containsOwn(genres) + td a" + + override suspend fun getDetails(manga: Manga): Manga = coroutineScope { + val fullUrl = manga.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + + val chaptersDeferred = async { getChapters(manga, doc) } + + val desc = doc.selectFirstOrThrow(selectDesc).html() + + val stateDiv = doc.select(selectState).text() + + val state = stateDiv.let { + when (it) { + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED + else -> null + } + } + + val alt = doc.body().select(selectAlt).text().replace("Alternative : ", "") + + val aut = doc.body().select(selectAut).eachText().joinToString() + + manga.copy( + tags = doc.body().select(selectTag).mapNotNullToSet { a -> + MangaTag( + key = a.attr("href").substringAfterLast("category=").substringBefore("&"), + title = a.text().toTitleCase(), + source = source, + ) + }, + description = desc, + altTitle = alt, + author = aut, + state = state, + chapters = chaptersDeferred.await(), + isNsfw = manga.isNsfw, + ) + } + + protected open val selectDate = "span" + protected open val selectChapter = "div.chapter-list div.row, ul.row-content-chapter li" + + protected open suspend fun getChapters(manga: Manga, doc: Document): List { + val dateFormat = SimpleDateFormat(datePattern, sourceLocale) + return doc.body().select(selectChapter).mapChapters(reversed = true) { i, li -> + val a = li.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + val dateText = li.select(selectDate).last()?.text() + + MangaChapter( + id = generateUid(href), + name = a.text(), + number = i + 1, + url = href, + uploadDate = parseChapterDate( + dateFormat, + dateText, + ), + source = source, + scanlator = null, + branch = null, + ) + } + } + + protected open val selectPage = "div#vungdoc img, div.container-chapter-reader img" + + protected open val otherDomain = "" + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + + if (doc.select(selectPage).isNullOrEmpty()) { + val fullUrl2 = chapter.url.toAbsoluteUrl(domain).replace(domain, otherDomain) + val doc2 = webClient.httpGet(fullUrl2).parseHtml() + + return doc2.select(selectPage).map { img -> + val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found") + + MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } else { + return doc.select(selectPage).map { img -> + val url = img.src()?.toRelativeUrl(domain) ?: img.parseFailed("Image src not found") + + MangaPage( + id = generateUid(url), + url = url, + preview = null, + source = source, + ) + } + } + + } + + + protected fun Element.src(): String? { + var result = absUrl("data-src") + if (result.isEmpty()) result = absUrl("data-cfsrc") + if (result.isEmpty()) result = absUrl("src") + return result.ifEmpty { null } + } + + protected fun parseChapterDate(dateFormat: DateFormat, date: String?): Long { + // Clean date (e.g. 5th December 2019 to 5 December 2019) before parsing it + val d = date?.lowercase() ?: return 0 + return when { + d.endsWith(" ago") || + // short Hours + d.endsWith(" h") || + // short Day + d.endsWith(" d") -> parseRelativeDate(date) + + // Handle 'yesterday' and 'today', using midnight + d.startsWith("year") -> Calendar.getInstance().apply { + add(Calendar.DAY_OF_MONTH, -1) // yesterday + set(Calendar.HOUR_OF_DAY, 0) + set(Calendar.MINUTE, 0) + set(Calendar.SECOND, 0) + set(Calendar.MILLISECOND, 0) + }.timeInMillis + + d.startsWith("today") -> Calendar.getInstance().apply { + set(Calendar.HOUR_OF_DAY, 0) + set(Calendar.MINUTE, 0) + set(Calendar.SECOND, 0) + set(Calendar.MILLISECOND, 0) + }.timeInMillis + + date.contains(Regex("""\d(st|nd|rd|th)""")) -> date.split(" ").map { + if (it.contains(Regex("""\d\D\D"""))) { + it.replace(Regex("""\D"""), "") + } else { + it + } + }.let { dateFormat.tryParse(it.joinToString(" ")) } + + else -> dateFormat.tryParse(date) + } + } + + // Parses dates in this form: + // 21 hours ago + private fun parseRelativeDate(date: String): Long { + val number = Regex("""(\d+)""").find(date)?.value?.toIntOrNull() ?: return 0 + val cal = Calendar.getInstance() + + return when { + WordSet( + "day", + "days", + ).anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis + + WordSet("hour", "hours", "h").anyWordIn(date) -> cal.apply { + add( + Calendar.HOUR, + -number, + ) + }.timeInMillis + + WordSet( + "min", + "minute", + "minutes", + ).anyWordIn(date) -> cal.apply { + add( + Calendar.MINUTE, + -number, + ) + }.timeInMillis + + WordSet("second").anyWordIn(date) -> cal.apply { + add( + Calendar.SECOND, + -number, + ) + }.timeInMillis + + WordSet("month", "months").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis + WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis + else -> 0 + } + } + +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangabat.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangabat.kt new file mode 100644 index 00000000..7e69a7f0 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangabat.kt @@ -0,0 +1,22 @@ +package org.koitharu.kotatsu.parsers.site.mangabox.en + + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.mangabox.MangaboxParser + +@MangaSourceParser("HMANGABAT", "Mangabat", "en") +internal class Mangabat(context: MangaLoaderContext) : + MangaboxParser(context, MangaSource.HMANGABAT) { + + override val configKeyDomain = ConfigKey.Domain("h.mangabat.com", "readmangabat.com") + + override val otherDomain = "readmangabat.com" + + override val searchUrl = "/search/manga/" + + override val listUrl = "/manga-list-all" + override val selectTagMap = "div.panel-category p.pn-category-row:not(.pn-category-row-border) a" +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangairo.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangairo.kt new file mode 100644 index 00000000..6cb498a1 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Mangairo.kt @@ -0,0 +1,168 @@ +package org.koitharu.kotatsu.parsers.site.mangabox.en + + +import kotlinx.coroutines.async +import kotlinx.coroutines.coroutineScope +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.Manga +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.model.MangaState +import org.koitharu.kotatsu.parsers.model.MangaTag +import org.koitharu.kotatsu.parsers.model.RATING_UNKNOWN +import org.koitharu.kotatsu.parsers.model.SortOrder +import org.koitharu.kotatsu.parsers.site.mangabox.MangaboxParser +import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl +import org.koitharu.kotatsu.parsers.util.domain +import org.koitharu.kotatsu.parsers.util.generateUid +import org.koitharu.kotatsu.parsers.util.host +import org.koitharu.kotatsu.parsers.util.mapNotNullToSet +import org.koitharu.kotatsu.parsers.util.parseHtml +import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow +import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl +import org.koitharu.kotatsu.parsers.util.toTitleCase +import org.koitharu.kotatsu.parsers.util.urlEncoded + + +@MangaSourceParser("MANGAIRO", "Mangairo", "en") +internal class Mangairo(context: MangaLoaderContext) : + MangaboxParser(context, MangaSource.MANGAIRO) { + + override val configKeyDomain = ConfigKey.Domain("w.mangairo.com", "chap.mangairo.com") + + override val otherDomain = "chap.mangairo.com" + + override val datePattern = "MMM-dd-yy" + override val listUrl = "/manga-list" + override val searchUrl = "/list/search/" + + override val selectDesc = "div#story_discription p" + override val selectState = "ul.story_info_right li:contains(Status) a" + override val selectAlt = "ul.story_info_right li:contains(Alter) h2" + override val selectAut = "ul.story_info_right li:contains(Author) a" + override val selectTag = "ul.story_info_right li:contains(Genres) a" + + override val selectChapter = "div.chapter_list li" + override val selectDate = "p" + + override val selectPage = "div.panel-read-story img" + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val url = buildString { + append("https://") + append(domain) + + if (!query.isNullOrEmpty()) { + append(searchUrl) + append(query.urlEncoded()) + append("?page=") + append(page.toString()) + + + } else { + + append("$listUrl/") + + append("/type-") + when (sortOrder) { + SortOrder.POPULARITY -> append("topview") + SortOrder.UPDATED -> append("latest") + SortOrder.NEWEST -> append("newest") + else -> append("latest") + } + + if (!tags.isNullOrEmpty()) { + append("/ctg-") + for (tag in tags) { + append(tag.key) + } + } else { + append("/ctg-all") + } + append("/state-all/page-") + append(page.toString()) + } + } + + val doc = webClient.httpGet(url).parseHtml() + + return doc.select("div.story-item").map { div -> + val href = div.selectFirstOrThrow("a").attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(div.host ?: domain), + coverUrl = div.selectFirst("img")?.src().orEmpty(), + title = (div.selectFirst("h2")?.text() ?: div.selectFirst("h3")?.text()).orEmpty(), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + isNsfw = isNsfwSource, + ) + } + } + + override suspend fun getTags(): Set { + val doc = webClient.httpGet("https://$domain/$listUrl/type-latest/ctg-all/state-all/page-1").parseHtml() + return doc.select("div.panel_category a:not(.ctg_select)").mapNotNullToSet { a -> + val key = a.attr("href").substringAfterLast("ctg-").substringBefore("/") + val name = a.attr("title").replace("Category ", "") + MangaTag( + key = key, + title = name, + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga = coroutineScope { + val fullUrl = manga.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + + val chaptersDeferred = async { getChapters(manga, doc) } + + val desc = doc.selectFirstOrThrow(selectDesc).html() + + val stateDiv = doc.select(selectState).text() + + val state = stateDiv.let { + when (it) { + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED + else -> null + } + } + + val alt = doc.body().select(selectAlt).text().replace("Alternative : ", "") + + val aut = doc.body().select(selectAut).eachText().joinToString() + + manga.copy( + tags = doc.body().select(selectTag).mapNotNullToSet { a -> + MangaTag( + key = a.attr("href") + .substringAfterLast("page-"), // Yes the site, it's crashing between page is tag id + title = a.text().toTitleCase(), + source = source, + ) + }, + description = desc, + altTitle = alt, + author = aut, + state = state, + chapters = chaptersDeferred.await(), + isNsfw = manga.isNsfw, + ) + } + + +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Manganato.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Manganato.kt new file mode 100644 index 00000000..520f3d66 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangabox/en/Manganato.kt @@ -0,0 +1,19 @@ +package org.koitharu.kotatsu.parsers.site.mangabox.en + + +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.site.mangabox.MangaboxParser + + +@MangaSourceParser("MANGANATO", "Manganato", "en") +internal class Manganato(context: MangaLoaderContext) : + MangaboxParser(context, MangaSource.MANGANATO) { + + override val configKeyDomain = ConfigKey.Domain("chapmanganato.com", "manganato.com") + + override val otherDomain = "chapmanganato.com" + +}