From c67b255ba99285625aedfe9be0ccf5cbba6217ad Mon Sep 17 00:00:00 2001 From: Koitharu Date: Sat, 18 Jun 2022 18:17:53 +0300 Subject: [PATCH] [NHentai] New manga source #8 --- .../kotatsu/parsers/site/NHentaiParser.kt | 189 ++++++++++++++++++ .../kotatsu/parsers/util/Collection.kt | 7 + .../koitharu/kotatsu/parsers/util/Jsoup.kt | 14 ++ .../koitharu/kotatsu/parsers/MangaSources.kt | 2 +- 4 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/NHentaiParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NHentaiParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NHentaiParser.kt new file mode 100644 index 00000000..b664acfd --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NHentaiParser.kt @@ -0,0 +1,189 @@ +package org.koitharu.kotatsu.parsers.site + +import androidx.collection.ArraySet +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaParser +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.SimpleDateFormat +import java.util.* + +@MangaSourceParser("NHENTAI", "N-Hentai") +class NHentaiParser(override val context: MangaLoaderContext) : MangaParser(MangaSource.NHENTAI) { + + override val configKeyDomain: ConfigKey.Domain + get() = ConfigKey.Domain("nhentai.net", null) + + override val sortOrders: Set + get() = EnumSet.of(SortOrder.NEWEST, SortOrder.POPULARITY) + + override suspend fun getList( + offset: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + if (query.isNullOrEmpty() && tags != null && tags.size > 1) { + return getList(offset, buildQuery(tags), emptySet(), sortOrder) + } + val domain = getDomain() + val page = (offset / 25) + 1 + val url = buildString { + append("https://") + append(domain) + if (!query.isNullOrEmpty()) { + append("/search/?q=") + append(query.urlEncoded()) + append("&page=") + append(page) + if (sortOrder == SortOrder.POPULARITY) { + append("&sort=popular") + } + } else { + append('/') + if (!tags.isNullOrEmpty()) { + val tag = tags.single() + append("tag/") + append(tag.key) + append('/') + if (sortOrder == SortOrder.POPULARITY) { + append("popular") + } + append("?page=") + append(page) + } else { + if (sortOrder == SortOrder.POPULARITY) { + append("?sort=popular&page=") + } else { + append("?page=") + } + append(page) + } + } + } + val root = context.httpGet(url).parseHtml().body().getElementById("content") + ?.selectLast("div.index-container") ?: parseFailed("Root not found") + val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)") + val regexSpaces = Regex("\\s+") + return root.select(".gallery").map { div -> + val a = div.selectFirstOrThrow("a.cover") + val href = a.attrAsRelativeUrl("href") + val img = div.selectFirstOrThrow("img") + val title = div.selectFirstOrThrow(".caption").text() + Manga( + id = generateUid(href), + title = title.replace(regexBrackets, "") + .replace(regexSpaces, " ") + .trim(), + altTitle = null, + url = href, + publicUrl = href.toAbsoluteUrl(domain), + rating = RATING_UNKNOWN, + isNsfw = true, + coverUrl = img.attrAsAbsoluteUrlOrNull("data-src") + ?: img.attrAsAbsoluteUrl("src"), + tags = setOf(), + state = null, + author = null, + largeCoverUrl = null, + description = null, + chapters = listOf(), + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val root = context.httpGet( + url = manga.url.toAbsoluteUrl(getDomain()) + ).parseHtml().body().requireElementById("bigcontainer") + val img = root.requireElementById("cover").selectFirstOrThrow("img") + val tagContainers = root.requireElementById("tags").select(".tag-container") + val dateFormat = SimpleDateFormat( + "yyyy-MM-dd'T'HH:mm:ss.SSSSSS'+00:00'", + Locale.ROOT, + ) + return manga.copy( + tags = tagContainers.find { x -> x.ownText() == "Tags:" }?.parseTags() ?: manga.tags, + author = tagContainers.find { x -> x.ownText() == "Artists:" } + ?.selectFirst("span.name")?.text()?.toCamelCase(), + largeCoverUrl = img.attrAsAbsoluteUrlOrNull("data-src") + ?: img.attrAsAbsoluteUrl("src"), + description = null, + chapters = listOf( + MangaChapter( + id = manga.id, + name = manga.title, + number = 1, + url = manga.url, + scanlator = null, + uploadDate = dateFormat.tryParse( + tagContainers.find { x -> x.ownText() == "Uploaded:" } + ?.selectFirst("time") + ?.attr("datetime") + ), + branch = null, + source = source, + ) + ) + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val url = chapter.url.toAbsoluteUrl(getDomain()) + val root = context.httpGet(url).parseHtml().requireElementById("thumbnail-container") + return root.select(".thumb-container").map { div -> + val a = div.selectFirstOrThrow("a") + val img = div.selectFirstOrThrow("img") + val href = a.attrAsRelativeUrl("href") + MangaPage( + id = generateUid(href), + url = href, + referer = url, + preview = img.attrAsAbsoluteUrlOrNull("data-src") + ?: img.attrAsAbsoluteUrl("src"), + source = source, + ) + } + } + + override suspend fun getPageUrl(page: MangaPage): String { + val root = context.httpGet(page.url.toAbsoluteUrl(getDomain())).parseHtml().body() + .requireElementById("image-container") + return root.selectFirstOrThrow("img").attrAsAbsoluteUrl("src") + } + + override suspend fun getTags(): Set { + return coroutineScope { + // parse first 3 pages of tags + (1..3).map { page -> + async { getTags(page) } + } + }.awaitAll().flattenTo(ArraySet(360)) + } + + private suspend fun getTags(page: Int): Set { + val root = context.httpGet("https://${getDomain()}/tags/popular?page=$page").parseHtml().body() + .getElementById("tag-container") + return root?.parseTags().orEmpty() + } + + private fun Element.parseTags() = select("a.tag").mapToSet { a -> + val href = a.attr("href").removeSuffix('/') + MangaTag( + title = a.selectFirstOrThrow(".name").text().toTitleCase(), + key = href.substringAfterLast('/'), + source = source, + ) + } + + private fun buildQuery(tags: Collection) = tags.joinToString(separator = " ") { tag -> + "tag:\"${tag.key}\"" + } +} \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Collection.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Collection.kt index df3d4b85..fe4ef0e3 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Collection.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Collection.kt @@ -11,6 +11,13 @@ fun MutableCollection.replaceWith(subject: Iterable) { addAll(subject) } +fun > Iterable>.flattenTo(destination: C): C { + for (element in this) { + destination.addAll(element) + } + return destination +} + fun List.medianOrNull(): T? = when { isEmpty() -> null else -> get((size / 2).coerceIn(indices)) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt index d75c5bfd..8eb92055 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt @@ -4,6 +4,8 @@ package org.koitharu.kotatsu.parsers.util import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import org.jsoup.nodes.Element +import org.jsoup.select.Selector +import org.koitharu.kotatsu.parsers.exception.ParseException val Element.host: String? get() { @@ -86,4 +88,16 @@ fun Element.styleValueOrNull(property: String): String? { val regex = Regex("${Regex.escape(property)}\\s*:\\s*[^;]+") val css = attr("style").find(regex) ?: return null return css.substringAfter(':').removeSuffix(';').trim() +} + +fun Element.selectFirstOrThrow(cssQuery: String): Element { + return Selector.selectFirst(cssQuery, this) ?: throw ParseException("Cannot find \"$cssQuery\"") +} + +fun Element.requireElementById(id: String): Element { + return getElementById(id) ?: throw ParseException("Cannot find \"#$id\"") +} + +fun Element.selectLast(cssQuery: String): Element? { + return select(cssQuery).lastOrNull() } \ No newline at end of file diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt index 6b63c4e1..5795da96 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt @@ -4,4 +4,4 @@ import org.junit.jupiter.params.provider.EnumSource import org.koitharu.kotatsu.parsers.model.MangaSource @EnumSource(MangaSource::class, names = ["LOCAL", "DUMMY"], mode = EnumSource.Mode.EXCLUDE) -internal annotation class MangaSources() \ No newline at end of file +internal annotation class MangaSources \ No newline at end of file