[NHentai] New manga source #8

pull/19/head
Koitharu 4 years ago
parent 0ed35a4b21
commit c67b255ba9
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -0,0 +1,189 @@
package org.koitharu.kotatsu.parsers.site
import androidx.collection.ArraySet
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import java.text.SimpleDateFormat
import java.util.*
@MangaSourceParser("NHENTAI", "N-Hentai")
class NHentaiParser(override val context: MangaLoaderContext) : MangaParser(MangaSource.NHENTAI) {
override val configKeyDomain: ConfigKey.Domain
get() = ConfigKey.Domain("nhentai.net", null)
override val sortOrders: Set<SortOrder>
get() = EnumSet.of(SortOrder.NEWEST, SortOrder.POPULARITY)
override suspend fun getList(
offset: Int,
query: String?,
tags: Set<MangaTag>?,
sortOrder: SortOrder,
): List<Manga> {
if (query.isNullOrEmpty() && tags != null && tags.size > 1) {
return getList(offset, buildQuery(tags), emptySet(), sortOrder)
}
val domain = getDomain()
val page = (offset / 25) + 1
val url = buildString {
append("https://")
append(domain)
if (!query.isNullOrEmpty()) {
append("/search/?q=")
append(query.urlEncoded())
append("&page=")
append(page)
if (sortOrder == SortOrder.POPULARITY) {
append("&sort=popular")
}
} else {
append('/')
if (!tags.isNullOrEmpty()) {
val tag = tags.single()
append("tag/")
append(tag.key)
append('/')
if (sortOrder == SortOrder.POPULARITY) {
append("popular")
}
append("?page=")
append(page)
} else {
if (sortOrder == SortOrder.POPULARITY) {
append("?sort=popular&page=")
} else {
append("?page=")
}
append(page)
}
}
}
val root = context.httpGet(url).parseHtml().body().getElementById("content")
?.selectLast("div.index-container") ?: parseFailed("Root not found")
val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)")
val regexSpaces = Regex("\\s+")
return root.select(".gallery").map { div ->
val a = div.selectFirstOrThrow("a.cover")
val href = a.attrAsRelativeUrl("href")
val img = div.selectFirstOrThrow("img")
val title = div.selectFirstOrThrow(".caption").text()
Manga(
id = generateUid(href),
title = title.replace(regexBrackets, "")
.replace(regexSpaces, " ")
.trim(),
altTitle = null,
url = href,
publicUrl = href.toAbsoluteUrl(domain),
rating = RATING_UNKNOWN,
isNsfw = true,
coverUrl = img.attrAsAbsoluteUrlOrNull("data-src")
?: img.attrAsAbsoluteUrl("src"),
tags = setOf(),
state = null,
author = null,
largeCoverUrl = null,
description = null,
chapters = listOf(),
source = source,
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val root = context.httpGet(
url = manga.url.toAbsoluteUrl(getDomain())
).parseHtml().body().requireElementById("bigcontainer")
val img = root.requireElementById("cover").selectFirstOrThrow("img")
val tagContainers = root.requireElementById("tags").select(".tag-container")
val dateFormat = SimpleDateFormat(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSS'+00:00'",
Locale.ROOT,
)
return manga.copy(
tags = tagContainers.find { x -> x.ownText() == "Tags:" }?.parseTags() ?: manga.tags,
author = tagContainers.find { x -> x.ownText() == "Artists:" }
?.selectFirst("span.name")?.text()?.toCamelCase(),
largeCoverUrl = img.attrAsAbsoluteUrlOrNull("data-src")
?: img.attrAsAbsoluteUrl("src"),
description = null,
chapters = listOf(
MangaChapter(
id = manga.id,
name = manga.title,
number = 1,
url = manga.url,
scanlator = null,
uploadDate = dateFormat.tryParse(
tagContainers.find { x -> x.ownText() == "Uploaded:" }
?.selectFirst("time")
?.attr("datetime")
),
branch = null,
source = source,
)
)
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val url = chapter.url.toAbsoluteUrl(getDomain())
val root = context.httpGet(url).parseHtml().requireElementById("thumbnail-container")
return root.select(".thumb-container").map { div ->
val a = div.selectFirstOrThrow("a")
val img = div.selectFirstOrThrow("img")
val href = a.attrAsRelativeUrl("href")
MangaPage(
id = generateUid(href),
url = href,
referer = url,
preview = img.attrAsAbsoluteUrlOrNull("data-src")
?: img.attrAsAbsoluteUrl("src"),
source = source,
)
}
}
override suspend fun getPageUrl(page: MangaPage): String {
val root = context.httpGet(page.url.toAbsoluteUrl(getDomain())).parseHtml().body()
.requireElementById("image-container")
return root.selectFirstOrThrow("img").attrAsAbsoluteUrl("src")
}
override suspend fun getTags(): Set<MangaTag> {
return coroutineScope {
// parse first 3 pages of tags
(1..3).map { page ->
async { getTags(page) }
}
}.awaitAll().flattenTo(ArraySet(360))
}
private suspend fun getTags(page: Int): Set<MangaTag> {
val root = context.httpGet("https://${getDomain()}/tags/popular?page=$page").parseHtml().body()
.getElementById("tag-container")
return root?.parseTags().orEmpty()
}
private fun Element.parseTags() = select("a.tag").mapToSet { a ->
val href = a.attr("href").removeSuffix('/')
MangaTag(
title = a.selectFirstOrThrow(".name").text().toTitleCase(),
key = href.substringAfterLast('/'),
source = source,
)
}
private fun buildQuery(tags: Collection<MangaTag>) = tags.joinToString(separator = " ") { tag ->
"tag:\"${tag.key}\""
}
}

@ -11,6 +11,13 @@ fun <T> MutableCollection<T>.replaceWith(subject: Iterable<T>) {
addAll(subject) addAll(subject)
} }
fun <T, C : MutableCollection<in T>> Iterable<Iterable<T>>.flattenTo(destination: C): C {
for (element in this) {
destination.addAll(element)
}
return destination
}
fun <T> List<T>.medianOrNull(): T? = when { fun <T> List<T>.medianOrNull(): T? = when {
isEmpty() -> null isEmpty() -> null
else -> get((size / 2).coerceIn(indices)) else -> get((size / 2).coerceIn(indices))

@ -4,6 +4,8 @@ package org.koitharu.kotatsu.parsers.util
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.select.Selector
import org.koitharu.kotatsu.parsers.exception.ParseException
val Element.host: String? val Element.host: String?
get() { get() {
@ -86,4 +88,16 @@ fun Element.styleValueOrNull(property: String): String? {
val regex = Regex("${Regex.escape(property)}\\s*:\\s*[^;]+") val regex = Regex("${Regex.escape(property)}\\s*:\\s*[^;]+")
val css = attr("style").find(regex) ?: return null val css = attr("style").find(regex) ?: return null
return css.substringAfter(':').removeSuffix(';').trim() return css.substringAfter(':').removeSuffix(';').trim()
}
fun Element.selectFirstOrThrow(cssQuery: String): Element {
return Selector.selectFirst(cssQuery, this) ?: throw ParseException("Cannot find \"$cssQuery\"")
}
fun Element.requireElementById(id: String): Element {
return getElementById(id) ?: throw ParseException("Cannot find \"#$id\"")
}
fun Element.selectLast(cssQuery: String): Element? {
return select(cssQuery).lastOrNull()
} }

@ -4,4 +4,4 @@ import org.junit.jupiter.params.provider.EnumSource
import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.model.MangaSource
@EnumSource(MangaSource::class, names = ["LOCAL", "DUMMY"], mode = EnumSource.Mode.EXCLUDE) @EnumSource(MangaSource::class, names = ["LOCAL", "DUMMY"], mode = EnumSource.Mode.EXCLUDE)
internal annotation class MangaSources() internal annotation class MangaSources
Loading…
Cancel
Save