[Grouple] Add related manga implementation

Koitharu 3 years ago
parent b65e1c498e
commit 9861a9aa6b
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -124,8 +124,8 @@ internal class JapScanParser(context: MangaLoaderContext) : PagedMangaParser(con
}.toList() }.toList()
val keyTables = listOf( val keyTables = listOf(
ShortTables[1].reversed() + LongTables[5].reversed() + LongTables[2].reversed() + LongTables[0].reversed(), shortTables[1].reversed() + longTables[5].reversed() + longTables[2].reversed() + longTables[0].reversed(),
ShortTables[2].reversed() + LongTables[3].reversed() + LongTables[4].reversed() + LongTables[1].reversed(), shortTables[2].reversed() + longTables[3].reversed() + longTables[4].reversed() + longTables[1].reversed(),
) )
var error: Exception? = null var error: Exception? = null

@ -4,12 +4,12 @@ import kotlinx.coroutines.flow.channelFlow
import kotlinx.coroutines.flow.first import kotlinx.coroutines.flow.first
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import okhttp3.Interceptor import okhttp3.Interceptor
import okhttp3.Response import okhttp3.Response
import okhttp3.internal.headersContentLength import okhttp3.internal.headersContentLength
import org.json.JSONArray import org.json.JSONArray
import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaParserAuthProvider import org.koitharu.kotatsu.parsers.MangaParserAuthProvider
@ -28,6 +28,7 @@ private const val NSFW_ALERT = "сексуальные сцены"
private const val NOTHING_FOUND = "Ничего не найдено" private const val NOTHING_FOUND = "Ничего не найдено"
private const val MIN_IMAGE_SIZE = 1024L private const val MIN_IMAGE_SIZE = 1024L
private const val HEADER_ACCEPT = "Accept" private const val HEADER_ACCEPT = "Accept"
private const val RELATED_TITLE = "Связанные произведения"
internal abstract class GroupleParser( internal abstract class GroupleParser(
context: MangaLoaderContext, context: MangaLoaderContext,
@ -43,9 +44,7 @@ internal abstract class GroupleParser(
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0", "Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
) )
override val headers: Headers = Headers.Builder() override val headers: Headers = Headers.Builder().add("User-Agent", config[userAgentKey]).build()
.add("User-Agent", config[userAgentKey])
.build()
override val sortOrders: Set<SortOrder> = EnumSet.of( override val sortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED, SortOrder.UPDATED,
@ -97,83 +96,32 @@ internal abstract class GroupleParser(
}.parseHtml().body() }.parseHtml().body()
val root = (doc.getElementById("mangaBox") ?: doc.getElementById("mangaResults")) val root = (doc.getElementById("mangaBox") ?: doc.getElementById("mangaResults"))
?: doc.parseFailed("Cannot find root") ?: doc.parseFailed("Cannot find root")
val tiles = root.selectFirst("div.tiles.row") ?: if ( val tiles =
root.select(".alert").any { it.ownText() == NOTHING_FOUND } root.selectFirst("div.tiles.row") ?: if (root.select(".alert").any { it.ownText() == NOTHING_FOUND }) {
) { return emptyList()
return emptyList() } else {
} else { doc.parseFailed("No tiles found")
doc.parseFailed("No tiles found")
}
val baseHost = root.baseUri().toHttpUrl().host
return tiles.select("div.tile").mapNotNull { node ->
val imgDiv = node.selectFirst("div.img") ?: return@mapNotNull null
val descDiv = node.selectFirst("div.desc") ?: return@mapNotNull null
if (descDiv.selectFirst("i.fa-user") != null) {
return@mapNotNull null // skip author
}
val href = imgDiv.selectFirst("a")?.attrAsAbsoluteUrlOrNull("href")
if (href == null || href.toHttpUrl().host != baseHost) {
return@mapNotNull null // skip external links
} }
val title = descDiv.selectFirst("h3")?.selectFirst("a")?.text() return tiles.select("div.tile").mapNotNull(::parseManga)
?: return@mapNotNull null
val tileInfo = descDiv.selectFirst("div.tile-info")
val relUrl = href.toRelativeUrl(baseHost)
Manga(
id = generateUid(relUrl),
url = relUrl,
publicUrl = href,
title = title,
altTitle = descDiv.selectFirst("h4")?.text(),
coverUrl = imgDiv.selectFirst("img.lazy")?.attr("data-original")?.replace("_p.", ".").orEmpty(),
rating = runCatching {
node.selectFirst(".compact-rate")
?.attr("title")
?.toFloatOrNull()
?.div(5f)
}.getOrNull() ?: RATING_UNKNOWN,
author = tileInfo?.selectFirst("a.person-link")?.text(),
isNsfw = defaultIsNsfw,
tags = runCatching {
tileInfo?.select("a.element-link")
?.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}
}.getOrNull().orEmpty(),
state = when {
node.selectFirst("div.tags")
?.selectFirst("span.mangaCompleted") != null -> MangaState.FINISHED
else -> null
},
source = source,
)
}
} }
override suspend fun getDetails(manga: Manga): Manga { override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).checkAuthRequired().parseHtml() val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).checkAuthRequired().parseHtml()
val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent") val root = doc.body().requireElementById("mangaBox").selectFirstOrThrow("div.leftContent")
?: doc.parseFailed("Cannot find root")
val dateFormat = SimpleDateFormat("dd.MM.yy", Locale.US) val dateFormat = SimpleDateFormat("dd.MM.yy", Locale.US)
val coverImg = root.selectFirst("div.subject-cover")?.selectFirst("img") val coverImg = root.selectFirst("div.subject-cover")?.selectFirst("img")
return manga.copy( return manga.copy(
description = root.selectFirst("div.manga-description")?.html(), description = root.selectFirst("div.manga-description")?.html(),
largeCoverUrl = coverImg?.attr("data-full"), largeCoverUrl = coverImg?.attr("data-full"),
coverUrl = coverImg?.attr("data-thumb") ?: manga.coverUrl, coverUrl = coverImg?.attr("data-thumb") ?: manga.coverUrl,
tags = manga.tags + root.select("div.subject-meta").select("span.elem_genre ") tags = manga.tags + root.select("div.subject-meta").select("span.elem_genre ").mapNotNull {
.mapNotNull { val a = it.selectFirst("a.element-link") ?: return@mapNotNull null
val a = it.selectFirst("a.element-link") ?: return@mapNotNull null MangaTag(
MangaTag( title = a.text().toTitleCase(),
title = a.text().toTitleCase(), key = a.attr("href").substringAfterLast('/'),
key = a.attr("href").substringAfterLast('/'), source = source,
source = source, )
) },
},
author = root.selectFirst("a.person-link")?.text() ?: manga.author, author = root.selectFirst("a.person-link")?.text() ?: manga.author,
isNsfw = manga.isNsfw || root.select(".alert-warning").any { it.ownText().contains(NSFW_ALERT) }, isNsfw = manga.isNsfw || root.select(".alert-warning").any { it.ownText().contains(NSFW_ALERT) },
chapters = root.requireElementById("chapters-list").select("a.chapter-link") chapters = root.requireElementById("chapters-list").select("a.chapter-link")
@ -183,9 +131,7 @@ internal abstract class GroupleParser(
var translators = "" var translators = ""
val translatorElement = a.attr("title") val translatorElement = a.attr("title")
if (!translatorElement.isNullOrBlank()) { if (!translatorElement.isNullOrBlank()) {
translators = translatorElement translators = translatorElement.replace("(Переводчик),", "&").removeSuffix(" (Переводчик)")
.replace("(Переводчик),", "&")
.removeSuffix(" (Переводчик)")
} }
MangaChapter( MangaChapter(
id = generateUid(href), id = generateUid(href),
@ -202,9 +148,7 @@ internal abstract class GroupleParser(
} }
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain) + "?mtr=1") val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain) + "?mtr=1").checkAuthRequired().parseHtml()
.checkAuthRequired()
.parseHtml()
val scripts = doc.select("script") val scripts = doc.select("script")
for (script in scripts) { for (script in scripts) {
val data = script.html() val data = script.html()
@ -212,10 +156,7 @@ internal abstract class GroupleParser(
if (pos == -1) { if (pos == -1) {
continue continue
} }
val json = data.substring(pos) val json = data.substring(pos).substringAfter('(').substringBefore('\n').substringBeforeLast(')')
.substringAfter('(')
.substringBefore('\n')
.substringBeforeLast(')')
if (json.isEmpty()) { if (json.isEmpty()) {
continue continue
} }
@ -280,8 +221,8 @@ internal abstract class GroupleParser(
override suspend fun getTags(): Set<MangaTag> { override suspend fun getTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://${domain}/list/genres/sort_name").parseHtml() val doc = webClient.httpGet("https://${domain}/list/genres/sort_name").parseHtml()
val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent") val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent")?.selectFirst("table.table")
?.selectFirst("table.table") ?: doc.parseFailed("Cannot find root") ?: doc.parseFailed("Cannot find root")
return root.select("a.element-link").mapToSet { a -> return root.select("a.element-link").mapToSet { a ->
MangaTag( MangaTag(
title = a.text().toTitleCase(), title = a.text().toTitleCase(),
@ -308,9 +249,7 @@ internal abstract class GroupleParser(
val ext = request.url.pathSegments.lastOrNull()?.substringAfterLast('.', "")?.lowercase(Locale.ROOT) val ext = request.url.pathSegments.lastOrNull()?.substringAfterLast('.', "")?.lowercase(Locale.ROOT)
return if (ext == "jpg" || ext == "jpeg" || ext == "png" || ext == "webp") { return if (ext == "jpg" || ext == "jpeg" || ext == "png" || ext == "webp") {
chain.proceed( chain.proceed(
request.newBuilder() request.newBuilder().header(HEADER_ACCEPT, "image/webp,image/png;q=0.9,image/jpeg,*/*;q=0.8").build(),
.header(HEADER_ACCEPT, "image/webp,image/png;q=0.9,image/jpeg,*/*;q=0.8")
.build(),
) )
} else { } else {
chain.proceed(request) chain.proceed(request)
@ -323,38 +262,33 @@ internal abstract class GroupleParser(
} }
override suspend fun getRelatedManga(seed: Manga): List<Manga> { override suspend fun getRelatedManga(seed: Manga): List<Manga> {
val parsers = listOf( val doc = webClient.httpGet(seed.url.toAbsoluteUrl(domain)).checkAuthRequired().parseHtml()
getParser(MangaSource.READMANGA_RU), val root = doc.body().requireElementById("mangaBox").select("h4").first { it.ownText() == RELATED_TITLE }
getParser(MangaSource.MINTMANGA), .nextElementSibling() ?: doc.parseFailed("Cannot find root")
getParser(MangaSource.SELFMANGA), return root.select("div.tile").mapNotNull(::parseManga)
)
return RelatedMangaFinder(parsers).invoke(seed)
} }
private fun getSortKey(sortOrder: SortOrder) = private fun getSortKey(sortOrder: SortOrder) = when (sortOrder) {
when (sortOrder) { SortOrder.ALPHABETICAL -> "name"
SortOrder.ALPHABETICAL -> "name" SortOrder.POPULARITY -> "rate"
SortOrder.POPULARITY -> "rate" SortOrder.UPDATED -> "updated"
SortOrder.UPDATED -> "updated" SortOrder.NEWEST -> "created"
SortOrder.NEWEST -> "created" SortOrder.RATING -> "votes"
SortOrder.RATING -> "votes" }
}
private suspend fun advancedSearch(domain: String, tags: Set<MangaTag>): Response { private suspend fun advancedSearch(domain: String, tags: Set<MangaTag>): Response {
val url = "https://$domain/search/advanced" val url = "https://$domain/search/advanced"
// Step 1: map catalog genres names to advanced-search genres ids // Step 1: map catalog genres names to advanced-search genres ids
val tagsIndex = webClient.httpGet(url).parseHtml() val tagsIndex =
.body().selectFirst("form.search-form") webClient.httpGet(url).parseHtml().body().selectFirst("form.search-form")?.select("div.form-group")
?.select("div.form-group") ?.find { it.selectFirst("li.property") != null }
?.find { it.selectFirst("li.property") != null } ?: throw ParseException("Genres filter element not found", url)
?: throw ParseException("Genres filter element not found", url)
val tagNames = tags.map { it.title.lowercase() } val tagNames = tags.map { it.title.lowercase() }
val payload = HashMap<String, String>() val payload = HashMap<String, String>()
var foundGenres = 0 var foundGenres = 0
tagsIndex.select("li.property").forEach { li -> tagsIndex.select("li.property").forEach { li ->
val name = li.text().trim().lowercase() val name = li.text().trim().lowercase()
val id = li.selectFirst("input")?.id() val id = li.selectFirst("input")?.id() ?: li.parseFailed("Id for tag $name not found")
?: li.parseFailed("Id for tag $name not found")
payload[id] = if (name in tagNames) { payload[id] = if (name in tagNames) {
foundGenres++ foundGenres++
"in" "in"
@ -383,11 +317,54 @@ internal abstract class GroupleParser(
response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE
}.getOrDefault(false) }.getOrDefault(false)
protected fun Response.checkAuthRequired(): Response { private fun Response.checkAuthRequired(): Response {
val lastPathSegment = request.url.pathSegments.lastOrNull() ?: return this val lastPathSegment = request.url.pathSegments.lastOrNull() ?: return this
if (lastPathSegment == "login") { if (lastPathSegment == "login") {
throw AuthRequiredException(source) throw AuthRequiredException(source)
} }
return this return this
} }
private fun parseManga(node: Element): Manga? {
val imgDiv = node.selectFirst("div.img") ?: return null
val descDiv = node.selectFirst("div.desc") ?: return null
if (descDiv.selectFirst("i.fa-user") != null || descDiv.selectFirst("i.fa-external-link") != null) {
return null // skip author
}
val href = imgDiv.selectFirst("a")?.attrAsAbsoluteUrlOrNull("href") ?: return null
val title = descDiv.selectFirst("h3")?.selectFirst("a")?.text() ?: return null
val tileInfo = descDiv.selectFirst("div.tile-info")
val relUrl = href.toRelativeUrl(domain)
if (relUrl.contains("://")) {
return null
}
return Manga(
id = generateUid(relUrl),
url = relUrl,
publicUrl = href,
title = title,
altTitle = descDiv.selectFirst("h4")?.text(),
coverUrl = imgDiv.selectFirst("img.lazy")?.attr("data-original")?.replace("_p.", ".").orEmpty(),
rating = runCatching {
node.selectFirst(".compact-rate")?.attr("title")?.toFloatOrNull()?.div(5f)
}.getOrNull() ?: RATING_UNKNOWN,
author = tileInfo?.selectFirst("a.person-link")?.text(),
isNsfw = defaultIsNsfw,
tags = runCatching {
tileInfo?.select("a.element-link")?.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}
}.getOrNull().orEmpty(),
state = when {
node.selectFirst("div.tags")?.selectFirst("span.mangaCompleted") != null -> MangaState.FINISHED
else -> null
},
source = source,
)
}
} }

Loading…
Cancel
Save