Improve links resolving

Koitharu 2 years ago
parent 07b0a2da9e
commit d8cb38a9be
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -9,6 +9,7 @@ import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.network.OkHttpWebClient import org.koitharu.kotatsu.parsers.network.OkHttpWebClient
import org.koitharu.kotatsu.parsers.network.WebClient import org.koitharu.kotatsu.parsers.network.WebClient
import org.koitharu.kotatsu.parsers.util.FaviconParser import org.koitharu.kotatsu.parsers.util.FaviconParser
import org.koitharu.kotatsu.parsers.util.LinkResolver
import org.koitharu.kotatsu.parsers.util.RelatedMangaFinder import org.koitharu.kotatsu.parsers.util.RelatedMangaFinder
import org.koitharu.kotatsu.parsers.util.domain import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
@ -111,5 +112,5 @@ public abstract class MangaParser @InternalParsersApi constructor(
* Return [Manga] object by web link to it * Return [Manga] object by web link to it
* @see [Manga.publicUrl] * @see [Manga.publicUrl]
*/ */
public open suspend fun resolveLink(link: HttpUrl): Manga? = null internal open suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? = null
} }

@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers.site.all
import androidx.collection.ArraySet import androidx.collection.ArraySet
import androidx.collection.SparseArrayCompat import androidx.collection.SparseArrayCompat
import okhttp3.HttpUrl
import org.json.JSONArray import org.json.JSONArray
import org.json.JSONObject import org.json.JSONObject
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
@ -250,6 +251,11 @@ internal class ComickFunParser(context: MangaLoaderContext) :
} }
} }
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val slug = link.pathSegments.lastOrNull() ?: return null
return resolver.resolveManga(this, url = slug, id = generateUid(slug))
}
private val tagsArray = SuspendLazy(::loadTags) private val tagsArray = SuspendLazy(::loadTags)
private suspend fun fetchAvailableTags(): Set<MangaTag> { private suspend fun fetchAvailableTags(): Set<MangaTag> {

@ -236,6 +236,11 @@ internal abstract class LineWebtoonsParser(
} }
} }
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val titleNo = link.queryParameter("title_no") ?: return null
return resolver.resolveManga(this, url = titleNo.toString())
}
private fun parseTag(jo: JSONObject): MangaTag { private fun parseTag(jo: JSONObject): MangaTag {
return MangaTag( return MangaTag(
title = jo.getString("name"), title = jo.getString("name"),

@ -209,7 +209,7 @@ internal class MangaDexParser(context: MangaLoaderContext) : MangaParser(context
return getDetails(mangaId) return getDetails(mangaId)
} }
override suspend fun resolveLink(link: HttpUrl): Manga? { override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val regex = Regex("[0-9a-f\\-]{10,}", RegexOption.IGNORE_CASE) val regex = Regex("[0-9a-f\\-]{10,}", RegexOption.IGNORE_CASE)
val mangaId = link.pathSegments.find { regex.matches(it) } ?: return null val mangaId = link.pathSegments.find { regex.matches(it) } ?: return null
return getDetails(mangaId) return getDetails(mangaId)

@ -100,17 +100,15 @@ internal abstract class GalleryAdultsParser(
protected open val selectGalleryLink = ".inner_thumb a" protected open val selectGalleryLink = ".inner_thumb a"
protected open val selectGalleryImg = "img" protected open val selectGalleryImg = "img"
protected open val selectGalleryTitle = "h2" protected open val selectGalleryTitle = "h2"
private val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)")
private val regexSpaces = Regex("\\s+")
protected open fun parseMangaList(doc: Document): List<Manga> { protected open fun parseMangaList(doc: Document): List<Manga> {
val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)")
val regexSpaces = Regex("\\s+")
return doc.select(selectGallery).map { div -> return doc.select(selectGallery).map { div ->
val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href") val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href")
Manga( Manga(
id = generateUid(href), id = generateUid(href),
title = div.select(selectGalleryTitle).text().replace(regexBrackets, "") title = div.select(selectGalleryTitle).text().cleanupTitle(),
.replace(regexSpaces, " ")
.trim(),
altTitle = null, altTitle = null,
url = href, url = href,
publicUrl = href.toAbsoluteUrl(domain), publicUrl = href.toAbsoluteUrl(domain),
@ -168,6 +166,7 @@ internal abstract class GalleryAdultsParser(
} }
return manga.copy( return manga.copy(
tags = tag.orEmpty(), tags = tag.orEmpty(),
title = doc.selectFirst("h1.title")?.textOrNull()?.cleanupTitle() ?: manga.title,
author = doc.selectFirst(selectAuthor)?.html()?.substringBefore("<span"), author = doc.selectFirst(selectAuthor)?.html()?.substringBefore("<span"),
chapters = listOf( chapters = listOf(
MangaChapter( MangaChapter(
@ -213,6 +212,10 @@ internal abstract class GalleryAdultsParser(
return doc.requireElementById(idImg).src() ?: doc.parseFailed("Image src not found") return doc.requireElementById(idImg).src() ?: doc.parseFailed("Image src not found")
} }
protected fun String.cleanupTitle() = replace(regexBrackets, "")
.replace(regexSpaces, " ")
.trim()
protected open fun Locale.toLanguagePath() = when (language) { protected open fun Locale.toLanguagePath() = when (language) {
else -> getDisplayLanguage(Locale.ENGLISH).lowercase() else -> getDisplayLanguage(Locale.ENGLISH).lowercase()
} }

@ -89,7 +89,7 @@ internal class NHentaiParser(context: MangaLoaderContext) :
val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href") val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href")
Manga( Manga(
id = generateUid(href), id = generateUid(href),
title = div.select(selectGalleryTitle).text().trim(), title = div.select(selectGalleryTitle).text().cleanupTitle(),
altTitle = null, altTitle = null,
url = href, url = href,
publicUrl = href.toAbsoluteUrl(domain), publicUrl = href.toAbsoluteUrl(domain),

@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers.site.ru
import androidx.collection.ArrayMap import androidx.collection.ArrayMap
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser import org.koitharu.kotatsu.parsers.PagedMangaParser
@ -159,6 +160,15 @@ internal class DesuMeParser(context: MangaLoaderContext) : PagedMangaParser(cont
} }
} }
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val doc = webClient.httpGet(link).parseHtml()
val mangaId = doc.getElementsByAttribute("data-manga_id").firstNotNullOfOrNull { element ->
element.attrOrNull("data-manga_id")
} ?: return null
val title = doc.metaValue("headline") ?: return null
return resolver.resolveManga(this, id = generateUid(mangaId), url = "/manga/api/$mangaId", title = title)
}
private fun getSortKey(sortOrder: SortOrder) = private fun getSortKey(sortOrder: SortOrder) =
when (sortOrder) { when (sortOrder) {
SortOrder.ALPHABETICAL -> "name" SortOrder.ALPHABETICAL -> "name"

@ -52,7 +52,10 @@ internal abstract class GroupleParser(
private val splitTranslationsKey = ConfigKey.SplitByTranslations(false) private val splitTranslationsKey = ConfigKey.SplitByTranslations(false)
private val tagsIndex = SuspendLazy(::fetchTagsMap) private val tagsIndex = SuspendLazy(::fetchTagsMap)
override fun getRequestHeaders(): Headers = Headers.Builder().add("User-Agent", config[userAgentKey]).build() override fun getRequestHeaders(): Headers = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.add("Accept-Language", "ru,en-US;q=0.7,en;q=0.3")
.build()
override val availableSortOrders: Set<SortOrder> = EnumSet.of( override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED, SortOrder.UPDATED,
@ -130,6 +133,7 @@ internal abstract class GroupleParser(
} }
return manga.copy( return manga.copy(
source = newSource, source = newSource,
title = doc.metaValue("name") ?: manga.title,
altTitle = root.selectFirst(".all-names-popover")?.select(".name")?.joinToString { it.text() } altTitle = root.selectFirst(".all-names-popover")?.select(".name")?.joinToString { it.text() }
?: manga.altTitle, ?: manga.altTitle,
publicUrl = response.request.url.toString(), publicUrl = response.request.url.toString(),

@ -179,3 +179,8 @@ public fun Element.src(
} }
return null return null
} }
public fun Element.metaValue(itemprop: String) = getElementsByAttributeValue("itemprop", itemprop)
.firstNotNullOfOrNull { element ->
element.attrOrNull("content")
}

@ -19,7 +19,7 @@ public class LinkResolver internal constructor(
public suspend fun getManga(): Manga? { public suspend fun getManga(): Manga? {
val parser = context.newParserInstance(source.get() ?: return null) val parser = context.newParserInstance(source.get() ?: return null)
return parser.resolveLink(link) ?: parser.resolveLinkLongPath() return parser.resolveLink(this, link) ?: resolveManga(parser)
} }
private suspend fun resolveSource(): MangaParserSource? = runInterruptible(Dispatchers.Default) { private suspend fun resolveSource(): MangaParserSource? = runInterruptible(Dispatchers.Default) {
@ -35,13 +35,18 @@ public class LinkResolver internal constructor(
null null
} }
private suspend fun MangaParser.resolveLinkLongPath(): Manga? { internal suspend fun resolveManga(
val stubTitle = link.pathSegments.lastOrNull().orEmpty() parser: MangaParser,
val seed = Manga( url: String = link.toString().toRelativeUrl(link.host),
id = 0L, id: Long = parser.generateUid(url),
title = stubTitle, title: String = STUB_TITLE,
): Manga? = resolveBySeed(
parser,
Manga(
id = id,
title = title,
altTitle = null, altTitle = null,
url = link.toString().toRelativeUrl(link.host), url = url,
publicUrl = link.toString(), publicUrl = link.toString(),
rating = RATING_UNKNOWN, rating = RATING_UNKNOWN,
isNsfw = false, isNsfw = false,
@ -52,22 +57,47 @@ public class LinkResolver internal constructor(
largeCoverUrl = null, largeCoverUrl = null,
description = null, description = null,
chapters = null, chapters = null,
source = source, source = parser.source,
).let { manga -> ),
getDetails(manga) )
private suspend fun resolveBySeed(parser: MangaParser, s: Manga): Manga? {
val seed = parser.getDetails(s)
if (!parser.filterCapabilities.isSearchSupported) {
return seed.takeUnless { it.chapters.isNullOrEmpty() }
} }
val query = when { val query = when {
seed.title != stubTitle && seed.title.isNotEmpty() -> seed.title seed.title != STUB_TITLE && seed.title.isNotEmpty() -> seed.title
!seed.altTitle.isNullOrEmpty() -> seed.altTitle !seed.altTitle.isNullOrEmpty() -> seed.altTitle
!seed.author.isNullOrEmpty() -> seed.author !seed.author.isNullOrEmpty() -> seed.author
else -> return seed // unfortunately we do not know a real manga title so unable to find it else -> return seed // unfortunately we do not know a real manga title so unable to find it
} }
val resolved = runCatchingCancellable {
val order = if (SortOrder.RELEVANCE in parser.availableSortOrders) {
SortOrder.RELEVANCE
} else {
parser.defaultSortOrder
}
val list = parser.getList(0, order, MangaListFilter(query = query))
list.singleOrNull { manga -> isSameUrl(manga.publicUrl) }
}.getOrNull()
if (resolved == null) {
return seed
}
return runCatchingCancellable { return runCatchingCancellable {
val order = if (SortOrder.RELEVANCE in availableSortOrders) SortOrder.RELEVANCE else defaultSortOrder parser.getDetails(resolved)
val list = getList(0, order, MangaListFilter(query = query)) }.getOrElse {
val result = list.single { manga -> isSameUrl(manga.publicUrl) } resolved.copy(
getDetails(result) chapters = seed.chapters ?: resolved.chapters,
}.getOrDefault(seed) description = seed.description ?: resolved.description,
author = seed.author ?: resolved.author,
tags = seed.tags + resolved.tags,
state = seed.state ?: resolved.state,
coverUrl = seed.coverUrl.ifEmpty { resolved.coverUrl },
largeCoverUrl = seed.largeCoverUrl ?: resolved.largeCoverUrl,
altTitle = seed.altTitle ?: resolved.altTitle,
)
}
} }
private fun isSameUrl(publicUrl: String): Boolean { private fun isSameUrl(publicUrl: String): Boolean {
@ -78,4 +108,9 @@ public class LinkResolver internal constructor(
return link.host == httpUrl.host return link.host == httpUrl.host
&& link.encodedPath == httpUrl.encodedPath && link.encodedPath == httpUrl.encodedPath
} }
private companion object {
const val STUB_TITLE = "Unknown manga"
}
} }

@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers
import kotlinx.coroutines.test.runTest import kotlinx.coroutines.test.runTest
import okhttp3.HttpUrl import okhttp3.HttpUrl
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.Disabled
import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.ParameterizedTest
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
@ -215,6 +216,20 @@ internal class MangaParserTest {
} }
} }
@ParameterizedTest(name = "{index}|link|{0}")
@MangaSources
fun link(source: MangaParserSource) = runTest(timeout = timeout) {
val parser = context.newParserInstance(source)
val manga = parser.getList(0, parser.defaultSortOrder, MangaListFilter.EMPTY).first()
val resolved = context.newLinkResolver(manga.publicUrl).getManga()
Assertions.assertNotNull(resolved)
resolved ?: return@runTest
Assertions.assertEquals(manga.id, resolved.id)
Assertions.assertEquals(manga.publicUrl, resolved.publicUrl)
Assertions.assertEquals(manga.url, resolved.url)
Assertions.assertEquals(manga.title, resolved.title)
}
@ParameterizedTest(name = "{index}|authorization|{0}") @ParameterizedTest(name = "{index}|authorization|{0}")
@MangaSources @MangaSources
@Disabled @Disabled

Loading…
Cancel
Save