From d8cb38a9beb9d0f3efda18561250170039afe45c Mon Sep 17 00:00:00 2001 From: Koitharu Date: Wed, 16 Oct 2024 11:10:01 +0300 Subject: [PATCH] Improve links resolving --- .../koitharu/kotatsu/parsers/MangaParser.kt | 3 +- .../parsers/site/all/ComickFunParser.kt | 6 ++ .../parsers/site/all/LineWebtoonsParser.kt | 5 ++ .../parsers/site/all/MangaDexParser.kt | 2 +- .../site/galleryadults/GalleryAdultsParser.kt | 13 ++-- .../site/galleryadults/all/NHentaiParser.kt | 2 +- .../kotatsu/parsers/site/ru/DesuMeParser.kt | 10 +++ .../parsers/site/ru/grouple/GroupleParser.kt | 6 +- .../koitharu/kotatsu/parsers/util/Jsoup.kt | 5 ++ .../kotatsu/parsers/util/LinkResolver.kt | 67 ++++++++++++++----- .../kotatsu/parsers/MangaParserTest.kt | 15 +++++ 11 files changed, 109 insertions(+), 25 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index 3c78d855..cc3a920c 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -9,6 +9,7 @@ import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.network.OkHttpWebClient import org.koitharu.kotatsu.parsers.network.WebClient import org.koitharu.kotatsu.parsers.util.FaviconParser +import org.koitharu.kotatsu.parsers.util.LinkResolver import org.koitharu.kotatsu.parsers.util.RelatedMangaFinder import org.koitharu.kotatsu.parsers.util.domain import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl @@ -111,5 +112,5 @@ public abstract class MangaParser @InternalParsersApi constructor( * Return [Manga] object by web link to it * @see [Manga.publicUrl] */ - public open suspend fun resolveLink(link: HttpUrl): Manga? = null + internal open suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? = null } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ComickFunParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ComickFunParser.kt index 9a943bde..b0a61443 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ComickFunParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ComickFunParser.kt @@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers.site.all import androidx.collection.ArraySet import androidx.collection.SparseArrayCompat +import okhttp3.HttpUrl import org.json.JSONArray import org.json.JSONObject import org.koitharu.kotatsu.parsers.MangaLoaderContext @@ -250,6 +251,11 @@ internal class ComickFunParser(context: MangaLoaderContext) : } } + override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? { + val slug = link.pathSegments.lastOrNull() ?: return null + return resolver.resolveManga(this, url = slug, id = generateUid(slug)) + } + private val tagsArray = SuspendLazy(::loadTags) private suspend fun fetchAvailableTags(): Set { diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/LineWebtoonsParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/LineWebtoonsParser.kt index 98ab058a..9a91442c 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/LineWebtoonsParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/LineWebtoonsParser.kt @@ -236,6 +236,11 @@ internal abstract class LineWebtoonsParser( } } + override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? { + val titleNo = link.queryParameter("title_no") ?: return null + return resolver.resolveManga(this, url = titleNo.toString()) + } + private fun parseTag(jo: JSONObject): MangaTag { return MangaTag( title = jo.getString("name"), diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/MangaDexParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/MangaDexParser.kt index ee59270a..9c02e92a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/MangaDexParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/MangaDexParser.kt @@ -209,7 +209,7 @@ internal class MangaDexParser(context: MangaLoaderContext) : MangaParser(context return getDetails(mangaId) } - override suspend fun resolveLink(link: HttpUrl): Manga? { + override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? { val regex = Regex("[0-9a-f\\-]{10,}", RegexOption.IGNORE_CASE) val mangaId = link.pathSegments.find { regex.matches(it) } ?: return null return getDetails(mangaId) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/GalleryAdultsParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/GalleryAdultsParser.kt index 2a96c1af..21b5a907 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/GalleryAdultsParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/GalleryAdultsParser.kt @@ -100,17 +100,15 @@ internal abstract class GalleryAdultsParser( protected open val selectGalleryLink = ".inner_thumb a" protected open val selectGalleryImg = "img" protected open val selectGalleryTitle = "h2" + private val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)") + private val regexSpaces = Regex("\\s+") protected open fun parseMangaList(doc: Document): List { - val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)") - val regexSpaces = Regex("\\s+") return doc.select(selectGallery).map { div -> val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href") Manga( id = generateUid(href), - title = div.select(selectGalleryTitle).text().replace(regexBrackets, "") - .replace(regexSpaces, " ") - .trim(), + title = div.select(selectGalleryTitle).text().cleanupTitle(), altTitle = null, url = href, publicUrl = href.toAbsoluteUrl(domain), @@ -168,6 +166,7 @@ internal abstract class GalleryAdultsParser( } return manga.copy( tags = tag.orEmpty(), + title = doc.selectFirst("h1.title")?.textOrNull()?.cleanupTitle() ?: manga.title, author = doc.selectFirst(selectAuthor)?.html()?.substringBefore(" getDisplayLanguage(Locale.ENGLISH).lowercase() } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt index f583f59b..fa553330 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt @@ -89,7 +89,7 @@ internal class NHentaiParser(context: MangaLoaderContext) : val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href") Manga( id = generateUid(href), - title = div.select(selectGalleryTitle).text().trim(), + title = div.select(selectGalleryTitle).text().cleanupTitle(), altTitle = null, url = href, publicUrl = href.toAbsoluteUrl(domain), diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/DesuMeParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/DesuMeParser.kt index 151f6548..38437443 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/DesuMeParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/DesuMeParser.kt @@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers.site.ru import androidx.collection.ArrayMap import okhttp3.Headers +import okhttp3.HttpUrl import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.PagedMangaParser @@ -159,6 +160,15 @@ internal class DesuMeParser(context: MangaLoaderContext) : PagedMangaParser(cont } } + override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? { + val doc = webClient.httpGet(link).parseHtml() + val mangaId = doc.getElementsByAttribute("data-manga_id").firstNotNullOfOrNull { element -> + element.attrOrNull("data-manga_id") + } ?: return null + val title = doc.metaValue("headline") ?: return null + return resolver.resolveManga(this, id = generateUid(mangaId), url = "/manga/api/$mangaId", title = title) + } + private fun getSortKey(sortOrder: SortOrder) = when (sortOrder) { SortOrder.ALPHABETICAL -> "name" diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/grouple/GroupleParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/grouple/GroupleParser.kt index 2fbbb606..f77d83a7 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/grouple/GroupleParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/ru/grouple/GroupleParser.kt @@ -52,7 +52,10 @@ internal abstract class GroupleParser( private val splitTranslationsKey = ConfigKey.SplitByTranslations(false) private val tagsIndex = SuspendLazy(::fetchTagsMap) - override fun getRequestHeaders(): Headers = Headers.Builder().add("User-Agent", config[userAgentKey]).build() + override fun getRequestHeaders(): Headers = Headers.Builder() + .add("User-Agent", config[userAgentKey]) + .add("Accept-Language", "ru,en-US;q=0.7,en;q=0.3") + .build() override val availableSortOrders: Set = EnumSet.of( SortOrder.UPDATED, @@ -130,6 +133,7 @@ internal abstract class GroupleParser( } return manga.copy( source = newSource, + title = doc.metaValue("name") ?: manga.title, altTitle = root.selectFirst(".all-names-popover")?.select(".name")?.joinToString { it.text() } ?: manga.altTitle, publicUrl = response.request.url.toString(), diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt index 1a431000..8f727b90 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt @@ -179,3 +179,8 @@ public fun Element.src( } return null } + +public fun Element.metaValue(itemprop: String) = getElementsByAttributeValue("itemprop", itemprop) + .firstNotNullOfOrNull { element -> + element.attrOrNull("content") + } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/LinkResolver.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/LinkResolver.kt index f219a020..0ee115b9 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/LinkResolver.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/LinkResolver.kt @@ -19,7 +19,7 @@ public class LinkResolver internal constructor( public suspend fun getManga(): Manga? { val parser = context.newParserInstance(source.get() ?: return null) - return parser.resolveLink(link) ?: parser.resolveLinkLongPath() + return parser.resolveLink(this, link) ?: resolveManga(parser) } private suspend fun resolveSource(): MangaParserSource? = runInterruptible(Dispatchers.Default) { @@ -35,13 +35,18 @@ public class LinkResolver internal constructor( null } - private suspend fun MangaParser.resolveLinkLongPath(): Manga? { - val stubTitle = link.pathSegments.lastOrNull().orEmpty() - val seed = Manga( - id = 0L, - title = stubTitle, + internal suspend fun resolveManga( + parser: MangaParser, + url: String = link.toString().toRelativeUrl(link.host), + id: Long = parser.generateUid(url), + title: String = STUB_TITLE, + ): Manga? = resolveBySeed( + parser, + Manga( + id = id, + title = title, altTitle = null, - url = link.toString().toRelativeUrl(link.host), + url = url, publicUrl = link.toString(), rating = RATING_UNKNOWN, isNsfw = false, @@ -52,22 +57,47 @@ public class LinkResolver internal constructor( largeCoverUrl = null, description = null, chapters = null, - source = source, - ).let { manga -> - getDetails(manga) + source = parser.source, + ), + ) + + private suspend fun resolveBySeed(parser: MangaParser, s: Manga): Manga? { + val seed = parser.getDetails(s) + if (!parser.filterCapabilities.isSearchSupported) { + return seed.takeUnless { it.chapters.isNullOrEmpty() } } val query = when { - seed.title != stubTitle && seed.title.isNotEmpty() -> seed.title + seed.title != STUB_TITLE && seed.title.isNotEmpty() -> seed.title !seed.altTitle.isNullOrEmpty() -> seed.altTitle !seed.author.isNullOrEmpty() -> seed.author else -> return seed // unfortunately we do not know a real manga title so unable to find it } + val resolved = runCatchingCancellable { + val order = if (SortOrder.RELEVANCE in parser.availableSortOrders) { + SortOrder.RELEVANCE + } else { + parser.defaultSortOrder + } + val list = parser.getList(0, order, MangaListFilter(query = query)) + list.singleOrNull { manga -> isSameUrl(manga.publicUrl) } + }.getOrNull() + if (resolved == null) { + return seed + } return runCatchingCancellable { - val order = if (SortOrder.RELEVANCE in availableSortOrders) SortOrder.RELEVANCE else defaultSortOrder - val list = getList(0, order, MangaListFilter(query = query)) - val result = list.single { manga -> isSameUrl(manga.publicUrl) } - getDetails(result) - }.getOrDefault(seed) + parser.getDetails(resolved) + }.getOrElse { + resolved.copy( + chapters = seed.chapters ?: resolved.chapters, + description = seed.description ?: resolved.description, + author = seed.author ?: resolved.author, + tags = seed.tags + resolved.tags, + state = seed.state ?: resolved.state, + coverUrl = seed.coverUrl.ifEmpty { resolved.coverUrl }, + largeCoverUrl = seed.largeCoverUrl ?: resolved.largeCoverUrl, + altTitle = seed.altTitle ?: resolved.altTitle, + ) + } } private fun isSameUrl(publicUrl: String): Boolean { @@ -78,4 +108,9 @@ public class LinkResolver internal constructor( return link.host == httpUrl.host && link.encodedPath == httpUrl.encodedPath } + + private companion object { + + const val STUB_TITLE = "Unknown manga" + } } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index 33819830..c41168bb 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers import kotlinx.coroutines.test.runTest import okhttp3.HttpUrl +import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Disabled import org.junit.jupiter.params.ParameterizedTest import org.koitharu.kotatsu.parsers.model.* @@ -215,6 +216,20 @@ internal class MangaParserTest { } } + @ParameterizedTest(name = "{index}|link|{0}") + @MangaSources + fun link(source: MangaParserSource) = runTest(timeout = timeout) { + val parser = context.newParserInstance(source) + val manga = parser.getList(0, parser.defaultSortOrder, MangaListFilter.EMPTY).first() + val resolved = context.newLinkResolver(manga.publicUrl).getManga() + Assertions.assertNotNull(resolved) + resolved ?: return@runTest + Assertions.assertEquals(manga.id, resolved.id) + Assertions.assertEquals(manga.publicUrl, resolved.publicUrl) + Assertions.assertEquals(manga.url, resolved.url) + Assertions.assertEquals(manga.title, resolved.title) + } + @ParameterizedTest(name = "{index}|authorization|{0}") @MangaSources @Disabled