Improve links resolving

master
Koitharu 2 years ago
parent 07b0a2da9e
commit d8cb38a9be
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -9,6 +9,7 @@ import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.network.OkHttpWebClient
import org.koitharu.kotatsu.parsers.network.WebClient
import org.koitharu.kotatsu.parsers.util.FaviconParser
import org.koitharu.kotatsu.parsers.util.LinkResolver
import org.koitharu.kotatsu.parsers.util.RelatedMangaFinder
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
@ -111,5 +112,5 @@ public abstract class MangaParser @InternalParsersApi constructor(
* Return [Manga] object by web link to it
* @see [Manga.publicUrl]
*/
public open suspend fun resolveLink(link: HttpUrl): Manga? = null
internal open suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? = null
}

@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers.site.all
import androidx.collection.ArraySet
import androidx.collection.SparseArrayCompat
import okhttp3.HttpUrl
import org.json.JSONArray
import org.json.JSONObject
import org.koitharu.kotatsu.parsers.MangaLoaderContext
@ -250,6 +251,11 @@ internal class ComickFunParser(context: MangaLoaderContext) :
}
}
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val slug = link.pathSegments.lastOrNull() ?: return null
return resolver.resolveManga(this, url = slug, id = generateUid(slug))
}
private val tagsArray = SuspendLazy(::loadTags)
private suspend fun fetchAvailableTags(): Set<MangaTag> {

@ -236,6 +236,11 @@ internal abstract class LineWebtoonsParser(
}
}
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val titleNo = link.queryParameter("title_no") ?: return null
return resolver.resolveManga(this, url = titleNo.toString())
}
private fun parseTag(jo: JSONObject): MangaTag {
return MangaTag(
title = jo.getString("name"),

@ -209,7 +209,7 @@ internal class MangaDexParser(context: MangaLoaderContext) : MangaParser(context
return getDetails(mangaId)
}
override suspend fun resolveLink(link: HttpUrl): Manga? {
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val regex = Regex("[0-9a-f\\-]{10,}", RegexOption.IGNORE_CASE)
val mangaId = link.pathSegments.find { regex.matches(it) } ?: return null
return getDetails(mangaId)

@ -100,17 +100,15 @@ internal abstract class GalleryAdultsParser(
protected open val selectGalleryLink = ".inner_thumb a"
protected open val selectGalleryImg = "img"
protected open val selectGalleryTitle = "h2"
private val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)")
private val regexSpaces = Regex("\\s+")
protected open fun parseMangaList(doc: Document): List<Manga> {
val regexBrackets = Regex("\\[[^]]+]|\\([^)]+\\)")
val regexSpaces = Regex("\\s+")
return doc.select(selectGallery).map { div ->
val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href")
Manga(
id = generateUid(href),
title = div.select(selectGalleryTitle).text().replace(regexBrackets, "")
.replace(regexSpaces, " ")
.trim(),
title = div.select(selectGalleryTitle).text().cleanupTitle(),
altTitle = null,
url = href,
publicUrl = href.toAbsoluteUrl(domain),
@ -168,6 +166,7 @@ internal abstract class GalleryAdultsParser(
}
return manga.copy(
tags = tag.orEmpty(),
title = doc.selectFirst("h1.title")?.textOrNull()?.cleanupTitle() ?: manga.title,
author = doc.selectFirst(selectAuthor)?.html()?.substringBefore("<span"),
chapters = listOf(
MangaChapter(
@ -213,6 +212,10 @@ internal abstract class GalleryAdultsParser(
return doc.requireElementById(idImg).src() ?: doc.parseFailed("Image src not found")
}
protected fun String.cleanupTitle() = replace(regexBrackets, "")
.replace(regexSpaces, " ")
.trim()
protected open fun Locale.toLanguagePath() = when (language) {
else -> getDisplayLanguage(Locale.ENGLISH).lowercase()
}

@ -89,7 +89,7 @@ internal class NHentaiParser(context: MangaLoaderContext) :
val href = div.selectFirstOrThrow(selectGalleryLink).attrAsRelativeUrl("href")
Manga(
id = generateUid(href),
title = div.select(selectGalleryTitle).text().trim(),
title = div.select(selectGalleryTitle).text().cleanupTitle(),
altTitle = null,
url = href,
publicUrl = href.toAbsoluteUrl(domain),

@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers.site.ru
import androidx.collection.ArrayMap
import okhttp3.Headers
import okhttp3.HttpUrl
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
@ -159,6 +160,15 @@ internal class DesuMeParser(context: MangaLoaderContext) : PagedMangaParser(cont
}
}
override suspend fun resolveLink(resolver: LinkResolver, link: HttpUrl): Manga? {
val doc = webClient.httpGet(link).parseHtml()
val mangaId = doc.getElementsByAttribute("data-manga_id").firstNotNullOfOrNull { element ->
element.attrOrNull("data-manga_id")
} ?: return null
val title = doc.metaValue("headline") ?: return null
return resolver.resolveManga(this, id = generateUid(mangaId), url = "/manga/api/$mangaId", title = title)
}
private fun getSortKey(sortOrder: SortOrder) =
when (sortOrder) {
SortOrder.ALPHABETICAL -> "name"

@ -52,7 +52,10 @@ internal abstract class GroupleParser(
private val splitTranslationsKey = ConfigKey.SplitByTranslations(false)
private val tagsIndex = SuspendLazy(::fetchTagsMap)
override fun getRequestHeaders(): Headers = Headers.Builder().add("User-Agent", config[userAgentKey]).build()
override fun getRequestHeaders(): Headers = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.add("Accept-Language", "ru,en-US;q=0.7,en;q=0.3")
.build()
override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
@ -130,6 +133,7 @@ internal abstract class GroupleParser(
}
return manga.copy(
source = newSource,
title = doc.metaValue("name") ?: manga.title,
altTitle = root.selectFirst(".all-names-popover")?.select(".name")?.joinToString { it.text() }
?: manga.altTitle,
publicUrl = response.request.url.toString(),

@ -179,3 +179,8 @@ public fun Element.src(
}
return null
}
public fun Element.metaValue(itemprop: String) = getElementsByAttributeValue("itemprop", itemprop)
.firstNotNullOfOrNull { element ->
element.attrOrNull("content")
}

@ -19,7 +19,7 @@ public class LinkResolver internal constructor(
public suspend fun getManga(): Manga? {
val parser = context.newParserInstance(source.get() ?: return null)
return parser.resolveLink(link) ?: parser.resolveLinkLongPath()
return parser.resolveLink(this, link) ?: resolveManga(parser)
}
private suspend fun resolveSource(): MangaParserSource? = runInterruptible(Dispatchers.Default) {
@ -35,13 +35,18 @@ public class LinkResolver internal constructor(
null
}
private suspend fun MangaParser.resolveLinkLongPath(): Manga? {
val stubTitle = link.pathSegments.lastOrNull().orEmpty()
val seed = Manga(
id = 0L,
title = stubTitle,
internal suspend fun resolveManga(
parser: MangaParser,
url: String = link.toString().toRelativeUrl(link.host),
id: Long = parser.generateUid(url),
title: String = STUB_TITLE,
): Manga? = resolveBySeed(
parser,
Manga(
id = id,
title = title,
altTitle = null,
url = link.toString().toRelativeUrl(link.host),
url = url,
publicUrl = link.toString(),
rating = RATING_UNKNOWN,
isNsfw = false,
@ -52,22 +57,47 @@ public class LinkResolver internal constructor(
largeCoverUrl = null,
description = null,
chapters = null,
source = source,
).let { manga ->
getDetails(manga)
source = parser.source,
),
)
private suspend fun resolveBySeed(parser: MangaParser, s: Manga): Manga? {
val seed = parser.getDetails(s)
if (!parser.filterCapabilities.isSearchSupported) {
return seed.takeUnless { it.chapters.isNullOrEmpty() }
}
val query = when {
seed.title != stubTitle && seed.title.isNotEmpty() -> seed.title
seed.title != STUB_TITLE && seed.title.isNotEmpty() -> seed.title
!seed.altTitle.isNullOrEmpty() -> seed.altTitle
!seed.author.isNullOrEmpty() -> seed.author
else -> return seed // unfortunately we do not know a real manga title so unable to find it
}
val resolved = runCatchingCancellable {
val order = if (SortOrder.RELEVANCE in parser.availableSortOrders) {
SortOrder.RELEVANCE
} else {
parser.defaultSortOrder
}
val list = parser.getList(0, order, MangaListFilter(query = query))
list.singleOrNull { manga -> isSameUrl(manga.publicUrl) }
}.getOrNull()
if (resolved == null) {
return seed
}
return runCatchingCancellable {
val order = if (SortOrder.RELEVANCE in availableSortOrders) SortOrder.RELEVANCE else defaultSortOrder
val list = getList(0, order, MangaListFilter(query = query))
val result = list.single { manga -> isSameUrl(manga.publicUrl) }
getDetails(result)
}.getOrDefault(seed)
parser.getDetails(resolved)
}.getOrElse {
resolved.copy(
chapters = seed.chapters ?: resolved.chapters,
description = seed.description ?: resolved.description,
author = seed.author ?: resolved.author,
tags = seed.tags + resolved.tags,
state = seed.state ?: resolved.state,
coverUrl = seed.coverUrl.ifEmpty { resolved.coverUrl },
largeCoverUrl = seed.largeCoverUrl ?: resolved.largeCoverUrl,
altTitle = seed.altTitle ?: resolved.altTitle,
)
}
}
private fun isSameUrl(publicUrl: String): Boolean {
@ -78,4 +108,9 @@ public class LinkResolver internal constructor(
return link.host == httpUrl.host
&& link.encodedPath == httpUrl.encodedPath
}
private companion object {
const val STUB_TITLE = "Unknown manga"
}
}

@ -2,6 +2,7 @@ package org.koitharu.kotatsu.parsers
import kotlinx.coroutines.test.runTest
import okhttp3.HttpUrl
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.params.ParameterizedTest
import org.koitharu.kotatsu.parsers.model.*
@ -215,6 +216,20 @@ internal class MangaParserTest {
}
}
@ParameterizedTest(name = "{index}|link|{0}")
@MangaSources
fun link(source: MangaParserSource) = runTest(timeout = timeout) {
val parser = context.newParserInstance(source)
val manga = parser.getList(0, parser.defaultSortOrder, MangaListFilter.EMPTY).first()
val resolved = context.newLinkResolver(manga.publicUrl).getManga()
Assertions.assertNotNull(resolved)
resolved ?: return@runTest
Assertions.assertEquals(manga.id, resolved.id)
Assertions.assertEquals(manga.publicUrl, resolved.publicUrl)
Assertions.assertEquals(manga.url, resolved.url)
Assertions.assertEquals(manga.title, resolved.title)
}
@ParameterizedTest(name = "{index}|authorization|{0}")
@MangaSources
@Disabled

Loading…
Cancel
Save