From c6b91800133ef91126ec59704a1fc50a20a84ab7 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 11 Jul 2022 11:42:38 +0300 Subject: [PATCH 1/4] Parse favicons from websites --- .../koitharu/kotatsu/parsers/MangaParser.kt | 9 ++ .../koitharu/kotatsu/parsers/model/Favicon.kt | 51 +++++++++++ .../kotatsu/parsers/model/Favicons.kt | 45 ++++++++++ .../kotatsu/parsers/util/FaviconParser.kt | 89 +++++++++++++++++++ .../kotatsu/parsers/MangaParserTest.kt | 13 ++- 5 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicon.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicons.kt create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index 7ae0228c2..32457ddcc 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -4,6 +4,7 @@ import androidx.annotation.CallSuper import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.FaviconParser import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl import java.util.* @@ -105,8 +106,16 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour /** * Returns direct link to the website favicon */ + @Deprecated( + message = "Use parseFavicons() to get multiple favicons with different size", + replaceWith = ReplaceWith("parseFavicons()"), + ) open fun getFaviconUrl() = "https://${getDomain()}/favicon.ico" + suspend fun parseFavicons(): Favicons { + return FaviconParser(context, getDomain()).parseFavicons() + } + @CallSuper open fun onCreateConfig(keys: MutableCollection>) { keys.add(configKeyDomain) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicon.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicon.kt new file mode 100644 index 000000000..7256524e5 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicon.kt @@ -0,0 +1,51 @@ +package org.koitharu.kotatsu.parsers.model + +import okhttp3.HttpUrl.Companion.toHttpUrl + +class Favicon internal constructor( + val url: String, + val size: Int, + internal val rel: String?, +) : Comparable { + + val type: String = url.toHttpUrl().pathSegments.last() + .substringAfterLast('.', "").lowercase() + + override fun compareTo(other: Favicon): Int { + val res = size.compareTo(other.size) + if (res != 0) { + return res + } + return relWeightOf(rel).compareTo(relWeightOf(other.rel)) + } + + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (javaClass != other?.javaClass) return false + + other as Favicon + + if (url != other.url) return false + if (size != other.size) return false + if (rel != other.rel) return false + + return true + } + + override fun hashCode(): Int { + var result = url.hashCode() + result = 31 * result + size + result = 31 * result + rel.hashCode() + return result + } + + override fun toString(): String { + return "Favicon(size=$size, type='$type', rel='$rel', url='$url')" + } + + private fun relWeightOf(rel: String?) = when (rel) { + "apple-touch-icon" -> 1 // Prefer apple-touch-icon because it has a better quality + "mask-icon" -> -1 + else -> 0 + } +} \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicons.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicons.kt new file mode 100644 index 000000000..f9c9aab7d --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/model/Favicons.kt @@ -0,0 +1,45 @@ +package org.koitharu.kotatsu.parsers.model + +class Favicons internal constructor( + favicons: Collection, + val referer: String, +) : Collection { + + private val icons = favicons.sortedDescending() + + override val size: Int + get() = icons.size + + override fun contains(element: Favicon): Boolean = icons.contains(element) + + override fun containsAll(elements: Collection): Boolean = icons.containsAll(elements) + + override fun isEmpty(): Boolean = icons.isEmpty() + + override fun iterator(): Iterator = icons.iterator() + + /** + * Finds a favicon whose size in pixels is greater than or equal to the specified size. + * If such icon is not available returns the largest icon + * @param size in pixels + * @param types supported file types, e.g. png, svg, ico. May be null but not empty + */ + @JvmOverloads + fun find(size: Int, types: Set? = null): Favicon? { + if (icons.isEmpty()) { + return null + } + var result: Favicon? = null + for (icon in icons) { + if (types != null && icon.type !in types) { + continue + } + if (result == null || icon.size >= size) { + result = icon + } else { + break + } + } + return result + } +} \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt new file mode 100644 index 000000000..cd49588ae --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt @@ -0,0 +1,89 @@ +package org.koitharu.kotatsu.parsers.util + +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.model.Favicon +import org.koitharu.kotatsu.parsers.model.Favicons +import org.koitharu.kotatsu.parsers.util.json.mapJSON + +class FaviconParser(private val context: MangaLoaderContext, private val domain: String) { + + suspend fun parseFavicons(): Favicons { + val url = "https://$domain" + val doc = context.httpGet(url).parseHtml() + val result = HashSet() + val manifestLink = doc.getElementsByAttributeValue("rel", "manifest").firstOrNull() + ?.attrAsAbsoluteUrlOrNull("href") + if (manifestLink != null) { + result += parseManifest(manifestLink) + } + val links = doc.getElementsByAttributeValueContaining("rel", "icon") + links.mapNotNullTo(result) { link -> + parseLink(link) + } + if (result.isEmpty()) { + result.add(createFallback()) + } + return Favicons(result, url) + } + + private fun parseLink(link: Element): Favicon? { + val href = link.attrAsAbsoluteUrlOrNull("href") + if (href == null || href.endsWith('/')) { + return null + } + val sizes = link.attr("sizes") + return Favicon( + url = href, + size = parseSize(sizes), + rel = link.attrOrNull("rel"), + ) + } + + private fun parseSize(sizes: String): Int { + if (sizes.isEmpty() || sizes == "any") { + return 0 + } + return sizes.substringBefore(' ') + .split('x', 'X', '*') + .firstNotNullOfOrNull { it.toIntOrNull() } + ?: 0 + } + + private suspend fun parseManifest(url: String): List { + val json = context.httpGet(url).parseJson() + val icons = json.getJSONArray("icons") + return icons.mapJSON { jo -> + Favicon( + url = jo.getString("src").resolveLink(), + size = parseSize(jo.getString("sizes")), + rel = null, + ) + } + } + + private fun createFallback(): Favicon { + val href = "https://$domain/favicon.ico" + return Favicon( + url = href, + size = 0, + rel = null, + ) + } + + private fun String.resolveLink(): String { + return when { + startsWith("http:") || startsWith("https:") -> { + this + } + + startsWith('/') -> { + "https://$domain$this" + } + + else -> { + "https://$domain/$this" + } + } + } +} \ No newline at end of file diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index f062d890b..d02d3d6f9 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -120,9 +120,16 @@ internal class MangaParserTest { @MangaSources fun favicon(source: MangaSource) = runTest { val parser = source.newParser(context) - val faviconUrl = parser.getFaviconUrl() - assert(faviconUrl.isUrlAbsolute()) - checkImageRequest(faviconUrl, null) + val favicons = parser.parseFavicons() + val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg") + assert(favicons.isNotEmpty()) + favicons.forEach { + assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" } + assert(it.type in types) { "Unknown icon type: ${it.type}" } + } + val favicon = favicons.find(24) + checkNotNull(favicon) + checkImageRequest(favicon.url, favicons.referer) } @ParameterizedTest(name = "{index}|domain|{0}") From 358910bafea7160936258f9d81919557cb40579e Mon Sep 17 00:00:00 2001 From: Koitharu Date: Tue, 12 Jul 2022 08:58:55 +0300 Subject: [PATCH 2/4] [NineManga] Fix favicons redirect --- .../org/koitharu/kotatsu/parsers/MangaParser.kt | 2 +- .../koitharu/kotatsu/parsers/site/NineMangaParser.kt | 4 ++++ .../koitharu/kotatsu/parsers/util/FaviconParser.kt | 12 ++++++++++-- .../kotatsu/parsers/CloudFlareInterceptor.kt | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index 32457ddcc..0ee27c34d 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -112,7 +112,7 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour ) open fun getFaviconUrl() = "https://${getDomain()}/favicon.ico" - suspend fun parseFavicons(): Favicons { + open suspend fun parseFavicons(): Favicons { return FaviconParser(context, getDomain()).parseFavicons() } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt index af5e2a55e..20c88be1b 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt @@ -171,6 +171,10 @@ internal abstract class NineMangaParser( } ?: parseFailed("Root not found") } + override suspend fun parseFavicons(): Favicons { + return FaviconParser(context, getDomain()).addHeaders(headers).parseFavicons() + } + private fun parseStatus(status: String) = when { status.contains("Ongoing") -> MangaState.ONGOING status.contains("Completed") -> MangaState.FINISHED diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt index cd49588ae..b7767a33f 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt @@ -1,5 +1,6 @@ package org.koitharu.kotatsu.parsers.util +import okhttp3.Headers import org.jsoup.nodes.Element import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.model.Favicon @@ -8,9 +9,16 @@ import org.koitharu.kotatsu.parsers.util.json.mapJSON class FaviconParser(private val context: MangaLoaderContext, private val domain: String) { + private val headers = Headers.Builder() + + fun addHeaders(headers: Headers): FaviconParser { + this.headers.addAll(headers) + return this + } + suspend fun parseFavicons(): Favicons { val url = "https://$domain" - val doc = context.httpGet(url).parseHtml() + val doc = context.httpGet(url, headers.build()).parseHtml() val result = HashSet() val manifestLink = doc.getElementsByAttributeValue("rel", "manifest").firstOrNull() ?.attrAsAbsoluteUrlOrNull("href") @@ -51,7 +59,7 @@ class FaviconParser(private val context: MangaLoaderContext, private val domain: } private suspend fun parseManifest(url: String): List { - val json = context.httpGet(url).parseJson() + val json = context.httpGet(url, headers.build()).parseJson() val icons = json.getJSONArray("icons") return icons.mapJSON { jo -> Favicon( diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt index 50a3e9d40..5f9a64c33 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt @@ -17,7 +17,7 @@ class CloudFlareInterceptor : Interceptor { if (response.code == HTTP_FORBIDDEN || response.code == HTTP_UNAVAILABLE) { if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) { response.closeQuietly() - throw CloudFlareProtectedException(chain.request().url.toString()) + throw CloudFlareProtectedException(response.request.url.toString()) } } return response From 330495556ad23ed8a1bf43ac9e23aa58a988fa43 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Tue, 12 Jul 2022 09:31:18 +0300 Subject: [PATCH 3/4] Refactor extra source headers --- .idea/runConfigurations/TestsAndReport.xml | 7 +++++++ .../org/koitharu/kotatsu/parsers/MangaParser.kt | 7 ++++++- .../kotatsu/parsers/site/NineMangaParser.kt | 6 +----- .../parsers/site/grouple/GroupleParser.kt | 2 +- .../kotatsu/parsers/util/FaviconParser.kt | 17 +++++++---------- .../kotatsu/parsers/MangaLoaderContextMock.kt | 6 +++++- .../koitharu/kotatsu/parsers/MangaParserTest.kt | 2 +- 7 files changed, 28 insertions(+), 19 deletions(-) create mode 100644 .idea/runConfigurations/TestsAndReport.xml diff --git a/.idea/runConfigurations/TestsAndReport.xml b/.idea/runConfigurations/TestsAndReport.xml new file mode 100644 index 000000000..614cfdf37 --- /dev/null +++ b/.idea/runConfigurations/TestsAndReport.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index 0ee27c34d..eb04d3dd3 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -1,6 +1,8 @@ package org.koitharu.kotatsu.parsers import androidx.annotation.CallSuper +import androidx.annotation.VisibleForTesting +import okhttp3.Headers import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.model.* @@ -31,6 +33,9 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour */ protected abstract val configKeyDomain: ConfigKey.Domain + @VisibleForTesting(otherwise = VisibleForTesting.PROTECTED) + internal open val headers: Headers? = null + /** * Used as fallback if value of `sortOrder` passed to [getList] is null */ @@ -113,7 +118,7 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour open fun getFaviconUrl() = "https://${getDomain()}/favicon.ico" open suspend fun parseFavicons(): Favicons { - return FaviconParser(context, getDomain()).parseFavicons() + return FaviconParser(context, getDomain(), headers).parseFavicons() } @CallSuper diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt index 20c88be1b..26eee91bd 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/NineMangaParser.kt @@ -26,7 +26,7 @@ internal abstract class NineMangaParser( context.cookieJar.insertCookies(getDomain(), "ninemanga_template_desk=yes") } - private val headers = Headers.Builder() + override val headers = Headers.Builder() .add("Accept-Language", "en-US;q=0.7,en;q=0.3") .build() @@ -171,10 +171,6 @@ internal abstract class NineMangaParser( } ?: parseFailed("Root not found") } - override suspend fun parseFavicons(): Favicons { - return FaviconParser(context, getDomain()).addHeaders(headers).parseFavicons() - } - private fun parseStatus(status: String) = when { status.contains("Ongoing") -> MangaState.ONGOING status.contains("Completed") -> MangaState.FINISHED diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt index 073b68bfd..f7425c1d1 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt @@ -24,7 +24,7 @@ internal abstract class GroupleParser( private val siteId: Int, ) : MangaParser(source), MangaParserAuthProvider { - private val headers = Headers.Builder() + override val headers = Headers.Builder() .add("User-Agent", userAgent) .build() diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt index b7767a33f..fec7f706a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/FaviconParser.kt @@ -7,18 +7,15 @@ import org.koitharu.kotatsu.parsers.model.Favicon import org.koitharu.kotatsu.parsers.model.Favicons import org.koitharu.kotatsu.parsers.util.json.mapJSON -class FaviconParser(private val context: MangaLoaderContext, private val domain: String) { - - private val headers = Headers.Builder() - - fun addHeaders(headers: Headers): FaviconParser { - this.headers.addAll(headers) - return this - } +class FaviconParser( + private val context: MangaLoaderContext, + private val domain: String, + private val headers: Headers?, +) { suspend fun parseFavicons(): Favicons { val url = "https://$domain" - val doc = context.httpGet(url, headers.build()).parseHtml() + val doc = context.httpGet(url, headers).parseHtml() val result = HashSet() val manifestLink = doc.getElementsByAttributeValue("rel", "manifest").firstOrNull() ?.attrAsAbsoluteUrlOrNull("href") @@ -59,7 +56,7 @@ class FaviconParser(private val context: MangaLoaderContext, private val domain: } private suspend fun parseManifest(url: String): List { - val json = context.httpGet(url, headers.build()).parseJson() + val json = context.httpGet(url, headers).parseJson() val icons = json.getJSONArray("icons") return icons.mapJSON { jo -> Favicon( diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt index 60357aa43..95ebb59c3 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt @@ -1,6 +1,7 @@ package org.koitharu.kotatsu.parsers import com.koushikdutta.quack.QuackContext +import okhttp3.Headers import okhttp3.OkHttpClient import okhttp3.Request import okhttp3.Response @@ -45,10 +46,13 @@ internal class MangaLoaderContextMock : MangaLoaderContext() { return SourceConfigMock() } - suspend fun doRequest(url: String, referer: String? = null): Response { + suspend fun doRequest(url: String, referer: String? = null, extraHeaders: Headers? = null): Response { val request = Request.Builder() .get() .url(url) + if (extraHeaders != null) { + request.headers(extraHeaders) + } if (referer != null) { request.header("Referer", referer) } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index d02d3d6f9..4055cd8e0 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -141,7 +141,7 @@ internal class MangaParserTest { .host(defaultDomain) .scheme("https") .toString() - val response = context.doRequest(url) + val response = context.doRequest(url, extraHeaders = parser.headers) val realUrl = response.request.url val realDomain = realUrl.topPrivateDomain() val realHost = realUrl.host From abc7eca40b10b7527a0c9674e1c9555f1424c4ba Mon Sep 17 00:00:00 2001 From: Koitharu Date: Wed, 13 Jul 2022 15:13:46 +0300 Subject: [PATCH 4/4] Rename parseFavicons to getFavicons --- src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt | 5 ++++- .../kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index eb04d3dd3..c229f4f8b 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -117,7 +117,10 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour ) open fun getFaviconUrl() = "https://${getDomain()}/favicon.ico" - open suspend fun parseFavicons(): Favicons { + /** + * Parse favicons from the main page of the source`s website + */ + open suspend fun getFavicons(): Favicons { return FaviconParser(context, getDomain(), headers).parseFavicons() } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index 4055cd8e0..f47f12886 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -120,7 +120,7 @@ internal class MangaParserTest { @MangaSources fun favicon(source: MangaSource) = runTest { val parser = source.newParser(context) - val favicons = parser.parseFavicons() + val favicons = parser.getFavicons() val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg") assert(favicons.isNotEmpty()) favicons.forEach {