Parse favicons from websites

Koitharu 4 years ago
parent 1f5d02ddb0
commit c6b9180013
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -4,6 +4,7 @@ import androidx.annotation.CallSuper
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.FaviconParser
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import java.util.*
@ -105,8 +106,16 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour
/**
* Returns direct link to the website favicon
*/
@Deprecated(
message = "Use parseFavicons() to get multiple favicons with different size",
replaceWith = ReplaceWith("parseFavicons()"),
)
open fun getFaviconUrl() = "https://${getDomain()}/favicon.ico"
suspend fun parseFavicons(): Favicons {
return FaviconParser(context, getDomain()).parseFavicons()
}
@CallSuper
open fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
keys.add(configKeyDomain)

@ -0,0 +1,51 @@
package org.koitharu.kotatsu.parsers.model
import okhttp3.HttpUrl.Companion.toHttpUrl
class Favicon internal constructor(
val url: String,
val size: Int,
internal val rel: String?,
) : Comparable<Favicon> {
val type: String = url.toHttpUrl().pathSegments.last()
.substringAfterLast('.', "").lowercase()
override fun compareTo(other: Favicon): Int {
val res = size.compareTo(other.size)
if (res != 0) {
return res
}
return relWeightOf(rel).compareTo(relWeightOf(other.rel))
}
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (javaClass != other?.javaClass) return false
other as Favicon
if (url != other.url) return false
if (size != other.size) return false
if (rel != other.rel) return false
return true
}
override fun hashCode(): Int {
var result = url.hashCode()
result = 31 * result + size
result = 31 * result + rel.hashCode()
return result
}
override fun toString(): String {
return "Favicon(size=$size, type='$type', rel='$rel', url='$url')"
}
private fun relWeightOf(rel: String?) = when (rel) {
"apple-touch-icon" -> 1 // Prefer apple-touch-icon because it has a better quality
"mask-icon" -> -1
else -> 0
}
}

@ -0,0 +1,45 @@
package org.koitharu.kotatsu.parsers.model
class Favicons internal constructor(
favicons: Collection<Favicon>,
val referer: String,
) : Collection<Favicon> {
private val icons = favicons.sortedDescending()
override val size: Int
get() = icons.size
override fun contains(element: Favicon): Boolean = icons.contains(element)
override fun containsAll(elements: Collection<Favicon>): Boolean = icons.containsAll(elements)
override fun isEmpty(): Boolean = icons.isEmpty()
override fun iterator(): Iterator<Favicon> = icons.iterator()
/**
* Finds a favicon whose size in pixels is greater than or equal to the specified size.
* If such icon is not available returns the largest icon
* @param size in pixels
* @param types supported file types, e.g. png, svg, ico. May be null but not empty
*/
@JvmOverloads
fun find(size: Int, types: Set<String>? = null): Favicon? {
if (icons.isEmpty()) {
return null
}
var result: Favicon? = null
for (icon in icons) {
if (types != null && icon.type !in types) {
continue
}
if (result == null || icon.size >= size) {
result = icon
} else {
break
}
}
return result
}
}

@ -0,0 +1,89 @@
package org.koitharu.kotatsu.parsers.util
import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.model.Favicon
import org.koitharu.kotatsu.parsers.model.Favicons
import org.koitharu.kotatsu.parsers.util.json.mapJSON
class FaviconParser(private val context: MangaLoaderContext, private val domain: String) {
suspend fun parseFavicons(): Favicons {
val url = "https://$domain"
val doc = context.httpGet(url).parseHtml()
val result = HashSet<Favicon>()
val manifestLink = doc.getElementsByAttributeValue("rel", "manifest").firstOrNull()
?.attrAsAbsoluteUrlOrNull("href")
if (manifestLink != null) {
result += parseManifest(manifestLink)
}
val links = doc.getElementsByAttributeValueContaining("rel", "icon")
links.mapNotNullTo(result) { link ->
parseLink(link)
}
if (result.isEmpty()) {
result.add(createFallback())
}
return Favicons(result, url)
}
private fun parseLink(link: Element): Favicon? {
val href = link.attrAsAbsoluteUrlOrNull("href")
if (href == null || href.endsWith('/')) {
return null
}
val sizes = link.attr("sizes")
return Favicon(
url = href,
size = parseSize(sizes),
rel = link.attrOrNull("rel"),
)
}
private fun parseSize(sizes: String): Int {
if (sizes.isEmpty() || sizes == "any") {
return 0
}
return sizes.substringBefore(' ')
.split('x', 'X', '*')
.firstNotNullOfOrNull { it.toIntOrNull() }
?: 0
}
private suspend fun parseManifest(url: String): List<Favicon> {
val json = context.httpGet(url).parseJson()
val icons = json.getJSONArray("icons")
return icons.mapJSON { jo ->
Favicon(
url = jo.getString("src").resolveLink(),
size = parseSize(jo.getString("sizes")),
rel = null,
)
}
}
private fun createFallback(): Favicon {
val href = "https://$domain/favicon.ico"
return Favicon(
url = href,
size = 0,
rel = null,
)
}
private fun String.resolveLink(): String {
return when {
startsWith("http:") || startsWith("https:") -> {
this
}
startsWith('/') -> {
"https://$domain$this"
}
else -> {
"https://$domain/$this"
}
}
}
}

@ -120,9 +120,16 @@ internal class MangaParserTest {
@MangaSources
fun favicon(source: MangaSource) = runTest {
val parser = source.newParser(context)
val faviconUrl = parser.getFaviconUrl()
assert(faviconUrl.isUrlAbsolute())
checkImageRequest(faviconUrl, null)
val favicons = parser.parseFavicons()
val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg")
assert(favicons.isNotEmpty())
favicons.forEach {
assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" }
assert(it.type in types) { "Unknown icon type: ${it.type}" }
}
val favicon = favicons.find(24)
checkNotNull(favicon)
checkImageRequest(favicon.url, favicons.referer)
}
@ParameterizedTest(name = "{index}|domain|{0}")

Loading…
Cancel
Save