Refactor extensions

pull/17/head
Koitharu 4 years ago
parent 45c9df0cd6
commit 4710bd9f02
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -50,7 +50,7 @@ dependencies {
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.1'
implementation 'com.squareup.okhttp3:okhttp:4.9.3'
implementation 'com.squareup.okio:okio:3.1.0'
implementation 'org.jsoup:jsoup:1.14.3'
implementation 'org.jsoup:jsoup:1.15.1'
implementation 'org.json:json:20220320'
implementation 'androidx.collection:collection-ktx:1.2.0'

@ -58,7 +58,7 @@ internal class AnibelParser(override val context: MangaLoaderContext) : MangaPar
genres
slug
mediaType
status
status
}
}
""".trimIndent(),

@ -189,7 +189,7 @@ internal class BatoToParser(override val context: MangaLoaderContext) : MangaPar
val root = body.getElementById("series-list") ?: parseFailed("Cannot find root")
return root.children().map { div ->
val a = div.selectFirst("a") ?: parseFailed()
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
val title = div.selectFirst(".item-title")?.text() ?: parseFailed("Title not found")
Manga(
id = generateUid(href),
@ -222,7 +222,7 @@ internal class BatoToParser(override val context: MangaLoaderContext) : MangaPar
private fun Element.parseChapter(index: Int): MangaChapter? {
val a = selectFirst("a.chapt") ?: return null
val extra = selectFirst(".extra")
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
return MangaChapter(
id = generateUid(href),
name = a.text(),

@ -42,11 +42,11 @@ internal abstract class ChanParser(source: MangaSource) : MangaParser(source) {
return root.select("div.content_row").mapNotNull { row ->
val a = row.selectFirst("div.manga_row1")?.selectFirst("h2")?.selectFirst("a")
?: return@mapNotNull null
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
Manga(
id = generateUid(href),
url = href,
publicUrl = href.inContextOf(a),
publicUrl = href.toAbsoluteUrl(a.host ?: domain),
altTitle = a.attr("title"),
title = a.text().substringAfterLast('(').substringBeforeLast(')'),
author = row.getElementsByAttributeValueStarting(
@ -81,7 +81,8 @@ internal abstract class ChanParser(source: MangaSource) : MangaParser(source) {
description = root.getElementById("description")?.html()?.substringBeforeLast("<div"),
largeCoverUrl = root.getElementById("cover")?.absUrl("src"),
chapters = root.select("table.table_cha tr:gt(1)").reversed().mapIndexedNotNull { i, tr ->
val href = tr?.selectFirst("a")?.relUrl("href") ?: return@mapIndexedNotNull null
val href = tr?.selectFirst("a")?.attrAsRelativeUrlOrNull("href")
?: return@mapIndexedNotNull null
MangaChapter(
id = generateUid(href),
name = tr.selectFirst("a")?.text().orEmpty(),

@ -106,7 +106,7 @@ internal class ExHentaiParser(
val (td1, td2) = tr.children()
val glink = td2.selectFirst("div.glink") ?: parseFailed("glink not found")
val a = glink.parents().select("a").first() ?: parseFailed("link not found")
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
val tagsDiv = glink.nextElementSibling() ?: parseFailed("tags div not found")
val mainTag = td2.selectFirst("div.cn")?.let { div ->
MangaTag(
@ -148,7 +148,7 @@ internal class ExHentaiParser(
?.substringAfterLast(' ')
?.toFloatOrNull()
?.div(5f) ?: manga.rating,
largeCoverUrl = cover?.css("background")?.cssUrl(),
largeCoverUrl = cover?.styleValueOrNull("background")?.cssUrl(),
description = taglist?.select("tr")?.joinToString("<br>") { tr ->
val (tc, td) = tr.children()
val subtags = td.select("a").joinToString { it.html() }
@ -181,7 +181,7 @@ internal class ExHentaiParser(
val doc = context.httpGet(chapter.url.withDomain()).parseHtml()
val root = doc.body().getElementById("gdt") ?: parseFailed("Root not found")
return root.select("a").map { a ->
val url = a.relUrl("href")
val url = a.attrAsRelativeUrl("href")
MangaPage(
id = generateUid(url),
url = url,

@ -81,7 +81,7 @@ internal abstract class GroupleParser(source: MangaSource, userAgent: String) :
if (descDiv.selectFirst("i.fa-user") != null) {
return@mapNotNull null // skip author
}
val href = imgDiv.selectFirst("a")?.attr("href")?.inContextOf(node)
val href = imgDiv.selectFirst("a")?.attrAsAbsoluteUrlOrNull("href")
if (href == null || href.toHttpUrl().host != baseHost) {
return@mapNotNull null // skip external links
}
@ -148,7 +148,7 @@ internal abstract class GroupleParser(source: MangaSource, userAgent: String) :
chapters = root.selectFirst("div.chapters-link")?.selectFirst("table")
?.select("tr:has(td > a)")?.asReversed()?.mapIndexedNotNull { i, tr ->
val a = tr.selectFirst("a") ?: return@mapIndexedNotNull null
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
var translators = ""
val translatorElement = a.attr("title")
if (!translatorElement.isNullOrBlank()) {

@ -55,12 +55,13 @@ internal abstract class MadaraParser(
payload,
).parseHtml()
return doc.select("div.row.c-tabs-item__content").map { div ->
val href = div.selectFirst("a")?.relUrl("href") ?: parseFailed("Link not found")
val href = div.selectFirst("a")?.attrAsRelativeUrlOrNull("href")
?: parseFailed("Link not found")
val summary = div.selectFirst(".tab-summary")
Manga(
id = generateUid(href),
url = href,
publicUrl = href.inContextOf(div),
publicUrl = href.toAbsoluteUrl(div.host ?: getDomain()),
coverUrl = div.selectFirst("img")?.src().orEmpty(),
title = (summary?.selectFirst("h3") ?: summary?.selectFirst("h4"))?.text().orEmpty(),
altTitle = null,
@ -140,12 +141,10 @@ internal abstract class MadaraParser(
?.joinToString { it.html() },
chapters = root2.select("li").asReversed().mapIndexed { i, li ->
val a = li.selectFirst("a")
val href = a?.relUrl("href").orEmpty().ifEmpty {
parseFailed("Link is missing")
}
val href = a?.attrAsRelativeUrlOrNull("href") ?: parseFailed("Link is missing")
MangaChapter(
id = generateUid(href),
name = a!!.ownText(),
name = a.ownText(),
number = i + 1,
url = href,
uploadDate = parseChapterDate(
@ -361,7 +360,8 @@ internal abstract class MadaraParser(
}
@MangaSourceParser("HENTAI_4FREE", "Hentai4Free", "en")
class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") {
class Hentai4Free(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") {
override val tagPrefix = "hentai-tag/"

@ -65,7 +65,7 @@ internal open class MangaLibParser(
?: return emptyList()
return items.mapNotNull { card ->
val a = card.selectFirst("a.media-card") ?: return@mapNotNull null
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
Manga(
id = generateUid(href),
title = card.selectFirst("h3")?.text().orEmpty(),
@ -74,7 +74,7 @@ internal open class MangaLibParser(
author = null,
rating = RATING_UNKNOWN,
url = href,
publicUrl = href.inContextOf(a),
publicUrl = href.toAbsoluteUrl(a.host ?: getDomain()),
tags = emptySet(),
state = null,
isNsfw = false,

@ -53,7 +53,7 @@ internal class MangaOwlParser(override val context: MangaLoaderContext) : MangaP
val slides = doc.body().select("ul.slides") ?: parseFailed("An error occurred while parsing")
val items = slides.select("div.col-md-2")
return items.mapNotNull { item ->
val href = item.selectFirst("h6 a")?.relUrl("href") ?: return@mapNotNull null
val href = item.selectFirst("h6 a")?.attrAsRelativeUrlOrNull("href") ?: return@mapNotNull null
Manga(
id = generateUid(href),
title = item.selectFirst("h6 a")?.text() ?: return@mapNotNull null,
@ -134,7 +134,7 @@ internal class MangaOwlParser(override val context: MangaLoaderContext) : MangaP
val doc = context.httpGet(fullUrl).parseHtml()
val root = doc.body().select("div.item img.owl-lazy") ?: throw ParseException("Root not found")
return root.map { div ->
val url = div?.relUrl("data-src") ?: parseFailed("Page image not found")
val url = div?.attrAsRelativeUrlOrNull("data-src") ?: parseFailed("Page image not found")
MangaPage(
id = generateUid(url),
url = url,

@ -58,10 +58,10 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
?: throw ParseException("Root not found")
return root.select("li").mapNotNull { li ->
val a = li.selectFirst("a.manga_cover")
val href = a?.relUrl("href")
val href = a?.attrAsRelativeUrlOrNull("href")
?: return@mapNotNull null
val views = li.select("p.view")
val status = views.findOwnText { x -> x.startsWith("Status:") }
val status = views.firstNotNullOfOrNull { it.ownText().takeIf { x -> x.startsWith("Status:") } }
?.substringAfter(':')?.trim()?.lowercase(Locale.ROOT)
Manga(
id = generateUid(href),
@ -71,7 +71,8 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
altTitle = null,
rating = li.selectFirst("p.score")?.selectFirst("b")
?.ownText()?.toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN,
author = views.findText { x -> x.startsWith("Author:") }?.substringAfter(':')
author = views.firstNotNullOfOrNull { it.text().takeIf { x -> x.startsWith("Author:") } }
?.substringAfter(':')
?.trim(),
state = when (status) {
"ongoing" -> MangaState.ONGOING
@ -87,7 +88,7 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
}.orEmpty(),
url = href,
isNsfw = false,
publicUrl = href.inContextOf(a),
publicUrl = href.toAbsoluteUrl(a.host ?: getDomain()),
)
}
}
@ -112,9 +113,10 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
}.orEmpty(),
description = info?.getElementById("show")?.ownText(),
chapters = chaptersList?.mapIndexedNotNull { i, li ->
val href = li.selectFirst("a")?.relUrl("href")
val href = li.selectFirst("a")?.attrAsRelativeUrlOrNull("href")
?: return@mapIndexedNotNull null
val name = li.select("span").filter { it.className().isEmpty() }
val name = li.select("span")
.filter { x -> x.className().isEmpty() }
.joinToString(" - ") { it.text() }.trim()
MangaChapter(
id = generateUid(href),
@ -139,8 +141,8 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
val root = doc.body().selectFirst("div.page_select")
?: throw ParseException("Cannot find root")
return root.selectFirst("select")?.select("option")?.mapNotNull {
val href = it.relUrl("value")
if (href.endsWith("featured.html")) {
val href = it.attrAsRelativeUrlOrNull("value")
if (href == null || href.endsWith("featured.html")) {
return@mapNotNull null
}
MangaPage(
@ -193,7 +195,7 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
val dateFormat = SimpleDateFormat("MMM dd,yyyy", Locale.US)
return list.select("li").asReversed().mapIndexedNotNull { i, li ->
val a = li.selectFirst("a") ?: return@mapIndexedNotNull null
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
val name = a.selectFirst("span.vol")?.text().orEmpty().ifEmpty {
a.ownText()
}

@ -117,7 +117,8 @@ internal abstract class NineMangaParser(
chapters = root.selectFirst("div.chapterbox")?.select("ul.sub_vol_ul > li")
?.asReversed()?.mapIndexed { i, li ->
val a = li.selectFirst("a.chapter_list_a")
val href = a?.relUrl("href")?.replace("%20", " ") ?: parseFailed("Link not found")
val href = a?.attrAsRelativeUrlOrNull("href")
?.replace("%20", " ") ?: parseFailed("Link not found")
MangaChapter(
id = generateUid(href),
name = a.text(),

@ -68,7 +68,7 @@ internal class NudeMoonParser(
return root.select("table.news_pic2").mapNotNull { row ->
val a = row.selectFirst("td.bg_style1")?.selectFirst("a")
?: return@mapNotNull null
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
val title = a.selectFirst("h2")?.text().orEmpty()
val info = row.selectFirst("td[width=100%]") ?: return@mapNotNull null
Manga(

@ -7,8 +7,8 @@ import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.relUrl
@MangaSourceParser("YAOICHAN", "Яой-тян", "ru")
internal class YaoiChanParser(override val context: MangaLoaderContext) : ChanParser(MangaSource.YAOICHAN) {
@ -25,7 +25,7 @@ internal class YaoiChanParser(override val context: MangaLoaderContext) : ChanPa
chapters = root.select("table.table_cha").flatMap { table ->
table.select("div.manga")
}.mapNotNull { it.selectFirst("a") }.reversed().mapIndexed { i, a ->
val href = a.relUrl("href")
val href = a.attrAsRelativeUrl("href")
MangaChapter(
id = generateUid(href),
name = a.text().trim(),

@ -1,3 +1,5 @@
@file:JvmName("CollectionUtils")
package org.koitharu.kotatsu.parsers.util
import androidx.collection.ArrayMap

@ -1,3 +1,5 @@
@file:JvmName("CookieJarUtils")
package org.koitharu.kotatsu.parsers.util
import okhttp3.Cookie

@ -0,0 +1,56 @@
@file:JvmName("JsoupUtils")
package org.koitharu.kotatsu.parsers.util
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import org.jsoup.nodes.Element
val Element.host: String?
get() {
val uri = baseUri()
return if (uri.isEmpty()) {
null
} else {
uri.toHttpUrlOrNull()?.host
}
}
fun Element.attrOrNull(attributeKey: String) = attr(attributeKey).takeUnless { it.isEmpty() }
fun Element.attrAsRelativeUrlOrNull(attributeKey: String): String? {
val attr = attr(attributeKey).trim()
if (attr.isEmpty()) {
return null
}
if (attr.startsWith("/")) {
return attr
}
val host = baseUri().toHttpUrlOrNull()?.host ?: return null
return attr.substringAfter(host)
}
fun Element.attrAsRelativeUrl(attributeKey: String): String {
return requireNotNull(attrAsRelativeUrlOrNull(attributeKey)) {
"Cannot get relative url for $attributeKey: \"${attr(attributeKey)}\""
}
}
fun Element.attrAsAbsoluteUrlOrNull(attributeKey: String): String? {
val attr = attr(attributeKey).trim()
if (attr.isEmpty()) {
return null
}
return (baseUri().toHttpUrlOrNull()?.newBuilder(attr) ?: return null).toString()
}
fun Element.attrAsAbsoluteUrl(attributeKey: String): String {
return requireNotNull(attrAsAbsoluteUrlOrNull(attributeKey)) {
"Cannot get absolute url for $attributeKey: \"${attr(attributeKey)}\""
}
}
fun Element.styleValueOrNull(property: String): String? {
val regex = Regex("${Regex.escape(property)}\\s*:\\s*[^;]+")
val css = attr("style").find(regex) ?: return null
return css.substringAfter(':').removeSuffix(';').trim()
}

@ -1,3 +1,5 @@
@file:JvmName("NumberUtils")
package org.koitharu.kotatsu.parsers.util
import java.text.DecimalFormat

@ -1,3 +1,5 @@
@file:JvmName("OkHttpUtils")
package org.koitharu.kotatsu.parsers.util
import kotlinx.coroutines.suspendCancellableCoroutine

@ -1,3 +1,5 @@
@file:JvmName("ParseUtils")
package org.koitharu.kotatsu.parsers.util
import okhttp3.Response
@ -45,6 +47,11 @@ fun Response.parseJsonArray(): JSONArray {
}
}
@Deprecated(
message = "",
level = DeprecationLevel.ERROR,
replaceWith = ReplaceWith("firstNotNullOfOrNull { it.ownText().takeIf(predicate) }"),
)
inline fun Elements.findOwnText(predicate: (String) -> Boolean): String? {
for (x in this) {
val ownText = x.ownText()
@ -55,6 +62,11 @@ inline fun Elements.findOwnText(predicate: (String) -> Boolean): String? {
return null
}
@Deprecated(
message = "",
level = DeprecationLevel.ERROR,
replaceWith = ReplaceWith("firstNotNullOfOrNull { it.text().takeIf(predicate) }"),
)
inline fun Elements.findText(predicate: (String) -> Boolean): String? {
for (x in this) {
val text = x.text()
@ -65,6 +77,11 @@ inline fun Elements.findText(predicate: (String) -> Boolean): String? {
return null
}
@Deprecated(
message = "Use toAbsoluteUrl() instead",
level = DeprecationLevel.ERROR,
replaceWith = ReplaceWith("toAbsoluteUrl(node.host)"),
)
fun String.inContextOf(node: Node): String {
return if (this.isEmpty()) {
""
@ -86,6 +103,11 @@ fun String.toAbsoluteUrl(domain: String): String = when {
else -> this
}
@Deprecated(
message = "",
level = DeprecationLevel.ERROR,
replaceWith = ReplaceWith("attrAsRelativeUrl(attributeKey)"),
)
fun Element.relUrl(attributeKey: String): String {
val attr = attr(attributeKey).trim()
if (attr.isEmpty()) {
@ -100,11 +122,12 @@ fun Element.relUrl(attributeKey: String): String {
private val REGEX_URL_BASE = Regex("^[^/]{2,6}://[^/]+/", RegexOption.IGNORE_CASE)
fun Element.css(property: String): String? {
val regex = Regex("${Regex.escape(property)}\\s*:\\s*[^;]+")
val css = attr("style").find(regex) ?: return null
return css.substringAfter(':').removeSuffix(';').trim()
}
@Deprecated(
message = "",
level = DeprecationLevel.ERROR,
replaceWith = ReplaceWith("styleValueOrNull(property)"),
)
fun Element.css(property: String): String? = styleValueOrNull(property)
fun DateFormat.tryParse(str: String?): Long = if (str.isNullOrEmpty()) {
0L

@ -1,3 +1,5 @@
@file:JvmName("StringUtils")
package org.koitharu.kotatsu.parsers.util
import androidx.collection.arraySetOf
Loading…
Cancel
Save