From d17024b690997e112943a05e99fa68c1b4bb1edf Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 2 May 2022 10:59:48 +0300 Subject: [PATCH] [Madara] Fix Madara parsers --- .../exception/CloudFlareProtectedException.kt | 2 +- .../kotatsu/parsers/site/HenChanParser.kt | 4 +-- .../kotatsu/parsers/site/MadaraParser.kt | 23 ++++++++++------- .../kotatsu/parsers/CloudFlareInterceptor.kt | 25 +++++++++++++++++++ .../kotatsu/parsers/MangaLoaderContextMock.kt | 3 ++- .../kotatsu/parsers/MangaParserTest.kt | 25 ++++++++----------- .../org/koitharu/kotatsu/test_util/Util.kt | 14 ++++------- 7 files changed, 60 insertions(+), 36 deletions(-) create mode 100644 src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt index 8a3b6b59..75337a12 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt @@ -4,4 +4,4 @@ import okio.IOException class CloudFlareProtectedException( val url: String, -) : IOException("Protected by CloudFlare") \ No newline at end of file +) : IOException("Protected by CloudFlare: $url") \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/HenChanParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/HenChanParser.kt index de98849c..d71c6b53 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/HenChanParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/HenChanParser.kt @@ -13,8 +13,8 @@ import org.koitharu.kotatsu.parsers.util.toTitleCase internal class HenChanParser(override val context: MangaLoaderContext) : ChanParser(MangaSource.HENCHAN) { override val configKeyDomain = ConfigKey.Domain( - "xx.hentaichan.live", - arrayOf("xx.hentaichan.live", "hentaichan.live", "hentaichan.pro"), + "xxx.hentaichan.live", + arrayOf("xxx.hentaichan.live", "xx.hentaichan.live", "hentaichan.live", "hentaichan.pro"), ) override suspend fun getList( diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/MadaraParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/MadaraParser.kt index 3a31ee4e..79e71680 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/MadaraParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/MadaraParser.kt @@ -28,6 +28,7 @@ internal abstract class MadaraParser( ) protected open val tagPrefix = "manga-genre/" + protected open val isNsfwSource = false override suspend fun getList( offset: Int, @@ -54,15 +55,14 @@ internal abstract class MadaraParser( payload, ).parseHtml() return doc.select("div.row.c-tabs-item__content").map { div -> - val href = div.selectFirst("a")?.relUrl("href") - ?: parseFailed("Link not found") + val href = div.selectFirst("a")?.relUrl("href") ?: parseFailed("Link not found") val summary = div.selectFirst(".tab-summary") Manga( id = generateUid(href), url = href, publicUrl = href.inContextOf(div), coverUrl = div.selectFirst("img")?.src().orEmpty(), - title = summary?.selectFirst("h3")?.text().orEmpty(), + title = (summary?.selectFirst("h3") ?: summary?.selectFirst("h4"))?.text().orEmpty(), altTitle = null, rating = div.selectFirst("span.total_votes")?.ownText() ?.toFloatOrNull()?.div(5f) ?: -1f, @@ -76,14 +76,14 @@ internal abstract class MadaraParser( author = summary?.selectFirst(".mg_author")?.selectFirst("a")?.ownText(), state = when ( summary?.selectFirst(".mg_status")?.selectFirst(".summary-content") - ?.ownText()?.trim() + ?.ownText()?.trim()?.lowercase() ) { - "OnGoing" -> MangaState.ONGOING - "Completed" -> MangaState.FINISHED + "ongoing" -> MangaState.ONGOING + "completed" -> MangaState.FINISHED else -> null }, source = source, - isNsfw = false, + isNsfw = isNsfwSource, ) } } @@ -337,7 +337,7 @@ internal abstract class MadaraParser( } @MangaSourceParser("TOPMANHUA", "Top Manhua", "en") - class TopManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.TOPMANHUA, "topmanhua.com") { + class TopManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.TOPMANHUA, "www.topmanhua.com") { override val tagPrefix = "manhua-genre/" } @@ -354,12 +354,17 @@ internal abstract class MadaraParser( MadaraParser(context, MangaSource.MANGA_DISTRICT, "mangadistrict.com") @MangaSourceParser("HENTAI_4FREE", "Hentai4Free", "en") - class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") + class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") { + + override val isNsfwSource = true + } @MangaSourceParser("ALLPORN_COMIC", "All Porn Comic", "en") class AllPornComic(context: MangaLoaderContext) : MadaraParser(context, MangaSource.ALLPORN_COMIC, "allporncomic.com") { + override val isNsfwSource = true + override fun getFaviconUrl(): String { return "https://cdn.${getDomain()}/wp-content/uploads/2019/01/cropped-cropped-pcround-32x32.png" } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt new file mode 100644 index 00000000..50a3e9d4 --- /dev/null +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt @@ -0,0 +1,25 @@ +package org.koitharu.kotatsu.parsers + +import okhttp3.Interceptor +import okhttp3.Response +import okhttp3.internal.closeQuietly +import org.koitharu.kotatsu.parsers.exception.CloudFlareProtectedException +import java.net.HttpURLConnection.HTTP_FORBIDDEN +import java.net.HttpURLConnection.HTTP_UNAVAILABLE + +private const val HEADER_SERVER = "Server" +private const val SERVER_CLOUDFLARE = "cloudflare" + +class CloudFlareInterceptor : Interceptor { + + override fun intercept(chain: Interceptor.Chain): Response { + val response = chain.proceed(chain.request()) + if (response.code == HTTP_FORBIDDEN || response.code == HTTP_UNAVAILABLE) { + if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) { + response.closeQuietly() + throw CloudFlareProtectedException(chain.request().url.toString()) + } + } + return response + } +} \ No newline at end of file diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt index dfb7f22e..60357aa4 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt @@ -25,6 +25,7 @@ internal class MangaLoaderContextMock : MangaLoaderContext() { override val httpClient: OkHttpClient = OkHttpClient.Builder() .cookieJar(cookieJar) .addInterceptor(UserAgentInterceptor(userAgent)) + .addInterceptor(CloudFlareInterceptor()) .connectTimeout(20, TimeUnit.SECONDS) .readTimeout(60, TimeUnit.SECONDS) .writeTimeout(20, TimeUnit.SECONDS) @@ -49,7 +50,7 @@ internal class MangaLoaderContextMock : MangaLoaderContext() { .get() .url(url) if (referer != null) { - request.header("Referrer", referer) + request.header("Referer", referer) } return httpClient.newCall(request.build()).await() } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index f6b89ac1..a0e125b2 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -11,7 +11,7 @@ import org.koitharu.kotatsu.parsers.util.medianOrNull import org.koitharu.kotatsu.parsers.util.mimeType import org.koitharu.kotatsu.test_util.isDistinct import org.koitharu.kotatsu.test_util.isDistinctBy -import org.koitharu.kotatsu.test_util.isUrlAbsoulte +import org.koitharu.kotatsu.test_util.isUrlAbsolute import org.koitharu.kotatsu.test_util.maxDuplicates @ExtendWith(AuthCheckExtension::class) @@ -50,7 +50,7 @@ internal class MangaParserTest { fun tags(source: MangaSource) = runTest { val parser = source.newParser(context) val tags = parser.getTags() - assert(tags.isNotEmpty()) + assert(tags.isNotEmpty()) { "No tags found" } val keys = tags.map { it.key } assert(keys.isDistinct()) assert("" !in keys) @@ -73,8 +73,8 @@ internal class MangaParserTest { val manga = list[3] parser.getDetails(manga).apply { assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } - assert(publicUrl.isUrlAbsoulte()) { "Manga public url is not absolute: '$publicUrl'" } - assert(description != null) { "Detailed description is null" } + assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" } + assert(description != null) { "Detailed description is null: '$publicUrl'" } assert(title.startsWith(manga.title)) { "Titles are mismatch: '$title' and '${manga.title}' for $publicUrl" } @@ -86,9 +86,6 @@ internal class MangaParserTest { assert(c.isDistinctBy { it.number to it.branch }) { "Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl" } - assert(c.isDistinctBy { it.name to it.branch }) { - "Chapters are not distinct by name: ${c.maxDuplicates { it.name to it.branch }} for $publicUrl" - } assert(c.all { it.source == source }) checkImageRequest(coverUrl, publicUrl) largeCoverUrl?.let { @@ -113,7 +110,7 @@ internal class MangaParserTest { val page = pages.medianOrNull() ?: error("No page") val pageUrl = parser.getPageUrl(page) assert(pageUrl.isNotEmpty()) - assert(pageUrl.isUrlAbsoulte()) + assert(pageUrl.isUrlAbsolute()) checkImageRequest(pageUrl, page.referer) } @@ -122,7 +119,7 @@ internal class MangaParserTest { fun favicon(source: MangaSource) = runTest { val parser = source.newParser(context) val faviconUrl = parser.getFaviconUrl() - assert(faviconUrl.isUrlAbsoulte()) + assert(faviconUrl.isUrlAbsolute()) checkImageRequest(faviconUrl, null) } @@ -142,11 +139,11 @@ internal class MangaParserTest { assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" } assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" } for (item in list) { - assert(item.url.isNotEmpty()) - assert(!item.url.isUrlAbsoulte()) - assert(item.coverUrl.isUrlAbsoulte()) { "Cover url is not absolute: ${item.coverUrl}" } + assert(item.url.isNotEmpty()) { "Url is empty" } + assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" } + assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" } assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" } - assert(item.publicUrl.isUrlAbsoulte()) + assert(item.publicUrl.isUrlAbsolute()) } val testItem = list.random() checkImageRequest(testItem.coverUrl, testItem.publicUrl) @@ -154,7 +151,7 @@ internal class MangaParserTest { private suspend fun checkImageRequest(url: String, referer: String?) { context.doRequest(url, referer).use { - assert(it.isSuccessful) { "Request failed: ${it.code}: ${it.message}" } + assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" } assert(it.mimeType?.startsWith("image/") == true) { "Wrong response mime type: ${it.mimeType}" } diff --git a/src/test/kotlin/org/koitharu/kotatsu/test_util/Util.kt b/src/test/kotlin/org/koitharu/kotatsu/test_util/Util.kt index 095357ba..f31c2777 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/test_util/Util.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/test_util/Util.kt @@ -2,8 +2,8 @@ package org.koitharu.kotatsu.test_util import androidx.collection.ArraySet -private val PATTERN_URL_ABSOLUTE = Regex("https?://\\S+", setOf(RegexOption.IGNORE_CASE)) -private val PATTERN_URL_RELATIVE = Regex("^/\\S+", setOf(RegexOption.IGNORE_CASE)) +private val PATTERN_URL_ABSOLUTE = Regex("^https?://[\\s\\S]+", setOf(RegexOption.IGNORE_CASE)) +private val PATTERN_URL_RELATIVE = Regex("^/[\\s\\S]+", setOf(RegexOption.IGNORE_CASE)) internal fun Collection.isDistinct(): Boolean { val set = ArraySet(size) @@ -26,18 +26,14 @@ internal fun Collection.isDistinctBy(selector: (T) -> K): Boolean { } internal fun String.isUrlRelative() = matches(PATTERN_URL_RELATIVE) -internal fun String.isUrlAbsoulte() = matches(PATTERN_URL_ABSOLUTE) +internal fun String.isUrlAbsolute() = matches(PATTERN_URL_ABSOLUTE) internal inline fun Collection.maxDuplicates(selector: (T) -> K): K? { return groupBy(selector).maxByOrNull { it.value.size }?.key } @Suppress("NOTHING_TO_INLINE") -inline operator fun List.component6(): T { - return get(5) -} +inline operator fun List.component6(): T = get(5) @Suppress("NOTHING_TO_INLINE") -inline operator fun List.component7(): T { - return get(6) -} \ No newline at end of file +inline operator fun List.component7(): T = get(6) \ No newline at end of file