[Madara] Fix Madara parsers

Koitharu 4 years ago
parent 4673fb7ec7
commit d17024b690
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -4,4 +4,4 @@ import okio.IOException
class CloudFlareProtectedException( class CloudFlareProtectedException(
val url: String, val url: String,
) : IOException("Protected by CloudFlare") ) : IOException("Protected by CloudFlare: $url")

@ -13,8 +13,8 @@ import org.koitharu.kotatsu.parsers.util.toTitleCase
internal class HenChanParser(override val context: MangaLoaderContext) : ChanParser(MangaSource.HENCHAN) { internal class HenChanParser(override val context: MangaLoaderContext) : ChanParser(MangaSource.HENCHAN) {
override val configKeyDomain = ConfigKey.Domain( override val configKeyDomain = ConfigKey.Domain(
"xx.hentaichan.live", "xxx.hentaichan.live",
arrayOf("xx.hentaichan.live", "hentaichan.live", "hentaichan.pro"), arrayOf("xxx.hentaichan.live", "xx.hentaichan.live", "hentaichan.live", "hentaichan.pro"),
) )
override suspend fun getList( override suspend fun getList(

@ -28,6 +28,7 @@ internal abstract class MadaraParser(
) )
protected open val tagPrefix = "manga-genre/" protected open val tagPrefix = "manga-genre/"
protected open val isNsfwSource = false
override suspend fun getList( override suspend fun getList(
offset: Int, offset: Int,
@ -54,15 +55,14 @@ internal abstract class MadaraParser(
payload, payload,
).parseHtml() ).parseHtml()
return doc.select("div.row.c-tabs-item__content").map { div -> return doc.select("div.row.c-tabs-item__content").map { div ->
val href = div.selectFirst("a")?.relUrl("href") val href = div.selectFirst("a")?.relUrl("href") ?: parseFailed("Link not found")
?: parseFailed("Link not found")
val summary = div.selectFirst(".tab-summary") val summary = div.selectFirst(".tab-summary")
Manga( Manga(
id = generateUid(href), id = generateUid(href),
url = href, url = href,
publicUrl = href.inContextOf(div), publicUrl = href.inContextOf(div),
coverUrl = div.selectFirst("img")?.src().orEmpty(), coverUrl = div.selectFirst("img")?.src().orEmpty(),
title = summary?.selectFirst("h3")?.text().orEmpty(), title = (summary?.selectFirst("h3") ?: summary?.selectFirst("h4"))?.text().orEmpty(),
altTitle = null, altTitle = null,
rating = div.selectFirst("span.total_votes")?.ownText() rating = div.selectFirst("span.total_votes")?.ownText()
?.toFloatOrNull()?.div(5f) ?: -1f, ?.toFloatOrNull()?.div(5f) ?: -1f,
@ -76,14 +76,14 @@ internal abstract class MadaraParser(
author = summary?.selectFirst(".mg_author")?.selectFirst("a")?.ownText(), author = summary?.selectFirst(".mg_author")?.selectFirst("a")?.ownText(),
state = when ( state = when (
summary?.selectFirst(".mg_status")?.selectFirst(".summary-content") summary?.selectFirst(".mg_status")?.selectFirst(".summary-content")
?.ownText()?.trim() ?.ownText()?.trim()?.lowercase()
) { ) {
"OnGoing" -> MangaState.ONGOING "ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED "completed" -> MangaState.FINISHED
else -> null else -> null
}, },
source = source, source = source,
isNsfw = false, isNsfw = isNsfwSource,
) )
} }
} }
@ -337,7 +337,7 @@ internal abstract class MadaraParser(
} }
@MangaSourceParser("TOPMANHUA", "Top Manhua", "en") @MangaSourceParser("TOPMANHUA", "Top Manhua", "en")
class TopManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.TOPMANHUA, "topmanhua.com") { class TopManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.TOPMANHUA, "www.topmanhua.com") {
override val tagPrefix = "manhua-genre/" override val tagPrefix = "manhua-genre/"
} }
@ -354,12 +354,17 @@ internal abstract class MadaraParser(
MadaraParser(context, MangaSource.MANGA_DISTRICT, "mangadistrict.com") MadaraParser(context, MangaSource.MANGA_DISTRICT, "mangadistrict.com")
@MangaSourceParser("HENTAI_4FREE", "Hentai4Free", "en") @MangaSourceParser("HENTAI_4FREE", "Hentai4Free", "en")
class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") {
override val isNsfwSource = true
}
@MangaSourceParser("ALLPORN_COMIC", "All Porn Comic", "en") @MangaSourceParser("ALLPORN_COMIC", "All Porn Comic", "en")
class AllPornComic(context: MangaLoaderContext) : class AllPornComic(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.ALLPORN_COMIC, "allporncomic.com") { MadaraParser(context, MangaSource.ALLPORN_COMIC, "allporncomic.com") {
override val isNsfwSource = true
override fun getFaviconUrl(): String { override fun getFaviconUrl(): String {
return "https://cdn.${getDomain()}/wp-content/uploads/2019/01/cropped-cropped-pcround-32x32.png" return "https://cdn.${getDomain()}/wp-content/uploads/2019/01/cropped-cropped-pcround-32x32.png"
} }

@ -0,0 +1,25 @@
package org.koitharu.kotatsu.parsers
import okhttp3.Interceptor
import okhttp3.Response
import okhttp3.internal.closeQuietly
import org.koitharu.kotatsu.parsers.exception.CloudFlareProtectedException
import java.net.HttpURLConnection.HTTP_FORBIDDEN
import java.net.HttpURLConnection.HTTP_UNAVAILABLE
private const val HEADER_SERVER = "Server"
private const val SERVER_CLOUDFLARE = "cloudflare"
class CloudFlareInterceptor : Interceptor {
override fun intercept(chain: Interceptor.Chain): Response {
val response = chain.proceed(chain.request())
if (response.code == HTTP_FORBIDDEN || response.code == HTTP_UNAVAILABLE) {
if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) {
response.closeQuietly()
throw CloudFlareProtectedException(chain.request().url.toString())
}
}
return response
}
}

@ -25,6 +25,7 @@ internal class MangaLoaderContextMock : MangaLoaderContext() {
override val httpClient: OkHttpClient = OkHttpClient.Builder() override val httpClient: OkHttpClient = OkHttpClient.Builder()
.cookieJar(cookieJar) .cookieJar(cookieJar)
.addInterceptor(UserAgentInterceptor(userAgent)) .addInterceptor(UserAgentInterceptor(userAgent))
.addInterceptor(CloudFlareInterceptor())
.connectTimeout(20, TimeUnit.SECONDS) .connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS) .readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(20, TimeUnit.SECONDS) .writeTimeout(20, TimeUnit.SECONDS)
@ -49,7 +50,7 @@ internal class MangaLoaderContextMock : MangaLoaderContext() {
.get() .get()
.url(url) .url(url)
if (referer != null) { if (referer != null) {
request.header("Referrer", referer) request.header("Referer", referer)
} }
return httpClient.newCall(request.build()).await() return httpClient.newCall(request.build()).await()
} }

@ -11,7 +11,7 @@ import org.koitharu.kotatsu.parsers.util.medianOrNull
import org.koitharu.kotatsu.parsers.util.mimeType import org.koitharu.kotatsu.parsers.util.mimeType
import org.koitharu.kotatsu.test_util.isDistinct import org.koitharu.kotatsu.test_util.isDistinct
import org.koitharu.kotatsu.test_util.isDistinctBy import org.koitharu.kotatsu.test_util.isDistinctBy
import org.koitharu.kotatsu.test_util.isUrlAbsoulte import org.koitharu.kotatsu.test_util.isUrlAbsolute
import org.koitharu.kotatsu.test_util.maxDuplicates import org.koitharu.kotatsu.test_util.maxDuplicates
@ExtendWith(AuthCheckExtension::class) @ExtendWith(AuthCheckExtension::class)
@ -50,7 +50,7 @@ internal class MangaParserTest {
fun tags(source: MangaSource) = runTest { fun tags(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val tags = parser.getTags() val tags = parser.getTags()
assert(tags.isNotEmpty()) assert(tags.isNotEmpty()) { "No tags found" }
val keys = tags.map { it.key } val keys = tags.map { it.key }
assert(keys.isDistinct()) assert(keys.isDistinct())
assert("" !in keys) assert("" !in keys)
@ -73,8 +73,8 @@ internal class MangaParserTest {
val manga = list[3] val manga = list[3]
parser.getDetails(manga).apply { parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
assert(publicUrl.isUrlAbsoulte()) { "Manga public url is not absolute: '$publicUrl'" } assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }
assert(description != null) { "Detailed description is null" } assert(description != null) { "Detailed description is null: '$publicUrl'" }
assert(title.startsWith(manga.title)) { assert(title.startsWith(manga.title)) {
"Titles are mismatch: '$title' and '${manga.title}' for $publicUrl" "Titles are mismatch: '$title' and '${manga.title}' for $publicUrl"
} }
@ -86,9 +86,6 @@ internal class MangaParserTest {
assert(c.isDistinctBy { it.number to it.branch }) { assert(c.isDistinctBy { it.number to it.branch }) {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl" "Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
} }
assert(c.isDistinctBy { it.name to it.branch }) {
"Chapters are not distinct by name: ${c.maxDuplicates { it.name to it.branch }} for $publicUrl"
}
assert(c.all { it.source == source }) assert(c.all { it.source == source })
checkImageRequest(coverUrl, publicUrl) checkImageRequest(coverUrl, publicUrl)
largeCoverUrl?.let { largeCoverUrl?.let {
@ -113,7 +110,7 @@ internal class MangaParserTest {
val page = pages.medianOrNull() ?: error("No page") val page = pages.medianOrNull() ?: error("No page")
val pageUrl = parser.getPageUrl(page) val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty()) assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsoulte()) assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.referer) checkImageRequest(pageUrl, page.referer)
} }
@ -122,7 +119,7 @@ internal class MangaParserTest {
fun favicon(source: MangaSource) = runTest { fun favicon(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val faviconUrl = parser.getFaviconUrl() val faviconUrl = parser.getFaviconUrl()
assert(faviconUrl.isUrlAbsoulte()) assert(faviconUrl.isUrlAbsolute())
checkImageRequest(faviconUrl, null) checkImageRequest(faviconUrl, null)
} }
@ -142,11 +139,11 @@ internal class MangaParserTest {
assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" } assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" }
assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" } assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" }
for (item in list) { for (item in list) {
assert(item.url.isNotEmpty()) assert(item.url.isNotEmpty()) { "Url is empty" }
assert(!item.url.isUrlAbsoulte()) assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" }
assert(item.coverUrl.isUrlAbsoulte()) { "Cover url is not absolute: ${item.coverUrl}" } assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" }
assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" } assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" }
assert(item.publicUrl.isUrlAbsoulte()) assert(item.publicUrl.isUrlAbsolute())
} }
val testItem = list.random() val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.publicUrl) checkImageRequest(testItem.coverUrl, testItem.publicUrl)
@ -154,7 +151,7 @@ internal class MangaParserTest {
private suspend fun checkImageRequest(url: String, referer: String?) { private suspend fun checkImageRequest(url: String, referer: String?) {
context.doRequest(url, referer).use { context.doRequest(url, referer).use {
assert(it.isSuccessful) { "Request failed: ${it.code}: ${it.message}" } assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) { assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}" "Wrong response mime type: ${it.mimeType}"
} }

@ -2,8 +2,8 @@ package org.koitharu.kotatsu.test_util
import androidx.collection.ArraySet import androidx.collection.ArraySet
private val PATTERN_URL_ABSOLUTE = Regex("https?://\\S+", setOf(RegexOption.IGNORE_CASE)) private val PATTERN_URL_ABSOLUTE = Regex("^https?://[\\s\\S]+", setOf(RegexOption.IGNORE_CASE))
private val PATTERN_URL_RELATIVE = Regex("^/\\S+", setOf(RegexOption.IGNORE_CASE)) private val PATTERN_URL_RELATIVE = Regex("^/[\\s\\S]+", setOf(RegexOption.IGNORE_CASE))
internal fun <T> Collection<T>.isDistinct(): Boolean { internal fun <T> Collection<T>.isDistinct(): Boolean {
val set = ArraySet<T>(size) val set = ArraySet<T>(size)
@ -26,18 +26,14 @@ internal fun <T, K> Collection<T>.isDistinctBy(selector: (T) -> K): Boolean {
} }
internal fun String.isUrlRelative() = matches(PATTERN_URL_RELATIVE) internal fun String.isUrlRelative() = matches(PATTERN_URL_RELATIVE)
internal fun String.isUrlAbsoulte() = matches(PATTERN_URL_ABSOLUTE) internal fun String.isUrlAbsolute() = matches(PATTERN_URL_ABSOLUTE)
internal inline fun <T, K> Collection<T>.maxDuplicates(selector: (T) -> K): K? { internal inline fun <T, K> Collection<T>.maxDuplicates(selector: (T) -> K): K? {
return groupBy(selector).maxByOrNull { it.value.size }?.key return groupBy(selector).maxByOrNull { it.value.size }?.key
} }
@Suppress("NOTHING_TO_INLINE") @Suppress("NOTHING_TO_INLINE")
inline operator fun <T> List<T>.component6(): T { inline operator fun <T> List<T>.component6(): T = get(5)
return get(5)
}
@Suppress("NOTHING_TO_INLINE") @Suppress("NOTHING_TO_INLINE")
inline operator fun <T> List<T>.component7(): T { inline operator fun <T> List<T>.component7(): T = get(6)
return get(6)
}
Loading…
Cancel
Save