[Madara] Fix Madara parsers

pull/13/head
Koitharu 4 years ago
parent 4673fb7ec7
commit d17024b690
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -4,4 +4,4 @@ import okio.IOException
class CloudFlareProtectedException(
val url: String,
) : IOException("Protected by CloudFlare")
) : IOException("Protected by CloudFlare: $url")

@ -13,8 +13,8 @@ import org.koitharu.kotatsu.parsers.util.toTitleCase
internal class HenChanParser(override val context: MangaLoaderContext) : ChanParser(MangaSource.HENCHAN) {
override val configKeyDomain = ConfigKey.Domain(
"xx.hentaichan.live",
arrayOf("xx.hentaichan.live", "hentaichan.live", "hentaichan.pro"),
"xxx.hentaichan.live",
arrayOf("xxx.hentaichan.live", "xx.hentaichan.live", "hentaichan.live", "hentaichan.pro"),
)
override suspend fun getList(

@ -28,6 +28,7 @@ internal abstract class MadaraParser(
)
protected open val tagPrefix = "manga-genre/"
protected open val isNsfwSource = false
override suspend fun getList(
offset: Int,
@ -54,15 +55,14 @@ internal abstract class MadaraParser(
payload,
).parseHtml()
return doc.select("div.row.c-tabs-item__content").map { div ->
val href = div.selectFirst("a")?.relUrl("href")
?: parseFailed("Link not found")
val href = div.selectFirst("a")?.relUrl("href") ?: parseFailed("Link not found")
val summary = div.selectFirst(".tab-summary")
Manga(
id = generateUid(href),
url = href,
publicUrl = href.inContextOf(div),
coverUrl = div.selectFirst("img")?.src().orEmpty(),
title = summary?.selectFirst("h3")?.text().orEmpty(),
title = (summary?.selectFirst("h3") ?: summary?.selectFirst("h4"))?.text().orEmpty(),
altTitle = null,
rating = div.selectFirst("span.total_votes")?.ownText()
?.toFloatOrNull()?.div(5f) ?: -1f,
@ -76,14 +76,14 @@ internal abstract class MadaraParser(
author = summary?.selectFirst(".mg_author")?.selectFirst("a")?.ownText(),
state = when (
summary?.selectFirst(".mg_status")?.selectFirst(".summary-content")
?.ownText()?.trim()
?.ownText()?.trim()?.lowercase()
) {
"OnGoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
"ongoing" -> MangaState.ONGOING
"completed" -> MangaState.FINISHED
else -> null
},
source = source,
isNsfw = false,
isNsfw = isNsfwSource,
)
}
}
@ -337,7 +337,7 @@ internal abstract class MadaraParser(
}
@MangaSourceParser("TOPMANHUA", "Top Manhua", "en")
class TopManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.TOPMANHUA, "topmanhua.com") {
class TopManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.TOPMANHUA, "www.topmanhua.com") {
override val tagPrefix = "manhua-genre/"
}
@ -354,12 +354,17 @@ internal abstract class MadaraParser(
MadaraParser(context, MangaSource.MANGA_DISTRICT, "mangadistrict.com")
@MangaSourceParser("HENTAI_4FREE", "Hentai4Free", "en")
class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net")
class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") {
override val isNsfwSource = true
}
@MangaSourceParser("ALLPORN_COMIC", "All Porn Comic", "en")
class AllPornComic(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.ALLPORN_COMIC, "allporncomic.com") {
override val isNsfwSource = true
override fun getFaviconUrl(): String {
return "https://cdn.${getDomain()}/wp-content/uploads/2019/01/cropped-cropped-pcround-32x32.png"
}

@ -0,0 +1,25 @@
package org.koitharu.kotatsu.parsers
import okhttp3.Interceptor
import okhttp3.Response
import okhttp3.internal.closeQuietly
import org.koitharu.kotatsu.parsers.exception.CloudFlareProtectedException
import java.net.HttpURLConnection.HTTP_FORBIDDEN
import java.net.HttpURLConnection.HTTP_UNAVAILABLE
private const val HEADER_SERVER = "Server"
private const val SERVER_CLOUDFLARE = "cloudflare"
class CloudFlareInterceptor : Interceptor {
override fun intercept(chain: Interceptor.Chain): Response {
val response = chain.proceed(chain.request())
if (response.code == HTTP_FORBIDDEN || response.code == HTTP_UNAVAILABLE) {
if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) {
response.closeQuietly()
throw CloudFlareProtectedException(chain.request().url.toString())
}
}
return response
}
}

@ -25,6 +25,7 @@ internal class MangaLoaderContextMock : MangaLoaderContext() {
override val httpClient: OkHttpClient = OkHttpClient.Builder()
.cookieJar(cookieJar)
.addInterceptor(UserAgentInterceptor(userAgent))
.addInterceptor(CloudFlareInterceptor())
.connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(20, TimeUnit.SECONDS)
@ -49,7 +50,7 @@ internal class MangaLoaderContextMock : MangaLoaderContext() {
.get()
.url(url)
if (referer != null) {
request.header("Referrer", referer)
request.header("Referer", referer)
}
return httpClient.newCall(request.build()).await()
}

@ -11,7 +11,7 @@ import org.koitharu.kotatsu.parsers.util.medianOrNull
import org.koitharu.kotatsu.parsers.util.mimeType
import org.koitharu.kotatsu.test_util.isDistinct
import org.koitharu.kotatsu.test_util.isDistinctBy
import org.koitharu.kotatsu.test_util.isUrlAbsoulte
import org.koitharu.kotatsu.test_util.isUrlAbsolute
import org.koitharu.kotatsu.test_util.maxDuplicates
@ExtendWith(AuthCheckExtension::class)
@ -50,7 +50,7 @@ internal class MangaParserTest {
fun tags(source: MangaSource) = runTest {
val parser = source.newParser(context)
val tags = parser.getTags()
assert(tags.isNotEmpty())
assert(tags.isNotEmpty()) { "No tags found" }
val keys = tags.map { it.key }
assert(keys.isDistinct())
assert("" !in keys)
@ -73,8 +73,8 @@ internal class MangaParserTest {
val manga = list[3]
parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
assert(publicUrl.isUrlAbsoulte()) { "Manga public url is not absolute: '$publicUrl'" }
assert(description != null) { "Detailed description is null" }
assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }
assert(description != null) { "Detailed description is null: '$publicUrl'" }
assert(title.startsWith(manga.title)) {
"Titles are mismatch: '$title' and '${manga.title}' for $publicUrl"
}
@ -86,9 +86,6 @@ internal class MangaParserTest {
assert(c.isDistinctBy { it.number to it.branch }) {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
}
assert(c.isDistinctBy { it.name to it.branch }) {
"Chapters are not distinct by name: ${c.maxDuplicates { it.name to it.branch }} for $publicUrl"
}
assert(c.all { it.source == source })
checkImageRequest(coverUrl, publicUrl)
largeCoverUrl?.let {
@ -113,7 +110,7 @@ internal class MangaParserTest {
val page = pages.medianOrNull() ?: error("No page")
val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsoulte())
assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.referer)
}
@ -122,7 +119,7 @@ internal class MangaParserTest {
fun favicon(source: MangaSource) = runTest {
val parser = source.newParser(context)
val faviconUrl = parser.getFaviconUrl()
assert(faviconUrl.isUrlAbsoulte())
assert(faviconUrl.isUrlAbsolute())
checkImageRequest(faviconUrl, null)
}
@ -142,11 +139,11 @@ internal class MangaParserTest {
assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" }
assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" }
for (item in list) {
assert(item.url.isNotEmpty())
assert(!item.url.isUrlAbsoulte())
assert(item.coverUrl.isUrlAbsoulte()) { "Cover url is not absolute: ${item.coverUrl}" }
assert(item.url.isNotEmpty()) { "Url is empty" }
assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" }
assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" }
assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" }
assert(item.publicUrl.isUrlAbsoulte())
assert(item.publicUrl.isUrlAbsolute())
}
val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.publicUrl)
@ -154,7 +151,7 @@ internal class MangaParserTest {
private suspend fun checkImageRequest(url: String, referer: String?) {
context.doRequest(url, referer).use {
assert(it.isSuccessful) { "Request failed: ${it.code}: ${it.message}" }
assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}"
}

@ -2,8 +2,8 @@ package org.koitharu.kotatsu.test_util
import androidx.collection.ArraySet
private val PATTERN_URL_ABSOLUTE = Regex("https?://\\S+", setOf(RegexOption.IGNORE_CASE))
private val PATTERN_URL_RELATIVE = Regex("^/\\S+", setOf(RegexOption.IGNORE_CASE))
private val PATTERN_URL_ABSOLUTE = Regex("^https?://[\\s\\S]+", setOf(RegexOption.IGNORE_CASE))
private val PATTERN_URL_RELATIVE = Regex("^/[\\s\\S]+", setOf(RegexOption.IGNORE_CASE))
internal fun <T> Collection<T>.isDistinct(): Boolean {
val set = ArraySet<T>(size)
@ -26,18 +26,14 @@ internal fun <T, K> Collection<T>.isDistinctBy(selector: (T) -> K): Boolean {
}
internal fun String.isUrlRelative() = matches(PATTERN_URL_RELATIVE)
internal fun String.isUrlAbsoulte() = matches(PATTERN_URL_ABSOLUTE)
internal fun String.isUrlAbsolute() = matches(PATTERN_URL_ABSOLUTE)
internal inline fun <T, K> Collection<T>.maxDuplicates(selector: (T) -> K): K? {
return groupBy(selector).maxByOrNull { it.value.size }?.key
}
@Suppress("NOTHING_TO_INLINE")
inline operator fun <T> List<T>.component6(): T {
return get(5)
}
inline operator fun <T> List<T>.component6(): T = get(5)
@Suppress("NOTHING_TO_INLINE")
inline operator fun <T> List<T>.component7(): T {
return get(6)
}
inline operator fun <T> List<T>.component7(): T = get(6)
Loading…
Cancel
Save