diff --git a/.github/summary.yaml b/.github/summary.yaml index 64b4a2b3..7d6d6314 100644 --- a/.github/summary.yaml +++ b/.github/summary.yaml @@ -1 +1 @@ -total: 1120 \ No newline at end of file +total: 1121 \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/network/CloudFlareHelper.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/network/CloudFlareHelper.kt new file mode 100644 index 00000000..f5f514db --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/network/CloudFlareHelper.kt @@ -0,0 +1,47 @@ +package org.koitharu.kotatsu.parsers.network + +import okhttp3.CookieJar +import okhttp3.HttpUrl.Companion.toHttpUrl +import okhttp3.Response +import org.jsoup.Jsoup +import java.net.HttpURLConnection.HTTP_FORBIDDEN +import java.net.HttpURLConnection.HTTP_UNAVAILABLE + +public object CloudFlareHelper { + + public const val PROTECTION_NOT_DETECTED: Int = 0 + public const val PROTECTION_CAPTCHA: Int = 1 + public const val PROTECTION_BLOCKED: Int = 2 + + private const val CF_CLEARANCE = "cf_clearance" + + public fun checkResponseForProtection(response: Response): Int { + if (response.code != HTTP_FORBIDDEN && response.code != HTTP_UNAVAILABLE) { + return PROTECTION_NOT_DETECTED + } + val content = if (response.body != null) { + response.peekBody(Long.MAX_VALUE).use { + Jsoup.parse(it.byteStream(), Charsets.UTF_8.name(), response.request.url.toString()) + } + } else { + return PROTECTION_NOT_DETECTED + } + return when { + content.selectFirst("h2[data-translate=\"blocked_why_headline\"]") != null -> PROTECTION_BLOCKED + content.getElementById("challenge-error-title") != null || content.getElementById("challenge-error-text") != null -> PROTECTION_CAPTCHA + + else -> PROTECTION_NOT_DETECTED + } + } + + public fun getClearanceCookie(cookieJar: CookieJar, url: String): String? { + return cookieJar.loadForRequest(url.toHttpUrl()).find { it.name == CF_CLEARANCE }?.value + } + + public fun isCloudFlareCookie(name: String): Boolean { + return name.startsWith("cf_") + || name.startsWith("_cf") + || name.startsWith("__cf") + || name == "csrftoken" + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ExHentaiParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ExHentaiParser.kt index 076250b4..e8025121 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ExHentaiParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/ExHentaiParser.kt @@ -22,7 +22,6 @@ import org.koitharu.kotatsu.parsers.util.* import java.util.* import java.util.Collections.emptyList import java.util.concurrent.TimeUnit -import kotlin.math.pow private const val DOMAIN_UNAUTHORIZED = "e-hentai.org" private const val DOMAIN_AUTHORIZED = "exhentai.org" @@ -46,7 +45,6 @@ internal class ExHentaiParser( private val ratingPattern = Regex("-?[0-9]+px") private val authCookies = arrayOf("ipb_member_id", "ipb_pass_hash") - private var updateDm = false private val nextPages = SparseArrayCompat() private val suspiciousContentKey = ConfigKey.ShowSuspiciousContent(false) @@ -113,6 +111,15 @@ internal class ExHentaiParser( ) override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List { + return getListPage(page, order, filter, updateDm = false) + } + + private suspend fun getListPage( + page: Int, + order: SortOrder, + filter: MangaListFilter, + updateDm: Boolean, + ): List { val next = nextPages.get(page, 0L) if (page > 0 && next == 0L) { @@ -137,15 +144,18 @@ internal class ExHentaiParser( url.addQueryParameter("f_sh", "on") } val body = webClient.httpGet(url.build()).parseHtml().body() - val root = body.selectFirst("table.itg") - ?.selectFirst("tbody") - ?: if (updateDm) { - body.parseFailed("Cannot find root") + val root = body.selectFirst("table.itg")?.selectFirst("tbody") + if (root == null) { + if (updateDm) { + if (body.getElementsContainingText("No hits found").isNotEmpty()) { + return emptyList() + } else { + body.parseFailed("Cannot find root") + } } else { - updateDm = true - return getListPage(page, order, filter) + return getListPage(page, order, filter, updateDm = true) } - updateDm = false + } nextPages[page + 1] = getNextTimestamp(body) return root.children().mapNotNull { tr -> @@ -155,13 +165,6 @@ internal class ExHentaiParser( val a = gLink.parents().select("a").first() ?: gLink.parseFailed("link not found") val href = a.attrAsRelativeUrl("href") val tagsDiv = gLink.nextElementSibling() ?: gLink.parseFailed("tags div not found") - val mainTag = td2.selectFirst("div.cn")?.let { div -> - MangaTag( - title = div.text().toTitleCase(Locale.ENGLISH), - key = tagIdByClass(div.classNames()) ?: return@let null, - source = source, - ) - } Manga( id = generateUid(href), title = gLink.text().cleanupTitle(), @@ -171,7 +174,7 @@ internal class ExHentaiParser( rating = td2.selectFirst("div.ir")?.parseRating() ?: RATING_UNKNOWN, isNsfw = true, coverUrl = td1.selectFirst("img")?.absUrl("src").orEmpty(), - tags = setOfNotNull(mainTag) + tagsDiv.parseTags(), + tags = tagsDiv.parseTags(), state = null, author = tagsDiv.getElementsContainingOwnText("artist:").first() ?.nextElementSibling()?.text(), @@ -250,8 +253,9 @@ internal class ExHentaiParser( return doc.body().requireElementById("img").attrAsAbsoluteUrl("src") } - private val tags = - "ahegao,anal,angel,apron,bandages,bbw,bdsm,beauty mark,big areolae,big ass,big breasts,big clit,big lips," + + @Suppress("SpellCheckingInspection") + private val tags: String + get() = "ahegao,anal,angel,apron,bandages,bbw,bdsm,beauty mark,big areolae,big ass,big breasts,big clit,big lips," + "big nipples,bikini,blackmail,bloomers,blowjob,bodysuit,bondage,breast expansion,bukkake,bunny girl,business suit," + "catgirl,centaur,cheating,chinese dress,christmas,collar,corset,cosplaying,cowgirl,crossdressing,cunnilingus," + "dark skin,daughter,deepthroat,defloration,demon girl,double penetration,dougi,dragon,drunk,elf,exhibitionism,farting," + @@ -387,12 +391,6 @@ internal class ExHentaiParser( return result } - private fun tagIdByClass(classNames: Collection): String? { - val className = classNames.find { x -> x.startsWith("ct") } ?: return null - val num = className.drop(2).toIntOrNull(16) ?: return null - return 2.0.pow(num).toInt().toString() - } - private fun getNextTimestamp(root: Element): Long { return root.getElementById("unext") ?.attrAsAbsoluteUrlOrNull("href") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt index 9cc9f8ba..f583f59b 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/galleryadults/all/NHentaiParser.kt @@ -115,7 +115,7 @@ internal class NHentaiParser(context: MangaLoaderContext) : val name = it.selectFirst(".name")?.text() ?: it.text() MangaTag( key = key, - title = name, + title = name.toTitleCase(sourceLocale), source = source, ) } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/CuuTruyenParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/CuuTruyenParser.kt index c6f5bc1a..7db0bbdb 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/CuuTruyenParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/vi/CuuTruyenParser.kt @@ -101,7 +101,7 @@ internal class CuuTruyenParser(context: MangaLoaderContext) : throw e } } - val data = json.getJSONArray("data") + val data = json.optJSONArray("data") ?: json.getJSONObject("data").getJSONArray("mangas") return data.mapJSON { jo -> Manga( @@ -227,7 +227,6 @@ internal class CuuTruyenParser(context: MangaLoaderContext) : } private fun availableTags() = arraySetOf( - MangaTag("Tất cả", "", source), MangaTag("Manga", "manga", source), MangaTag("Đang tiến hành", "dang-tien-hanh", source), MangaTag("Thể thao", "the-thao", source), diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt index 7d106750..69efa9a4 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt @@ -3,24 +3,19 @@ package org.koitharu.kotatsu.parsers import okhttp3.Interceptor import okhttp3.Response import okhttp3.internal.closeQuietly -import java.net.HttpURLConnection - -private const val HEADER_SERVER = "Server" -private const val SERVER_CLOUDFLARE = "cloudflare" +import org.koitharu.kotatsu.parsers.network.CloudFlareHelper internal class CloudFlareInterceptor : Interceptor { override fun intercept(chain: Interceptor.Chain): Response { val request = chain.request() val response = chain.proceed(request) - if (response.code == HttpURLConnection.HTTP_FORBIDDEN || response.code == HttpURLConnection.HTTP_UNAVAILABLE) { - if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) { - response.closeQuietly() - throw CloudFlareProtectedException( - url = response.request.url.toString(), - headers = request.headers, - ) - } + if (CloudFlareHelper.checkResponseForProtection(response) != CloudFlareHelper.PROTECTION_NOT_DETECTED) { + response.closeQuietly() + throw CloudFlareProtectedException( + url = response.request.url.toString(), + headers = request.headers, + ) } return response }