From 74ffe9418be89e566de830a1acc09e5dc604b667 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Fri, 28 Jul 2023 10:49:20 +0300 Subject: [PATCH 1/2] Fix html descriptions --- .../parsers/site/madara/MadaraParser.kt | 68 ++++++++----------- .../parsers/site/madara/en/MangaDass.kt | 10 +-- .../parsers/site/madara/en/MangaDna.kt | 10 +-- .../parsers/site/madara/ja/HachiManga.kt | 10 +-- 4 files changed, 33 insertions(+), 65 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt index c8d767be..3e82266a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt @@ -278,15 +278,7 @@ internal abstract class MadaraParser( async { getChapters(manga, doc) } } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)") ?: body.selectFirst("div.post-content_item:contains(Statut)") @@ -438,31 +430,31 @@ internal abstract class MadaraParser( val d = date?.lowercase() ?: return 0 return when { d.endsWith(" ago") || d.endsWith(" atrás") || // Handle translated 'ago' in Portuguese. - d.startsWith("há ") || // other translated 'ago' in Portuguese. - d.endsWith(" hace") || // other translated 'ago' in Spanish - d.endsWith(" назад") || // other translated 'ago' in Russian - d.endsWith(" önce") || // Handle translated 'ago' in Turkish. - d.endsWith(" trước") || // Handle translated 'ago' in Viêt Nam. - d.startsWith("il y a") || // Handle translated 'ago' in French. - //If there is no ago but just a motion of time - // short Hours - d.endsWith(" h") || - // short Day - d.endsWith(" d") || - // Day in Portuguese - d.endsWith(" días") || d.endsWith(" día") || - // Day in French - d.endsWith(" jour") || d.endsWith(" jours") || - // Hours in Portuguese - d.endsWith(" horas") || d.endsWith(" hora") || - // Hours in french - d.endsWith(" heure") || d.endsWith(" heures") || - // Minutes in English - d.endsWith(" mins") || - // Minutes in Portuguese - d.endsWith(" minutos") || d.endsWith(" minuto") || - //Minutes in French - d.endsWith(" minute") || d.endsWith(" minutes") -> parseRelativeDate(date) + d.startsWith("há ") || // other translated 'ago' in Portuguese. + d.endsWith(" hace") || // other translated 'ago' in Spanish + d.endsWith(" назад") || // other translated 'ago' in Russian + d.endsWith(" önce") || // Handle translated 'ago' in Turkish. + d.endsWith(" trước") || // Handle translated 'ago' in Viêt Nam. + d.startsWith("il y a") || // Handle translated 'ago' in French. + //If there is no ago but just a motion of time + // short Hours + d.endsWith(" h") || + // short Day + d.endsWith(" d") || + // Day in Portuguese + d.endsWith(" días") || d.endsWith(" día") || + // Day in French + d.endsWith(" jour") || d.endsWith(" jours") || + // Hours in Portuguese + d.endsWith(" horas") || d.endsWith(" hora") || + // Hours in french + d.endsWith(" heure") || d.endsWith(" heures") || + // Minutes in English + d.endsWith(" mins") || + // Minutes in Portuguese + d.endsWith(" minutos") || d.endsWith(" minuto") || + //Minutes in French + d.endsWith(" minute") || d.endsWith(" minutes") -> parseRelativeDate(date) // Handle 'yesterday' and 'today', using midnight d.startsWith("year") -> Calendar.getInstance().apply { @@ -559,10 +551,10 @@ internal abstract class MadaraParser( private fun createRequestTemplate() = ("action=madara_load_more&page=1&template=madara-core%2Fcontent%2Fcontent-search&vars%5Bs%5D=&vars%5B" + - "orderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query" + - "%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type" + - "%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border" + - "%5D=desc&vars%5Bmanga_archives_item_layout%5D=default").split( + "orderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query" + + "%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type" + + "%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border" + + "%5D=desc&vars%5Bmanga_archives_item_layout%5D=default").split( '&', ).map { val pos = it.indexOf('=') diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt index 67b2c6ec..0d6b7bb4 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt @@ -110,15 +110,7 @@ internal class MangaDass(context: MangaLoaderContext) : val chaptersDeferred = async { getChapters(manga, doc) } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)"))?.selectLast("div.summary-content") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt index bb29e38d..cc4cac05 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt @@ -108,15 +108,7 @@ internal class MangaDna(context: MangaLoaderContext) : val chaptersDeferred = async { getChapters(manga, doc) } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)"))?.selectLast("div.summary-content") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt index c4c7e3ec..80082117 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt @@ -119,15 +119,7 @@ internal class HachiManga(context: MangaLoaderContext) : MadaraParser(context, M val chaptersDeferred = async { getChapters(manga, doc) } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(状態)"))?.selectLast("div.summary-content") From d24ab347cbe18c30268203197659576ef1c60d0b Mon Sep 17 00:00:00 2001 From: Koitharu Date: Fri, 28 Jul 2023 14:35:13 +0300 Subject: [PATCH 2/2] Move CloudFlareProtectedException to test sources --- buildSrc/src/main/resources/report.html | 27 ++--------------- .../kotatsu/parsers/CloudFlareInterceptor.kt | 29 +++++++++---------- .../parsers}/CloudFlareProtectedException.kt | 4 +-- 3 files changed, 18 insertions(+), 42 deletions(-) rename src/{main/kotlin/org/koitharu/kotatsu/parsers/exception => test/kotlin/org/koitharu/kotatsu/parsers}/CloudFlareProtectedException.kt (57%) diff --git a/buildSrc/src/main/resources/report.html b/buildSrc/src/main/resources/report.html index b18a0dad..d297533e 100644 --- a/buildSrc/src/main/resources/report.html +++ b/buildSrc/src/main/resources/report.html @@ -57,7 +57,7 @@ data-bs-toggle="modal" style="cursor: pointer;"> - {% elseif case.failure.type == 'org.koitharu.kotatsu.parsers.exception.CloudFlareProtectedException' %} + {% elseif case.failure.type == 'org.koitharu.kotatsu.parsers.CloudFlareProtectedException' %} @@ -101,29 +101,6 @@ feather.replace() - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file + diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt index 8878779d..7d106750 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareInterceptor.kt @@ -3,7 +3,6 @@ package org.koitharu.kotatsu.parsers import okhttp3.Interceptor import okhttp3.Response import okhttp3.internal.closeQuietly -import org.koitharu.kotatsu.parsers.exception.CloudFlareProtectedException import java.net.HttpURLConnection private const val HEADER_SERVER = "Server" @@ -11,18 +10,18 @@ private const val SERVER_CLOUDFLARE = "cloudflare" internal class CloudFlareInterceptor : Interceptor { - override fun intercept(chain: Interceptor.Chain): Response { - val request = chain.request() - val response = chain.proceed(request) - if (response.code == HttpURLConnection.HTTP_FORBIDDEN || response.code == HttpURLConnection.HTTP_UNAVAILABLE) { - if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) { - response.closeQuietly() - throw CloudFlareProtectedException( - url = response.request.url.toString(), - headers = request.headers, - ) - } - } - return response - } + override fun intercept(chain: Interceptor.Chain): Response { + val request = chain.request() + val response = chain.proceed(request) + if (response.code == HttpURLConnection.HTTP_FORBIDDEN || response.code == HttpURLConnection.HTTP_UNAVAILABLE) { + if (response.header(HEADER_SERVER)?.startsWith(SERVER_CLOUDFLARE) == true) { + response.closeQuietly() + throw CloudFlareProtectedException( + url = response.request.url.toString(), + headers = request.headers, + ) + } + } + return response + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareProtectedException.kt similarity index 57% rename from src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt rename to src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareProtectedException.kt index e06415e3..bbfdae8a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/exception/CloudFlareProtectedException.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/CloudFlareProtectedException.kt @@ -1,4 +1,4 @@ -package org.koitharu.kotatsu.parsers.exception +package org.koitharu.kotatsu.parsers import okhttp3.Headers import okio.IOException @@ -6,4 +6,4 @@ import okio.IOException class CloudFlareProtectedException( val url: String, val headers: Headers, -) : IOException("Protected by CloudFlare: $url") \ No newline at end of file +) : IOException("Protected by CloudFlare: $url")