From 74ffe9418be89e566de830a1acc09e5dc604b667 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Fri, 28 Jul 2023 10:49:20 +0300 Subject: [PATCH] Fix html descriptions --- .../parsers/site/madara/MadaraParser.kt | 68 ++++++++----------- .../parsers/site/madara/en/MangaDass.kt | 10 +-- .../parsers/site/madara/en/MangaDna.kt | 10 +-- .../parsers/site/madara/ja/HachiManga.kt | 10 +-- 4 files changed, 33 insertions(+), 65 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt index c8d767be..3e82266a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt @@ -278,15 +278,7 @@ internal abstract class MadaraParser( async { getChapters(manga, doc) } } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)") ?: body.selectFirst("div.post-content_item:contains(Statut)") @@ -438,31 +430,31 @@ internal abstract class MadaraParser( val d = date?.lowercase() ?: return 0 return when { d.endsWith(" ago") || d.endsWith(" atrás") || // Handle translated 'ago' in Portuguese. - d.startsWith("há ") || // other translated 'ago' in Portuguese. - d.endsWith(" hace") || // other translated 'ago' in Spanish - d.endsWith(" назад") || // other translated 'ago' in Russian - d.endsWith(" önce") || // Handle translated 'ago' in Turkish. - d.endsWith(" trước") || // Handle translated 'ago' in Viêt Nam. - d.startsWith("il y a") || // Handle translated 'ago' in French. - //If there is no ago but just a motion of time - // short Hours - d.endsWith(" h") || - // short Day - d.endsWith(" d") || - // Day in Portuguese - d.endsWith(" días") || d.endsWith(" día") || - // Day in French - d.endsWith(" jour") || d.endsWith(" jours") || - // Hours in Portuguese - d.endsWith(" horas") || d.endsWith(" hora") || - // Hours in french - d.endsWith(" heure") || d.endsWith(" heures") || - // Minutes in English - d.endsWith(" mins") || - // Minutes in Portuguese - d.endsWith(" minutos") || d.endsWith(" minuto") || - //Minutes in French - d.endsWith(" minute") || d.endsWith(" minutes") -> parseRelativeDate(date) + d.startsWith("há ") || // other translated 'ago' in Portuguese. + d.endsWith(" hace") || // other translated 'ago' in Spanish + d.endsWith(" назад") || // other translated 'ago' in Russian + d.endsWith(" önce") || // Handle translated 'ago' in Turkish. + d.endsWith(" trước") || // Handle translated 'ago' in Viêt Nam. + d.startsWith("il y a") || // Handle translated 'ago' in French. + //If there is no ago but just a motion of time + // short Hours + d.endsWith(" h") || + // short Day + d.endsWith(" d") || + // Day in Portuguese + d.endsWith(" días") || d.endsWith(" día") || + // Day in French + d.endsWith(" jour") || d.endsWith(" jours") || + // Hours in Portuguese + d.endsWith(" horas") || d.endsWith(" hora") || + // Hours in french + d.endsWith(" heure") || d.endsWith(" heures") || + // Minutes in English + d.endsWith(" mins") || + // Minutes in Portuguese + d.endsWith(" minutos") || d.endsWith(" minuto") || + //Minutes in French + d.endsWith(" minute") || d.endsWith(" minutes") -> parseRelativeDate(date) // Handle 'yesterday' and 'today', using midnight d.startsWith("year") -> Calendar.getInstance().apply { @@ -559,10 +551,10 @@ internal abstract class MadaraParser( private fun createRequestTemplate() = ("action=madara_load_more&page=1&template=madara-core%2Fcontent%2Fcontent-search&vars%5Bs%5D=&vars%5B" + - "orderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query" + - "%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type" + - "%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border" + - "%5D=desc&vars%5Bmanga_archives_item_layout%5D=default").split( + "orderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query" + + "%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type" + + "%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border" + + "%5D=desc&vars%5Bmanga_archives_item_layout%5D=default").split( '&', ).map { val pos = it.indexOf('=') diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt index 67b2c6ec..0d6b7bb4 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDass.kt @@ -110,15 +110,7 @@ internal class MangaDass(context: MangaLoaderContext) : val chaptersDeferred = async { getChapters(manga, doc) } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)"))?.selectLast("div.summary-content") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt index bb29e38d..cc4cac05 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/en/MangaDna.kt @@ -108,15 +108,7 @@ internal class MangaDna(context: MangaLoaderContext) : val chaptersDeferred = async { getChapters(manga, doc) } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)"))?.selectLast("div.summary-content") diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt index c4c7e3ec..80082117 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ja/HachiManga.kt @@ -119,15 +119,7 @@ internal class HachiManga(context: MangaLoaderContext) : MadaraParser(context, M val chaptersDeferred = async { getChapters(manga, doc) } - val desc = body.select(selectdesc).let { - if (it.select("p").text().isNotEmpty()) { - it.select("p").joinToString(separator = "\n\n") { p -> - p.text().replace("
", "\n") - } - } else { - it.text() - } - } + val desc = body.select(selectdesc).html() val stateDiv = (body.selectFirst("div.post-content_item:contains(状態)"))?.selectLast("div.summary-content")