From 06a043d290c73a0aceae654816787872f10c36f2 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Tue, 11 Jul 2023 10:36:21 +0300 Subject: [PATCH] Refactor Madara parser --- .../parsers/site/madara/MadaraParser.kt | 104 ++++++------------ .../parsers/site/madara/ru/BestManga.kt | 2 +- 2 files changed, 36 insertions(+), 70 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt index 48c1ad30..69431f75 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt @@ -8,35 +8,11 @@ import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.PagedMangaParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.exception.ParseException -import org.koitharu.kotatsu.parsers.model.Manga -import org.koitharu.kotatsu.parsers.model.MangaChapter -import org.koitharu.kotatsu.parsers.model.MangaPage -import org.koitharu.kotatsu.parsers.model.MangaSource -import org.koitharu.kotatsu.parsers.model.MangaState -import org.koitharu.kotatsu.parsers.model.MangaTag -import org.koitharu.kotatsu.parsers.model.SortOrder -import org.koitharu.kotatsu.parsers.model.WordSet -import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrlOrNull -import org.koitharu.kotatsu.parsers.util.domain -import org.koitharu.kotatsu.parsers.util.generateUid -import org.koitharu.kotatsu.parsers.util.host -import org.koitharu.kotatsu.parsers.util.mapChapters -import org.koitharu.kotatsu.parsers.util.mapNotNullToSet -import org.koitharu.kotatsu.parsers.util.oneOrThrowIfMany -import org.koitharu.kotatsu.parsers.util.parseFailed -import org.koitharu.kotatsu.parsers.util.parseHtml -import org.koitharu.kotatsu.parsers.util.removeSuffix -import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow -import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl -import org.koitharu.kotatsu.parsers.util.toMutableMap -import org.koitharu.kotatsu.parsers.util.toRelativeUrl -import org.koitharu.kotatsu.parsers.util.toTitleCase -import org.koitharu.kotatsu.parsers.util.tryParse -import org.koitharu.kotatsu.parsers.util.urlEncoded +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* import java.text.DateFormat import java.text.SimpleDateFormat -import java.util.Calendar -import java.util.EnumSet +import java.util.* internal abstract class MadaraParser( @@ -78,7 +54,8 @@ internal abstract class MadaraParser( } - protected val ongoing: Array = arrayOf( + @JvmField + protected val ongoing: Set = hashSetOf( "مستمرة", "En curso", "En Curso", @@ -118,7 +95,8 @@ internal abstract class MadaraParser( "Devam ediyor", ) - protected val finished: Array = arrayOf( + @JvmField + protected val finished: Set = hashSetOf( "Completed", "Completo", "Complété", @@ -144,7 +122,6 @@ internal abstract class MadaraParser( "Bitmiş", ) - override suspend fun getListPage( page: Int, query: String?, @@ -194,19 +171,10 @@ internal abstract class MadaraParser( ) }.orEmpty(), author = summary?.selectFirst(".mg_author")?.selectFirst("a")?.ownText(), - state = when (summary?.selectFirst(".mg_status")?.selectFirst(".summary-content")?.ownText()?.trim() + state = when (summary?.selectFirst(".mg_status")?.selectFirst(".summary-content")?.ownText() ?.lowercase()) { - "مستمرة", "En curso", "En Curso", "Ongoing", "OnGoing", "On going", "Ativo", "En Cours", "En cours", "Activo", - "En cours \uD83D\uDFE2", "En cours de publication", "Đang tiến hành", "Em lançamento", "em lançamento", "Em Lançamento", - "Онгоінг", "Publishing", "Devam Ediyor", "Em Andamento", "Em andamento", "In Corso", "Güncel", "Berjalan", "Продолжается", "Updating", - "Lançando", "In Arrivo", "Emision", "En emision", "مستمر", "Curso", "En marcha", "Publicandose", "Publicando", "连载中", - "Devam ediyor", - -> MangaState.ONGOING - - "Completed", "Completo", "Complété", "Fini", "Achevé", "Terminé", "Terminé ⚫", "Tamamlandı", "Đã hoàn thành", "Hoàn Thành", "مكتملة", - "Завершено", "Finished", "Finalizado", "Completata", "One-Shot", "Bitti", "Tamat", "Completado", "Concluído", "Concluido", "已完结", "Bitmiş", - -> MangaState.FINISHED - + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED else -> null }, source = source, @@ -250,8 +218,9 @@ internal abstract class MadaraParser( override suspend fun getDetails(manga: Manga): Manga = coroutineScope { val fullUrl = manga.url.toAbsoluteUrl(domain) val doc = webClient.httpGet(fullUrl).parseHtml() + val body = doc.body() - val testchekasync = doc.body().select("div.listing-chapters_wrap") + val testchekasync = body.select("div.listing-chapters_wrap") val chaptersDeferred = if (testchekasync.isNullOrEmpty()) { async { loadChapters(manga.url, doc) } @@ -259,7 +228,7 @@ internal abstract class MadaraParser( async { getChapters(manga, doc) } } - val desc = doc.select(selectdesc).let { + val desc = body.select(selectdesc).let { if (it.select("p").text().isNotEmpty()) { it.select("p").joinToString(separator = "\n\n") { p -> p.text().replace("
", "\n") @@ -269,26 +238,24 @@ internal abstract class MadaraParser( } } - val stateselect = - doc.body().select("div.post-content_item:contains(Status) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(Statut) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(حالة العمل) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(Estado) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(สถานะ) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(Stato) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(Durum) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(Statüsü) > div.summary-content").last() ?: doc.body() - .select("div.post-content_item:contains(状态) > div.summary-content").last() ?: doc.body() - .select("div.summary-content").last() - - val state = - stateselect?.let { - when (it.text()) { - in ongoing -> MangaState.ONGOING - in finished -> MangaState.FINISHED - else -> null - } + val stateDiv = (body.selectFirst("div.post-content_item:contains(Status)") + ?: body.selectFirst("div.post-content_item:contains(Statut)") + ?: body.selectFirst("div.post-content_item:contains(حالة العمل)") + ?: body.selectFirst("div.post-content_item:contains(Estado)") + ?: body.selectFirst("div.post-content_item:contains(สถานะ)") + ?: body.selectFirst("div.post-content_item:contains(Stato)") + ?: body.selectFirst("div.post-content_item:contains(Durum)") + ?: body.selectFirst("div.post-content_item:contains(Statüsü)") + ?: body.selectFirst("div.post-content_item:contains(Статус)") + ?: body.selectFirst("div.post-content_item:contains(状态)"))?.selectLast("div.summary-content") + + val state = stateDiv?.let { + when (it.text()) { + in ongoing -> MangaState.ONGOING + in finished -> MangaState.FINISHED + else -> null } + } val alt = doc.body().select(".post-content_item:contains(Alt) .summary-content").firstOrNull()?.tableValue()?.text() @@ -335,15 +302,14 @@ internal abstract class MadaraParser( protected open suspend fun loadChapters(mangaUrl: String, document: Document): List { - val doc = if (postreq == false) { - val url = mangaUrl.toAbsoluteUrl(domain).removeSuffix('/') + "/ajax/chapters/" - webClient.httpPost(url, emptyMap()).parseHtml() - } else { + val doc = if (postreq) { val mangaId = document.select("div#manga-chapters-holder").attr("data-id") val url = "https://$domain/wp-admin/admin-ajax.php" val postdata = "action=manga_get_chapters&manga=$mangaId" - webClient.httpPost(url, postdata).parseHtml() + } else { + val url = mangaUrl.toAbsoluteUrl(domain).removeSuffix('/') + "/ajax/chapters/" + webClient.httpPost(url, emptyMap()).parseHtml() } val dateFormat = SimpleDateFormat(datePattern, sourceLocale) @@ -406,7 +372,7 @@ internal abstract class MadaraParser( date.endsWith(" trước", ignoreCase = true) -> { parseRelativeDate(date) } - // Handle translated 'ago' in french. + // Handle translated 'ago' in French. date.startsWith("il y a", ignoreCase = true) -> { parseRelativeDate(date) } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ru/BestManga.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ru/BestManga.kt index 41666ea2..1945c505 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ru/BestManga.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/ru/BestManga.kt @@ -7,7 +7,7 @@ import org.koitharu.kotatsu.parsers.model.MangaSource import org.koitharu.kotatsu.parsers.site.madara.MadaraParser -@MangaSourceParser("BEST_MANGA", "best manga", "ru") +@MangaSourceParser("BEST_MANGA", "BestManga", "ru") internal class BestManga(context: MangaLoaderContext) : MadaraParser(context, MangaSource.BEST_MANGA, "bestmanga.club") {