From dd6de64c44fcb626b95ddd05aa6905717fc3d236 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Wed, 27 Jul 2022 14:04:29 +0300 Subject: [PATCH] New manga source: UnionManga --- .../kotatsu/parsers/MangaLoaderContext.kt | 8 +- .../koitharu/kotatsu/parsers/MangaParser.kt | 7 + .../kotatsu/parsers/site/UnionMangasParser.kt | 177 ++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContext.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContext.kt index 17316e94..04753783 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContext.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContext.kt @@ -1,6 +1,7 @@ package org.koitharu.kotatsu.parsers import okhttp3.* +import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.MediaType.Companion.toMediaType import okhttp3.RequestBody.Companion.toRequestBody import org.json.JSONObject @@ -24,7 +25,7 @@ abstract class MangaLoaderContext { * @param url * @param headers an additional headers for request, may be null */ - suspend fun httpGet(url: String, headers: Headers? = null): Response { + suspend fun httpGet(url: HttpUrl, headers: Headers? = null): Response { val request = Request.Builder() .get() .url(url) @@ -34,6 +35,11 @@ abstract class MangaLoaderContext { return httpClient.newCall(request.build()).await().ensureSuccess() } + suspend fun httpGet(url: String, headers: Headers? = null): Response { + return httpGet(url.toHttpUrl(), headers) + } + + /** * Do a HEAD http request to specific url * @param url diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index d1af2aa2..2788529c 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -3,6 +3,7 @@ package org.koitharu.kotatsu.parsers import androidx.annotation.CallSuper import androidx.annotation.VisibleForTesting import okhttp3.Headers +import okhttp3.HttpUrl import org.jsoup.nodes.Element import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.exception.ParseException @@ -141,6 +142,12 @@ abstract class MangaParser @InternalParsersApi constructor(val source: MangaSour return subdomain + "." + domain.removePrefix("www.") } + fun urlBuilder(): HttpUrl.Builder { + return HttpUrl.Builder() + .scheme("https") + .host(getDomain()) + } + /** * Create a unique id for [Manga]/[MangaChapter]/[MangaPage]. * @param url must be relative url, without a domain diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt new file mode 100644 index 00000000..8cf6bd1f --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt @@ -0,0 +1,177 @@ +package org.koitharu.kotatsu.parsers.site + +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.getStringOrNull +import org.koitharu.kotatsu.parsers.util.json.mapJSON +import java.text.SimpleDateFormat +import java.util.* + +@MangaSourceParser("UNION_MANGAS", "Union Mangás", "pt") +class UnionMangasParser(override val context: MangaLoaderContext) : PagedMangaParser(MangaSource.UNION_MANGAS, 40) { + + override val sortOrders = EnumSet.of( + SortOrder.ALPHABETICAL, + SortOrder.POPULARITY, + ) + + override val configKeyDomain = ConfigKey.Domain("unionleitor.top", emptyArray()) + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + if (!query.isNullOrEmpty()) { + return if (page == searchPaginator.firstPage) { + search(query) + } else { + emptyList() + } + } + val tag = tags.oneOrThrowIfMany() + val url = urlBuilder() + .addPathSegment("lista-mangas") + .addPathSegment( + when { + tag != null -> tag.key + sortOrder == SortOrder.ALPHABETICAL -> "a-z" + else -> "visualizacoes" + }, + ).addPathSegment(page.toString()) + val doc = context.httpGet(url.build()).parseHtml() + val root = doc.selectFirstOrThrow("div.tamanho-bloco-perfil") + return root.select(".lista-mangas-novos").map { div -> + val a = div.selectFirstOrThrow("a") + val img = div.selectFirstOrThrow("img") + val href = a.attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = a.attrAsAbsoluteUrl("href"), + title = div.selectLastOrThrow("a").text(), + coverUrl = img.attrAsAbsoluteUrl("src"), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + description = div.selectLast("div")?.ownText(), + state = null, + author = null, + isNsfw = false, + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = context.httpGet(manga.url.toAbsoluteUrl(getDomain())).parseHtml() + val root = doc.selectFirstOrThrow(".perfil-manga") + val dateFormat = SimpleDateFormat("dd/MM/yyyy", Locale.ROOT) + return manga.copy( + rating = root.select("h2") + .find { it.ownText().startsWith('#') } + ?.ownText()?.drop(1)?.toFloatOrNull()?.div(10f) ?: manga.rating, + largeCoverUrl = root.selectFirst("img.img-thumbnail")?.attrAsAbsoluteUrlOrNull("src"), + description = root.selectFirst(".panel-default")?.selectFirst(".panel-body")?.html(), + author = root.tableValue("Autor")?.ownText(), + altTitle = root.tableValue("Título(s) Alternativo(s)")?.ownText(), + state = when (root.tableValue("Status")?.selectLast(".label")?.text()) { + "Completo" -> MangaState.FINISHED + "Ativo" -> MangaState.ONGOING + else -> null + }, + tags = root.tableValue("Gênero(s)")?.select("a")?.mapToSet { + it.toMangaTag() + } ?: manga.tags, + isNsfw = root.selectFirst(".alert-danger")?.html()?.contains("18 anos") == true, + chapters = root.select("div.row.capitulos").asReversed().mapChapters { i, div -> + val a = div.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + val title = a.text() + MangaChapter( + id = generateUid(href), + name = title, + number = i + 1, + url = href, + scanlator = div.selectLast("a")?.text()?.takeUnless { it == title }, + uploadDate = dateFormat.tryParse( + a.nextElementSibling()?.text()?.removeSurrounding('(', ')'), + ), + branch = null, + source = source, + ) + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(getDomain()) + val doc = context.httpGet(fullUrl).parseHtml() + val root = doc.body().selectFirstOrThrow("article") + return root.selectOrThrow("img[pag]").mapNotNull { img -> + val href = img.attrAsRelativeUrl("src") + if (href.startsWith("/images/banner")) { + return@mapNotNull null + } + MangaPage( + id = generateUid(href), + url = href, + referer = fullUrl, + preview = null, + source = source, + ) + } + } + + override suspend fun getTags(): Set { + val doc = context.httpGet(urlBuilder().addPathSegment("lista-mangas").build()).parseHtml() + val ul = doc.body().selectFirstOrThrow(".nav-tabs").selectFirstOrThrow("ul.dropdown-menu") + return ul.select("li").mapToSet { li -> + li.selectFirstOrThrow("a").toMangaTag() + } + } + + private suspend fun search(query: String): List { + val domain = getDomain() + val json = context.httpGet( + urlBuilder() + .addPathSegments("assets/busca.php") + .addQueryParameter("nomeManga", query) + .build(), + ).parseJson() + return json.getJSONArray("items").mapJSON { jo -> + val href = "/pagina-manga/" + jo.getString("url") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(domain), + title = jo.getString("titulo"), + rating = RATING_UNKNOWN, + tags = emptySet(), + author = jo.getStringOrNull("autor"), + coverUrl = jo.getString("imagem"), + state = null, + isNsfw = false, + altTitle = null, + source = source, + ) + } + } + + private fun Element.tableValue(title: String): Element? { + return select("h4.media-heading") + .find { it.selectFirst("label.subtit-manga")?.text()?.contains(title, ignoreCase = true) == true } + } + + private fun Element.toMangaTag() = MangaTag( + title = text().toTitleCase(sourceLocale ?: Locale.ROOT), + key = attr("href").removeSuffix('/').substringAfterLast('/'), + source = source, + ) +} \ No newline at end of file