diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt index a99d6d64..84ee386a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/MangaParser.kt @@ -12,115 +12,115 @@ import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl import java.util.* abstract class MangaParser @InternalParsersApi constructor( - @property:InternalParsersApi val context: MangaLoaderContext, - val source: MangaSource, + @property:InternalParsersApi val context: MangaLoaderContext, + val source: MangaSource, ) { - /** - * Supported [SortOrder] variants. Must not be empty. - * - * For better performance use [EnumSet] for more than one item. - */ - abstract val sortOrders: Set - - val config by lazy { context.getConfig(source) } - - open val sourceLocale: Locale? - get() = source.locale?.let { Locale(it) } - - /** - * Provide default domain and available alternatives, if any. - * - * Never hardcode domain in requests, use [getDomain] instead. - */ - @InternalParsersApi - abstract val configKeyDomain: ConfigKey.Domain - - open val headers: Headers? = null - - /** - * Used as fallback if value of `sortOrder` passed to [getList] is null - */ - protected open val defaultSortOrder: SortOrder - get() { - val supported = sortOrders - return SortOrder.values().first { it in supported } - } - - protected val webClient: WebClient = OkHttpWebClient(context.httpClient, source) - - /** - * Parse list of manga by specified criteria - * - * @param offset starting from 0 and used for pagination. - * Note than passed value may not be divisible by internal page size, so you should adjust it manually. - * @param query search query, may be null or empty if no search needed - * @param tags genres for filtering, values from [getTags] and [Manga.tags]. May be null or empty - * @param sortOrder one of [sortOrders] or null for default value - */ - @JvmSynthetic - @InternalParsersApi - abstract suspend fun getList( - offset: Int, - query: String?, - tags: Set?, - sortOrder: SortOrder, - ): List - - /** - * Parse list of manga with search by text query - * - * @param offset starting from 0 and used for pagination. - * @param query search query - */ - open suspend fun getList(offset: Int, query: String): List { - return getList(offset, query, null, defaultSortOrder) - } - - /** - * Parse list of manga by specified criteria - * - * @param offset starting from 0 and used for pagination. - * Note than passed value may not be divisible by internal page size, so you should adjust it manually. - * @param tags genres for filtering, values from [getTags] and [Manga.tags]. May be null or empty - * @param sortOrder one of [sortOrders] or null for default value - */ - open suspend fun getList(offset: Int, tags: Set?, sortOrder: SortOrder?): List { - return getList(offset, null, tags, sortOrder ?: defaultSortOrder) - } - - /** - * Parse details for [Manga]: chapters list, description, large cover, etc. - * Must return the same manga, may change any fields excepts id, url and source - * @see Manga.copy - */ - abstract suspend fun getDetails(manga: Manga): Manga - - /** - * Parse pages list for specified chapter. - * @see MangaPage for details - */ - abstract suspend fun getPages(chapter: MangaChapter): List - - /** - * Fetch direct link to the page image. - */ - open suspend fun getPageUrl(page: MangaPage): String = page.url.toAbsoluteUrl(domain) - - /** - * Fetch available tags (genres) for source - */ - abstract suspend fun getTags(): Set - - /** - * Parse favicons from the main page of the source`s website - */ - open suspend fun getFavicons(): Favicons { - return FaviconParser(webClient, domain).parseFavicons() - } - - @CallSuper - open fun onCreateConfig(keys: MutableCollection>) { - keys.add(configKeyDomain) - } + /** + * Supported [SortOrder] variants. Must not be empty. + * + * For better performance use [EnumSet] for more than one item. + */ + abstract val sortOrders: Set + + val config by lazy { context.getConfig(source) } + + open val sourceLocale: Locale + get() = source.locale?.let { Locale(it) } ?: Locale.ROOT + + /** + * Provide default domain and available alternatives, if any. + * + * Never hardcode domain in requests, use [getDomain] instead. + */ + @InternalParsersApi + abstract val configKeyDomain: ConfigKey.Domain + + open val headers: Headers? = null + + /** + * Used as fallback if value of `sortOrder` passed to [getList] is null + */ + protected open val defaultSortOrder: SortOrder + get() { + val supported = sortOrders + return SortOrder.values().first { it in supported } + } + + protected val webClient: WebClient = OkHttpWebClient(context.httpClient, source) + + /** + * Parse list of manga by specified criteria + * + * @param offset starting from 0 and used for pagination. + * Note than passed value may not be divisible by internal page size, so you should adjust it manually. + * @param query search query, may be null or empty if no search needed + * @param tags genres for filtering, values from [getTags] and [Manga.tags]. May be null or empty + * @param sortOrder one of [sortOrders] or null for default value + */ + @JvmSynthetic + @InternalParsersApi + abstract suspend fun getList( + offset: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List + + /** + * Parse list of manga with search by text query + * + * @param offset starting from 0 and used for pagination. + * @param query search query + */ + open suspend fun getList(offset: Int, query: String): List { + return getList(offset, query, null, defaultSortOrder) + } + + /** + * Parse list of manga by specified criteria + * + * @param offset starting from 0 and used for pagination. + * Note than passed value may not be divisible by internal page size, so you should adjust it manually. + * @param tags genres for filtering, values from [getTags] and [Manga.tags]. May be null or empty + * @param sortOrder one of [sortOrders] or null for default value + */ + open suspend fun getList(offset: Int, tags: Set?, sortOrder: SortOrder?): List { + return getList(offset, null, tags, sortOrder ?: defaultSortOrder) + } + + /** + * Parse details for [Manga]: chapters list, description, large cover, etc. + * Must return the same manga, may change any fields excepts id, url and source + * @see Manga.copy + */ + abstract suspend fun getDetails(manga: Manga): Manga + + /** + * Parse pages list for specified chapter. + * @see MangaPage for details + */ + abstract suspend fun getPages(chapter: MangaChapter): List + + /** + * Fetch direct link to the page image. + */ + open suspend fun getPageUrl(page: MangaPage): String = page.url.toAbsoluteUrl(domain) + + /** + * Fetch available tags (genres) for source + */ + abstract suspend fun getTags(): Set + + /** + * Parse favicons from the main page of the source`s website + */ + open suspend fun getFavicons(): Favicons { + return FaviconParser(webClient, domain).parseFavicons() + } + + @CallSuper + open fun onCreateConfig(keys: MutableCollection>) { + keys.add(configKeyDomain) + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/CloneMangaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/CloneMangaParser.kt index 889aa81c..56fd66e0 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/CloneMangaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/CloneMangaParser.kt @@ -10,94 +10,92 @@ import java.util.* @MangaSourceParser("CLONEMANGA", "CloneManga", "en") internal class CloneMangaParser(context: MangaLoaderContext) : PagedMangaParser( - context, - MangaSource.CLONEMANGA, - pageSize = 1, + context, + MangaSource.CLONEMANGA, + pageSize = 1, ) { - override val sortOrders: Set = Collections.singleton( - SortOrder.POPULARITY, - ) + override val sortOrders: Set = Collections.singleton( + SortOrder.POPULARITY, + ) - override val configKeyDomain = ConfigKey.Domain("manga.clone-army.org", null) + override val configKeyDomain = ConfigKey.Domain("manga.clone-army.org", null) - override suspend fun getListPage( - page: Int, - query: String?, - tags: Set?, - sortOrder: SortOrder, - ): List { - if (query != null || page > 1) { - return emptyList() - } - val link = "https://${domain}/viewer_landing.php" - val doc = webClient.httpGet(link).parseHtml() - val mangas = doc.getElementsByClass("comicPreviewContainer") - return mangas.mapNotNull { item -> - val attr = item.getElementsByClass("comicPreview").attr("style") - val href = item.selectFirst("a")?.attrAsAbsoluteUrl("href") ?: return@mapNotNull null - val cover = attr.substring(attr.indexOf("site/themes"), attr.indexOf(")")) - Manga( - id = generateUid(href), - title = item.selectFirst("h3")?.text() ?: return@mapNotNull null, - coverUrl = "https://${domain}/$cover", - altTitle = null, - author = "Dan Kim", - rating = RATING_UNKNOWN, - url = href, - isNsfw = false, - tags = emptySet(), - state = null, - publicUrl = href.toAbsoluteUrl(domain), - source = source, - ) - } - } + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + if (query != null || page > 1) { + return emptyList() + } + val link = "https://${domain}/viewer_landing.php" + val doc = webClient.httpGet(link).parseHtml() + val mangas = doc.getElementsByClass("comicPreviewContainer") + return mangas.mapNotNull { item -> + val background = item.selectFirstOrThrow(".comicPreview").styleValueOrNull("background") + val href = item.selectFirst("a")?.attrAsAbsoluteUrl("href") ?: return@mapNotNull null + val cover = background?.substring(background.indexOf("site/themes"), background.indexOf(")")) + Manga( + id = generateUid(href), + title = item.selectFirst("h3")?.text() ?: return@mapNotNull null, + coverUrl = "https://${domain}/$cover", + altTitle = null, + author = "Dan Kim", + rating = RATING_UNKNOWN, + url = href, + isNsfw = false, + tags = emptySet(), + state = null, + publicUrl = href.toAbsoluteUrl(domain), + source = source, + ) + } + } - override suspend fun getDetails(manga: Manga): Manga { - val doc = webClient.httpGet(manga.publicUrl).parseHtml() - val series = doc.location() - val numChapters = Regex( - pattern = "&page=(.*)&lang=", - ).findAll( - input = doc.getElementsByTag("script")[3].toString(), - ) - .elementAt(3).destructured.component1() - .toInt() - val chapters = ArrayList() - for (i in 0..numChapters) { - val chapter = MangaChapter( - id = generateUid("$series&page=$i"), - name = "Chapter ${i + 1}", - number = i + 1, - url = "$series&page=$i", - scanlator = null, - branch = null, - uploadDate = 0L, - source = MangaSource.DUMMY, - ) - chapters.add(chapter) - } - return manga.copy(chapters = chapters) - } + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.publicUrl).parseHtml() + val series = doc.location() + val numChapters = Regex( + pattern = "&page=(.*)&lang=", + ).findAll( + input = doc.getElementsByTag("script")[3].toString(), + ) + .elementAt(3).destructured.component1() + .toInt() + val chapters = ArrayList() + for (i in 0..numChapters) { + val chapter = MangaChapter( + id = generateUid("$series&page=$i"), + name = "Chapter ${i + 1}", + number = i + 1, + url = "$series&page=$i", + scanlator = null, + branch = null, + uploadDate = 0L, + source = source, + ) + chapters.add(chapter) + } + return manga.copy(chapters = chapters) + } - override suspend fun getPages(chapter: MangaChapter): List { - val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() - val imgUrl = doc.getElementsByClass("subsectionContainer")[0] - .selectFirst("img") - ?.attrAsAbsoluteUrlOrNull("src") ?: doc.parseFailed("Something broken") - return listOf( - MangaPage( - id = generateUid(imgUrl), - url = imgUrl, - referer = imgUrl, - preview = null, - source = MangaSource.DUMMY, - ), - ) - } + override suspend fun getPages(chapter: MangaChapter): List { + val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() + val imgUrl = doc.getElementsByClass("subsectionContainer")[0] + .selectFirst("img") + ?.attrAsAbsoluteUrlOrNull("src") ?: doc.parseFailed("Something broken") + return listOf( + MangaPage( + id = generateUid(imgUrl), + url = imgUrl, + referer = imgUrl, + preview = null, + source = source, + ), + ) + } - override suspend fun getTags(): Set { - return emptySet() - } + override suspend fun getTags(): Set = emptySet() } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/Manhwa18Parser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/Manhwa18Parser.kt index 97a40b94..55bc37cd 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/Manhwa18Parser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/Manhwa18Parser.kt @@ -1,5 +1,6 @@ package org.koitharu.kotatsu.parsers.site +import androidx.collection.ArrayMap import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.PagedMangaParser @@ -10,176 +11,187 @@ import java.util.* @MangaSourceParser("MANHWA18", "Manhwa18", "en") class Manhwa18Parser(context: MangaLoaderContext) : - PagedMangaParser(context, MangaSource.MANHWA18, pageSize = 20, searchPageSize = 20) { - - override val configKeyDomain: ConfigKey.Domain - get() = ConfigKey.Domain("manhwa18.net", null) - - override val sortOrders: Set - get() = EnumSet.of(SortOrder.UPDATED, SortOrder.POPULARITY, SortOrder.ALPHABETICAL) - - override suspend fun getFavicons(): Favicons { - return Favicons( - listOf( - Favicon("https://${domain}/uploads/logos/logo-mini.png", 92, null), - ), - domain, - ) - } - - override suspend fun getDetails(manga: Manga): Manga { - val docs = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() - val cardInfoElement = docs.selectFirst(".card .manga-info") - val author = cardInfoElement?.selectFirst("b:contains(Author(s))")?.parent() - ?.select("a.btn") - ?.joinToString(", ") { it.text() } - val tags = cardInfoElement?.selectFirst("b:contains(Genre(s))")?.parent() - ?.select("a.btn") - ?.mapToSet { MangaTag(it.text(), it.text().lowercase(), MangaSource.MANHWA18) } - val state = cardInfoElement?.selectFirst("b:contains(Status)")?.parent() - ?.selectFirst("a.btn") - ?.let { - when (it.text()) { - "On going" -> MangaState.ONGOING - "Completed" -> MangaState.FINISHED - else -> null - } - } - - return manga.copy( - altTitle = cardInfoElement?.selectFirst("b:contains(Other names)")?.parent()?.ownText()?.removePrefix(": "), - author = author, - description = docs.selectFirst(".series-summary .summary-content")?.html(), - tags = tags.orEmpty(), - state = state, - chapters = docs.select(".card-body > .list-chapters > a").asReversed().mapChapters { index, element -> - // attrAsRelativeUrl only return page url without the '/' - val chapterUrl = element.attrAsAbsoluteUrlOrNull("href")?.toRelativeUrl(domain) - ?: return@mapChapters null - val uploadDate = parseUploadDate(element.selectFirst(".chapter-time")?.text()) - MangaChapter( - id = generateUid(chapterUrl), - name = element.selectFirst(".chapter-name")?.text().orEmpty(), - number = index + 1, - url = chapterUrl, - scanlator = null, - uploadDate = uploadDate, - branch = null, - source = MangaSource.MANHWA18, - ) - }, - ) - } - - // 7 minutes ago - // 5 hours ago - // 2 days ago - // 2 weeks ago - // 4 years ago - private fun parseUploadDate(timeStr: String?): Long { - timeStr ?: return 0 - - val timeWords = timeStr.split(' ') - if (timeWords.size != 3) return 0 - val timeWord = timeWords[1] - val timeAmount = timeWords[0].toIntOrNull() ?: return 0 - val timeUnit = when (timeWord) { - "minute", "minutes" -> Calendar.MINUTE - "hour", "hours" -> Calendar.HOUR - "day", "days" -> Calendar.DAY_OF_YEAR - "week", "weeks" -> Calendar.WEEK_OF_YEAR - "month", "months" -> Calendar.MONTH - "year", "years" -> Calendar.YEAR - else -> return 0 - } - val cal = Calendar.getInstance() - cal.add(timeUnit, -timeAmount) - return cal.time.time - } - - override suspend fun getListPage( - page: Int, - query: String?, - tags: Set?, - sortOrder: SortOrder, - ): List { - val sortQuery = when (sortOrder) { - SortOrder.ALPHABETICAL -> "name" - SortOrder.POPULARITY -> "views" - SortOrder.UPDATED -> "last_update" - else -> "" - } - - val sortType = if (sortOrder == SortOrder.ALPHABETICAL) "ASC" else "DESC" - val tagQuery = tags?.joinToString(",") { it.key }.orEmpty() - val url = buildString { - append("https://") - append(domain) - append("/manga-list.html?listType=pagination&page=") - append(page) - append("&artist=&author=&group=&m_status=&name=") - append(query?.urlEncoded().orEmpty()) - append("&genre=$tagQuery") - append("&ungenre=") - append("&sort=") - append(sortQuery) - append("&sort_type=") - append(sortType) - } - - val docs = webClient.httpGet(url).parseHtml() - val actualPage = docs.selectFirst("ul.pagination a.active")?.text()?.toIntOrNull() - if (actualPage != page) { - return emptyList() - } - - return docs.select(".card-body .thumb-item-flow") - .map { - val titleElement = it.selectFirstOrThrow(".thumb_attr.series-title > a") - val absUrl = titleElement.attrAsAbsoluteUrl("href") - Manga( - id = generateUid(absUrl.toRelativeUrl(domain)), - title = titleElement.text(), - altTitle = null, - url = absUrl.toRelativeUrl(domain), - publicUrl = absUrl, - rating = RATING_UNKNOWN, - isNsfw = true, - coverUrl = it.selectFirst("div.img-in-ratio")?.attrAsAbsoluteUrl("data-bg").orEmpty(), - tags = emptySet(), - state = null, - author = null, - largeCoverUrl = null, - description = null, - source = MangaSource.MANHWA18, - ) - } - } - - override suspend fun getPages(chapter: MangaChapter): List { - val chapterUrl = chapter.url.toAbsoluteUrl(domain) - return webClient.httpGet(chapterUrl).parseHtml() - .select("div.chapter-content > img").map { - val url = it.attrAsRelativeUrlOrNull("src").orEmpty() - MangaPage( - id = generateUid(url), - url = url, - referer = chapterUrl, - preview = null, - source = MangaSource.MANHWA18, - ) - } - } - - override suspend fun getTags(): Set { - return webClient.httpGet("https://${domain}/").parseHtml().selectFirstOrThrow(".genres-menu") - .select("a.genres-item").orEmpty() - .mapToSet { - MangaTag( - title = it.text(), - key = it.text().lowercase(), - source = MangaSource.MANHWA18, - ) - } - } + PagedMangaParser(context, MangaSource.MANHWA18, pageSize = 20, searchPageSize = 20) { + + override val configKeyDomain: ConfigKey.Domain + get() = ConfigKey.Domain("manhwa18.net", null) + + override val sortOrders: Set + get() = EnumSet.of(SortOrder.UPDATED, SortOrder.POPULARITY, SortOrder.ALPHABETICAL, SortOrder.NEWEST) + + private val tagsMap = SuspendLazy(::parseTags) + + override suspend fun getFavicons(): Favicons { + return Favicons( + listOf( + Favicon("https://${domain}/uploads/logos/logo-mini.png", 92, null), + ), + domain, + ) + } + + override suspend fun getDetails(manga: Manga): Manga { + val docs = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val cardInfoElement = docs.selectFirst(".card .manga-info") + val author = cardInfoElement?.selectFirst("b:contains(Author(s))")?.parent() + ?.select("a.btn") + ?.joinToString(", ") { it.text() } + val availableTags = tagsMap.get() + val tags = cardInfoElement?.selectFirst("b:contains(Genre(s))")?.parent() + ?.select("a.btn") + ?.mapNotNullToSet { availableTags[it.text().lowercase(Locale.ENGLISH)] } + val state = cardInfoElement?.selectFirst("b:contains(Status)")?.parent() + ?.selectFirst("a.btn") + ?.let { + when (it.text()) { + "On going" -> MangaState.ONGOING + "Completed" -> MangaState.FINISHED + else -> null + } + } + + return manga.copy( + altTitle = cardInfoElement?.selectFirst("b:contains(Other names)")?.parent()?.ownText()?.removePrefix(": "), + author = author, + description = docs.selectFirst(".series-summary .summary-content")?.html(), + tags = tags.orEmpty(), + state = state, + chapters = docs.select(".card-body > .list-chapters > a").asReversed().mapChapters { index, element -> + // attrAsRelativeUrl only return page url without the '/' + val chapterUrl = element.attrAsAbsoluteUrlOrNull("href")?.toRelativeUrl(domain) + ?: return@mapChapters null + val uploadDate = parseUploadDate(element.selectFirst(".chapter-time")?.text()) + MangaChapter( + id = generateUid(chapterUrl), + name = element.selectFirst(".chapter-name")?.text().orEmpty(), + number = index + 1, + url = chapterUrl, + scanlator = null, + uploadDate = uploadDate, + branch = null, + source = MangaSource.MANHWA18, + ) + }, + ) + } + + // 7 minutes ago + // 5 hours ago + // 2 days ago + // 2 weeks ago + // 4 years ago + private fun parseUploadDate(timeStr: String?): Long { + timeStr ?: return 0 + + val timeWords = timeStr.split(' ') + if (timeWords.size != 3) return 0 + val timeWord = timeWords[1] + val timeAmount = timeWords[0].toIntOrNull() ?: return 0 + val timeUnit = when (timeWord) { + "minute", "minutes" -> Calendar.MINUTE + "hour", "hours" -> Calendar.HOUR + "day", "days" -> Calendar.DAY_OF_YEAR + "week", "weeks" -> Calendar.WEEK_OF_YEAR + "month", "months" -> Calendar.MONTH + "year", "years" -> Calendar.YEAR + else -> return 0 + } + val cal = Calendar.getInstance() + cal.add(timeUnit, -timeAmount) + return cal.time.time + } + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val sortQuery = when (sortOrder) { + SortOrder.ALPHABETICAL -> "az" + SortOrder.POPULARITY -> "top" + SortOrder.UPDATED -> "update" + SortOrder.NEWEST -> "new" + else -> "" + } + + val tagQuery = tags?.joinToString(",") { it.key }.orEmpty() + val url = buildString { + append("https://") + append(domain) + append("/tim-kiem?page=") + append(page) + if (!query.isNullOrEmpty()) { + append("&q=") + append(query.urlEncoded()) + } + append("&accept_genres=$tagQuery") + append("&sort=") + append(sortQuery) + } + + val docs = webClient.httpGet(url).parseHtml() + + return docs.select(".card-body .thumb-item-flow") + .map { + val titleElement = it.selectFirstOrThrow(".thumb_attr.series-title > a") + val absUrl = titleElement.attrAsAbsoluteUrl("href") + Manga( + id = generateUid(absUrl.toRelativeUrl(domain)), + title = titleElement.text(), + altTitle = null, + url = absUrl.toRelativeUrl(domain), + publicUrl = absUrl, + rating = RATING_UNKNOWN, + isNsfw = true, + coverUrl = it.selectFirst("div.img-in-ratio")?.attrAsAbsoluteUrl("data-bg").orEmpty(), + tags = emptySet(), + state = null, + author = null, + largeCoverUrl = null, + description = null, + source = MangaSource.MANHWA18, + ) + } + } + + override suspend fun getPages(chapter: MangaChapter): List { + val chapterUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(chapterUrl).parseHtml() + return doc.requireElementById("chapter-content").select("img").mapNotNull { + val url = it.attrAsRelativeUrlOrNull("data-src") + ?: it.attrAsRelativeUrlOrNull("src") + ?: return@mapNotNull null + MangaPage( + id = generateUid(url), + url = url, + referer = chapterUrl, + preview = null, + source = MangaSource.MANHWA18, + ) + } + } + + override suspend fun getTags(): Set { + return tagsMap.get().values.toSet() + } + + private suspend fun parseTags(): Map { + val doc = webClient.httpGet("https://$domain/tim-kiem?q=").parseHtml() + val list = doc.getElementsByAttribute("data-genre-id") + if (list.isEmpty()) { + return emptyMap() + } + val result = ArrayMap(list.size) + for (item in list) { + val id = item.attr("data-genre-id") + val name = item.text() + result[name.lowercase(Locale.ENGLISH)] = MangaTag( + title = name.toTitleCase(Locale.ENGLISH), + key = id, + source = source, + ) + } + return result + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt index ea07c0fc..53b959d0 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/UnionMangasParser.kt @@ -15,163 +15,163 @@ import java.util.* @MangaSourceParser("UNION_MANGAS", "Union Mangás", "pt") class UnionMangasParser(context: MangaLoaderContext) : PagedMangaParser(context, MangaSource.UNION_MANGAS, 40) { - override val sortOrders = EnumSet.of( - SortOrder.ALPHABETICAL, - SortOrder.POPULARITY, - ) + override val sortOrders = EnumSet.of( + SortOrder.ALPHABETICAL, + SortOrder.POPULARITY, + ) - override val configKeyDomain = ConfigKey.Domain("unionleitor.top", emptyArray()) + override val configKeyDomain = ConfigKey.Domain("unionleitor.top", emptyArray()) - override suspend fun getListPage( - page: Int, - query: String?, - tags: Set?, - sortOrder: SortOrder, - ): List { - if (!query.isNullOrEmpty()) { - return if (page == searchPaginator.firstPage) { - search(query) - } else { - emptyList() - } - } - val tag = tags.oneOrThrowIfMany() - val url = urlBuilder() - .addPathSegment("lista-mangas") - .addPathSegment( - when { - tag != null -> tag.key - sortOrder == SortOrder.ALPHABETICAL -> "a-z" - else -> "visualizacoes" - }, - ).addPathSegment(page.toString()) - val doc = webClient.httpGet(url.build()).parseHtml() - val root = doc.selectFirstOrThrow("div.tamanho-bloco-perfil") - return root.select(".lista-mangas-novos").map { div -> - val a = div.selectFirstOrThrow("a") - val img = div.selectFirstOrThrow("img") - val href = a.attrAsRelativeUrl("href") - Manga( - id = generateUid(href), - url = href, - publicUrl = a.attrAsAbsoluteUrl("href"), - title = div.selectLastOrThrow("a").text(), - coverUrl = img.attrAsAbsoluteUrl("src"), - altTitle = null, - rating = RATING_UNKNOWN, - tags = emptySet(), - description = div.selectLast("div")?.ownText(), - state = null, - author = null, - isNsfw = false, - source = source, - ) - } - } + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + if (!query.isNullOrEmpty()) { + return if (page == searchPaginator.firstPage) { + search(query) + } else { + emptyList() + } + } + val tag = tags.oneOrThrowIfMany() + val url = urlBuilder() + .addPathSegment("lista-mangas") + .addPathSegment( + when { + tag != null -> tag.key + sortOrder == SortOrder.ALPHABETICAL -> "a-z" + else -> "visualizacoes" + }, + ).addPathSegment(page.toString()) + val doc = webClient.httpGet(url.build()).parseHtml() + val root = doc.selectFirstOrThrow("div.tamanho-bloco-perfil") + return root.select(".lista-mangas-novos").map { div -> + val a = div.selectFirstOrThrow("a") + val img = div.selectFirstOrThrow("img") + val href = a.attrAsRelativeUrl("href") + Manga( + id = generateUid(href), + url = href, + publicUrl = a.attrAsAbsoluteUrl("href"), + title = div.selectLastOrThrow("a").text(), + coverUrl = img.attrAsAbsoluteUrl("src"), + altTitle = null, + rating = RATING_UNKNOWN, + tags = emptySet(), + description = div.selectLast("div")?.ownText(), + state = null, + author = null, + isNsfw = false, + source = source, + ) + } + } - override suspend fun getDetails(manga: Manga): Manga { - val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() - val root = doc.selectFirstOrThrow(".perfil-manga") - val dateFormat = SimpleDateFormat("dd/MM/yyyy", Locale.ROOT) - return manga.copy( - rating = root.select("h2") - .find { it.ownText().startsWith('#') } - ?.ownText()?.drop(1)?.toFloatOrNull()?.div(10f) ?: manga.rating, - largeCoverUrl = root.selectFirst("img.img-thumbnail")?.attrAsAbsoluteUrlOrNull("src"), - description = root.selectFirst(".panel-default")?.selectFirst(".panel-body")?.html(), - author = root.tableValue("Autor")?.ownText(), - altTitle = root.tableValue("Título(s) Alternativo(s)")?.ownText(), - state = when (root.tableValue("Status")?.selectLast(".label")?.text()) { - "Completo" -> MangaState.FINISHED - "Ativo" -> MangaState.ONGOING - else -> null - }, - tags = root.tableValue("Gênero(s)")?.select("a")?.mapToSet { - it.toMangaTag() - } ?: manga.tags, - isNsfw = root.selectFirst(".alert-danger")?.html()?.contains("18 anos") == true, - chapters = root.select("div.row.capitulos").asReversed().mapChapters { i, div -> - val a = div.selectFirstOrThrow("a") - val href = a.attrAsRelativeUrl("href") - val title = a.text() - MangaChapter( - id = generateUid(href), - name = title, - number = i + 1, - url = href, - scanlator = div.selectLast("a")?.text()?.takeUnless { it == title }, - uploadDate = dateFormat.tryParse( - a.nextElementSibling()?.text()?.removeSurrounding("(", ")"), - ), - branch = null, - source = source, - ) - }, - ) - } + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val root = doc.selectFirstOrThrow(".perfil-manga") + val dateFormat = SimpleDateFormat("dd/MM/yyyy", Locale.ROOT) + return manga.copy( + rating = root.select("h2") + .find { it.ownText().startsWith('#') } + ?.ownText()?.drop(1)?.toFloatOrNull()?.div(10f) ?: manga.rating, + largeCoverUrl = root.selectFirst("img.img-thumbnail")?.attrAsAbsoluteUrlOrNull("src"), + description = root.selectFirst(".panel-default")?.selectFirst(".panel-body")?.html(), + author = root.tableValue("Autor")?.ownText(), + altTitle = root.tableValue("Título(s) Alternativo(s)")?.ownText(), + state = when (root.tableValue("Status")?.selectLast(".label")?.text()) { + "Completo" -> MangaState.FINISHED + "Ativo" -> MangaState.ONGOING + else -> null + }, + tags = root.tableValue("Gênero(s)")?.select("a")?.mapToSet { + it.toMangaTag() + } ?: manga.tags, + isNsfw = root.selectFirst(".alert-danger")?.html()?.contains("18 anos") == true, + chapters = root.select("div.row.capitulos").asReversed().mapChapters { i, div -> + val a = div.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + val title = a.text() + MangaChapter( + id = generateUid(href), + name = title, + number = i + 1, + url = href, + scanlator = div.selectLast("a")?.text()?.takeUnless { it == title }, + uploadDate = dateFormat.tryParse( + a.nextElementSibling()?.text()?.removeSurrounding("(", ")"), + ), + branch = null, + source = source, + ) + }, + ) + } - override suspend fun getPages(chapter: MangaChapter): List { - val fullUrl = chapter.url.toAbsoluteUrl(domain) - val doc = webClient.httpGet(fullUrl).parseHtml() - val root = doc.body().selectFirstOrThrow("article") - return root.selectOrThrow("img[pag]").mapNotNull { img -> - val href = img.attrAsRelativeUrl("src") - if (href.startsWith("/images/banner")) { - return@mapNotNull null - } - MangaPage( - id = generateUid(href), - url = href, - referer = fullUrl, - preview = null, - source = source, - ) - } - } + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + val root = doc.body().selectFirstOrThrow("article") + return root.selectOrThrow("img[pag]").mapNotNull { img -> + val href = img.attrAsRelativeUrl("src") + if (href.startsWith("/images/banner")) { + return@mapNotNull null + } + MangaPage( + id = generateUid(href), + url = href, + referer = fullUrl, + preview = null, + source = source, + ) + } + } - override suspend fun getTags(): Set { - val doc = webClient.httpGet(urlBuilder().addPathSegment("lista-mangas").build()).parseHtml() - val ul = doc.body().selectFirstOrThrow(".nav-tabs").selectFirstOrThrow("ul.dropdown-menu") - return ul.select("li").mapToSet { li -> - li.selectFirstOrThrow("a").toMangaTag() - } - } + override suspend fun getTags(): Set { + val doc = webClient.httpGet(urlBuilder().addPathSegment("lista-mangas").build()).parseHtml() + val ul = doc.body().selectFirstOrThrow(".nav-tabs").selectFirstOrThrow("ul.dropdown-menu") + return ul.select("li").mapToSet { li -> + li.selectFirstOrThrow("a").toMangaTag() + } + } - private suspend fun search(query: String): List { - val domain = domain - val json = webClient.httpGet( - urlBuilder() - .addPathSegments("assets/busca.php") - .addQueryParameter("nomeManga", query) - .build(), - ).parseJson() - return json.getJSONArray("items").mapJSON { jo -> - val href = "/pagina-manga/" + jo.getString("url") - Manga( - id = generateUid(href), - url = href, - publicUrl = href.toAbsoluteUrl(domain), - title = jo.getString("titulo"), - rating = RATING_UNKNOWN, - tags = emptySet(), - author = jo.getStringOrNull("autor"), - coverUrl = jo.getString("imagem"), - state = null, - isNsfw = false, - altTitle = null, - source = source, - ) - } - } + private suspend fun search(query: String): List { + val domain = domain + val json = webClient.httpGet( + urlBuilder() + .addPathSegments("assets/busca.php") + .addQueryParameter("nomeManga", query) + .build(), + ).parseJson() + return json.getJSONArray("items").mapJSON { jo -> + val href = "/pagina-manga/" + jo.getString("url") + Manga( + id = generateUid(href), + url = href, + publicUrl = href.toAbsoluteUrl(domain), + title = jo.getString("titulo"), + rating = RATING_UNKNOWN, + tags = emptySet(), + author = jo.getStringOrNull("autor"), + coverUrl = jo.getString("imagem"), + state = null, + isNsfw = false, + altTitle = null, + source = source, + ) + } + } - private fun Element.tableValue(title: String): Element? { - return select("h4.media-heading") - .find { it.selectFirst("label.subtit-manga")?.text()?.contains(title, ignoreCase = true) == true } - } + private fun Element.tableValue(title: String): Element? { + return select("h4.media-heading") + .find { it.selectFirst("label.subtit-manga")?.text()?.contains(title, ignoreCase = true) == true } + } - private fun Element.toMangaTag() = MangaTag( - title = text().toTitleCase(sourceLocale ?: Locale.ROOT), - key = attr("href").removeSuffix('/').substringAfterLast('/'), - source = source, - ) + private fun Element.toMangaTag() = MangaTag( + title = text().toTitleCase(sourceLocale), + key = attr("href").removeSuffix('/').substringAfterLast('/'), + source = source, + ) } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/IsekaiScanEuParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/IsekaiScanEuParser.kt index 3ec177c7..49b506a6 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/IsekaiScanEuParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/IsekaiScanEuParser.kt @@ -6,13 +6,14 @@ import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.model.Manga import org.koitharu.kotatsu.parsers.model.MangaChapter import org.koitharu.kotatsu.parsers.model.MangaSource +import org.koitharu.kotatsu.parsers.model.MangaTag import org.koitharu.kotatsu.parsers.util.* import java.text.SimpleDateFormat import java.util.* -@MangaSourceParser("ISEKAISCAN_EU", "IsekaiScan (eu)", "en") +@MangaSourceParser("ISEKAISCAN_EU", "IsekaiScan", "en") internal class IsekaiScanEuParser(context: MangaLoaderContext) : - MadaraParser(context, MangaSource.ISEKAISCAN_EU, "isekaiscan.eu") { + MadaraParser(context, MangaSource.ISEKAISCAN_EU, "isekaiscan.to") { override val datePattern = "MM/dd/yyyy" @@ -44,4 +45,29 @@ internal class IsekaiScanEuParser(context: MangaLoaderContext) : ) } } + + override suspend fun getTags(): Set { + val doc = webClient.httpGet("https://$domain/mangax/").parseHtml() + val body = doc.body() + val root1 = body.selectFirst("header")?.selectFirst("ul.second-menu") + val root2 = body.selectFirst("div.genres_wrap")?.selectFirst("ul.list-unstyled") + if (root1 == null && root2 == null) { + doc.parseFailed("Root not found") + } + val list = root1?.select("li").orEmpty() + root2?.select("li").orEmpty() + val keySet = HashSet(list.size) + return list.mapNotNullToSet { li -> + val a = li.selectFirst("a") ?: return@mapNotNullToSet null + val href = a.attr("href").removeSuffix("/") + .substringAfterLast(tagPrefix, "") + if (href.isEmpty() || !keySet.add(href)) { + return@mapNotNullToSet null + } + MangaTag( + key = href, + title = a.ownText().toTitleCase(Locale.ENGLISH), + source = source, + ) + } + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara5Parser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara5Parser.kt index 3cc26196..1b480d01 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara5Parser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara5Parser.kt @@ -13,174 +13,174 @@ import java.text.SimpleDateFormat import java.util.* abstract class Madara5Parser @InternalParsersApi constructor( - context: MangaLoaderContext, - source: MangaSource, - domain: String, + context: MangaLoaderContext, + source: MangaSource, + domain: String, ) : PagedMangaParser(context, source, pageSize = 22) { - protected open val datePattern = "MMMM dd, HH:mm" - protected open val tagPrefix = "/mangas/" - protected open val nsfwTags = arraySetOf("yaoi", "yuri", "mature") - - override val sortOrders: Set = EnumSet.of(SortOrder.UPDATED) - - override val configKeyDomain = ConfigKey.Domain(domain, null) - - override suspend fun getListPage( - page: Int, - query: String?, - tags: Set?, - sortOrder: SortOrder, - ): List { - val domain = domain - val url = buildString { - append("https://") - append(domain) - append("/search?s=") - if (!query.isNullOrEmpty()) { - append(query.urlEncoded()) - } - append("&post_type=wp-manga") - if (!tags.isNullOrEmpty()) { - for (tag in tags) { - append("&genre%5B%5D=") - append(tag.key) - } - } - append("&op=1&author=&artist=&page=") - append(page) - } - val root = webClient.httpGet(url).parseHtml().body().selectFirstOrThrow(".search-wrap") - return root.select(".c-tabs-item__content").map { div -> - val a = div.selectFirstOrThrow("a") - val img = div.selectLastOrThrow("img") - val href = a.attrAsRelativeUrl("href") - val postContent = root.selectFirstOrThrow(".post-content") - val tagSet = postContent.getElementsContainingOwnText("Genre") - .firstOrNull()?.tableValue() - ?.getElementsByAttributeValueContaining("href", tagPrefix) - ?.mapToSet { it.asMangaTag() }.orEmpty() - Manga( - id = generateUid(href), - title = a.attr("title"), - altTitle = postContent.getElementsContainingOwnText("Alternative") - .firstOrNull()?.tableValue()?.text()?.trim(), - url = href, - publicUrl = a.attrAsAbsoluteUrl("href"), - coverUrl = img.src().orEmpty(), - author = postContent.getElementsContainingOwnText("Author") - .firstOrNull()?.tableValue()?.text()?.trim(), - state = postContent.getElementsContainingOwnText("Status") - .firstOrNull()?.tableValue()?.text()?.asMangaState(), - isNsfw = isNsfw(tagSet), - rating = div.selectFirstOrThrow(".score").text() - .toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN, - tags = tagSet, - source = source, - ) - } - } - - override suspend fun getDetails(manga: Manga): Manga { - val root = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().body() - .selectFirstOrThrow(".site-content") - val postContent = root.selectFirstOrThrow(".post-content") - val tags = postContent.getElementsContainingOwnText("Genre") - .firstOrNull()?.tableValue() - ?.getElementsByAttributeValueContaining("href", tagPrefix) - ?.mapToSet { a -> a.asMangaTag() } ?: manga.tags - val mangaId = root.getElementById("manga-chapters-holder")?.attr("data-id")?.toLongOrNull() - ?: root.parseFailed("Cannot find mangaId") - return manga.copy( - description = (root.selectFirst(".detail-content") - ?: root.selectFirstOrThrow(".description-summary")).html(), - author = postContent.getElementsContainingOwnText("Author") - .firstOrNull()?.tableValue()?.text()?.trim(), - state = postContent.getElementsContainingOwnText("Status") - .firstOrNull()?.tableValue()?.text()?.asMangaState(), - tags = tags, - isNsfw = isNsfw(tags), - chapters = loadChapters(mangaId), - ) - } - - override suspend fun getPages(chapter: MangaChapter): List { - val fullUrl = chapter.url.toAbsoluteUrl(domain) - val doc = webClient.httpGet(fullUrl).parseHtml() - val arrayData = doc.getElementById("arraydata") ?: doc.parseFailed("#arraydata not found") - return arrayData.html().split(',').map { url -> - MangaPage( - id = generateUid(url), - url = url, - referer = fullUrl, - preview = null, - source = source, - ) - } - } - - override suspend fun getTags(): Set { - val doc = webClient.httpGet("http://${domain}/").parseHtml().body() - return doc.getElementsByAttributeValueContaining("href", tagPrefix) - .mapToSet { it.asMangaTag() } - } - - private suspend fun loadChapters(mangaId: Long): List { - val dateFormat = SimpleDateFormat(datePattern, sourceLocale ?: Locale.US) - val doc = webClient.httpGet("https://${domain}/ajax-list-chapter?mangaID=$mangaId").parseHtml() - return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li -> - val a = li.selectFirstOrThrow("a") - val href = a.attrAsRelativeUrl("href") - MangaChapter( - id = generateUid(href), - url = href, - name = a.text(), - number = i + 1, - branch = null, - uploadDate = dateFormat.tryParse( - li.selectFirst(".chapter-release-date")?.text()?.trim(), - ), - scanlator = null, - source = source, - ) - } - } - - protected fun isNsfw(tags: Set): Boolean { - return tags.any { it.key in nsfwTags } - } - - private fun Element.src(): String? { - return absUrl("data-src").ifEmpty { - absUrl("src") - }.takeUnless { it.isEmpty() } - } - - private fun Element.tableValue(): Element { - for (p in parents()) { - val children = p.children() - if (children.size == 2) { - return children[1] - } - } - parseFailed("Cannot find tableValue for node ${text()}") - } - - private fun String.asMangaState() = when (trim().lowercase(sourceLocale ?: Locale.US)) { - "ongoing" -> MangaState.ONGOING - "completed" -> MangaState.FINISHED - else -> null - } - - private fun Element.asMangaTag() = MangaTag( - title = ownText(), - key = attr("href").removeSuffix('/').substringAfterLast('/') - .replace('-', '+'), - source = source, - ) - - @MangaSourceParser("MANGAOWLS", "BeautyManga", "en") - class BeautyManga(context: MangaLoaderContext) : Madara5Parser(context, MangaSource.MANGAOWLS, "beautymanga.com") { - - } + protected open val datePattern = "MMMM dd, HH:mm" + protected open val tagPrefix = "/mangas/" + protected open val nsfwTags = arraySetOf("yaoi", "yuri", "mature") + + override val sortOrders: Set = EnumSet.of(SortOrder.UPDATED) + + override val configKeyDomain = ConfigKey.Domain(domain, null) + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val domain = domain + val url = buildString { + append("https://") + append(domain) + append("/search?s=") + if (!query.isNullOrEmpty()) { + append(query.urlEncoded()) + } + append("&post_type=wp-manga") + if (!tags.isNullOrEmpty()) { + for (tag in tags) { + append("&genre%5B%5D=") + append(tag.key) + } + } + append("&op=1&author=&artist=&page=") + append(page) + } + val root = webClient.httpGet(url).parseHtml().body().selectFirstOrThrow(".search-wrap") + return root.select(".c-tabs-item__content").map { div -> + val a = div.selectFirstOrThrow("a") + val img = div.selectLastOrThrow("img") + val href = a.attrAsRelativeUrl("href") + val postContent = root.selectFirstOrThrow(".post-content") + val tagSet = postContent.getElementsContainingOwnText("Genre") + .firstOrNull()?.tableValue() + ?.getElementsByAttributeValueContaining("href", tagPrefix) + ?.mapToSet { it.asMangaTag() }.orEmpty() + Manga( + id = generateUid(href), + title = a.attr("title"), + altTitle = postContent.getElementsContainingOwnText("Alternative") + .firstOrNull()?.tableValue()?.text()?.trim(), + url = href, + publicUrl = a.attrAsAbsoluteUrl("href"), + coverUrl = img.src().orEmpty(), + author = postContent.getElementsContainingOwnText("Author") + .firstOrNull()?.tableValue()?.text()?.trim(), + state = postContent.getElementsContainingOwnText("Status") + .firstOrNull()?.tableValue()?.text()?.asMangaState(), + isNsfw = isNsfw(tagSet), + rating = div.selectFirstOrThrow(".score").text() + .toFloatOrNull()?.div(5f) ?: RATING_UNKNOWN, + tags = tagSet, + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val root = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().body() + .selectFirstOrThrow(".site-content") + val postContent = root.selectFirstOrThrow(".post-content") + val tags = postContent.getElementsContainingOwnText("Genre") + .firstOrNull()?.tableValue() + ?.getElementsByAttributeValueContaining("href", tagPrefix) + ?.mapToSet { a -> a.asMangaTag() } ?: manga.tags + val mangaId = root.getElementById("manga-chapters-holder")?.attr("data-id")?.toLongOrNull() + ?: root.parseFailed("Cannot find mangaId") + return manga.copy( + description = (root.selectFirst(".detail-content") + ?: root.selectFirstOrThrow(".description-summary")).html(), + author = postContent.getElementsContainingOwnText("Author") + .firstOrNull()?.tableValue()?.text()?.trim(), + state = postContent.getElementsContainingOwnText("Status") + .firstOrNull()?.tableValue()?.text()?.asMangaState(), + tags = tags, + isNsfw = isNsfw(tags), + chapters = loadChapters(mangaId), + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + val arrayData = doc.getElementById("arraydata") ?: doc.parseFailed("#arraydata not found") + return arrayData.html().split(',').map { url -> + MangaPage( + id = generateUid(url), + url = url, + referer = fullUrl, + preview = null, + source = source, + ) + } + } + + override suspend fun getTags(): Set { + val doc = webClient.httpGet("http://${domain}/").parseHtml().body() + return doc.getElementsByAttributeValueContaining("href", tagPrefix) + .mapToSet { it.asMangaTag() } + } + + private suspend fun loadChapters(mangaId: Long): List { + val dateFormat = SimpleDateFormat(datePattern, sourceLocale) + val doc = webClient.httpGet("https://${domain}/ajax-list-chapter?mangaID=$mangaId").parseHtml() + return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li -> + val a = li.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + MangaChapter( + id = generateUid(href), + url = href, + name = a.text(), + number = i + 1, + branch = null, + uploadDate = dateFormat.tryParse( + li.selectFirst(".chapter-release-date")?.text()?.trim(), + ), + scanlator = null, + source = source, + ) + } + } + + protected fun isNsfw(tags: Set): Boolean { + return tags.any { it.key in nsfwTags } + } + + private fun Element.src(): String? { + return absUrl("data-src").ifEmpty { + absUrl("src") + }.takeUnless { it.isEmpty() } + } + + private fun Element.tableValue(): Element { + for (p in parents()) { + val children = p.children() + if (children.size == 2) { + return children[1] + } + } + parseFailed("Cannot find tableValue for node ${text()}") + } + + private fun String.asMangaState() = when (trim().lowercase(sourceLocale)) { + "ongoing" -> MangaState.ONGOING + "completed" -> MangaState.FINISHED + else -> null + } + + private fun Element.asMangaTag() = MangaTag( + title = ownText(), + key = attr("href").removeSuffix('/').substringAfterLast('/') + .replace('-', '+'), + source = source, + ) + + @MangaSourceParser("MANGAOWLS", "BeautyManga", "en") + class BeautyManga(context: MangaLoaderContext) : Madara5Parser(context, MangaSource.MANGAOWLS, "beautymanga.com") { + + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara6Parser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara6Parser.kt index a38f4b97..a476644b 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara6Parser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/Madara6Parser.kt @@ -7,64 +7,63 @@ import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* import java.text.SimpleDateFormat -import java.util.* internal abstract class Madara6Parser( - context: MangaLoaderContext, - source: MangaSource, - domain: String, + context: MangaLoaderContext, + source: MangaSource, + domain: String, ) : MadaraParser(context, source, domain) { - override val datePattern: String = "dd MMMM yyyy" + override val datePattern: String = "dd MMMM yyyy" - override suspend fun getDetails(manga: Manga): Manga { - return coroutineScope { - val chapters = async { loadChapters(manga.url) } - val body = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().body() - parseDetails(manga, body, chapters.await()) - } - } + override suspend fun getDetails(manga: Manga): Manga { + return coroutineScope { + val chapters = async { loadChapters(manga.url) } + val body = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().body() + parseDetails(manga, body, chapters.await()) + } + } - protected fun Element.tableValue(): Element { - for (p in parents()) { - val children = p.children() - if (children.size == 2) { - return children[1] - } - } - parseFailed("Cannot find tableValue for node ${text()}") - } + protected fun Element.tableValue(): Element { + for (p in parents()) { + val children = p.children() + if (children.size == 2) { + return children[1] + } + } + parseFailed("Cannot find tableValue for node ${text()}") + } - protected abstract fun String.asMangaState(): MangaState? + protected abstract fun String.asMangaState(): MangaState? - protected fun Element.asMangaTag() = MangaTag( - title = ownText(), - key = attr("href").removeSuffix('/').substringAfterLast('/') - .replace('-', '+'), - source = source, - ) + protected fun Element.asMangaTag() = MangaTag( + title = ownText(), + key = attr("href").removeSuffix('/').substringAfterLast('/') + .replace('-', '+'), + source = source, + ) - protected open suspend fun loadChapters(mangaUrl: String): List { - val url = mangaUrl.toAbsoluteUrl(domain).removeSuffix('/') + "/ajax/chapters/" - val dateFormat = SimpleDateFormat(datePattern, sourceLocale ?: Locale.ROOT) - val doc = webClient.httpPost(url, emptyMap()).parseHtml() - return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li -> - val a = li.selectFirstOrThrow("a") - val href = a.attrAsRelativeUrl("href") - MangaChapter( - id = generateUid(href), - url = href, - name = a.text(), - number = i + 1, - branch = null, - uploadDate = dateFormat.tryParse( - li.selectFirst(".chapter-release-date")?.text()?.trim(), - ), - scanlator = null, - source = source, - ) - } - } + protected open suspend fun loadChapters(mangaUrl: String): List { + val url = mangaUrl.toAbsoluteUrl(domain).removeSuffix('/') + "/ajax/chapters/" + val dateFormat = SimpleDateFormat(datePattern, sourceLocale) + val doc = webClient.httpPost(url, emptyMap()).parseHtml() + return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li -> + val a = li.selectFirstOrThrow("a") + val href = a.attrAsRelativeUrl("href") + MangaChapter( + id = generateUid(href), + url = href, + name = a.text(), + number = i + 1, + branch = null, + uploadDate = dateFormat.tryParse( + li.selectFirst(".chapter-release-date")?.text()?.trim(), + ), + scanlator = null, + source = source, + ) + } + } - protected abstract fun parseDetails(manga: Manga, body: Element, chapters: List): Manga + protected abstract fun parseDetails(manga: Manga, body: Element, chapters: List): Manga } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt index d5e40952..e03e8550 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/MadaraParser.kt @@ -153,7 +153,7 @@ internal abstract class MadaraParser( protected open suspend fun getChapters(manga: Manga, doc: Document): List { val root2 = doc.body().selectFirstOrThrow("div.content-area") .selectFirstOrThrow("div.c-page") - val dateFormat = SimpleDateFormat(datePattern, sourceLocale ?: Locale.US) + val dateFormat = SimpleDateFormat(datePattern, sourceLocale) return root2.select("li").asReversed().mapChapters { i, li -> val a = li.selectFirst("a") val href = a?.attrAsRelativeUrlOrNull("href") ?: li.parseFailed("Link is missing") @@ -285,11 +285,11 @@ internal abstract class MadaraParser( } } - private fun Element.src(): String? { + protected fun Element.src(): String? { var result = absUrl("data-src") if (result.isEmpty()) result = absUrl("data-cfsrc") if (result.isEmpty()) result = absUrl("src") - return if (result.isEmpty()) null else result + return result.ifEmpty { null } } private fun createRequestTemplate() = diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/NeatManga.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/NeatManga.kt index 2e325c6b..3e8e4bb7 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/NeatManga.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/NeatManga.kt @@ -5,10 +5,7 @@ import kotlinx.coroutines.coroutineScope import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.exception.ParseException -import org.koitharu.kotatsu.parsers.model.Manga -import org.koitharu.kotatsu.parsers.model.MangaChapter -import org.koitharu.kotatsu.parsers.model.MangaSource -import org.koitharu.kotatsu.parsers.model.MangaTag +import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* import java.text.SimpleDateFormat import java.util.* @@ -16,6 +13,8 @@ import java.util.* @MangaSourceParser("NEATMANGA", "NeatManga", "en") internal class NeatManga(context: MangaLoaderContext) : MadaraParser(context, MangaSource.NEATMANGA, "neatmangas.com") { + override val datePattern = "dd MMMM yyyy" + override suspend fun getDetails(manga: Manga): Manga = coroutineScope { val chaptersDeferred = async { getChapters(manga) } val fullUrl = manga.url.toAbsoluteUrl(domain) @@ -72,4 +71,26 @@ internal class NeatManga(context: MangaLoaderContext) : MadaraParser(context, Ma ) } } + + override suspend fun getPages(chapter: MangaChapter): List { + val fullUrl = chapter.url.toAbsoluteUrl(domain) + val doc = webClient.httpGet(fullUrl).parseHtml() + val root = doc.body().selectFirst("div.main-col-inner") + ?.selectFirst("div.reading-content") + ?: throw ParseException("Root not found", fullUrl) + return root.select("div.page-break").mapNotNull { div -> + val img = div.selectFirst("img") + if (img == null || img.attr("id").isNullOrEmpty()) { + return@mapNotNull null + } + val url = img.src()?.toRelativeUrl(domain) ?: div.parseFailed("Image src not found") + MangaPage( + id = generateUid(url), + url = url, + preview = null, + referer = fullUrl, + source = source, + ) + } + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/PrismaScansParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/PrismaScansParser.kt index cea3be89..b9d0c822 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/PrismaScansParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/madara/PrismaScansParser.kt @@ -10,45 +10,44 @@ import org.koitharu.kotatsu.parsers.model.MangaState import org.koitharu.kotatsu.parsers.util.attrAsAbsoluteUrlOrNull import org.koitharu.kotatsu.parsers.util.mapToSet import org.koitharu.kotatsu.parsers.util.selectFirstOrThrow -import java.util.* @MangaSourceParser("PRISMA_SCANS", "Prisma Scans", "pt") internal class PrismaScansParser(context: MangaLoaderContext) : - Madara6Parser(context, MangaSource.PRISMA_SCANS, "prismascans.net") { + Madara6Parser(context, MangaSource.PRISMA_SCANS, "prismascans.net") { - override val tagPrefix = "manga-genre/" - override val datePattern = "MMM dd, yyyy" + override val tagPrefix = "manga-genre/" + override val datePattern = "MMM dd, yyyy" - override fun parseDetails(manga: Manga, body: Element, chapters: List): Manga { - val root = body.selectFirstOrThrow(".site-content") - val postContent = root.selectFirstOrThrow(".post-content") - val tags = postContent.getElementsContainingOwnText("Gênero") - .firstOrNull()?.tableValue() - ?.getElementsByAttributeValueContaining("href", tagPrefix) - ?.mapToSet { a -> a.asMangaTag() } ?: manga.tags - return manga.copy( - largeCoverUrl = root.selectFirst("picture") - ?.selectFirst("img[data-src]") - ?.attrAsAbsoluteUrlOrNull("data-src"), - description = root.selectFirstOrThrow(".manga-excerpt").firstElementChild()?.html(), - author = postContent.getElementsContainingOwnText("Artista") - .firstOrNull()?.tableValue()?.text()?.trim(), - altTitle = postContent.getElementsContainingOwnText("Título Alternativo") - .firstOrNull()?.tableValue()?.text()?.trim(), - state = postContent.getElementsContainingOwnText("Status") - .firstOrNull()?.tableValue()?.text()?.asMangaState(), - tags = tags, - isNsfw = body.hasClass("adult-content"), - chapters = chapters, - ) - } + override fun parseDetails(manga: Manga, body: Element, chapters: List): Manga { + val root = body.selectFirstOrThrow(".site-content") + val postContent = root.selectFirstOrThrow(".post-content") + val tags = postContent.getElementsContainingOwnText("Gênero") + .firstOrNull()?.tableValue() + ?.getElementsByAttributeValueContaining("href", tagPrefix) + ?.mapToSet { a -> a.asMangaTag() } ?: manga.tags + return manga.copy( + largeCoverUrl = root.selectFirst("picture") + ?.selectFirst("img[data-src]") + ?.attrAsAbsoluteUrlOrNull("data-src"), + description = root.selectFirstOrThrow(".manga-excerpt").firstElementChild()?.html(), + author = postContent.getElementsContainingOwnText("Artista") + .firstOrNull()?.tableValue()?.text()?.trim(), + altTitle = postContent.getElementsContainingOwnText("Título Alternativo") + .firstOrNull()?.tableValue()?.text()?.trim(), + state = postContent.getElementsContainingOwnText("Status") + .firstOrNull()?.tableValue()?.text()?.asMangaState(), + tags = tags, + isNsfw = body.hasClass("adult-content"), + chapters = chapters, + ) + } - override fun String.asMangaState() = when (trim().lowercase(sourceLocale ?: Locale.ROOT)) { - "em lançamento" -> MangaState.ONGOING - "completo", - "cancelado", - -> MangaState.FINISHED + override fun String.asMangaState() = when (trim().lowercase(sourceLocale)) { + "em lançamento" -> MangaState.ONGOING + "completo", + "cancelado", + -> MangaState.FINISHED - else -> null - } + else -> null + } } diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/MangaReaderParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/MangaReaderParser.kt index 215a6e11..ef776dc5 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/MangaReaderParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/mangareader/MangaReaderParser.kt @@ -223,6 +223,7 @@ internal abstract class MangaReaderParser( private fun Element.imageUrl(): String { return attrAsAbsoluteUrlOrNull("src") + ?: attrAsAbsoluteUrlOrNull("data-src") ?: attrAsAbsoluteUrlOrNull("data-cfsrc") ?: "" } @@ -258,6 +259,7 @@ internal abstract class MangaReaderParser( override val configKeyDomain: ConfigKey.Domain get() = ConfigKey.Domain("manhwaindo.id", null) + override val chapterDateFormat = SimpleDateFormat("MMMM dd, yyyy", Locale.ENGLISH) override val listUrl: String get() = "/series" override val tableMode: Boolean get() = false } @@ -499,7 +501,7 @@ internal abstract class MangaReaderParser( override val tableMode: Boolean get() = false - override val chapterDateFormat: SimpleDateFormat = SimpleDateFormat("MMM d, yyyy", Locale.ENGLISH) + override val chapterDateFormat: SimpleDateFormat = SimpleDateFormat("MMM d, yyyy", sourceLocale) override suspend fun parseInfoList(docs: Document, manga: Manga, chapters: List): Manga { val infoElement = docs.selectFirst("div.infox") @@ -542,7 +544,7 @@ internal abstract class MangaReaderParser( class KomikLokalParser(context: MangaLoaderContext) : MangaReaderParser(context, MangaSource.KOMIKLOKAL, pageSize = 20, searchPageSize = 10) { override val configKeyDomain: ConfigKey.Domain - get() = ConfigKey.Domain("komiklokal.pics", null) + get() = ConfigKey.Domain("komikmirror.art", null) override val listUrl: String get() = "/manga" @@ -592,7 +594,7 @@ internal abstract class MangaReaderParser( class KomikDewasaParser(context: MangaLoaderContext) : MangaReaderParser(context, MangaSource.KOMIKDEWASA, pageSize = 20, searchPageSize = 10) { override val configKeyDomain: ConfigKey.Domain - get() = ConfigKey.Domain("komikdewasa.club", null) + get() = ConfigKey.Domain("komikdewasa.info", null) override val listUrl: String get() = "/manga" @@ -656,7 +658,7 @@ internal abstract class MangaReaderParser( class KomikMangaParser(context: MangaLoaderContext) : MangaReaderParser(context, MangaSource.KOMIKMANGA, pageSize = 20, searchPageSize = 10) { override val configKeyDomain: ConfigKey.Domain - get() = ConfigKey.Domain("komikmanga.xyz", null) + get() = ConfigKey.Domain("komikhentai.co", null) override val listUrl: String get() = "/project" diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt index f76e079e..6e5faeb8 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Jsoup.kt @@ -32,7 +32,7 @@ fun Element.attrOrNull(attributeKey: String) = attr(attributeKey).takeUnless { i */ fun Element.attrAsRelativeUrlOrNull(attributeKey: String): String? { val attr = attr(attributeKey).trim() - if (attr.isEmpty()) { + if (attr.isEmpty() || attr.startsWith("data:")) { return null } if (attr.startsWith("/")) { @@ -63,7 +63,7 @@ fun Element.attrAsRelativeUrl(attributeKey: String): String { */ fun Element.attrAsAbsoluteUrlOrNull(attributeKey: String): String? { val attr = attr(attributeKey).trim() - if (attr.isEmpty()) { + if (attr.isEmpty() || attr.startsWith("data:")) { return null } return (baseUri().toHttpUrlOrNull()?.newBuilder(attr) ?: return null).toString() @@ -114,4 +114,4 @@ fun Element.selectLast(cssQuery: String): Element? { fun Element.selectLastOrThrow(cssQuery: String): Element { return selectLast(cssQuery) ?: throw ParseException("Cannot find \"$cssQuery\"", baseUri()) -} \ No newline at end of file +} diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index 337b484e..9ea50a30 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -23,7 +23,7 @@ internal class MangaParserTest { @MangaSources fun list(source: MangaSource) = runTest { val parser = source.newParser(context) - val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) + val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null) checkMangaList(list, "list") assert(list.all { it.source == source }) } @@ -46,7 +46,7 @@ internal class MangaParserTest { @MangaSources fun search(source: MangaSource) = runTest { val parser = source.newParser(context) - val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull { + val subject = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull { it.title.length } ?: error("No manga found") val query = subject.title @@ -84,7 +84,7 @@ internal class MangaParserTest { @MangaSources fun details(source: MangaSource) = runTest { val parser = source.newParser(context) - val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) + val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null) val manga = list[3] parser.getDetails(manga).apply { assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } @@ -113,7 +113,7 @@ internal class MangaParserTest { @MangaSources fun pages(source: MangaSource) = runTest { val parser = source.newParser(context) - val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) + val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null) val manga = list.first() val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null") val pages = parser.getPages(chapter)