From c976e0e4e0b05cab49f1aa5933094683913fcfb8 Mon Sep 17 00:00:00 2001 From: vianh Date: Thu, 1 Sep 2022 22:17:31 +0700 Subject: [PATCH 1/5] [BlogTruyen] New source --- .gitignore | 3 +- .../kotatsu/parsers/site/BlogTruyenParser.kt | 246 ++++++++++++++++++ 2 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt diff --git a/.gitignore b/.gitignore index 49187094b..04c031559 100644 --- a/.gitignore +++ b/.gitignore @@ -75,4 +75,5 @@ build/ .idea/**/ktlint.xml .idea/codeStyles/ -src/test/resources/cookies.txt \ No newline at end of file +src/test/resources/cookies.txt +local.properties \ No newline at end of file diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt new file mode 100644 index 000000000..dcf6fae73 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt @@ -0,0 +1,246 @@ +package org.koitharu.kotatsu.parsers.site + +import androidx.collection.ArrayMap +import org.json.JSONArray +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaParser +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.SimpleDateFormat +import java.util.* +import kotlin.collections.HashSet + +@MangaSourceParser("BLOGTRUYEN", "BlogTruyen", "vi") +class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(MangaSource.BLOGTRUYEN) { + override val configKeyDomain: ConfigKey.Domain + get() = ConfigKey.Domain("blogtruyen.vn", null) + + override val sortOrders: Set + get() = EnumSet.of(SortOrder.UPDATED) + + private val dateFormat = SimpleDateFormat("dd/MM/yyyy HH:mm", Locale.US) + private var cacheTags: ArrayMap? = null + + override fun getFaviconUrl() = "https://${getDomain()}/Content/themes/img/favicon.ico" + + override suspend fun getDetails(manga: Manga): Manga { + val doc = context.httpGet(manga.url.toAbsoluteUrl(getDomain())).parseHtml() + val descriptionElement = doc.selectFirstOrThrow("div.description") + val statusText = descriptionElement + .selectFirst("p:contains(Trạng thái) > span.color-red") + ?.text() + + val state = when(statusText) { + "Đang tiến hành" -> MangaState.ONGOING + "Đã hoàn thành" -> MangaState.FINISHED + else -> null + } + + val rating = doc.selectFirst("span.total-vote")?.attr("ng-init")?.let { text -> + val like = text.substringAfter("TotalLike=") + .substringBefore(';') + .toIntOrNull() ?: return@let RATING_UNKNOWN + val dislike = text.substringAfter("TotalDisLike=") + .toIntOrNull() ?: return@let RATING_UNKNOWN + + when { + like == 0 && dislike == 0 -> RATING_UNKNOWN + else -> like.toFloat() / (like + dislike) + } + } + + val tagMap = getOrCreateTagMap() + val tags = descriptionElement.select("p > span.category").mapNotNullToSet { + val tagName = it.selectFirst("a")?.text()?.trim() ?: return@mapNotNullToSet null + tagMap[tagName] + } + + return manga.copy( + tags = tags, + author = descriptionElement.selectFirst("p:contains(Tác giả) > a")?.text(), + description = doc.selectFirst(".detail .content")?.html(), + chapters = parseChapterList(doc), + largeCoverUrl = doc.selectLast("div.thumbnail > img")?.attrAsAbsoluteUrlOrNull("src"), + state = state, + rating = rating ?: RATING_UNKNOWN, + isNsfw = doc.getElementById("warningCategory") != null + ) + } + + private fun parseChapterList(doc: Document): List { + val chapterList = doc.select("#list-chapters > p") + return chapterList.asReversed().mapChapters { index, element -> + val titleElement = element.selectFirst("span.title > a") ?: return@mapChapters null + val name = titleElement.text() + val relativeUrl = titleElement.attr("href") + val id = relativeUrl.substringAfter('/').substringBefore('/') + val uploadDate = dateFormat.tryParse(element.select("span.publishedDate").text()) + MangaChapter( + id = generateUid(id), + name = name, + number = index + 1, + url = relativeUrl, + scanlator = null, + uploadDate = uploadDate, + branch = null, + source = source + ) + } + } + + override suspend fun getList( + offset: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val page = (offset / 20f).toIntUp() + 1 + return when { + !query.isNullOrEmpty() -> { + val searchUrl = "https://${getDomain()}/timkiem/nangcao/1/0/-1/-1?txt=$query&p=$page" + val searchContent = context.httpGet(searchUrl).parseHtml() + .selectFirst("section.list-manga-bycate > div.list") + parseMangaList(searchContent) + } + + !tags.isNullOrEmpty() -> { + val tag = tags.oneOrThrowIfMany()!! + val categoryAjax = "https://${getDomain()}/ajax/Category/AjaxLoadMangaByCategory?id=${tag.key}&orderBy=5&p=$page" + val listContent = context.httpGet(categoryAjax).parseHtml().selectFirst("div.list") + parseMangaList(listContent) + } + else -> getNormalList(page) + } + } + + private suspend fun getNormalList(page: Int): List { + val pageLink = "https://${getDomain()}/page-$page" + val doc = context.httpGet(pageLink).parseHtml() + val listElements = doc.selectFirstOrThrow("section.list-mainpage.listview") + .select("div.bg-white.storyitem") + + return listElements.mapNotNull { + val linkTag = it.selectFirst("div.fl-l > a") ?: return@mapNotNull null + val relativeUrl = linkTag.attr("href") + val tagMap = getOrCreateTagMap() + val tags = it.select("footer > div.category > a").mapNotNullToSet { a -> + tagMap[a.text()] + } + + Manga( + id = generateUid(relativeUrl), + title = linkTag.attr("title"), + altTitle = null, + description = it.selectFirst("p.al-j.break.line-height-15")?.text(), + url = relativeUrl, + publicUrl = relativeUrl.toAbsoluteUrl(getDomain()), + coverUrl = linkTag.selectLast("img")?.attr("src").orEmpty(), + source = source, + tags = tags, + isNsfw = false, + rating = RATING_UNKNOWN, + author = null, + state = null, + ) + } + } + + private fun parseMangaList(listElement: Element?): List { + listElement ?: return emptyList() + + return listElement.select("span.tiptip[data-tiptip]").mapNotNull { + val mangaInfo = listElement.getElementById(it.attr("data-tiptip")) ?: return@mapNotNull null + val a = it.selectFirst("a") ?: return@mapNotNull null + val relativeUrl = a.attr("href") + Manga( + id = generateUid(relativeUrl), + title = a.text(), + altTitle = null, + description = mangaInfo.select("div.al-j.fs-12").text(), + url = relativeUrl, + publicUrl = relativeUrl.toAbsoluteUrl(getDomain()), + coverUrl = mangaInfo.selectFirst("div > img.img")?.attr("src").orEmpty(), + isNsfw = false, + rating = RATING_UNKNOWN, + tags = emptySet(), + author = null, + state = null, + source = source, + ) + } + } + + override suspend fun getPages(chapter: MangaChapter): List { + fun generateImageId(index: Int) = generateUid("${chapter.url}/$index") + + val doc = context.httpGet(chapter.url.toAbsoluteUrl(getDomain())).parseHtml() + val pages = ArrayList() + val referer = chapter.url.toAbsoluteUrl(getDomain()) + doc.select("#content > img").forEach { img -> + val url = img.attr("src") + pages.add( + MangaPage( + id = generateImageId(pages.lastIndex), + url = url, + referer = referer, + preview = null, + source = source, + ) + ) + } + + // Some chapters use js script to render images + val script = doc.selectLast("#content > script") + if (script != null && script.data().contains("listImageCaption")) { + val imagesStr = script.data().substringBefore(';').substringAfterLast('=').trim() + val imageArr = JSONArray(imagesStr) + for (i in 0 until imageArr.length()) { + val imageUrl = imageArr.getJSONObject(i).getString("url") + pages.add( + MangaPage( + id = generateImageId(pages.lastIndex), + url = imageUrl, + referer = referer, + preview = null, + source = source + ) + ) + } + } + + return pages + } + + override suspend fun getTags(): Set { + val map = getOrCreateTagMap() + val tags = HashSet(map.size) + for (entry in map) { + tags.add(entry.value) + } + + return tags + } + + + private suspend fun getOrCreateTagMap(): ArrayMap { + cacheTags?.let { return it } + val doc = context.httpGet("https://${getDomain()}/timkiem/nangcao").parseHtml() + val tagItems = doc.select("li[data-id]") + val tagMap = ArrayMap(tagItems.size) + for (tag in tagItems) { + val title = tag.text().trim() + tagMap[tag.text().trim()] = MangaTag( + title = title, + key = tag.attr("data-id"), + source = source + ) + } + + cacheTags = tagMap + return tagMap + } +} \ No newline at end of file From 599f606aef8c25795db686d892dee18eaabd978a Mon Sep 17 00:00:00 2001 From: vianh Date: Thu, 1 Sep 2022 22:27:48 +0700 Subject: [PATCH 2/5] Remove deprecated favicon url --- .../org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt index dcf6fae73..84f075b39 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt @@ -25,8 +25,6 @@ class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(Ma private val dateFormat = SimpleDateFormat("dd/MM/yyyy HH:mm", Locale.US) private var cacheTags: ArrayMap? = null - override fun getFaviconUrl() = "https://${getDomain()}/Content/themes/img/favicon.ico" - override suspend fun getDetails(manga: Manga): Manga { val doc = context.httpGet(manga.url.toAbsoluteUrl(getDomain())).parseHtml() val descriptionElement = doc.selectFirstOrThrow("div.description") From e3612ef466f22bbaa1d1f8623e7d76594b2afe9e Mon Sep 17 00:00:00 2001 From: vianh Date: Fri, 2 Sep 2022 10:57:28 +0700 Subject: [PATCH 3/5] [BlogTruyen] Update to use PagedMangaParser --- .../kotatsu/parsers/site/BlogTruyenParser.kt | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt index 84f075b39..caa669598 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt @@ -5,8 +5,8 @@ import org.json.JSONArray import org.jsoup.nodes.Document import org.jsoup.nodes.Element import org.koitharu.kotatsu.parsers.MangaLoaderContext -import org.koitharu.kotatsu.parsers.MangaParser import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.util.* @@ -15,7 +15,9 @@ import java.util.* import kotlin.collections.HashSet @MangaSourceParser("BLOGTRUYEN", "BlogTruyen", "vi") -class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(MangaSource.BLOGTRUYEN) { +class BlogTruyenParser(override val context: MangaLoaderContext) : + PagedMangaParser(MangaSource.BLOGTRUYEN, pageSize = 20) { + override val configKeyDomain: ConfigKey.Domain get() = ConfigKey.Domain("blogtruyen.vn", null) @@ -32,7 +34,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(Ma .selectFirst("p:contains(Trạng thái) > span.color-red") ?.text() - val state = when(statusText) { + val state = when (statusText) { "Đang tiến hành" -> MangaState.ONGOING "Đã hoàn thành" -> MangaState.FINISHED else -> null @@ -90,13 +92,12 @@ class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(Ma } } - override suspend fun getList( - offset: Int, + override suspend fun getListPage( + page: Int, query: String?, tags: Set?, sortOrder: SortOrder, ): List { - val page = (offset / 20f).toIntUp() + 1 return when { !query.isNullOrEmpty() -> { val searchUrl = "https://${getDomain()}/timkiem/nangcao/1/0/-1/-1?txt=$query&p=$page" @@ -226,7 +227,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(Ma private suspend fun getOrCreateTagMap(): ArrayMap { cacheTags?.let { return it } - val doc = context.httpGet("https://${getDomain()}/timkiem/nangcao").parseHtml() + val doc = context.httpGet("/timkiem/nangcao".toAbsoluteUrl(getDomain())).parseHtml() val tagItems = doc.select("li[data-id]") val tagMap = ArrayMap(tagItems.size) for (tag in tagItems) { @@ -241,4 +242,4 @@ class BlogTruyenParser(override val context: MangaLoaderContext): MangaParser(Ma cacheTags = tagMap return tagMap } -} \ No newline at end of file +} From aec404c0a1f9f1362480426759903c0a179bede4 Mon Sep 17 00:00:00 2001 From: ViAnh <37103340+VietAnh14@users.noreply.github.com> Date: Fri, 2 Sep 2022 17:23:50 +0700 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Koitharu --- .../kotatsu/parsers/site/BlogTruyenParser.kt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt index caa669598..cf9b6381a 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt @@ -76,7 +76,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : return chapterList.asReversed().mapChapters { index, element -> val titleElement = element.selectFirst("span.title > a") ?: return@mapChapters null val name = titleElement.text() - val relativeUrl = titleElement.attr("href") + val relativeUrl = titleElement.attrAsRelativeUrl("href") val id = relativeUrl.substringAfter('/').substringBefore('/') val uploadDate = dateFormat.tryParse(element.select("span.publishedDate").text()) MangaChapter( @@ -100,7 +100,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : ): List { return when { !query.isNullOrEmpty() -> { - val searchUrl = "https://${getDomain()}/timkiem/nangcao/1/0/-1/-1?txt=$query&p=$page" + val searchUrl = "https://${getDomain()}/timkiem/nangcao/1/0/-1/-1?txt=${query.urlEncoded()}&p=$page" val searchContent = context.httpGet(searchUrl).parseHtml() .selectFirst("section.list-manga-bycate > div.list") parseMangaList(searchContent) @@ -124,7 +124,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : return listElements.mapNotNull { val linkTag = it.selectFirst("div.fl-l > a") ?: return@mapNotNull null - val relativeUrl = linkTag.attr("href") + val relativeUrl = linkTag.attrAsRelativeUrl("href") val tagMap = getOrCreateTagMap() val tags = it.select("footer > div.category > a").mapNotNullToSet { a -> tagMap[a.text()] @@ -154,7 +154,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : return listElement.select("span.tiptip[data-tiptip]").mapNotNull { val mangaInfo = listElement.getElementById(it.attr("data-tiptip")) ?: return@mapNotNull null val a = it.selectFirst("a") ?: return@mapNotNull null - val relativeUrl = a.attr("href") + val relativeUrl = a.attrAsRelativeUrl("href") Manga( id = generateUid(relativeUrl), title = a.text(), @@ -162,7 +162,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : description = mangaInfo.select("div.al-j.fs-12").text(), url = relativeUrl, publicUrl = relativeUrl.toAbsoluteUrl(getDomain()), - coverUrl = mangaInfo.selectFirst("div > img.img")?.attr("src").orEmpty(), + coverUrl = mangaInfo.selectFirst("div > img.img")?.absUrl("src").orEmpty(), isNsfw = false, rating = RATING_UNKNOWN, tags = emptySet(), @@ -180,7 +180,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : val pages = ArrayList() val referer = chapter.url.toAbsoluteUrl(getDomain()) doc.select("#content > img").forEach { img -> - val url = img.attr("src") + val url = img.attrAsRelativeUrl("src") pages.add( MangaPage( id = generateImageId(pages.lastIndex), From 1577afc4a9ae75f7afdd06baaee699f8e04d7123 Mon Sep 17 00:00:00 2001 From: vianh Date: Fri, 2 Sep 2022 17:35:43 +0700 Subject: [PATCH 5/5] [BlogTruyen] Synchronize tags with mutex --- .../koitharu/kotatsu/parsers/site/BlogTruyenParser.kt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt index cf9b6381a..08c1514db 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/BlogTruyenParser.kt @@ -1,6 +1,8 @@ package org.koitharu.kotatsu.parsers.site import androidx.collection.ArrayMap +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock import org.json.JSONArray import org.jsoup.nodes.Document import org.jsoup.nodes.Element @@ -24,6 +26,7 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : override val sortOrders: Set get() = EnumSet.of(SortOrder.UPDATED) + private val mutex = Mutex() private val dateFormat = SimpleDateFormat("dd/MM/yyyy HH:mm", Locale.US) private var cacheTags: ArrayMap? = null @@ -225,8 +228,8 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : } - private suspend fun getOrCreateTagMap(): ArrayMap { - cacheTags?.let { return it } + private suspend fun getOrCreateTagMap(): ArrayMap = mutex.withLock { + cacheTags?.let { return@withLock it } val doc = context.httpGet("/timkiem/nangcao".toAbsoluteUrl(getDomain())).parseHtml() val tagItems = doc.select("li[data-id]") val tagMap = ArrayMap(tagItems.size) @@ -240,6 +243,6 @@ class BlogTruyenParser(override val context: MangaLoaderContext) : } cacheTags = tagMap - return tagMap + tagMap } }