Fix chapters duplicates

pull/36/head
Koitharu 4 years ago
parent 8c26f3c790
commit 5be4fb1114
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -54,6 +54,7 @@ internal class BatoToParser(override val context: MangaLoaderContext) : MangaPar
when (sortOrder) {
SortOrder.UPDATED,
-> append("update.za")
SortOrder.POPULARITY -> append("views_a.za")
SortOrder.NEWEST -> append("create.za")
SortOrder.ALPHABETICAL -> append("title.az")
@ -93,7 +94,7 @@ internal class BatoToParser(override val context: MangaLoaderContext) : MangaPar
?.selectFirst(".main")
?.children()
?.reversed()
?.mapIndexedNotNull { i, div ->
?.mapChapters { i, div ->
div.parseChapter(i)
}.orEmpty(),
)

@ -158,7 +158,7 @@ internal class ExHentaiParser(
a.text().toIntOrNull() != null
}?.let { a ->
val count = a.text().toInt()
val chapters = ArrayList<MangaChapter>(count)
val chapters = ChaptersListBuilder(count)
for (i in 1..count) {
val url = "${manga.url}?p=${i - 1}"
chapters += MangaChapter(
@ -172,7 +172,7 @@ internal class ExHentaiParser(
branch = null,
)
}
chapters
chapters.toList()
},
)
}

@ -125,7 +125,7 @@ abstract class Madara5Parser @InternalParsersApi constructor(
private suspend fun loadChapters(mangaId: Long): List<MangaChapter> {
val dateFormat = SimpleDateFormat("MMMM dd, HH:mm", sourceLocale ?: Locale.US)
val doc = context.httpGet("https://${getDomain()}/ajax-list-chapter?mangaID=$mangaId").parseHtml()
return doc.select("li.wp-manga-chapter").asReversed().mapIndexed { i, li ->
return doc.select("li.wp-manga-chapter").asReversed().mapChapters { i, li ->
val a = li.selectFirstOrThrow("a")
val href = a.attrAsRelativeUrl("href")
MangaChapter(

@ -137,7 +137,7 @@ internal abstract class MadaraParser(
?.select("p")
?.filterNot { it.ownText().startsWith("A brief description") }
?.joinToString { it.html() },
chapters = root2.select("li").asReversed().mapIndexed { i, li ->
chapters = root2.select("li").asReversed().mapChapters { i, li ->
val a = li.selectFirst("a")
val href = a?.attrAsRelativeUrlOrNull("href") ?: parseFailed("Link is missing")
MangaChapter(
@ -200,6 +200,7 @@ internal abstract class MadaraParser(
set(Calendar.MILLISECOND, 0)
}.timeInMillis
}
date.startsWith("today", ignoreCase = true) -> {
Calendar.getInstance().apply {
set(Calendar.HOUR_OF_DAY, 0)
@ -208,6 +209,7 @@ internal abstract class MadaraParser(
set(Calendar.MILLISECOND, 0)
}.timeInMillis
}
date.contains(Regex("""\d(st|nd|rd|th)""")) -> {
// Clean date (e.g. 5th December 2019 to 5 December 2019) before parsing it
date.split(" ").map {
@ -219,6 +221,7 @@ internal abstract class MadaraParser(
}
.let { dateFormat.tryParse(it.joinToString(" ")) }
}
else -> dateFormat.tryParse(date)
}
}
@ -238,24 +241,28 @@ internal abstract class MadaraParser(
"dia",
"day",
).anyWordIn(date) -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) }.timeInMillis
WordSet("jam", "saat", "heure", "hora", "hour").anyWordIn(date) -> cal.apply {
add(
Calendar.HOUR,
-number,
)
}.timeInMillis
WordSet("menit", "dakika", "min", "minute", "minuto").anyWordIn(date) -> cal.apply {
add(
Calendar.MINUTE,
-number,
)
}.timeInMillis
WordSet("detik", "segundo", "second").anyWordIn(date) -> cal.apply {
add(
Calendar.SECOND,
-number,
)
}.timeInMillis
WordSet("month").anyWordIn(date) -> cal.apply { add(Calendar.MONTH, -number) }.timeInMillis
WordSet("year").anyWordIn(date) -> cal.apply { add(Calendar.YEAR, -number) }.timeInMillis
else -> 0
@ -270,12 +277,12 @@ internal abstract class MadaraParser(
private fun createRequestTemplate() =
(
"action=madara_load_more&page=1&template=madara-core%2Fcontent%2Fcontent-search&vars%5Bs%5D=&vars%5B" +
"orderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query" +
"%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type" +
"%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border" +
"%5D=desc&vars%5Bmanga_archives_item_layout%5D=default"
).split('&')
"action=madara_load_more&page=1&template=madara-core%2Fcontent%2Fcontent-search&vars%5Bs%5D=&vars%5B" +
"orderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query" +
"%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type" +
"%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border" +
"%5D=desc&vars%5Bmanga_archives_item_layout%5D=default"
).split('&')
.map {
val pos = it.indexOf('=')
it.substring(0, pos) to it.substring(pos + 1)
@ -293,7 +300,8 @@ internal abstract class MadaraParser(
class KingManga(context: MangaLoaderContext) : MadaraParser(context, MangaSource.KINGMANGA, "king-manga.com")
@MangaSourceParser("MANGAHATACHI", "MangahaTachi", "ja")
class MangahaTachi(context: MangaLoaderContext) : MadaraParser(context, MangaSource.MANGAHATACHI, "mangahatachi.com")
class MangahaTachi(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.MANGAHATACHI, "mangahatachi.com")
@MangaSourceParser("PIANMANGA", "PianManga", "en")
class PianManga(context: MangaLoaderContext) : MadaraParser(context, MangaSource.PIANMANGA, "pianmanga.com")
@ -400,7 +408,8 @@ internal abstract class MadaraParser(
}
@MangaSourceParser("MANGA_DISTRICT", "Manga District", "en")
class MangaDistrict(context: MangaLoaderContext) : MadaraParser(context, MangaSource.MANGA_DISTRICT, "mangadistrict.com") {
class MangaDistrict(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.MANGA_DISTRICT, "mangadistrict.com") {
override val tagPrefix = "publication-genre/"
@ -408,7 +417,8 @@ internal abstract class MadaraParser(
}
@MangaSourceParser("HENTAI_4FREE", "Hentai4Free", "en")
class Hentai4Free(context: MangaLoaderContext) : MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") {
class Hentai4Free(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.HENTAI_4FREE, "hentai4free.net") {
override val tagPrefix = "hentai-tag/"
@ -437,7 +447,8 @@ internal abstract class MadaraParser(
}
@MangaSourceParser("ALLPORN_COMIC", "All Porn Comic", "en")
class AllPornComic(context: MangaLoaderContext) : MadaraParser(context, MangaSource.ALLPORN_COMIC, "allporncomic.com") {
class AllPornComic(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.ALLPORN_COMIC, "allporncomic.com") {
override val tagPrefix = "porncomic-genre/"
@ -488,7 +499,8 @@ internal abstract class MadaraParser(
}
@MangaSourceParser("MANGA_MANHUA", "Manga Manhua", "en")
class MangaManhua(context: MangaLoaderContext) : MadaraParser(context, MangaSource.MANGA_MANHUA, "mangamanhua.online")
class MangaManhua(context: MangaLoaderContext) :
MadaraParser(context, MangaSource.MANGA_MANHUA, "mangamanhua.online")
@MangaSourceParser("MANGA_247", "247MANGA", "en")
class Manga247(context: MangaLoaderContext) : MadaraParser(context, MangaSource.MANGA_247, "247manga.com") {

@ -29,13 +29,14 @@ class MangaInUaParser(override val context: MangaLoaderContext) : MangaParser(Ma
val searchPage = (offset / 10f).toIntUp().inc()
val url = when {
!query.isNullOrEmpty() -> (
"/index.php?do=search" +
"&subaction=search" +
"&search_start=$searchPage" +
"&full_search=1" +
"&story=$query" +
"&titleonly=3"
).toAbsoluteUrl(getDomain())
"/index.php?do=search" +
"&subaction=search" +
"&search_start=$searchPage" +
"&full_search=1" +
"&story=$query" +
"&titleonly=3"
).toAbsoluteUrl(getDomain())
tags.isNullOrEmpty() -> "/mangas/page/$page".toAbsoluteUrl(getDomain())
tags.size == 1 -> "${tags.first().key}/page/$page"
tags.size > 1 -> throw IllegalArgumentException("This source supports only 1 genre")
@ -90,16 +91,16 @@ class MangaInUaParser(override val context: MangaLoaderContext) : MangaParser(Ma
description = root.selectFirst("div.item__full-description")?.text(),
largeCoverUrl = root.selectFirst("div.item__full-sidebar--poster")?.selectFirst("img")
?.attrAsAbsoluteUrlOrNull("src"),
chapters = chapterNodes.mapNotNull { item ->
chapters = chapterNodes.mapChapters { _, item ->
val href = item?.selectFirst("a")?.attrAsRelativeUrlOrNull("href")
?: return@mapNotNull null
?: return@mapChapters null
val isAlternative = item.styleValueOrNull("background") != null
val name = item.selectFirst("a")?.text().orEmpty()
if (!isAlternative) i++
MangaChapter(
id = generateUid(href),
name = if (isAlternative) {
prevChapterName ?: return@mapNotNull null
prevChapterName ?: return@mapChapters null
} else {
prevChapterName = name
name

@ -92,7 +92,7 @@ internal open class MangaLibParser(
val chaptersDoc = context.httpGet("$fullUrl?section=chapters").parseHtml()
val scripts = chaptersDoc.select("script")
val dateFormat = SimpleDateFormat("yyy-MM-dd", Locale.US)
var chapters: ArrayList<MangaChapter>? = null
var chapters: ChaptersListBuilder? = null
scripts@ for (script in scripts) {
val raw = script.html().lines()
for (line in raw) {
@ -100,7 +100,7 @@ internal open class MangaLibParser(
val json = JSONObject(line.substringAfter('=').substringBeforeLast(';'))
val list = json.getJSONObject("chapters").getJSONArray("list")
val total = list.length()
chapters = ArrayList(total)
chapters = ChaptersListBuilder(total)
for (i in 0 until total) {
val item = list.getJSONObject(i)
val chapterId = item.getLong("chapter_id")
@ -111,7 +111,6 @@ internal open class MangaLibParser(
append(item.getInt("chapter_volume"))
append("/c")
append(item.getString("chapter_number"))
@Suppress("BlockingMethodInNonBlockingContext") // lint issue
append('/')
append(item.optString("chapter_string"))
}
@ -160,7 +159,7 @@ internal open class MangaLibParser(
} ?: manga.tags,
isNsfw = isNsfw(doc),
description = info?.selectFirst("div.media-description__text")?.html(),
chapters = chapters,
chapters = chapters?.toList(),
)
}

@ -38,12 +38,14 @@ internal class MangaOwlParser(override val context: MangaLoaderContext) : MangaP
append("/search/$page?search=")
append(query.urlEncoded())
}
!tags.isNullOrEmpty() -> {
for (tag in tags) {
append(tag.key)
}
append("/$page?type=${getAlternativeSortKey(sortOrder)}")
}
else -> {
append("/${getSortKey(sortOrder)}/$page")
}
@ -110,7 +112,7 @@ internal class MangaOwlParser(override val context: MangaLoaderContext) : MangaP
state = parseStatus(info.select("p.fexi_header_para:contains(status)").first()?.ownText()),
tags = manga.tags + parsedTags,
chapters = table.select("div.table.table-chapter-list").select("li.list-group-item.chapter_list")
.asReversed().mapIndexed { i, li ->
.asReversed().mapChapters { i, li ->
val a = li.select("a")
val href = a.attr("data-href").ifEmpty {
parseFailed("Link is missing")

@ -45,6 +45,7 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
}
"/search?name=${query.urlEncoded()}".toAbsoluteUrl(getDomain())
}
tags.isNullOrEmpty() -> "/directory/$page.htm$sortKey".toAbsoluteUrl(getDomain())
tags.size == 1 -> "/directory/${tags.first().key}/$page.htm$sortKey".toAbsoluteUrl(getDomain())
else -> tags.joinToString(
@ -112,9 +113,9 @@ internal class MangaTownParser(override val context: MangaLoaderContext) : Manga
)
}.orEmpty(),
description = info?.getElementById("show")?.ownText(),
chapters = chaptersList?.mapIndexedNotNull { i, li ->
chapters = chaptersList?.mapChapters { i, li ->
val href = li.selectFirst("a")?.attrAsRelativeUrlOrNull("href")
?: return@mapIndexedNotNull null
?: return@mapChapters null
val name = li.select("span")
.filter { x -> x.className().isEmpty() }
.joinToString(" - ") { it.text() }.trim()

@ -47,7 +47,9 @@ class NicovideoSeigaParser(override val context: MangaLoaderContext) :
val url = when {
!query.isNullOrEmpty() -> return if (offset == 0) getSearchList(query, page) else emptyList()
tags.isNullOrEmpty() -> "https://$domain/manga/list?page=$page&sort=${getSortKey(sortOrder)}"
tags.size == 1 -> "https://$domain/manga/list?category=${tags.first().key}&page=$page&sort=${getSortKey(sortOrder)}"
tags.size == 1 -> "https://$domain/manga/list?category=${tags.first().key}&page=$page" +
"&sort=${getSortKey(sortOrder)}"
tags.size > 1 -> throw IllegalArgumentException("This source supports only 1 category")
else -> "https://$domain/manga/list?page=$page&sort=${getSortKey(sortOrder)}"
}
@ -55,7 +57,8 @@ class NicovideoSeigaParser(override val context: MangaLoaderContext) :
val comicList = doc.body().select("#comic_list > ul > li") ?: parseFailed("Container not found")
val items = comicList.select("div > .description > div > div")
return items.mapNotNull { item ->
val href = item.selectFirst(".comic_icon > div > a")?.attrAsRelativeUrlOrNull("href") ?: return@mapNotNull null
val href =
item.selectFirst(".comic_icon > div > a")?.attrAsRelativeUrlOrNull("href") ?: return@mapNotNull null
val statusText = item.selectFirst(".mg_description_header > .mg_icon > .content_status > span")?.text()
Manga(
id = generateUid(href),
@ -92,15 +95,17 @@ class NicovideoSeigaParser(override val context: MangaLoaderContext) :
.text()
return manga.copy(
description = contents.selectFirst("div.mg_work_detail > div > div.row > div.description_text")?.html(),
largeCoverUrl = contents.selectFirst("div.primaries > div.main_visual > a > img")?.attrAsAbsoluteUrlOrNull("src"),
largeCoverUrl = contents.selectFirst("div.primaries > div.main_visual > a > img")
?.attrAsAbsoluteUrlOrNull("src"),
state = when (statusText) {
STATUS_ONGOING -> MangaState.ONGOING
STATUS_FINISHED -> MangaState.FINISHED
else -> null
},
isNsfw = contents.select(".icon_adult").isNotEmpty(),
chapters = contents.select("#episode_list > ul > li").mapIndexedNotNull { i, li ->
val href = li.selectFirst("div > div.description > div.title > a")?.attrAsRelativeUrl("href") ?: parseFailed()
chapters = contents.select("#episode_list > ul > li").mapChapters { i, li ->
val href = li.selectFirst("div > div.description > div.title > a")
?.attrAsRelativeUrl("href") ?: parseFailed()
MangaChapter(
id = generateUid(href),
name = li.select("div > div.description > div.title > a").text(),
@ -151,7 +156,8 @@ class NicovideoSeigaParser(override val context: MangaLoaderContext) :
val doc = context.httpGet("https://$domain/manga/search/?q=$query&page=$page&sort=score").parseHtml()
val root = doc.body().select(".search_result__item")
return root.mapNotNull { item ->
val href = item.selectFirst(".search_result__item__thumbnail > a")?.attrAsRelativeUrl("href") ?: parseFailed()
val href = item.selectFirst(".search_result__item__thumbnail > a")
?.attrAsRelativeUrl("href") ?: parseFailed()
Manga(
id = generateUid(href),
url = href,

@ -50,6 +50,7 @@ internal abstract class NineMangaParser(
append(query.urlEncoded())
append("&page=")
}
!tags.isNullOrEmpty() -> {
append("/search/?category_id=")
for (tag in tags) {
@ -58,6 +59,7 @@ internal abstract class NineMangaParser(
}
append("&page=")
}
else -> {
append("/category/index_")
}
@ -115,7 +117,7 @@ internal abstract class NineMangaParser(
description = infoRoot.getElementsByAttributeValue("itemprop", "description").first()
?.html()?.substringAfter("</b>"),
chapters = root.selectFirst("div.chapterbox")?.select("ul.sub_vol_ul > li")
?.asReversed()?.mapIndexed { i, li ->
?.asReversed()?.mapChapters { i, li ->
val a = li.selectFirst("a.chapter_list_a")
val href = a?.attrAsRelativeUrlOrNull("href")
?.replace("%20", " ") ?: parseFailed("Link not found")

@ -141,7 +141,7 @@ internal class RemangaParser(
source = MangaSource.REMANGA,
)
},
chapters = chapters.mapIndexed { i, jo ->
chapters = chapters.mapChapters { i, jo ->
val id = jo.getLong("id")
val name = jo.getString("name").toTitleCase(Locale.ROOT)
val publishers = jo.optJSONArray("publishers")

@ -161,8 +161,8 @@ internal abstract class GroupleParser(
author = root.selectFirst("a.person-link")?.text() ?: manga.author,
isNsfw = root.select(".alert-warning").any { it.ownText().contains(NSFW_ALERT) },
chapters = root.selectFirst("div.chapters-link")?.selectFirst("table")
?.select("tr:has(td > a)")?.asReversed()?.mapIndexedNotNull { i, tr ->
val a = tr.selectFirst("a") ?: return@mapIndexedNotNull null
?.select("tr:has(td > a)")?.asReversed()?.mapChapters { i, tr ->
val a = tr.selectFirst("a") ?: return@mapChapters null
val href = a.attrAsRelativeUrl("href")
var translators = ""
val translatorElement = a.attr("title")

@ -90,9 +90,9 @@ internal abstract class ChanParser(source: MangaSource) : MangaParser(source), M
return manga.copy(
description = root.getElementById("description")?.html()?.substringBeforeLast("<div"),
largeCoverUrl = root.getElementById("cover")?.absUrl("src"),
chapters = root.select("table.table_cha tr:gt(1)").reversed().mapIndexedNotNull { i, tr ->
chapters = root.select("table.table_cha tr:gt(1)").reversed().mapChapters { i, tr ->
val href = tr?.selectFirst("a")?.attrAsRelativeUrlOrNull("href")
?: return@mapIndexedNotNull null
?: return@mapChapters null
MangaChapter(
id = generateUid(href),
name = tr.selectFirst("a")?.text().orEmpty(),

@ -8,6 +8,7 @@ import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaSource
import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl
import org.koitharu.kotatsu.parsers.util.mapChapters
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
@ -25,7 +26,7 @@ internal class YaoiChanParser(override val context: MangaLoaderContext) : ChanPa
largeCoverUrl = root.getElementById("cover")?.absUrl("src"),
chapters = root.select("table.table_cha").flatMap { table ->
table.select("div.manga")
}.mapNotNull { it.selectFirst("a") }.reversed().mapIndexed { i, a ->
}.mapNotNull { it.selectFirst("a") }.reversed().mapChapters { i, a ->
val href = a.attrAsRelativeUrl("href")
MangaChapter(
id = generateUid(href),

@ -0,0 +1,42 @@
package org.koitharu.kotatsu.parsers.util
import org.koitharu.kotatsu.parsers.InternalParsersApi
import org.koitharu.kotatsu.parsers.model.MangaChapter
@InternalParsersApi
inline fun <T> Iterable<T>.mapChapters(transform: (index: Int, T) -> MangaChapter?): List<MangaChapter> {
val builder = ChaptersListBuilder(collectionSize())
var index = 0
for (item in this) {
if (builder.add(transform(index, item))) {
index++
}
}
return builder.toList()
}
@PublishedApi
internal fun <T> Iterable<T>.collectionSize(): Int {
return if (this is Collection<*>) this.size else 10
}
@PublishedApi
internal class ChaptersListBuilder(initialSize: Int) {
private val ids = HashSet<Long>(initialSize)
private val list = ArrayList<MangaChapter>(initialSize)
fun add(chapter: MangaChapter?): Boolean {
return chapter != null && ids.add(chapter.id) && list.add(chapter)
}
operator fun plusAssign(chapter: MangaChapter?) {
add(chapter)
}
fun reverse() {
list.reverse()
}
fun toList(): List<MangaChapter> = list
}
Loading…
Cancel
Save