[Bentomanga] New parser

source/neox^2
Koitharu 3 years ago
parent 66adc65a96
commit 9adc0c5358
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -18,11 +18,14 @@ class OkHttpWebClient(
private val mangaSource: MangaSource,
) : WebClient {
override suspend fun httpGet(url: HttpUrl): Response {
override suspend fun httpGet(url: HttpUrl, extraHeaders: Headers?): Response {
val request = Request.Builder()
.get()
.url(url)
.addTags()
if (extraHeaders != null) {
request.headers(extraHeaders)
}
return httpClient.newCall(request.build()).await().ensureSuccess()
}

@ -1,5 +1,6 @@
package org.koitharu.kotatsu.parsers.network
import okhttp3.Headers
import okhttp3.HttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.Response
@ -13,11 +14,15 @@ interface WebClient {
*/
suspend fun httpGet(url: String): Response = httpGet(url.toHttpUrl())
suspend fun httpGet(url: String, extraHeaders: Headers?): Response = httpGet(url.toHttpUrl(), extraHeaders)
/**
* Do a GET http request to specific url
* @param url
*/
suspend fun httpGet(url: HttpUrl): Response
suspend fun httpGet(url: HttpUrl): Response = httpGet(url, null)
suspend fun httpGet(url: HttpUrl, extraHeaders: Headers?): Response
/**
* Do a HEAD http request to specific url

@ -0,0 +1,235 @@
package org.koitharu.kotatsu.parsers.site
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import okhttp3.Headers
import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.PagedMangaParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.model.*
import org.koitharu.kotatsu.parsers.util.*
import java.util.*
@MangaSourceParser("BENTOMANGA", "Bentomanga", "fr")
internal class BentomangaParser(context: MangaLoaderContext) : PagedMangaParser(context, MangaSource.BENTOMANGA, 10) {
override val sortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
SortOrder.POPULARITY,
SortOrder.RATING,
SortOrder.NEWEST,
SortOrder.ALPHABETICAL,
)
override val configKeyDomain = ConfigKey.Domain("www.bentomanga.com", null)
override val headers: Headers = Headers.Builder()
.add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0")
.build()
init {
paginator.firstPage = 0
searchPaginator.firstPage = 0
}
override suspend fun getListPage(
page: Int,
query: String?,
tags: Set<MangaTag>?,
sortOrder: SortOrder,
): List<Manga> {
val url = urlBuilder()
.addPathSegment("manga_list")
.addQueryParameter("limit", page.toString())
.addQueryParameter(
"order_by",
when (sortOrder) {
SortOrder.UPDATED -> "update"
SortOrder.POPULARITY -> "views"
SortOrder.RATING -> "top"
SortOrder.NEWEST -> "create"
SortOrder.ALPHABETICAL -> "name"
},
)
if (!tags.isNullOrEmpty()) {
url.addQueryParameter("withCategories", tags.joinToString(",") { it.key })
}
if (!query.isNullOrEmpty()) {
url.addQueryParameter("search", query)
}
val root = webClient.httpGet(url.build()).parseHtml().requireElementById("mangas_content")
return root.select(".manga[data-manga]").map { div ->
val header = div.selectFirstOrThrow(".manga_header")
val href = header.selectFirstOrThrow("a").attrAsRelativeUrl("href")
Manga(
id = generateUid(href),
title = div.selectFirstOrThrow("h1").text(),
altTitle = null,
url = href,
publicUrl = href.toAbsoluteUrl(domain),
rating = div.getElementsByAttributeValue("data-icon", "avg_rate")
.firstOrNull()
?.textOrNull()
?.toFloatOrNull()
?.div(10f)
?: RATING_UNKNOWN,
isNsfw = div.selectFirst(".badge-adult_content") != null,
coverUrl = div.selectFirstOrThrow("img").src(),
tags = div.selectFirst(".component-manga-categories")
.assertNotNull("tags")
?.select("a")
?.mapToSet { a ->
MangaTag(
title = a.text().toTitleCase(sourceLocale),
key = a.attr("href").substringAfterLast('='),
source = source,
)
}.orEmpty(),
state = null,
author = null,
description = div.selectFirst(".manga_synopsis")?.html().assertNotNull("description"),
source = source,
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val mangaUrl = manga.url.toAbsoluteUrl(domain)
val root = webClient.httpGet(mangaUrl).parseHtml()
.requireElementById("container_manga_show")
return manga.copy(
altTitle = root.selectFirst(".component-manga-title_alt")?.textOrNull().assertNotNull("altTitle"),
description = root.selectFirst(".datas_synopsis")?.html().assertNotNull("description")
?: manga.description,
state = when (root.selectFirst(".datas_more-status-data")?.textOrNull().assertNotNull("status")) {
"En cours" -> MangaState.ONGOING
else -> null
},
author = root.selectFirst(".datas_more-authors-people")?.textOrNull().assertNotNull("author"),
chapters = run {
val input = root.selectFirst("input[name=\"limit\"]") ?: return@run emptyList()
val max = input.attr("max").toInt()
if (max <= 1) {
parseChapters(root)
} else {
coroutineScope {
val result = ArrayList<MangaChapter>(parseChapters(root))
result.ensureCapacity(result.size * max)
(2..max).map { i ->
async {
loadChapters(mangaUrl, i)
}
}.awaitAll()
.flattenTo(result)
result
}
}
}.reversed(),
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val chapterUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(chapterUrl).parseHtml()
val chapterId = doc.head().getElementsByAttribute("data-chapter-id").first()!!.attr("data-chapter-id")
val json = webClient.httpGet(
"https://$domain/api/?id=$chapterId&type=chapter",
Headers.headersOf(
"Referer", chapterUrl,
"x-requested-with", "XMLHttpRequest",
),
).parseJson()
val baseUrl = json.getString("baseImagesUrl")
val pages = json.getJSONArray("page_array")
return (0 until pages.length()).map { i ->
val url = concatUrl(baseUrl, pages.getString(i))
MangaPage(
id = generateUid(url),
url = url.toAbsoluteUrl(domain),
preview = null,
source = source,
)
}
}
override suspend fun getTags(): Set<MangaTag> {
val root = webClient.httpGet(urlBuilder().addPathSegment("manga_list").build())
.parseHtml()
.requireElementById("search_options-form")
return root.getElementsByAttributeValue("name", "categories[]")
.mapToSet { input ->
val div = input.parents().first()!!
MangaTag(
title = div.text().toTitleCase(sourceLocale),
key = input.attr("value"),
source = source,
)
}
}
private suspend fun loadChapters(baseUrl: String, page: Int): List<MangaChapter> {
return parseChapters(webClient.httpGet("$baseUrl?limit=$page").parseHtml().body())
}
private fun parseChapters(root: Element): List<MangaChapter> {
return root.requireElementById("chapters_content")
.select(".component-chapter").map { div ->
val a = div.selectFirstOrThrow("a")
val href = a.attrAsRelativeUrl("href")
val title = div.selectFirstOrThrow(".chapter_volume").text()
val name = div.selectFirst(".chapter_title")?.textOrNull()
MangaChapter(
id = generateUid(href),
name = if (name != null && name != title) "$title: $name" else title,
number = href.substringAfterLast('/').toIntOrNull() ?: 0,
url = href,
scanlator = div.selectFirst(".team_link-name")?.textOrNull().assertNotNull("scanlator"),
uploadDate = div.selectFirst(".component-chapter-date")
?.ownTextOrNull()
.parseDate(),
branch = null,
source = source,
)
}
}
private fun String?.parseDate(): Long {
if (this == null) {
assert(false) { "Date is null" }
return 0L
}
val parts = split(' ')
assert(parts.size == 2) { "Wrong date $this" }
val count = parts.getOrNull(0)?.toIntOrNull() ?: return 0L
val unit = parts.getOrNull(1) ?: return 0L
val calendarUnit = when (unit) {
"s" -> Calendar.SECOND
"min" -> Calendar.MINUTE
"h" -> Calendar.HOUR
"j" -> Calendar.DAY_OF_YEAR
"sem." -> Calendar.WEEK_OF_YEAR
"mois" -> Calendar.MONTH
"ans", "an" -> Calendar.YEAR
else -> {
assert(false) { "Unknown time unit $unit" }
return 0L
}
}
val calendar = Calendar.getInstance()
calendar.add(calendarUnit, -count)
return calendar.timeInMillis
}
private fun Element.src(): String {
return attrAsAbsoluteUrlOrNull("data-cfsrc")
?: attrAsAbsoluteUrlOrNull("src")
?: attrAsAbsoluteUrlOrNull("data-src")
?: run {
assert(false) { "Image src not found" }
""
}
}
}
Loading…
Cancel
Save