[Grouple] Selecting the fastest pages server

pull/154/head
Koitharu 3 years ago
parent e51b33c74a
commit 454b24ec88
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -1,5 +1,7 @@
package org.koitharu.kotatsu.parsers.site.grouple package org.koitharu.kotatsu.parsers.site.grouple
import kotlinx.coroutines.async
import kotlinx.coroutines.coroutineScope
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.Response import okhttp3.Response
@ -21,298 +23,306 @@ private const val NSFW_ALERT = "сексуальные сцены"
private const val NOTHING_FOUND = "Ничего не найдено" private const val NOTHING_FOUND = "Ничего не найдено"
internal abstract class GroupleParser( internal abstract class GroupleParser(
context: MangaLoaderContext, context: MangaLoaderContext,
source: MangaSource, source: MangaSource,
userAgent: String, userAgent: String,
private val siteId: Int, private val siteId: Int,
) : MangaParser(context, source), MangaParserAuthProvider { ) : MangaParser(context, source), MangaParserAuthProvider {
override val headers = Headers.Builder() @Volatile
.add("User-Agent", userAgent) private var cachedPagesServer: String? = null
.build()
override val sortOrders: Set<SortOrder> = EnumSet.of( override val headers = Headers.Builder()
SortOrder.UPDATED, .add("User-Agent", userAgent)
SortOrder.POPULARITY, .build()
SortOrder.NEWEST,
SortOrder.RATING,
)
override val authUrl: String override val sortOrders: Set<SortOrder> = EnumSet.of(
get() { SortOrder.UPDATED,
val targetUri = "https://${domain}/".urlEncoded() SortOrder.POPULARITY,
return "https://grouple.co/internal/auth/sso?siteId=$siteId&=targetUri=$targetUri" SortOrder.NEWEST,
} SortOrder.RATING,
)
override val isAuthorized: Boolean override val authUrl: String
get() = context.cookieJar.getCookies(domain).any { it.name == "gwt" } get() {
val targetUri = "https://${domain}/".urlEncoded()
return "https://grouple.co/internal/auth/sso?siteId=$siteId&=targetUri=$targetUri"
}
override suspend fun getList( override val isAuthorized: Boolean
offset: Int, get() = context.cookieJar.getCookies(domain).any { it.name == "gwt" }
query: String?,
tags: Set<MangaTag>?,
sortOrder: SortOrder,
): List<Manga> {
val domain = domain
val doc = when {
!query.isNullOrEmpty() -> webClient.httpPost(
"https://$domain/search",
mapOf(
"q" to query.urlEncoded(),
"offset" to (offset upBy PAGE_SIZE_SEARCH).toString(),
),
)
tags.isNullOrEmpty() -> webClient.httpGet( override suspend fun getList(
"https://$domain/list?sortType=${ offset: Int,
getSortKey(sortOrder) query: String?,
}&offset=${offset upBy PAGE_SIZE}", tags: Set<MangaTag>?,
) sortOrder: SortOrder,
): List<Manga> {
val domain = domain
val doc = when {
!query.isNullOrEmpty() -> webClient.httpPost(
"https://$domain/search",
mapOf(
"q" to query.urlEncoded(),
"offset" to (offset upBy PAGE_SIZE_SEARCH).toString(),
),
)
tags.size == 1 -> webClient.httpGet( tags.isNullOrEmpty() -> webClient.httpGet(
"https://$domain/list/genre/${tags.first().key}?sortType=${ "https://$domain/list?sortType=${
getSortKey(sortOrder) getSortKey(sortOrder)
}&offset=${offset upBy PAGE_SIZE}", }&offset=${offset upBy PAGE_SIZE}",
) )
offset > 0 -> return emptyList() tags.size == 1 -> webClient.httpGet(
else -> advancedSearch(domain, tags) "https://$domain/list/genre/${tags.first().key}?sortType=${
}.parseHtml().body() getSortKey(sortOrder)
val root = (doc.getElementById("mangaBox") ?: doc.getElementById("mangaResults")) }&offset=${offset upBy PAGE_SIZE}",
?: doc.parseFailed("Cannot find root") )
val tiles = root.selectFirst("div.tiles.row") ?: if (
root.select(".alert").any { it.ownText() == NOTHING_FOUND }
) {
return emptyList()
} else {
doc.parseFailed("No tiles found")
}
val baseHost = root.baseUri().toHttpUrl().host
return tiles.select("div.tile").mapNotNull { node ->
val imgDiv = node.selectFirst("div.img") ?: return@mapNotNull null
val descDiv = node.selectFirst("div.desc") ?: return@mapNotNull null
if (descDiv.selectFirst("i.fa-user") != null) {
return@mapNotNull null // skip author
}
val href = imgDiv.selectFirst("a")?.attrAsAbsoluteUrlOrNull("href")
if (href == null || href.toHttpUrl().host != baseHost) {
return@mapNotNull null // skip external links
}
val title = descDiv.selectFirst("h3")?.selectFirst("a")?.text()
?: return@mapNotNull null
val tileInfo = descDiv.selectFirst("div.tile-info")
val relUrl = href.toRelativeUrl(baseHost)
Manga(
id = generateUid(relUrl),
url = relUrl,
publicUrl = href,
title = title,
altTitle = descDiv.selectFirst("h4")?.text(),
coverUrl = imgDiv.selectFirst("img.lazy")?.attr("data-original")?.replace("_p.", ".").orEmpty(),
rating = runCatching {
node.selectFirst(".compact-rate")
?.attr("title")
?.toFloatOrNull()
?.div(5f)
}.getOrNull() ?: RATING_UNKNOWN,
author = tileInfo?.selectFirst("a.person-link")?.text(),
isNsfw = false,
tags = runCatching {
tileInfo?.select("a.element-link")
?.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}
}.getOrNull().orEmpty(),
state = when {
node.selectFirst("div.tags")
?.selectFirst("span.mangaCompleted") != null -> MangaState.FINISHED
else -> null offset > 0 -> return emptyList()
}, else -> advancedSearch(domain, tags)
source = source, }.parseHtml().body()
) val root = (doc.getElementById("mangaBox") ?: doc.getElementById("mangaResults"))
} ?: doc.parseFailed("Cannot find root")
} val tiles = root.selectFirst("div.tiles.row") ?: if (
root.select(".alert").any { it.ownText() == NOTHING_FOUND }
) {
return emptyList()
} else {
doc.parseFailed("No tiles found")
}
val baseHost = root.baseUri().toHttpUrl().host
return tiles.select("div.tile").mapNotNull { node ->
val imgDiv = node.selectFirst("div.img") ?: return@mapNotNull null
val descDiv = node.selectFirst("div.desc") ?: return@mapNotNull null
if (descDiv.selectFirst("i.fa-user") != null) {
return@mapNotNull null // skip author
}
val href = imgDiv.selectFirst("a")?.attrAsAbsoluteUrlOrNull("href")
if (href == null || href.toHttpUrl().host != baseHost) {
return@mapNotNull null // skip external links
}
val title = descDiv.selectFirst("h3")?.selectFirst("a")?.text()
?: return@mapNotNull null
val tileInfo = descDiv.selectFirst("div.tile-info")
val relUrl = href.toRelativeUrl(baseHost)
Manga(
id = generateUid(relUrl),
url = relUrl,
publicUrl = href,
title = title,
altTitle = descDiv.selectFirst("h4")?.text(),
coverUrl = imgDiv.selectFirst("img.lazy")?.attr("data-original")?.replace("_p.", ".").orEmpty(),
rating = runCatching {
node.selectFirst(".compact-rate")
?.attr("title")
?.toFloatOrNull()
?.div(5f)
}.getOrNull() ?: RATING_UNKNOWN,
author = tileInfo?.selectFirst("a.person-link")?.text(),
isNsfw = false,
tags = runCatching {
tileInfo?.select("a.element-link")
?.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}
}.getOrNull().orEmpty(),
state = when {
node.selectFirst("div.tags")
?.selectFirst("span.mangaCompleted") != null -> MangaState.FINISHED
override suspend fun getDetails(manga: Manga): Manga { else -> null
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).checkAuthRequired().parseHtml() },
val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent") source = source,
?: doc.parseFailed("Cannot find root") )
val dateFormat = SimpleDateFormat("dd.MM.yy", Locale.US) }
val coverImg = root.selectFirst("div.subject-cover")?.selectFirst("img") }
return manga.copy(
description = root.selectFirst("div.manga-description")?.html(),
largeCoverUrl = coverImg?.attr("data-full"),
coverUrl = coverImg?.attr("data-thumb") ?: manga.coverUrl,
tags = manga.tags + root.select("div.subject-meta").select("span.elem_genre ")
.mapNotNull {
val a = it.selectFirst("a.element-link") ?: return@mapNotNull null
MangaTag(
title = a.text().toTitleCase(),
key = a.attr("href").substringAfterLast('/'),
source = source,
)
},
author = root.selectFirst("a.person-link")?.text() ?: manga.author,
isNsfw = root.select(".alert-warning").any { it.ownText().contains(NSFW_ALERT) },
chapters = root.selectFirst("div.chapters-link")?.selectFirst("table")
?.select("tr:has(td > a)")?.asReversed()?.mapChapters { i, tr ->
val a = tr.selectFirst("a.chapter-link") ?: return@mapChapters null
val href = a.attrAsRelativeUrl("href")
var translators = ""
val translatorElement = a.attr("title")
if (!translatorElement.isNullOrBlank()) {
translators = translatorElement
.replace("(Переводчик),", "&")
.removeSuffix(" (Переводчик)")
}
MangaChapter(
id = generateUid(href),
name = tr.selectFirst("a")?.text().orEmpty().removePrefix(manga.title).trim(),
number = i + 1,
url = href,
uploadDate = dateFormat.tryParse(tr.selectFirst("td.date")?.text()),
scanlator = translators,
source = source,
branch = null,
)
},
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain) + "?mtr=1") val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).checkAuthRequired().parseHtml()
.checkAuthRequired() val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent")
.parseHtml() ?: doc.parseFailed("Cannot find root")
val scripts = doc.select("script") val dateFormat = SimpleDateFormat("dd.MM.yy", Locale.US)
for (script in scripts) { val coverImg = root.selectFirst("div.subject-cover")?.selectFirst("img")
val data = script.html() return manga.copy(
val pos = data.indexOf("rm_h.readerInit( 0,") description = root.selectFirst("div.manga-description")?.html(),
if (pos == -1) { largeCoverUrl = coverImg?.attr("data-full"),
continue coverUrl = coverImg?.attr("data-thumb") ?: manga.coverUrl,
} tags = manga.tags + root.select("div.subject-meta").select("span.elem_genre ")
val json = data.substring(pos) .mapNotNull {
.substringAfter('(') val a = it.selectFirst("a.element-link") ?: return@mapNotNull null
.substringBefore('\n') MangaTag(
.substringBeforeLast(')') title = a.text().toTitleCase(),
if (json.isEmpty()) { key = a.attr("href").substringAfterLast('/'),
continue source = source,
} )
val ja = JSONArray("[$json]") },
val pages = ja.getJSONArray(1) author = root.selectFirst("a.person-link")?.text() ?: manga.author,
val servers = ja.getJSONArray(3).mapJSON { it.getString("path") } isNsfw = root.select(".alert-warning").any { it.ownText().contains(NSFW_ALERT) },
val serversStr = servers.joinToString("|") chapters = root.selectFirst("div.chapters-link")?.selectFirst("table")
return (0 until pages.length()).map { i -> ?.select("tr:has(td > a)")?.asReversed()?.mapChapters { i, tr ->
val page = pages.getJSONArray(i) val a = tr.selectFirst("a.chapter-link") ?: return@mapChapters null
val primaryServer = page.getString(0) val href = a.attrAsRelativeUrl("href")
val url = page.getString(2) var translators = ""
MangaPage( val translatorElement = a.attr("title")
id = generateUid(url), if (!translatorElement.isNullOrBlank()) {
url = "$primaryServer|$serversStr|$url", translators = translatorElement
preview = null, .replace("(Переводчик),", "&")
referer = chapter.url, .removeSuffix(" (Переводчик)")
source = source, }
) MangaChapter(
} id = generateUid(href),
} name = tr.selectFirst("a")?.text().orEmpty().removePrefix(manga.title).trim(),
doc.parseFailed("Pages list not found at ${chapter.url}") number = i + 1,
} url = href,
uploadDate = dateFormat.tryParse(tr.selectFirst("td.date")?.text()),
scanlator = translators,
source = source,
branch = null,
)
},
)
}
override suspend fun getPageUrl(page: MangaPage): String { override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val parts = page.url.split('|') val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain) + "?mtr=1")
val path = parts.last() .checkAuthRequired()
val servers = parts.dropLast(1).toSet() .parseHtml()
for (server in servers) { val scripts = doc.select("script")
val url = server + path for (script in scripts) {
if (tryHead(url)) { val data = script.html()
return url val pos = data.indexOf("rm_h.readerInit( 0,")
} if (pos == -1) {
} continue
val fallbackServer = servers.firstOrNull() ?: throw ParseException("Cannot find any page url", page.url) }
return fallbackServer + path val json = data.substring(pos)
} .substringAfter('(')
.substringBefore('\n')
.substringBeforeLast(')')
if (json.isEmpty()) {
continue
}
val ja = JSONArray("[$json]")
val pages = ja.getJSONArray(1)
val servers = ja.getJSONArray(3).mapJSON { it.getString("path") }
val serversStr = servers.joinToString("|")
return (0 until pages.length()).map { i ->
val page = pages.getJSONArray(i)
val primaryServer = page.getString(0)
val url = page.getString(2)
MangaPage(
id = generateUid(url),
url = "$primaryServer|$serversStr|$url",
preview = null,
referer = chapter.url,
source = source,
)
}
}
doc.parseFailed("Pages list not found at ${chapter.url}")
}
override suspend fun getTags(): Set<MangaTag> { override suspend fun getPageUrl(page: MangaPage): String {
val doc = webClient.httpGet("https://${domain}/list/genres/sort_name").parseHtml() val parts = page.url.split('|')
val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent") val path = parts.last()
?.selectFirst("table.table") ?: doc.parseFailed("Cannot find root") val servers = parts.dropLast(1).toSet()
return root.select("a.element-link").mapToSet { a -> val cachedServer = cachedPagesServer
MangaTag( if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) {
title = a.text().toTitleCase(), return cachedServer + path
key = a.attr("href").substringAfterLast('/'), }
source = source, val server = coroutineScope {
) servers.map { server ->
} async {
} if (tryHead(server + path)) server else null
}
}.awaitFirst { it != null }
}
cachedPagesServer = server
return checkNotNull(server + path)
}
override suspend fun getUsername(): String { override suspend fun getTags(): Set<MangaTag> {
val root = webClient.httpGet("https://grouple.co/").parseHtml().body() val doc = webClient.httpGet("https://${domain}/list/genres/sort_name").parseHtml()
val element = root.selectFirst("img.user-avatar") ?: throw AuthRequiredException(source) val root = doc.body().getElementById("mangaBox")?.selectFirst("div.leftContent")
val res = element.parent()?.text() ?.selectFirst("table.table") ?: doc.parseFailed("Cannot find root")
return if (res.isNullOrEmpty()) { return root.select("a.element-link").mapToSet { a ->
root.parseFailed("Cannot find username") MangaTag(
} else res title = a.text().toTitleCase(),
} key = a.attr("href").substringAfterLast('/'),
source = source,
)
}
}
private fun getSortKey(sortOrder: SortOrder) = override suspend fun getUsername(): String {
when (sortOrder) { val root = webClient.httpGet("https://grouple.co/").parseHtml().body()
SortOrder.ALPHABETICAL -> "name" val element = root.selectFirst("img.user-avatar") ?: throw AuthRequiredException(source)
SortOrder.POPULARITY -> "rate" val res = element.parent()?.text()
SortOrder.UPDATED -> "updated" return if (res.isNullOrEmpty()) {
SortOrder.NEWEST -> "created" root.parseFailed("Cannot find username")
SortOrder.RATING -> "votes" } else res
} }
private suspend fun advancedSearch(domain: String, tags: Set<MangaTag>): Response { private fun getSortKey(sortOrder: SortOrder) =
val url = "https://$domain/search/advanced" when (sortOrder) {
// Step 1: map catalog genres names to advanced-search genres ids SortOrder.ALPHABETICAL -> "name"
val tagsIndex = webClient.httpGet(url).parseHtml() SortOrder.POPULARITY -> "rate"
.body().selectFirst("form.search-form") SortOrder.UPDATED -> "updated"
?.select("div.form-group") SortOrder.NEWEST -> "created"
?.get(1) ?: throw ParseException("Genres filter element not found", url) SortOrder.RATING -> "votes"
val tagNames = tags.map { it.title.lowercase() } }
val payload = HashMap<String, String>()
var foundGenres = 0
tagsIndex.select("li.property").forEach { li ->
val name = li.text().trim().lowercase()
val id = li.selectFirst("input")?.id()
?: li.parseFailed("Id for tag $name not found")
payload[id] = if (name in tagNames) {
foundGenres++
"in"
} else ""
}
if (foundGenres != tags.size) {
tagsIndex.parseFailed("Some genres are not found")
}
// Step 2: advanced search
payload["q"] = ""
payload["s_high_rate"] = ""
payload["s_single"] = ""
payload["s_mature"] = ""
payload["s_completed"] = ""
payload["s_translated"] = ""
payload["s_many_chapters"] = ""
payload["s_wait_upload"] = ""
payload["s_sale"] = ""
payload["years"] = "1900,2099"
payload["+"] = "Искать".urlEncoded()
return webClient.httpPost(url, payload)
}
private suspend fun tryHead(url: String): Boolean = runCatchingCancellable { private suspend fun advancedSearch(domain: String, tags: Set<MangaTag>): Response {
webClient.httpHead(url).isSuccessful val url = "https://$domain/search/advanced"
}.getOrDefault(false) // Step 1: map catalog genres names to advanced-search genres ids
val tagsIndex = webClient.httpGet(url).parseHtml()
.body().selectFirst("form.search-form")
?.select("div.form-group")
?.get(1) ?: throw ParseException("Genres filter element not found", url)
val tagNames = tags.map { it.title.lowercase() }
val payload = HashMap<String, String>()
var foundGenres = 0
tagsIndex.select("li.property").forEach { li ->
val name = li.text().trim().lowercase()
val id = li.selectFirst("input")?.id()
?: li.parseFailed("Id for tag $name not found")
payload[id] = if (name in tagNames) {
foundGenres++
"in"
} else ""
}
if (foundGenres != tags.size) {
tagsIndex.parseFailed("Some genres are not found")
}
// Step 2: advanced search
payload["q"] = ""
payload["s_high_rate"] = ""
payload["s_single"] = ""
payload["s_mature"] = ""
payload["s_completed"] = ""
payload["s_translated"] = ""
payload["s_many_chapters"] = ""
payload["s_wait_upload"] = ""
payload["s_sale"] = ""
payload["years"] = "1900,2099"
payload["+"] = "Искать".urlEncoded()
return webClient.httpPost(url, payload)
}
private fun Response.checkAuthRequired(): Response { private suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
val lastPathSegment = request.url.pathSegments.lastOrNull() ?: return this webClient.httpHead(url).isSuccessful
if (lastPathSegment == "login") { }.getOrDefault(false)
throw AuthRequiredException(source)
} private fun Response.checkAuthRequired(): Response {
return this val lastPathSegment = request.url.pathSegments.lastOrNull() ?: return this
} if (lastPathSegment == "login") {
throw AuthRequiredException(source)
}
return this
}
} }

@ -0,0 +1,40 @@
package org.koitharu.kotatsu.parsers.util
import kotlinx.coroutines.Deferred
import kotlinx.coroutines.Job
import kotlinx.coroutines.selects.select
import kotlin.coroutines.cancellation.CancellationException
fun Iterable<Job>.cancelAll(cause: CancellationException? = null) {
forEach { it.cancel(cause) }
}
suspend fun <T> Iterable<Deferred<T>>.awaitFirst(): T = select<T> {
for (async in this@awaitFirst) {
async.onAwait { it }
}
}.also { this@awaitFirst.cancelAll() }
suspend fun <T> Collection<Deferred<T>>.awaitFirst(condition: (T) -> Boolean): T {
var result: Any? = NULL
var counter = size
while (result === NULL && counter > 0) {
val candidate = select<T> {
for (async in this@awaitFirst) {
async.onAwait { it }
}
}
if (condition(candidate)) {
result = candidate
}
counter--
}
cancelAll()
if (result === NULL) {
throw NoSuchElementException()
}
@Suppress("UNCHECKED_CAST")
return result as T
}
private val NULL = Any()
Loading…
Cancel
Save