[Grouple] Fix pages url extraction

source/neox^2
Koitharu 3 years ago
parent 5b94badfc2
commit 306d46ea93
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -1,9 +1,11 @@
package org.koitharu.kotatsu.parsers.site.grouple package org.koitharu.kotatsu.parsers.site.grouple
import kotlinx.coroutines.async import kotlinx.coroutines.flow.channelFlow
import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.flow.first
import kotlinx.coroutines.launch
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import okhttp3.Interceptor import okhttp3.Interceptor
import okhttp3.Response import okhttp3.Response
import okhttp3.internal.headersContentLength import okhttp3.internal.headersContentLength
@ -236,30 +238,42 @@ internal abstract class GroupleParser(
override suspend fun getPageUrl(page: MangaPage): String { override suspend fun getPageUrl(page: MangaPage): String {
val parts = page.url.split('|') val parts = page.url.split('|')
if (parts.size < 2) {
throw ParseException("No servers found for page", page.url)
}
val path = parts.last() val path = parts.last()
val servers = parts.dropLast(1).toSet() // fast path
val cachedServer = cachedPagesServer cachedPagesServer?.let { host ->
if (!cachedServer.isNullOrEmpty() && cachedServer in servers && tryHead(concatUrl(cachedServer, path))) { val url = concatUrl("https://$host/", path)
return concatUrl(cachedServer, path) if (tryHead(url)) {
return url
} else {
cachedPagesServer = null
}
} }
if (servers.isEmpty()) { // slow path
throw ParseException("No servers found for page", page.url) val candidates = HashSet<String>((parts.size - 1) * 2)
for (i in 0 until parts.size - 1) {
val server = parts[i].trim().ifEmpty { "https://$domain/" }
candidates.add(concatUrl(server, path))
candidates.add(concatUrl(server, path.substringBeforeLast('?')))
} }
val server = try { return try {
coroutineScope { channelFlow {
servers.map { server -> for (url in candidates) {
async { launch {
val host = server.trim().ifEmpty { "https://$domain/" } if (tryHead(url)) {
if (tryHead(concatUrl(host, path))) host else null send(url)
}
} }
}.awaitFirst { it != null } }
}.also { }.first().also {
cachedPagesServer = it cachedPagesServer = it.toHttpUrlOrNull()?.host
} }
} catch (e: NoSuchElementException) { } catch (e: NoSuchElementException) {
servers.random() assert(false) { e.toString() }
candidates.random()
} }
return concatUrl(checkNotNull(server).ifEmpty { "https://$domain/" }, path)
} }
override suspend fun getTags(): Set<MangaTag> { override suspend fun getTags(): Set<MangaTag> {

@ -2,39 +2,34 @@ package org.koitharu.kotatsu.parsers.util
import kotlinx.coroutines.Deferred import kotlinx.coroutines.Deferred
import kotlinx.coroutines.Job import kotlinx.coroutines.Job
import kotlinx.coroutines.selects.select import kotlinx.coroutines.flow.channelFlow
import kotlinx.coroutines.flow.first
import kotlinx.coroutines.launch
import kotlin.coroutines.cancellation.CancellationException import kotlin.coroutines.cancellation.CancellationException
fun Iterable<Job>.cancelAll(cause: CancellationException? = null) { fun Iterable<Job>.cancelAll(cause: CancellationException? = null) {
forEach { it.cancel(cause) } forEach { it.cancel(cause) }
} }
suspend fun <T> Iterable<Deferred<T>>.awaitFirst(): T = select<T> { suspend fun <T> Iterable<Deferred<T>>.awaitFirst(): T {
for (async in this@awaitFirst) { return channelFlow {
async.onAwait { it } for (deferred in this@awaitFirst) {
} launch {
}.also { this@awaitFirst.cancelAll() } send(deferred.await())
}
}
}.first().also { this@awaitFirst.cancelAll() }
}
suspend fun <T> Collection<Deferred<T>>.awaitFirst(condition: (T) -> Boolean): T { suspend fun <T> Collection<Deferred<T>>.awaitFirst(condition: (T) -> Boolean): T {
var result: Any? = NULL return channelFlow {
var counter = size for (deferred in this@awaitFirst) {
while (result === NULL && counter > 0) { launch {
val candidate = select<T> { val result = deferred.await()
for (async in this@awaitFirst) { if (condition(result)) {
async.onAwait { it } send(result)
}
} }
} }
if (condition(candidate)) { }.first().also { this@awaitFirst.cancelAll() }
result = candidate
}
counter--
}
cancelAll()
if (result === NULL) {
throw NoSuchElementException()
}
@Suppress("UNCHECKED_CAST")
return result as T
} }
private val NULL = Any()

@ -114,20 +114,24 @@ internal class MangaParserTest {
@MangaSources @MangaSources
fun pages(source: MangaSource) = runTest { fun pages(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val list = parser.getList(0, sortOrder = SortOrder.POPULARITY, tags = null) val list = parser.getList(0, sortOrder = SortOrder.UPDATED, tags = null)
val manga = list.first() val manga = list.first()
val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null") val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null at ${manga.publicUrl}")
val pages = parser.getPages(chapter) val pages = parser.getPages(chapter)
assert(pages.isNotEmpty()) assert(pages.isNotEmpty())
assert(pages.isDistinctBy { it.id }) assert(pages.isDistinctBy { it.id })
assert(pages.all { it.source == source }) assert(pages.all { it.source == source })
val page = pages.medianOrNull() ?: error("No page") arrayOf(
val pageUrl = parser.getPageUrl(page) pages.first(),
assert(pageUrl.isNotEmpty()) pages.medianOrNull() ?: error("No page"),
assert(pageUrl.isUrlAbsolute()) ).forEach { page ->
checkImageRequest(pageUrl, page.source) val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.source)
}
} }
@ParameterizedTest(name = "{index}|favicon|{0}") @ParameterizedTest(name = "{index}|favicon|{0}")

Loading…
Cancel
Save