Fix grouple pages parsing #156

Koitharu 3 years ago
parent f4c47b5b84
commit eea87d8607
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -14,4 +14,8 @@ sealed class ConfigKey<T>(
class ShowSuspiciousContent(
override val defaultValue: Boolean,
) : ConfigKey<Boolean>("show_suspicious")
class UserAgent(
override val defaultValue: String,
) : ConfigKey<String>("user_agent")
}

@ -5,10 +5,12 @@ import kotlinx.coroutines.coroutineScope
import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.Response
import okhttp3.internal.headersContentLength
import org.json.JSONArray
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaParserAuthProvider
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.AuthRequiredException
import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.*
@ -21,6 +23,7 @@ private const val PAGE_SIZE = 70
private const val PAGE_SIZE_SEARCH = 50
private const val NSFW_ALERT = "сексуальные сцены"
private const val NOTHING_FOUND = "Ничего не найдено"
private const val MIN_IMAGE_SIZE = 1024L
internal abstract class GroupleParser(
context: MangaLoaderContext,
@ -31,11 +34,13 @@ internal abstract class GroupleParser(
@Volatile
private var cachedPagesServer: String? = null
override val headers = Headers.Builder()
.add(
"User-Agent",
private val userAgentKey = ConfigKey.UserAgent(
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
)
override val headers: Headers
get() = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.build()
override val sortOrders: Set<SortOrder> = EnumSet.of(
@ -237,15 +242,23 @@ internal abstract class GroupleParser(
if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) {
return cachedServer + path
}
val server = coroutineScope {
if (servers.isEmpty()) {
throw ParseException("No servers found for page", page.url)
}
val server = try {
coroutineScope {
servers.map { server ->
async {
if (tryHead(server + path)) server else null
}
}.awaitFirst { it != null }
}.also {
cachedPagesServer = it
}
} catch (e: NoSuchElementException) {
servers.random()
}
cachedPagesServer = server
return checkNotNull(server + path)
return checkNotNull(server) + path
}
override suspend fun getTags(): Set<MangaTag> {
@ -316,8 +329,9 @@ internal abstract class GroupleParser(
return webClient.httpPost(url, payload)
}
private suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
webClient.httpHead(url).isSuccessful
suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
val response = webClient.httpHead(url)
response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE
}.getOrDefault(false)
private fun Response.checkAuthRequired(): Response {

@ -1,7 +1,6 @@
package org.koitharu.kotatsu.parsers
import com.koushikdutta.quack.QuackContext
import okhttp3.Headers
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.Response
@ -13,7 +12,7 @@ import java.util.concurrent.TimeUnit
internal object MangaLoaderContextMock : MangaLoaderContext() {
private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format(
/*BuildConfig.VERSION_NAME*/ "4.3",
/*BuildConfig.VERSION_NAME*/ "4.4",
/*Build.VERSION.RELEASE*/ "r",
/*Build.MODEL*/ "",
/*Build.BRAND*/ "",
@ -46,15 +45,12 @@ internal object MangaLoaderContextMock : MangaLoaderContext() {
return SourceConfigMock()
}
suspend fun doRequest(url: String, referer: String? = null, extraHeaders: Headers? = null): Response {
suspend fun doRequest(url: String, source: MangaSource?): Response {
val request = Request.Builder()
.get()
.url(url)
if (extraHeaders != null) {
request.headers(extraHeaders)
}
if (referer != null) {
request.header("Referer", referer)
if (source != null) {
request.tag(MangaSource::class.java, source)
}
return httpClient.newCall(request.build()).await()
}

@ -11,10 +11,7 @@ import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.medianOrNull
import org.koitharu.kotatsu.parsers.util.mimeType
import org.koitharu.kotatsu.test_util.isDistinct
import org.koitharu.kotatsu.test_util.isDistinctBy
import org.koitharu.kotatsu.test_util.isUrlAbsolute
import org.koitharu.kotatsu.test_util.maxDuplicates
import org.koitharu.kotatsu.test_util.*
@ExtendWith(AuthCheckExtension::class)
@ -105,9 +102,9 @@ internal class MangaParserTest {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
}
assert(c.all { it.source == source })
checkImageRequest(coverUrl, publicUrl)
checkImageRequest(coverUrl, source)
largeCoverUrl?.let {
checkImageRequest(it, publicUrl)
checkImageRequest(it, source)
}
}
}
@ -129,7 +126,7 @@ internal class MangaParserTest {
val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.referer)
checkImageRequest(pageUrl, page.source)
}
@ParameterizedTest(name = "{index}|favicon|{0}")
@ -145,7 +142,7 @@ internal class MangaParserTest {
}
val favicon = favicons.find(24)
checkNotNull(favicon)
checkImageRequest(favicon.url, favicons.referer)
checkImageRequest(favicon.url, source)
}
@ParameterizedTest(name = "{index}|domain|{0}")
@ -157,7 +154,7 @@ internal class MangaParserTest {
.host(defaultDomain)
.scheme("https")
.toString()
val response = context.doRequest(url, extraHeaders = parser.headers)
val response = context.doRequest(url, source)
val realUrl = response.request.url
val realDomain = realUrl.topPrivateDomain()
val realHost = realUrl.host
@ -189,11 +186,11 @@ internal class MangaParserTest {
assert(item.publicUrl.isUrlAbsolute())
}
val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.publicUrl)
checkImageRequest(testItem.coverUrl, testItem.source)
}
private suspend fun checkImageRequest(url: String, referer: String?) {
context.doRequest(url, referer).use {
private suspend fun checkImageRequest(url: String, source: MangaSource) {
context.doRequest(url, source).use {
assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}"

Loading…
Cancel
Save