Fix grouple pages parsing #156

Koitharu 3 years ago
parent f4c47b5b84
commit eea87d8607
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -14,4 +14,8 @@ sealed class ConfigKey<T>(
class ShowSuspiciousContent( class ShowSuspiciousContent(
override val defaultValue: Boolean, override val defaultValue: Boolean,
) : ConfigKey<Boolean>("show_suspicious") ) : ConfigKey<Boolean>("show_suspicious")
class UserAgent(
override val defaultValue: String,
) : ConfigKey<String>("user_agent")
} }

@ -5,10 +5,12 @@ import kotlinx.coroutines.coroutineScope
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.Response import okhttp3.Response
import okhttp3.internal.headersContentLength
import org.json.JSONArray import org.json.JSONArray
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaParserAuthProvider import org.koitharu.kotatsu.parsers.MangaParserAuthProvider
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.AuthRequiredException import org.koitharu.kotatsu.parsers.exception.AuthRequiredException
import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
@ -21,6 +23,7 @@ private const val PAGE_SIZE = 70
private const val PAGE_SIZE_SEARCH = 50 private const val PAGE_SIZE_SEARCH = 50
private const val NSFW_ALERT = "сексуальные сцены" private const val NSFW_ALERT = "сексуальные сцены"
private const val NOTHING_FOUND = "Ничего не найдено" private const val NOTHING_FOUND = "Ничего не найдено"
private const val MIN_IMAGE_SIZE = 1024L
internal abstract class GroupleParser( internal abstract class GroupleParser(
context: MangaLoaderContext, context: MangaLoaderContext,
@ -31,11 +34,13 @@ internal abstract class GroupleParser(
@Volatile @Volatile
private var cachedPagesServer: String? = null private var cachedPagesServer: String? = null
override val headers = Headers.Builder() private val userAgentKey = ConfigKey.UserAgent(
.add(
"User-Agent",
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0", "Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
) )
override val headers: Headers
get() = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.build() .build()
override val sortOrders: Set<SortOrder> = EnumSet.of( override val sortOrders: Set<SortOrder> = EnumSet.of(
@ -237,15 +242,23 @@ internal abstract class GroupleParser(
if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) { if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) {
return cachedServer + path return cachedServer + path
} }
val server = coroutineScope { if (servers.isEmpty()) {
throw ParseException("No servers found for page", page.url)
}
val server = try {
coroutineScope {
servers.map { server -> servers.map { server ->
async { async {
if (tryHead(server + path)) server else null if (tryHead(server + path)) server else null
} }
}.awaitFirst { it != null } }.awaitFirst { it != null }
}.also {
cachedPagesServer = it
}
} catch (e: NoSuchElementException) {
servers.random()
} }
cachedPagesServer = server return checkNotNull(server) + path
return checkNotNull(server + path)
} }
override suspend fun getTags(): Set<MangaTag> { override suspend fun getTags(): Set<MangaTag> {
@ -316,8 +329,9 @@ internal abstract class GroupleParser(
return webClient.httpPost(url, payload) return webClient.httpPost(url, payload)
} }
private suspend fun tryHead(url: String): Boolean = runCatchingCancellable { suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
webClient.httpHead(url).isSuccessful val response = webClient.httpHead(url)
response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE
}.getOrDefault(false) }.getOrDefault(false)
private fun Response.checkAuthRequired(): Response { private fun Response.checkAuthRequired(): Response {

@ -1,7 +1,6 @@
package org.koitharu.kotatsu.parsers package org.koitharu.kotatsu.parsers
import com.koushikdutta.quack.QuackContext import com.koushikdutta.quack.QuackContext
import okhttp3.Headers
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
import okhttp3.Request import okhttp3.Request
import okhttp3.Response import okhttp3.Response
@ -13,7 +12,7 @@ import java.util.concurrent.TimeUnit
internal object MangaLoaderContextMock : MangaLoaderContext() { internal object MangaLoaderContextMock : MangaLoaderContext() {
private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format( private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format(
/*BuildConfig.VERSION_NAME*/ "4.3", /*BuildConfig.VERSION_NAME*/ "4.4",
/*Build.VERSION.RELEASE*/ "r", /*Build.VERSION.RELEASE*/ "r",
/*Build.MODEL*/ "", /*Build.MODEL*/ "",
/*Build.BRAND*/ "", /*Build.BRAND*/ "",
@ -46,15 +45,12 @@ internal object MangaLoaderContextMock : MangaLoaderContext() {
return SourceConfigMock() return SourceConfigMock()
} }
suspend fun doRequest(url: String, referer: String? = null, extraHeaders: Headers? = null): Response { suspend fun doRequest(url: String, source: MangaSource?): Response {
val request = Request.Builder() val request = Request.Builder()
.get() .get()
.url(url) .url(url)
if (extraHeaders != null) { if (source != null) {
request.headers(extraHeaders) request.tag(MangaSource::class.java, source)
}
if (referer != null) {
request.header("Referer", referer)
} }
return httpClient.newCall(request.build()).await() return httpClient.newCall(request.build()).await()
} }

@ -11,10 +11,7 @@ import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.util.domain import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.medianOrNull import org.koitharu.kotatsu.parsers.util.medianOrNull
import org.koitharu.kotatsu.parsers.util.mimeType import org.koitharu.kotatsu.parsers.util.mimeType
import org.koitharu.kotatsu.test_util.isDistinct import org.koitharu.kotatsu.test_util.*
import org.koitharu.kotatsu.test_util.isDistinctBy
import org.koitharu.kotatsu.test_util.isUrlAbsolute
import org.koitharu.kotatsu.test_util.maxDuplicates
@ExtendWith(AuthCheckExtension::class) @ExtendWith(AuthCheckExtension::class)
@ -105,9 +102,9 @@ internal class MangaParserTest {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl" "Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
} }
assert(c.all { it.source == source }) assert(c.all { it.source == source })
checkImageRequest(coverUrl, publicUrl) checkImageRequest(coverUrl, source)
largeCoverUrl?.let { largeCoverUrl?.let {
checkImageRequest(it, publicUrl) checkImageRequest(it, source)
} }
} }
} }
@ -129,7 +126,7 @@ internal class MangaParserTest {
val pageUrl = parser.getPageUrl(page) val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty()) assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsolute()) assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.referer) checkImageRequest(pageUrl, page.source)
} }
@ParameterizedTest(name = "{index}|favicon|{0}") @ParameterizedTest(name = "{index}|favicon|{0}")
@ -145,7 +142,7 @@ internal class MangaParserTest {
} }
val favicon = favicons.find(24) val favicon = favicons.find(24)
checkNotNull(favicon) checkNotNull(favicon)
checkImageRequest(favicon.url, favicons.referer) checkImageRequest(favicon.url, source)
} }
@ParameterizedTest(name = "{index}|domain|{0}") @ParameterizedTest(name = "{index}|domain|{0}")
@ -157,7 +154,7 @@ internal class MangaParserTest {
.host(defaultDomain) .host(defaultDomain)
.scheme("https") .scheme("https")
.toString() .toString()
val response = context.doRequest(url, extraHeaders = parser.headers) val response = context.doRequest(url, source)
val realUrl = response.request.url val realUrl = response.request.url
val realDomain = realUrl.topPrivateDomain() val realDomain = realUrl.topPrivateDomain()
val realHost = realUrl.host val realHost = realUrl.host
@ -189,11 +186,11 @@ internal class MangaParserTest {
assert(item.publicUrl.isUrlAbsolute()) assert(item.publicUrl.isUrlAbsolute())
} }
val testItem = list.random() val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.publicUrl) checkImageRequest(testItem.coverUrl, testItem.source)
} }
private suspend fun checkImageRequest(url: String, referer: String?) { private suspend fun checkImageRequest(url: String, source: MangaSource) {
context.doRequest(url, referer).use { context.doRequest(url, source).use {
assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" } assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) { assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}" "Wrong response mime type: ${it.mimeType}"

Loading…
Cancel
Save