Fix grouple pages parsing #156

Koitharu 3 years ago
parent f4c47b5b84
commit eea87d8607
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -14,4 +14,8 @@ sealed class ConfigKey<T>(
class ShowSuspiciousContent( class ShowSuspiciousContent(
override val defaultValue: Boolean, override val defaultValue: Boolean,
) : ConfigKey<Boolean>("show_suspicious") ) : ConfigKey<Boolean>("show_suspicious")
}
class UserAgent(
override val defaultValue: String,
) : ConfigKey<String>("user_agent")
}

@ -5,10 +5,12 @@ import kotlinx.coroutines.coroutineScope
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.Response import okhttp3.Response
import okhttp3.internal.headersContentLength
import org.json.JSONArray import org.json.JSONArray
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaParserAuthProvider import org.koitharu.kotatsu.parsers.MangaParserAuthProvider
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.AuthRequiredException import org.koitharu.kotatsu.parsers.exception.AuthRequiredException
import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.*
@ -21,6 +23,7 @@ private const val PAGE_SIZE = 70
private const val PAGE_SIZE_SEARCH = 50 private const val PAGE_SIZE_SEARCH = 50
private const val NSFW_ALERT = "сексуальные сцены" private const val NSFW_ALERT = "сексуальные сцены"
private const val NOTHING_FOUND = "Ничего не найдено" private const val NOTHING_FOUND = "Ничего не найдено"
private const val MIN_IMAGE_SIZE = 1024L
internal abstract class GroupleParser( internal abstract class GroupleParser(
context: MangaLoaderContext, context: MangaLoaderContext,
@ -31,12 +34,14 @@ internal abstract class GroupleParser(
@Volatile @Volatile
private var cachedPagesServer: String? = null private var cachedPagesServer: String? = null
override val headers = Headers.Builder() private val userAgentKey = ConfigKey.UserAgent(
.add( "Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
"User-Agent", )
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
) override val headers: Headers
.build() get() = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.build()
override val sortOrders: Set<SortOrder> = EnumSet.of( override val sortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED, SortOrder.UPDATED,
@ -237,15 +242,23 @@ internal abstract class GroupleParser(
if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) { if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) {
return cachedServer + path return cachedServer + path
} }
val server = coroutineScope { if (servers.isEmpty()) {
servers.map { server -> throw ParseException("No servers found for page", page.url)
async { }
if (tryHead(server + path)) server else null val server = try {
} coroutineScope {
}.awaitFirst { it != null } servers.map { server ->
async {
if (tryHead(server + path)) server else null
}
}.awaitFirst { it != null }
}.also {
cachedPagesServer = it
}
} catch (e: NoSuchElementException) {
servers.random()
} }
cachedPagesServer = server return checkNotNull(server) + path
return checkNotNull(server + path)
} }
override suspend fun getTags(): Set<MangaTag> { override suspend fun getTags(): Set<MangaTag> {
@ -316,8 +329,9 @@ internal abstract class GroupleParser(
return webClient.httpPost(url, payload) return webClient.httpPost(url, payload)
} }
private suspend fun tryHead(url: String): Boolean = runCatchingCancellable { suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
webClient.httpHead(url).isSuccessful val response = webClient.httpHead(url)
response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE
}.getOrDefault(false) }.getOrDefault(false)
private fun Response.checkAuthRequired(): Response { private fun Response.checkAuthRequired(): Response {

@ -1,7 +1,6 @@
package org.koitharu.kotatsu.parsers package org.koitharu.kotatsu.parsers
import com.koushikdutta.quack.QuackContext import com.koushikdutta.quack.QuackContext
import okhttp3.Headers
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
import okhttp3.Request import okhttp3.Request
import okhttp3.Response import okhttp3.Response
@ -12,57 +11,54 @@ import java.util.concurrent.TimeUnit
internal object MangaLoaderContextMock : MangaLoaderContext() { internal object MangaLoaderContextMock : MangaLoaderContext() {
private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format( private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format(
/*BuildConfig.VERSION_NAME*/ "4.3", /*BuildConfig.VERSION_NAME*/ "4.4",
/*Build.VERSION.RELEASE*/ "r", /*Build.VERSION.RELEASE*/ "r",
/*Build.MODEL*/ "", /*Build.MODEL*/ "",
/*Build.BRAND*/ "", /*Build.BRAND*/ "",
/*Build.DEVICE*/ "", /*Build.DEVICE*/ "",
/*Locale.getDefault().language*/ "en", /*Locale.getDefault().language*/ "en",
) )
override val cookieJar = InMemoryCookieJar() override val cookieJar = InMemoryCookieJar()
override val httpClient: OkHttpClient = OkHttpClient.Builder() override val httpClient: OkHttpClient = OkHttpClient.Builder()
.cookieJar(cookieJar) .cookieJar(cookieJar)
.addInterceptor(CommonHeadersInterceptor(userAgent)) .addInterceptor(CommonHeadersInterceptor(userAgent))
.addInterceptor(CloudFlareInterceptor()) .addInterceptor(CloudFlareInterceptor())
.connectTimeout(20, TimeUnit.SECONDS) .connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS) .readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(20, TimeUnit.SECONDS) .writeTimeout(20, TimeUnit.SECONDS)
.build() .build()
init { init {
loadTestCookies() loadTestCookies()
} }
override suspend fun evaluateJs(script: String): String? { override suspend fun evaluateJs(script: String): String? {
return QuackContext.create().use { return QuackContext.create().use {
it.evaluate(script)?.toString() it.evaluate(script)?.toString()
} }
} }
override fun getConfig(source: MangaSource): MangaSourceConfig { override fun getConfig(source: MangaSource): MangaSourceConfig {
return SourceConfigMock() return SourceConfigMock()
} }
suspend fun doRequest(url: String, referer: String? = null, extraHeaders: Headers? = null): Response { suspend fun doRequest(url: String, source: MangaSource?): Response {
val request = Request.Builder() val request = Request.Builder()
.get() .get()
.url(url) .url(url)
if (extraHeaders != null) { if (source != null) {
request.headers(extraHeaders) request.tag(MangaSource::class.java, source)
} }
if (referer != null) { return httpClient.newCall(request.build()).await()
request.header("Referer", referer) }
}
return httpClient.newCall(request.build()).await()
}
private fun loadTestCookies() { private fun loadTestCookies() {
// https://addons.mozilla.org/ru/firefox/addon/cookies-txt/ // https://addons.mozilla.org/ru/firefox/addon/cookies-txt/
javaClass.getResourceAsStream("/cookies.txt")?.use { javaClass.getResourceAsStream("/cookies.txt")?.use {
cookieJar.loadFromStream(it) cookieJar.loadFromStream(it)
} ?: println("No cookies loaded!") } ?: println("No cookies loaded!")
} }
} }

@ -11,193 +11,190 @@ import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.util.domain import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.medianOrNull import org.koitharu.kotatsu.parsers.util.medianOrNull
import org.koitharu.kotatsu.parsers.util.mimeType import org.koitharu.kotatsu.parsers.util.mimeType
import org.koitharu.kotatsu.test_util.isDistinct import org.koitharu.kotatsu.test_util.*
import org.koitharu.kotatsu.test_util.isDistinctBy
import org.koitharu.kotatsu.test_util.isUrlAbsolute
import org.koitharu.kotatsu.test_util.maxDuplicates
@ExtendWith(AuthCheckExtension::class) @ExtendWith(AuthCheckExtension::class)
internal class MangaParserTest { internal class MangaParserTest {
private val context = MangaLoaderContextMock private val context = MangaLoaderContextMock
@ParameterizedTest(name = "{index}|list|{0}") @ParameterizedTest(name = "{index}|list|{0}")
@MangaSources @MangaSources
fun list(source: MangaSource) = runTest { fun list(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
checkMangaList(list, "list") checkMangaList(list, "list")
assert(list.all { it.source == source }) assert(list.all { it.source == source })
} }
@ParameterizedTest(name = "{index}|pagination|{0}") @ParameterizedTest(name = "{index}|pagination|{0}")
@MangaSources @MangaSources
fun pagination(source: MangaSource) = runTest { fun pagination(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val page1 = parser.getList(0, sortOrder = null, tags = null) val page1 = parser.getList(0, sortOrder = null, tags = null)
val page2 = parser.getList(page1.size, sortOrder = null, tags = null) val page2 = parser.getList(page1.size, sortOrder = null, tags = null)
assert(page1.isNotEmpty()) { "Page 1 is empty" } assert(page1.isNotEmpty()) { "Page 1 is empty" }
assert(page2.isNotEmpty()) { "Page 2 is empty" } assert(page2.isNotEmpty()) { "Page 2 is empty" }
val intersection = page1.intersect(page2.toSet()) val intersection = page1.intersect(page2.toSet())
assert(intersection.isEmpty()) { assert(intersection.isEmpty()) {
"Pages are intersected by " + intersection.size "Pages are intersected by " + intersection.size
} }
} }
@ParameterizedTest(name = "{index}|search|{0}") @ParameterizedTest(name = "{index}|search|{0}")
@MangaSources @MangaSources
fun search(source: MangaSource) = runTest { fun search(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull { val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull {
it.title.length it.title.length
} ?: error("No manga found") } ?: error("No manga found")
val query = subject.title val query = subject.title
check(query.isNotBlank()) { "Manga title '$query' is blank" } check(query.isNotBlank()) { "Manga title '$query' is blank" }
val list = parser.getList(0, query) val list = parser.getList(0, query)
assert(list.isNotEmpty()) { "Empty search results by \"$query\"" } assert(list.isNotEmpty()) { "Empty search results by \"$query\"" }
assert(list.singleOrNull { it.url == subject.url && it.id == subject.id } != null) { assert(list.singleOrNull { it.url == subject.url && it.id == subject.id } != null) {
"Single subject '${subject.title} (${subject.publicUrl})' not found in search results" "Single subject '${subject.title} (${subject.publicUrl})' not found in search results"
} }
checkMangaList(list, "search('$query')") checkMangaList(list, "search('$query')")
assert(list.all { it.source == source }) assert(list.all { it.source == source })
} }
@ParameterizedTest(name = "{index}|tags|{0}") @ParameterizedTest(name = "{index}|tags|{0}")
@MangaSources @MangaSources
fun tags(source: MangaSource) = runTest { fun tags(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val tags = parser.getTags() val tags = parser.getTags()
assert(tags.isNotEmpty()) { "No tags found" } assert(tags.isNotEmpty()) { "No tags found" }
val keys = tags.map { it.key } val keys = tags.map { it.key }
assert(keys.isDistinct()) assert(keys.isDistinct())
assert("" !in keys) assert("" !in keys)
val titles = tags.map { it.title } val titles = tags.map { it.title }
// assert(titles.isDistinct()) // assert(titles.isDistinct())
assert("" !in titles) assert("" !in titles)
assert(tags.all { it.source == source }) assert(tags.all { it.source == source })
val tag = tags.last() val tag = tags.last()
val list = parser.getList(offset = 0, tags = setOf(tag), sortOrder = null) val list = parser.getList(offset = 0, tags = setOf(tag), sortOrder = null)
checkMangaList(list, "${tag.title} (${tag.key})") checkMangaList(list, "${tag.title} (${tag.key})")
assert(list.all { it.source == source }) assert(list.all { it.source == source })
} }
@ParameterizedTest(name = "{index}|details|{0}") @ParameterizedTest(name = "{index}|details|{0}")
@MangaSources @MangaSources
fun details(source: MangaSource) = runTest { fun details(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list[3] val manga = list[3]
parser.getDetails(manga).apply { parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" } assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }
assert(description != null) { "Detailed description is null: '$publicUrl'" } assert(description != null) { "Detailed description is null: '$publicUrl'" }
assert(title.startsWith(manga.title)) { assert(title.startsWith(manga.title)) {
"Titles are mismatch: '$title' and '${manga.title}' for $publicUrl" "Titles are mismatch: '$title' and '${manga.title}' for $publicUrl"
} }
assert(this.source == source) assert(this.source == source)
val c = checkNotNull(chapters) val c = checkNotNull(chapters)
assert(c.isDistinctBy { it.id }) { assert(c.isDistinctBy { it.id }) {
"Chapters are not distinct by id: ${c.maxDuplicates { it.id }} for $publicUrl" "Chapters are not distinct by id: ${c.maxDuplicates { it.id }} for $publicUrl"
} }
assert(c.isDistinctBy { it.number to it.branch }) { assert(c.isDistinctBy { it.number to it.branch }) {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl" "Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
} }
assert(c.all { it.source == source }) assert(c.all { it.source == source })
checkImageRequest(coverUrl, publicUrl) checkImageRequest(coverUrl, source)
largeCoverUrl?.let { largeCoverUrl?.let {
checkImageRequest(it, publicUrl) checkImageRequest(it, source)
} }
} }
} }
@ParameterizedTest(name = "{index}|pages|{0}") @ParameterizedTest(name = "{index}|pages|{0}")
@MangaSources @MangaSources
fun pages(source: MangaSource) = runTest { fun pages(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list.first() val manga = list.first()
val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null") val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null")
val pages = parser.getPages(chapter) val pages = parser.getPages(chapter)
assert(pages.isNotEmpty()) assert(pages.isNotEmpty())
assert(pages.isDistinctBy { it.id }) assert(pages.isDistinctBy { it.id })
assert(pages.all { it.source == source }) assert(pages.all { it.source == source })
val page = pages.medianOrNull() ?: error("No page") val page = pages.medianOrNull() ?: error("No page")
val pageUrl = parser.getPageUrl(page) val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty()) assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsolute()) assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.referer) checkImageRequest(pageUrl, page.source)
} }
@ParameterizedTest(name = "{index}|favicon|{0}") @ParameterizedTest(name = "{index}|favicon|{0}")
@MangaSources @MangaSources
fun favicon(source: MangaSource) = runTest { fun favicon(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val favicons = parser.getFavicons() val favicons = parser.getFavicons()
val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg") val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg")
assert(favicons.isNotEmpty()) assert(favicons.isNotEmpty())
favicons.forEach { favicons.forEach {
assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" } assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" }
assert(it.type in types) { "Unknown icon type: ${it.type}" } assert(it.type in types) { "Unknown icon type: ${it.type}" }
} }
val favicon = favicons.find(24) val favicon = favicons.find(24)
checkNotNull(favicon) checkNotNull(favicon)
checkImageRequest(favicon.url, favicons.referer) checkImageRequest(favicon.url, source)
} }
@ParameterizedTest(name = "{index}|domain|{0}") @ParameterizedTest(name = "{index}|domain|{0}")
@MangaSources @MangaSources
fun domain(source: MangaSource) = runTest { fun domain(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
val defaultDomain = parser.domain val defaultDomain = parser.domain
val url = HttpUrl.Builder() val url = HttpUrl.Builder()
.host(defaultDomain) .host(defaultDomain)
.scheme("https") .scheme("https")
.toString() .toString()
val response = context.doRequest(url, extraHeaders = parser.headers) val response = context.doRequest(url, source)
val realUrl = response.request.url val realUrl = response.request.url
val realDomain = realUrl.topPrivateDomain() val realDomain = realUrl.topPrivateDomain()
val realHost = realUrl.host val realHost = realUrl.host
assert(defaultDomain == realHost || defaultDomain == realDomain) { assert(defaultDomain == realHost || defaultDomain == realDomain) {
"Domain mismatch:\nRequired:\t\t\t$defaultDomain\nActual:\t\t\t$realDomain\nHost:\t\t\t$realHost" "Domain mismatch:\nRequired:\t\t\t$defaultDomain\nActual:\t\t\t$realDomain\nHost:\t\t\t$realHost"
} }
} }
@ParameterizedTest(name = "{index}|authorization|{0}") @ParameterizedTest(name = "{index}|authorization|{0}")
@MangaSources @MangaSources
@Disabled @Disabled
fun authorization(source: MangaSource) = runTest { fun authorization(source: MangaSource) = runTest {
val parser = source.newParser(context) val parser = source.newParser(context)
if (parser is MangaParserAuthProvider) { if (parser is MangaParserAuthProvider) {
val username = parser.getUsername() val username = parser.getUsername()
assert(username.isNotBlank()) { "Username is blank" } assert(username.isNotBlank()) { "Username is blank" }
println("Signed in to ${source.name} as $username") println("Signed in to ${source.name} as $username")
} }
} }
private suspend fun checkMangaList(list: List<Manga>, cause: String) { private suspend fun checkMangaList(list: List<Manga>, cause: String) {
assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" } assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" }
assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" } assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" }
for (item in list) { for (item in list) {
assert(item.url.isNotEmpty()) { "Url is empty" } assert(item.url.isNotEmpty()) { "Url is empty" }
assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" } assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" }
assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" } assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" }
assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" } assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" }
assert(item.publicUrl.isUrlAbsolute()) assert(item.publicUrl.isUrlAbsolute())
} }
val testItem = list.random() val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.publicUrl) checkImageRequest(testItem.coverUrl, testItem.source)
} }
private suspend fun checkImageRequest(url: String, referer: String?) { private suspend fun checkImageRequest(url: String, source: MangaSource) {
context.doRequest(url, referer).use { context.doRequest(url, source).use {
assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" } assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) { assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}" "Wrong response mime type: ${it.mimeType}"
} }
} }
} }
} }

Loading…
Cancel
Save