Fix grouple pages parsing #156

pull/168/head
Koitharu 3 years ago
parent f4c47b5b84
commit eea87d8607
No known key found for this signature in database
GPG Key ID: 8E861F8CE6E7CE27

@ -14,4 +14,8 @@ sealed class ConfigKey<T>(
class ShowSuspiciousContent(
override val defaultValue: Boolean,
) : ConfigKey<Boolean>("show_suspicious")
}
class UserAgent(
override val defaultValue: String,
) : ConfigKey<String>("user_agent")
}

@ -5,10 +5,12 @@ import kotlinx.coroutines.coroutineScope
import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.Response
import okhttp3.internal.headersContentLength
import org.json.JSONArray
import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaParser
import org.koitharu.kotatsu.parsers.MangaParserAuthProvider
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.AuthRequiredException
import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.*
@ -21,6 +23,7 @@ private const val PAGE_SIZE = 70
private const val PAGE_SIZE_SEARCH = 50
private const val NSFW_ALERT = "сексуальные сцены"
private const val NOTHING_FOUND = "Ничего не найдено"
private const val MIN_IMAGE_SIZE = 1024L
internal abstract class GroupleParser(
context: MangaLoaderContext,
@ -31,12 +34,14 @@ internal abstract class GroupleParser(
@Volatile
private var cachedPagesServer: String? = null
override val headers = Headers.Builder()
.add(
"User-Agent",
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
)
.build()
private val userAgentKey = ConfigKey.UserAgent(
"Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0",
)
override val headers: Headers
get() = Headers.Builder()
.add("User-Agent", config[userAgentKey])
.build()
override val sortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
@ -237,15 +242,23 @@ internal abstract class GroupleParser(
if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) {
return cachedServer + path
}
val server = coroutineScope {
servers.map { server ->
async {
if (tryHead(server + path)) server else null
}
}.awaitFirst { it != null }
if (servers.isEmpty()) {
throw ParseException("No servers found for page", page.url)
}
val server = try {
coroutineScope {
servers.map { server ->
async {
if (tryHead(server + path)) server else null
}
}.awaitFirst { it != null }
}.also {
cachedPagesServer = it
}
} catch (e: NoSuchElementException) {
servers.random()
}
cachedPagesServer = server
return checkNotNull(server + path)
return checkNotNull(server) + path
}
override suspend fun getTags(): Set<MangaTag> {
@ -316,8 +329,9 @@ internal abstract class GroupleParser(
return webClient.httpPost(url, payload)
}
private suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
webClient.httpHead(url).isSuccessful
suspend fun tryHead(url: String): Boolean = runCatchingCancellable {
val response = webClient.httpHead(url)
response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE
}.getOrDefault(false)
private fun Response.checkAuthRequired(): Response {

@ -1,7 +1,6 @@
package org.koitharu.kotatsu.parsers
import com.koushikdutta.quack.QuackContext
import okhttp3.Headers
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.Response
@ -12,57 +11,54 @@ import java.util.concurrent.TimeUnit
internal object MangaLoaderContextMock : MangaLoaderContext() {
private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format(
/*BuildConfig.VERSION_NAME*/ "4.3",
/*Build.VERSION.RELEASE*/ "r",
/*Build.MODEL*/ "",
/*Build.BRAND*/ "",
/*Build.DEVICE*/ "",
/*Locale.getDefault().language*/ "en",
)
private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format(
/*BuildConfig.VERSION_NAME*/ "4.4",
/*Build.VERSION.RELEASE*/ "r",
/*Build.MODEL*/ "",
/*Build.BRAND*/ "",
/*Build.DEVICE*/ "",
/*Locale.getDefault().language*/ "en",
)
override val cookieJar = InMemoryCookieJar()
override val cookieJar = InMemoryCookieJar()
override val httpClient: OkHttpClient = OkHttpClient.Builder()
.cookieJar(cookieJar)
.addInterceptor(CommonHeadersInterceptor(userAgent))
.addInterceptor(CloudFlareInterceptor())
.connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(20, TimeUnit.SECONDS)
.build()
override val httpClient: OkHttpClient = OkHttpClient.Builder()
.cookieJar(cookieJar)
.addInterceptor(CommonHeadersInterceptor(userAgent))
.addInterceptor(CloudFlareInterceptor())
.connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(20, TimeUnit.SECONDS)
.build()
init {
loadTestCookies()
}
init {
loadTestCookies()
}
override suspend fun evaluateJs(script: String): String? {
return QuackContext.create().use {
it.evaluate(script)?.toString()
}
}
override suspend fun evaluateJs(script: String): String? {
return QuackContext.create().use {
it.evaluate(script)?.toString()
}
}
override fun getConfig(source: MangaSource): MangaSourceConfig {
return SourceConfigMock()
}
override fun getConfig(source: MangaSource): MangaSourceConfig {
return SourceConfigMock()
}
suspend fun doRequest(url: String, referer: String? = null, extraHeaders: Headers? = null): Response {
val request = Request.Builder()
.get()
.url(url)
if (extraHeaders != null) {
request.headers(extraHeaders)
}
if (referer != null) {
request.header("Referer", referer)
}
return httpClient.newCall(request.build()).await()
}
suspend fun doRequest(url: String, source: MangaSource?): Response {
val request = Request.Builder()
.get()
.url(url)
if (source != null) {
request.tag(MangaSource::class.java, source)
}
return httpClient.newCall(request.build()).await()
}
private fun loadTestCookies() {
// https://addons.mozilla.org/ru/firefox/addon/cookies-txt/
javaClass.getResourceAsStream("/cookies.txt")?.use {
cookieJar.loadFromStream(it)
} ?: println("No cookies loaded!")
}
private fun loadTestCookies() {
// https://addons.mozilla.org/ru/firefox/addon/cookies-txt/
javaClass.getResourceAsStream("/cookies.txt")?.use {
cookieJar.loadFromStream(it)
} ?: println("No cookies loaded!")
}
}

@ -11,193 +11,190 @@ import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.medianOrNull
import org.koitharu.kotatsu.parsers.util.mimeType
import org.koitharu.kotatsu.test_util.isDistinct
import org.koitharu.kotatsu.test_util.isDistinctBy
import org.koitharu.kotatsu.test_util.isUrlAbsolute
import org.koitharu.kotatsu.test_util.maxDuplicates
import org.koitharu.kotatsu.test_util.*
@ExtendWith(AuthCheckExtension::class)
internal class MangaParserTest {
private val context = MangaLoaderContextMock
@ParameterizedTest(name = "{index}|list|{0}")
@MangaSources
fun list(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
checkMangaList(list, "list")
assert(list.all { it.source == source })
}
@ParameterizedTest(name = "{index}|pagination|{0}")
@MangaSources
fun pagination(source: MangaSource) = runTest {
val parser = source.newParser(context)
val page1 = parser.getList(0, sortOrder = null, tags = null)
val page2 = parser.getList(page1.size, sortOrder = null, tags = null)
assert(page1.isNotEmpty()) { "Page 1 is empty" }
assert(page2.isNotEmpty()) { "Page 2 is empty" }
val intersection = page1.intersect(page2.toSet())
assert(intersection.isEmpty()) {
"Pages are intersected by " + intersection.size
}
}
@ParameterizedTest(name = "{index}|search|{0}")
@MangaSources
fun search(source: MangaSource) = runTest {
val parser = source.newParser(context)
val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull {
it.title.length
} ?: error("No manga found")
val query = subject.title
check(query.isNotBlank()) { "Manga title '$query' is blank" }
val list = parser.getList(0, query)
assert(list.isNotEmpty()) { "Empty search results by \"$query\"" }
assert(list.singleOrNull { it.url == subject.url && it.id == subject.id } != null) {
"Single subject '${subject.title} (${subject.publicUrl})' not found in search results"
}
checkMangaList(list, "search('$query')")
assert(list.all { it.source == source })
}
@ParameterizedTest(name = "{index}|tags|{0}")
@MangaSources
fun tags(source: MangaSource) = runTest {
val parser = source.newParser(context)
val tags = parser.getTags()
assert(tags.isNotEmpty()) { "No tags found" }
val keys = tags.map { it.key }
assert(keys.isDistinct())
assert("" !in keys)
val titles = tags.map { it.title }
private val context = MangaLoaderContextMock
@ParameterizedTest(name = "{index}|list|{0}")
@MangaSources
fun list(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
checkMangaList(list, "list")
assert(list.all { it.source == source })
}
@ParameterizedTest(name = "{index}|pagination|{0}")
@MangaSources
fun pagination(source: MangaSource) = runTest {
val parser = source.newParser(context)
val page1 = parser.getList(0, sortOrder = null, tags = null)
val page2 = parser.getList(page1.size, sortOrder = null, tags = null)
assert(page1.isNotEmpty()) { "Page 1 is empty" }
assert(page2.isNotEmpty()) { "Page 2 is empty" }
val intersection = page1.intersect(page2.toSet())
assert(intersection.isEmpty()) {
"Pages are intersected by " + intersection.size
}
}
@ParameterizedTest(name = "{index}|search|{0}")
@MangaSources
fun search(source: MangaSource) = runTest {
val parser = source.newParser(context)
val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull {
it.title.length
} ?: error("No manga found")
val query = subject.title
check(query.isNotBlank()) { "Manga title '$query' is blank" }
val list = parser.getList(0, query)
assert(list.isNotEmpty()) { "Empty search results by \"$query\"" }
assert(list.singleOrNull { it.url == subject.url && it.id == subject.id } != null) {
"Single subject '${subject.title} (${subject.publicUrl})' not found in search results"
}
checkMangaList(list, "search('$query')")
assert(list.all { it.source == source })
}
@ParameterizedTest(name = "{index}|tags|{0}")
@MangaSources
fun tags(source: MangaSource) = runTest {
val parser = source.newParser(context)
val tags = parser.getTags()
assert(tags.isNotEmpty()) { "No tags found" }
val keys = tags.map { it.key }
assert(keys.isDistinct())
assert("" !in keys)
val titles = tags.map { it.title }
// assert(titles.isDistinct())
assert("" !in titles)
assert(tags.all { it.source == source })
val tag = tags.last()
val list = parser.getList(offset = 0, tags = setOf(tag), sortOrder = null)
checkMangaList(list, "${tag.title} (${tag.key})")
assert(list.all { it.source == source })
}
@ParameterizedTest(name = "{index}|details|{0}")
@MangaSources
fun details(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list[3]
parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }
assert(description != null) { "Detailed description is null: '$publicUrl'" }
assert(title.startsWith(manga.title)) {
"Titles are mismatch: '$title' and '${manga.title}' for $publicUrl"
}
assert(this.source == source)
val c = checkNotNull(chapters)
assert(c.isDistinctBy { it.id }) {
"Chapters are not distinct by id: ${c.maxDuplicates { it.id }} for $publicUrl"
}
assert(c.isDistinctBy { it.number to it.branch }) {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
}
assert(c.all { it.source == source })
checkImageRequest(coverUrl, publicUrl)
largeCoverUrl?.let {
checkImageRequest(it, publicUrl)
}
}
}
@ParameterizedTest(name = "{index}|pages|{0}")
@MangaSources
fun pages(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list.first()
val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null")
val pages = parser.getPages(chapter)
assert(pages.isNotEmpty())
assert(pages.isDistinctBy { it.id })
assert(pages.all { it.source == source })
val page = pages.medianOrNull() ?: error("No page")
val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.referer)
}
@ParameterizedTest(name = "{index}|favicon|{0}")
@MangaSources
fun favicon(source: MangaSource) = runTest {
val parser = source.newParser(context)
val favicons = parser.getFavicons()
val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg")
assert(favicons.isNotEmpty())
favicons.forEach {
assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" }
assert(it.type in types) { "Unknown icon type: ${it.type}" }
}
val favicon = favicons.find(24)
checkNotNull(favicon)
checkImageRequest(favicon.url, favicons.referer)
}
@ParameterizedTest(name = "{index}|domain|{0}")
@MangaSources
fun domain(source: MangaSource) = runTest {
val parser = source.newParser(context)
val defaultDomain = parser.domain
val url = HttpUrl.Builder()
.host(defaultDomain)
.scheme("https")
.toString()
val response = context.doRequest(url, extraHeaders = parser.headers)
val realUrl = response.request.url
val realDomain = realUrl.topPrivateDomain()
val realHost = realUrl.host
assert(defaultDomain == realHost || defaultDomain == realDomain) {
"Domain mismatch:\nRequired:\t\t\t$defaultDomain\nActual:\t\t\t$realDomain\nHost:\t\t\t$realHost"
}
}
@ParameterizedTest(name = "{index}|authorization|{0}")
@MangaSources
@Disabled
fun authorization(source: MangaSource) = runTest {
val parser = source.newParser(context)
if (parser is MangaParserAuthProvider) {
val username = parser.getUsername()
assert(username.isNotBlank()) { "Username is blank" }
println("Signed in to ${source.name} as $username")
}
}
private suspend fun checkMangaList(list: List<Manga>, cause: String) {
assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" }
assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" }
for (item in list) {
assert(item.url.isNotEmpty()) { "Url is empty" }
assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" }
assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" }
assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" }
assert(item.publicUrl.isUrlAbsolute())
}
val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.publicUrl)
}
private suspend fun checkImageRequest(url: String, referer: String?) {
context.doRequest(url, referer).use {
assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}"
}
}
}
assert("" !in titles)
assert(tags.all { it.source == source })
val tag = tags.last()
val list = parser.getList(offset = 0, tags = setOf(tag), sortOrder = null)
checkMangaList(list, "${tag.title} (${tag.key})")
assert(list.all { it.source == source })
}
@ParameterizedTest(name = "{index}|details|{0}")
@MangaSources
fun details(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list[3]
parser.getDetails(manga).apply {
assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" }
assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" }
assert(description != null) { "Detailed description is null: '$publicUrl'" }
assert(title.startsWith(manga.title)) {
"Titles are mismatch: '$title' and '${manga.title}' for $publicUrl"
}
assert(this.source == source)
val c = checkNotNull(chapters)
assert(c.isDistinctBy { it.id }) {
"Chapters are not distinct by id: ${c.maxDuplicates { it.id }} for $publicUrl"
}
assert(c.isDistinctBy { it.number to it.branch }) {
"Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl"
}
assert(c.all { it.source == source })
checkImageRequest(coverUrl, source)
largeCoverUrl?.let {
checkImageRequest(it, source)
}
}
}
@ParameterizedTest(name = "{index}|pages|{0}")
@MangaSources
fun pages(source: MangaSource) = runTest {
val parser = source.newParser(context)
val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null)
val manga = list.first()
val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null")
val pages = parser.getPages(chapter)
assert(pages.isNotEmpty())
assert(pages.isDistinctBy { it.id })
assert(pages.all { it.source == source })
val page = pages.medianOrNull() ?: error("No page")
val pageUrl = parser.getPageUrl(page)
assert(pageUrl.isNotEmpty())
assert(pageUrl.isUrlAbsolute())
checkImageRequest(pageUrl, page.source)
}
@ParameterizedTest(name = "{index}|favicon|{0}")
@MangaSources
fun favicon(source: MangaSource) = runTest {
val parser = source.newParser(context)
val favicons = parser.getFavicons()
val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg")
assert(favicons.isNotEmpty())
favicons.forEach {
assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" }
assert(it.type in types) { "Unknown icon type: ${it.type}" }
}
val favicon = favicons.find(24)
checkNotNull(favicon)
checkImageRequest(favicon.url, source)
}
@ParameterizedTest(name = "{index}|domain|{0}")
@MangaSources
fun domain(source: MangaSource) = runTest {
val parser = source.newParser(context)
val defaultDomain = parser.domain
val url = HttpUrl.Builder()
.host(defaultDomain)
.scheme("https")
.toString()
val response = context.doRequest(url, source)
val realUrl = response.request.url
val realDomain = realUrl.topPrivateDomain()
val realHost = realUrl.host
assert(defaultDomain == realHost || defaultDomain == realDomain) {
"Domain mismatch:\nRequired:\t\t\t$defaultDomain\nActual:\t\t\t$realDomain\nHost:\t\t\t$realHost"
}
}
@ParameterizedTest(name = "{index}|authorization|{0}")
@MangaSources
@Disabled
fun authorization(source: MangaSource) = runTest {
val parser = source.newParser(context)
if (parser is MangaParserAuthProvider) {
val username = parser.getUsername()
assert(username.isNotBlank()) { "Username is blank" }
println("Signed in to ${source.name} as $username")
}
}
private suspend fun checkMangaList(list: List<Manga>, cause: String) {
assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" }
assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" }
for (item in list) {
assert(item.url.isNotEmpty()) { "Url is empty" }
assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" }
assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" }
assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" }
assert(item.publicUrl.isUrlAbsolute())
}
val testItem = list.random()
checkImageRequest(testItem.coverUrl, testItem.source)
}
private suspend fun checkImageRequest(url: String, source: MangaSource) {
context.doRequest(url, source).use {
assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" }
assert(it.mimeType?.startsWith("image/") == true) {
"Wrong response mime type: ${it.mimeType}"
}
}
}
}

Loading…
Cancel
Save