diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/config/ConfigKey.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/config/ConfigKey.kt index 3ff6e276..8603ac62 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/config/ConfigKey.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/config/ConfigKey.kt @@ -14,4 +14,8 @@ sealed class ConfigKey( class ShowSuspiciousContent( override val defaultValue: Boolean, ) : ConfigKey("show_suspicious") -} \ No newline at end of file + + class UserAgent( + override val defaultValue: String, + ) : ConfigKey("user_agent") +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt index b98fd807..e99784af 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/grouple/GroupleParser.kt @@ -5,10 +5,12 @@ import kotlinx.coroutines.coroutineScope import okhttp3.Headers import okhttp3.HttpUrl.Companion.toHttpUrl import okhttp3.Response +import okhttp3.internal.headersContentLength import org.json.JSONArray import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaParser import org.koitharu.kotatsu.parsers.MangaParserAuthProvider +import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.exception.AuthRequiredException import org.koitharu.kotatsu.parsers.exception.ParseException import org.koitharu.kotatsu.parsers.model.* @@ -21,6 +23,7 @@ private const val PAGE_SIZE = 70 private const val PAGE_SIZE_SEARCH = 50 private const val NSFW_ALERT = "сексуальные сцены" private const val NOTHING_FOUND = "Ничего не найдено" +private const val MIN_IMAGE_SIZE = 1024L internal abstract class GroupleParser( context: MangaLoaderContext, @@ -31,12 +34,14 @@ internal abstract class GroupleParser( @Volatile private var cachedPagesServer: String? = null - override val headers = Headers.Builder() - .add( - "User-Agent", - "Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0", - ) - .build() + private val userAgentKey = ConfigKey.UserAgent( + "Mozilla/5.0 (X11; U; UNICOS lcLinux; en-US) Gecko/20140730 (KHTML, like Gecko, Safari/419.3) Arora/0.8.0", + ) + + override val headers: Headers + get() = Headers.Builder() + .add("User-Agent", config[userAgentKey]) + .build() override val sortOrders: Set = EnumSet.of( SortOrder.UPDATED, @@ -237,15 +242,23 @@ internal abstract class GroupleParser( if (cachedServer != null && cachedServer in servers && tryHead(cachedServer + path)) { return cachedServer + path } - val server = coroutineScope { - servers.map { server -> - async { - if (tryHead(server + path)) server else null - } - }.awaitFirst { it != null } + if (servers.isEmpty()) { + throw ParseException("No servers found for page", page.url) + } + val server = try { + coroutineScope { + servers.map { server -> + async { + if (tryHead(server + path)) server else null + } + }.awaitFirst { it != null } + }.also { + cachedPagesServer = it + } + } catch (e: NoSuchElementException) { + servers.random() } - cachedPagesServer = server - return checkNotNull(server + path) + return checkNotNull(server) + path } override suspend fun getTags(): Set { @@ -316,8 +329,9 @@ internal abstract class GroupleParser( return webClient.httpPost(url, payload) } - private suspend fun tryHead(url: String): Boolean = runCatchingCancellable { - webClient.httpHead(url).isSuccessful + suspend fun tryHead(url: String): Boolean = runCatchingCancellable { + val response = webClient.httpHead(url) + response.isSuccessful && response.headersContentLength() >= MIN_IMAGE_SIZE }.getOrDefault(false) private fun Response.checkAuthRequired(): Response { diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt index 2c80cb0f..6a01b4cc 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaLoaderContextMock.kt @@ -1,7 +1,6 @@ package org.koitharu.kotatsu.parsers import com.koushikdutta.quack.QuackContext -import okhttp3.Headers import okhttp3.OkHttpClient import okhttp3.Request import okhttp3.Response @@ -12,57 +11,54 @@ import java.util.concurrent.TimeUnit internal object MangaLoaderContextMock : MangaLoaderContext() { - private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format( - /*BuildConfig.VERSION_NAME*/ "4.3", - /*Build.VERSION.RELEASE*/ "r", - /*Build.MODEL*/ "", - /*Build.BRAND*/ "", - /*Build.DEVICE*/ "", - /*Locale.getDefault().language*/ "en", - ) + private val userAgent = "Kotatsu/%s (Android %s; %s; %s %s; %s)".format( + /*BuildConfig.VERSION_NAME*/ "4.4", + /*Build.VERSION.RELEASE*/ "r", + /*Build.MODEL*/ "", + /*Build.BRAND*/ "", + /*Build.DEVICE*/ "", + /*Locale.getDefault().language*/ "en", + ) - override val cookieJar = InMemoryCookieJar() + override val cookieJar = InMemoryCookieJar() - override val httpClient: OkHttpClient = OkHttpClient.Builder() - .cookieJar(cookieJar) - .addInterceptor(CommonHeadersInterceptor(userAgent)) - .addInterceptor(CloudFlareInterceptor()) - .connectTimeout(20, TimeUnit.SECONDS) - .readTimeout(60, TimeUnit.SECONDS) - .writeTimeout(20, TimeUnit.SECONDS) - .build() + override val httpClient: OkHttpClient = OkHttpClient.Builder() + .cookieJar(cookieJar) + .addInterceptor(CommonHeadersInterceptor(userAgent)) + .addInterceptor(CloudFlareInterceptor()) + .connectTimeout(20, TimeUnit.SECONDS) + .readTimeout(60, TimeUnit.SECONDS) + .writeTimeout(20, TimeUnit.SECONDS) + .build() - init { - loadTestCookies() - } + init { + loadTestCookies() + } - override suspend fun evaluateJs(script: String): String? { - return QuackContext.create().use { - it.evaluate(script)?.toString() - } - } + override suspend fun evaluateJs(script: String): String? { + return QuackContext.create().use { + it.evaluate(script)?.toString() + } + } - override fun getConfig(source: MangaSource): MangaSourceConfig { - return SourceConfigMock() - } + override fun getConfig(source: MangaSource): MangaSourceConfig { + return SourceConfigMock() + } - suspend fun doRequest(url: String, referer: String? = null, extraHeaders: Headers? = null): Response { - val request = Request.Builder() - .get() - .url(url) - if (extraHeaders != null) { - request.headers(extraHeaders) - } - if (referer != null) { - request.header("Referer", referer) - } - return httpClient.newCall(request.build()).await() - } + suspend fun doRequest(url: String, source: MangaSource?): Response { + val request = Request.Builder() + .get() + .url(url) + if (source != null) { + request.tag(MangaSource::class.java, source) + } + return httpClient.newCall(request.build()).await() + } - private fun loadTestCookies() { - // https://addons.mozilla.org/ru/firefox/addon/cookies-txt/ - javaClass.getResourceAsStream("/cookies.txt")?.use { - cookieJar.loadFromStream(it) - } ?: println("No cookies loaded!") - } + private fun loadTestCookies() { + // https://addons.mozilla.org/ru/firefox/addon/cookies-txt/ + javaClass.getResourceAsStream("/cookies.txt")?.use { + cookieJar.loadFromStream(it) + } ?: println("No cookies loaded!") + } } diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt index ab450369..337b484e 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaParserTest.kt @@ -11,193 +11,190 @@ import org.koitharu.kotatsu.parsers.model.SortOrder import org.koitharu.kotatsu.parsers.util.domain import org.koitharu.kotatsu.parsers.util.medianOrNull import org.koitharu.kotatsu.parsers.util.mimeType -import org.koitharu.kotatsu.test_util.isDistinct -import org.koitharu.kotatsu.test_util.isDistinctBy -import org.koitharu.kotatsu.test_util.isUrlAbsolute -import org.koitharu.kotatsu.test_util.maxDuplicates +import org.koitharu.kotatsu.test_util.* @ExtendWith(AuthCheckExtension::class) internal class MangaParserTest { - private val context = MangaLoaderContextMock - - @ParameterizedTest(name = "{index}|list|{0}") - @MangaSources - fun list(source: MangaSource) = runTest { - val parser = source.newParser(context) - val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) - checkMangaList(list, "list") - assert(list.all { it.source == source }) - } - - @ParameterizedTest(name = "{index}|pagination|{0}") - @MangaSources - fun pagination(source: MangaSource) = runTest { - val parser = source.newParser(context) - val page1 = parser.getList(0, sortOrder = null, tags = null) - val page2 = parser.getList(page1.size, sortOrder = null, tags = null) - assert(page1.isNotEmpty()) { "Page 1 is empty" } - assert(page2.isNotEmpty()) { "Page 2 is empty" } - val intersection = page1.intersect(page2.toSet()) - assert(intersection.isEmpty()) { - "Pages are intersected by " + intersection.size - } - } - - @ParameterizedTest(name = "{index}|search|{0}") - @MangaSources - fun search(source: MangaSource) = runTest { - val parser = source.newParser(context) - val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull { - it.title.length - } ?: error("No manga found") - val query = subject.title - check(query.isNotBlank()) { "Manga title '$query' is blank" } - val list = parser.getList(0, query) - assert(list.isNotEmpty()) { "Empty search results by \"$query\"" } - assert(list.singleOrNull { it.url == subject.url && it.id == subject.id } != null) { - "Single subject '${subject.title} (${subject.publicUrl})' not found in search results" - } - checkMangaList(list, "search('$query')") - assert(list.all { it.source == source }) - } - - @ParameterizedTest(name = "{index}|tags|{0}") - @MangaSources - fun tags(source: MangaSource) = runTest { - val parser = source.newParser(context) - val tags = parser.getTags() - assert(tags.isNotEmpty()) { "No tags found" } - val keys = tags.map { it.key } - assert(keys.isDistinct()) - assert("" !in keys) - val titles = tags.map { it.title } + private val context = MangaLoaderContextMock + + @ParameterizedTest(name = "{index}|list|{0}") + @MangaSources + fun list(source: MangaSource) = runTest { + val parser = source.newParser(context) + val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) + checkMangaList(list, "list") + assert(list.all { it.source == source }) + } + + @ParameterizedTest(name = "{index}|pagination|{0}") + @MangaSources + fun pagination(source: MangaSource) = runTest { + val parser = source.newParser(context) + val page1 = parser.getList(0, sortOrder = null, tags = null) + val page2 = parser.getList(page1.size, sortOrder = null, tags = null) + assert(page1.isNotEmpty()) { "Page 1 is empty" } + assert(page2.isNotEmpty()) { "Page 2 is empty" } + val intersection = page1.intersect(page2.toSet()) + assert(intersection.isEmpty()) { + "Pages are intersected by " + intersection.size + } + } + + @ParameterizedTest(name = "{index}|search|{0}") + @MangaSources + fun search(source: MangaSource) = runTest { + val parser = source.newParser(context) + val subject = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null).minByOrNull { + it.title.length + } ?: error("No manga found") + val query = subject.title + check(query.isNotBlank()) { "Manga title '$query' is blank" } + val list = parser.getList(0, query) + assert(list.isNotEmpty()) { "Empty search results by \"$query\"" } + assert(list.singleOrNull { it.url == subject.url && it.id == subject.id } != null) { + "Single subject '${subject.title} (${subject.publicUrl})' not found in search results" + } + checkMangaList(list, "search('$query')") + assert(list.all { it.source == source }) + } + + @ParameterizedTest(name = "{index}|tags|{0}") + @MangaSources + fun tags(source: MangaSource) = runTest { + val parser = source.newParser(context) + val tags = parser.getTags() + assert(tags.isNotEmpty()) { "No tags found" } + val keys = tags.map { it.key } + assert(keys.isDistinct()) + assert("" !in keys) + val titles = tags.map { it.title } // assert(titles.isDistinct()) - assert("" !in titles) - assert(tags.all { it.source == source }) - - val tag = tags.last() - val list = parser.getList(offset = 0, tags = setOf(tag), sortOrder = null) - checkMangaList(list, "${tag.title} (${tag.key})") - assert(list.all { it.source == source }) - } - - @ParameterizedTest(name = "{index}|details|{0}") - @MangaSources - fun details(source: MangaSource) = runTest { - val parser = source.newParser(context) - val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) - val manga = list[3] - parser.getDetails(manga).apply { - assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } - assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" } - assert(description != null) { "Detailed description is null: '$publicUrl'" } - assert(title.startsWith(manga.title)) { - "Titles are mismatch: '$title' and '${manga.title}' for $publicUrl" - } - assert(this.source == source) - val c = checkNotNull(chapters) - assert(c.isDistinctBy { it.id }) { - "Chapters are not distinct by id: ${c.maxDuplicates { it.id }} for $publicUrl" - } - assert(c.isDistinctBy { it.number to it.branch }) { - "Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl" - } - assert(c.all { it.source == source }) - checkImageRequest(coverUrl, publicUrl) - largeCoverUrl?.let { - checkImageRequest(it, publicUrl) - } - } - } - - @ParameterizedTest(name = "{index}|pages|{0}") - @MangaSources - fun pages(source: MangaSource) = runTest { - val parser = source.newParser(context) - val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) - val manga = list.first() - val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null") - val pages = parser.getPages(chapter) - - assert(pages.isNotEmpty()) - assert(pages.isDistinctBy { it.id }) - assert(pages.all { it.source == source }) - - val page = pages.medianOrNull() ?: error("No page") - val pageUrl = parser.getPageUrl(page) - assert(pageUrl.isNotEmpty()) - assert(pageUrl.isUrlAbsolute()) - checkImageRequest(pageUrl, page.referer) - } - - @ParameterizedTest(name = "{index}|favicon|{0}") - @MangaSources - fun favicon(source: MangaSource) = runTest { - val parser = source.newParser(context) - val favicons = parser.getFavicons() - val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg") - assert(favicons.isNotEmpty()) - favicons.forEach { - assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" } - assert(it.type in types) { "Unknown icon type: ${it.type}" } - } - val favicon = favicons.find(24) - checkNotNull(favicon) - checkImageRequest(favicon.url, favicons.referer) - } - - @ParameterizedTest(name = "{index}|domain|{0}") - @MangaSources - fun domain(source: MangaSource) = runTest { - val parser = source.newParser(context) - val defaultDomain = parser.domain - val url = HttpUrl.Builder() - .host(defaultDomain) - .scheme("https") - .toString() - val response = context.doRequest(url, extraHeaders = parser.headers) - val realUrl = response.request.url - val realDomain = realUrl.topPrivateDomain() - val realHost = realUrl.host - assert(defaultDomain == realHost || defaultDomain == realDomain) { - "Domain mismatch:\nRequired:\t\t\t$defaultDomain\nActual:\t\t\t$realDomain\nHost:\t\t\t$realHost" - } - } - - @ParameterizedTest(name = "{index}|authorization|{0}") - @MangaSources - @Disabled - fun authorization(source: MangaSource) = runTest { - val parser = source.newParser(context) - if (parser is MangaParserAuthProvider) { - val username = parser.getUsername() - assert(username.isNotBlank()) { "Username is blank" } - println("Signed in to ${source.name} as $username") - } - } - - private suspend fun checkMangaList(list: List, cause: String) { - assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" } - assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" } - for (item in list) { - assert(item.url.isNotEmpty()) { "Url is empty" } - assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" } - assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" } - assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" } - assert(item.publicUrl.isUrlAbsolute()) - } - val testItem = list.random() - checkImageRequest(testItem.coverUrl, testItem.publicUrl) - } - - private suspend fun checkImageRequest(url: String, referer: String?) { - context.doRequest(url, referer).use { - assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" } - assert(it.mimeType?.startsWith("image/") == true) { - "Wrong response mime type: ${it.mimeType}" - } - } - } + assert("" !in titles) + assert(tags.all { it.source == source }) + + val tag = tags.last() + val list = parser.getList(offset = 0, tags = setOf(tag), sortOrder = null) + checkMangaList(list, "${tag.title} (${tag.key})") + assert(list.all { it.source == source }) + } + + @ParameterizedTest(name = "{index}|details|{0}") + @MangaSources + fun details(source: MangaSource) = runTest { + val parser = source.newParser(context) + val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) + val manga = list[3] + parser.getDetails(manga).apply { + assert(!chapters.isNullOrEmpty()) { "Chapters are null or empty" } + assert(publicUrl.isUrlAbsolute()) { "Manga public url is not absolute: '$publicUrl'" } + assert(description != null) { "Detailed description is null: '$publicUrl'" } + assert(title.startsWith(manga.title)) { + "Titles are mismatch: '$title' and '${manga.title}' for $publicUrl" + } + assert(this.source == source) + val c = checkNotNull(chapters) + assert(c.isDistinctBy { it.id }) { + "Chapters are not distinct by id: ${c.maxDuplicates { it.id }} for $publicUrl" + } + assert(c.isDistinctBy { it.number to it.branch }) { + "Chapters are not distinct by number: ${c.maxDuplicates { it.number to it.branch }} for $publicUrl" + } + assert(c.all { it.source == source }) + checkImageRequest(coverUrl, source) + largeCoverUrl?.let { + checkImageRequest(it, source) + } + } + } + + @ParameterizedTest(name = "{index}|pages|{0}") + @MangaSources + fun pages(source: MangaSource) = runTest { + val parser = source.newParser(context) + val list = parser.getList(20, sortOrder = SortOrder.POPULARITY, tags = null) + val manga = list.first() + val chapter = parser.getDetails(manga).chapters?.firstOrNull() ?: error("Chapter is null") + val pages = parser.getPages(chapter) + + assert(pages.isNotEmpty()) + assert(pages.isDistinctBy { it.id }) + assert(pages.all { it.source == source }) + + val page = pages.medianOrNull() ?: error("No page") + val pageUrl = parser.getPageUrl(page) + assert(pageUrl.isNotEmpty()) + assert(pageUrl.isUrlAbsolute()) + checkImageRequest(pageUrl, page.source) + } + + @ParameterizedTest(name = "{index}|favicon|{0}") + @MangaSources + fun favicon(source: MangaSource) = runTest { + val parser = source.newParser(context) + val favicons = parser.getFavicons() + val types = setOf("png", "svg", "ico", "gif", "jpg", "jpeg") + assert(favicons.isNotEmpty()) + favicons.forEach { + assert(it.url.isUrlAbsolute()) { "Favicon url is not absolute: ${it.url}" } + assert(it.type in types) { "Unknown icon type: ${it.type}" } + } + val favicon = favicons.find(24) + checkNotNull(favicon) + checkImageRequest(favicon.url, source) + } + + @ParameterizedTest(name = "{index}|domain|{0}") + @MangaSources + fun domain(source: MangaSource) = runTest { + val parser = source.newParser(context) + val defaultDomain = parser.domain + val url = HttpUrl.Builder() + .host(defaultDomain) + .scheme("https") + .toString() + val response = context.doRequest(url, source) + val realUrl = response.request.url + val realDomain = realUrl.topPrivateDomain() + val realHost = realUrl.host + assert(defaultDomain == realHost || defaultDomain == realDomain) { + "Domain mismatch:\nRequired:\t\t\t$defaultDomain\nActual:\t\t\t$realDomain\nHost:\t\t\t$realHost" + } + } + + @ParameterizedTest(name = "{index}|authorization|{0}") + @MangaSources + @Disabled + fun authorization(source: MangaSource) = runTest { + val parser = source.newParser(context) + if (parser is MangaParserAuthProvider) { + val username = parser.getUsername() + assert(username.isNotBlank()) { "Username is blank" } + println("Signed in to ${source.name} as $username") + } + } + + private suspend fun checkMangaList(list: List, cause: String) { + assert(list.isNotEmpty()) { "Manga list for '$cause' is empty" } + assert(list.isDistinctBy { it.id }) { "Manga list for '$cause' contains duplicated ids" } + for (item in list) { + assert(item.url.isNotEmpty()) { "Url is empty" } + assert(!item.url.isUrlAbsolute()) { "Url looks like absolute: ${item.url}" } + assert(item.coverUrl.isUrlAbsolute()) { "Cover url is not absolute: ${item.coverUrl}" } + assert(item.title.isNotEmpty()) { "Title for ${item.publicUrl} is empty" } + assert(item.publicUrl.isUrlAbsolute()) + } + val testItem = list.random() + checkImageRequest(testItem.coverUrl, testItem.source) + } + + private suspend fun checkImageRequest(url: String, source: MangaSource) { + context.doRequest(url, source).use { + assert(it.isSuccessful) { "Request failed: ${it.code}(${it.message}): $url" } + assert(it.mimeType?.startsWith("image/") == true) { + "Wrong response mime type: ${it.mimeType}" + } + } + } }