diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/network/RateLimitInterceptor.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/network/RateLimitInterceptor.kt new file mode 100644 index 00000000..dfc126fb --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/network/RateLimitInterceptor.kt @@ -0,0 +1,79 @@ +package org.koitharu.kotatsu.parsers.network + +import okhttp3.Interceptor +import okhttp3.Response +import okhttp3.internal.notifyAll +import okio.IOException +import java.util.concurrent.Semaphore +import java.util.concurrent.TimeUnit + +// TODO rewrite this +class RateLimitInterceptor : Interceptor { + + private val requestQueue = ArrayDeque(10) + private val rateLimitMillis = TimeUnit.SECONDS.toMillis(60L) + private val fairLock = Semaphore(1, true) + + override fun intercept(chain: Interceptor.Chain): Response { + val call = chain.call() + val request = chain.request() + + try { + fairLock.acquire() + } catch (e: InterruptedException) { + throw IOException(e) + } + + val requestQueue = this.requestQueue + val timestamp: Long + + try { + synchronized(requestQueue) { + while (requestQueue.size >= 10) { + val periodStart = System.currentTimeMillis() - rateLimitMillis + var hasRemovedExpired = false + while (requestQueue.isEmpty().not() && requestQueue.first() <= periodStart) { + requestQueue.removeFirst() + hasRemovedExpired = true + } + if (call.isCanceled()) { + throw IOException("Canceled") + } else if (hasRemovedExpired) { + break + } else { + try { + requestQueue.wait(requestQueue.first() - periodStart) + } catch (_: InterruptedException) { + continue + } + } + } + + timestamp = System.currentTimeMillis() + requestQueue.addLast(timestamp) + } + } finally { + fairLock.release() + } + + val response = chain.proceed(request) + if (response.networkResponse == null) { + synchronized(requestQueue) { + if (requestQueue.isEmpty() || timestamp < requestQueue.first()) return@synchronized + val iterator = requestQueue.iterator() + while (iterator.hasNext()) { + if (iterator.next() == timestamp) { + iterator.remove() + break + } + } + requestQueue.notifyAll() + } + } + + return response + } + + @Suppress("PLATFORM_CLASS_MAPPED_TO_KOTLIN", "NOTHING_TO_INLINE") + private inline fun Any.wait(timeout: Long) = (this as Object).wait(timeout) +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/TuMangaOnlineParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/TuMangaOnlineParser.kt new file mode 100644 index 00000000..d8fe1a9a --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/TuMangaOnlineParser.kt @@ -0,0 +1,219 @@ +package org.koitharu.kotatsu.parsers.site + +import okhttp3.Headers +import okhttp3.HttpUrl.Companion.toHttpUrl +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.parsers.MangaLoaderContext +import org.koitharu.kotatsu.parsers.MangaSourceParser +import org.koitharu.kotatsu.parsers.PagedMangaParser +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import java.text.SimpleDateFormat +import java.util.* + +@MangaSourceParser("TUMANGAONLINE", "TuMangaOnline", "es") +class TuMangaOnlineParser(context: MangaLoaderContext) : PagedMangaParser( + context, + source = MangaSource.TUMANGAONLINE, + pageSize = 24, +) { + + override val configKeyDomain = ConfigKey.Domain("lectortmo.com") + + private val chapterDateFormat = SimpleDateFormat("yyyy-MM-dd", sourceLocale) + + override val sortOrders = EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) + + override suspend fun getListPage( + page: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder, + ): List { + val url = buildString { + append("/library") + if (query.isNullOrEmpty()) { + append("?order_item=") + if (sortOrder == SortOrder.POPULARITY) { + append("likes_count") + } + if (sortOrder == SortOrder.NEWEST) { + append("creation") + } + append("&order_dir=desc") + append("&filter_by=title") + if (tags != null) { + for (tag in tags) { + append("&genders[]=${tag.key}") + } + } + } else { + append("?title=$query") + } + append("&_pg=1") + append("&page=$page") + }.toAbsoluteUrl(domain) + + val doc = webClient.httpGet(url, headers).parseHtml() + val items = doc.body().select("div.element") + return items.mapNotNull { item -> + val href = + item.selectFirst("a")?.attrAsRelativeUrlOrNull("href")?.substringAfter(' ') ?: return@mapNotNull null + Manga( + id = generateUid(href), + title = item.selectFirst("h4.text-truncate")?.text() ?: return@mapNotNull null, + coverUrl = item.select("style").toString().substringAfter("('").substringBeforeLast("')"), + altTitle = null, + author = null, + rating = item.selectFirst("span.score")?.text()?.toFloatOrNull()?.div(10F) ?: RATING_UNKNOWN, + url = href, + isNsfw = item.select("i").hasClass("fas fa-heartbeat fa-2x"), + tags = emptySet(), + state = null, + publicUrl = href.toAbsoluteUrl(doc.host ?: domain), + source = source, + ) + } + + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() + val contents = doc.body().selectFirstOrThrow("section.element-header-content") + return manga.copy( + description = contents.selectFirst("p.element-description")?.html(), + largeCoverUrl = contents.selectFirst(".book-thumbnail")?.attrAsAbsoluteUrlOrNull("src"), + state = parseStatus(contents.select("span.book-status").text().orEmpty()), + author = contents.selectFirst("h5.card-title")?.attr("title")?.substringAfter(", "), + chapters = if (doc.select("div.chapters").isEmpty()) { + doc.select(oneShotChapterListSelector()).mapChapters(reversed = true) { i, item -> + oneShotChapterFromElement(item) + } + } else { + val chapters = mutableListOf() + doc.select(regularChapterListSelector()).reversed().forEachIndexed { i, item -> + val chaptername = item.select("div.col-10.text-truncate").text().replace(" ", " ").trim() + val scanelement = item.select("ul.chapter-list > li") + scanelement.forEach { chapters.add(regularChapterFromElement(it, chaptername, i)) } + } + chapters + }, + ) + } + + private fun oneShotChapterListSelector() = "div.chapter-list-element > ul.list-group li.list-group-item" + + private fun oneShotChapterFromElement(element: Element): MangaChapter { + val href = element.selectFirstOrThrow("div.row > .text-right > a").attrAsRelativeUrl("href") + return MangaChapter( + id = generateUid(href), + name = "One Shot", + number = 1, + url = href, + scanlator = element.select("div.col-md-6.text-truncate").text(), + branch = null, + uploadDate = chapterDateFormat.tryParse(element.select("span.badge.badge-primary.p-2").first()?.text()), + source = source, + ) + } + + private fun regularChapterListSelector() = "div.chapters > ul.list-group li.p-0.list-group-item" + + private fun regularChapterFromElement(element: Element, chName: String, number: Int): MangaChapter { + val href = element.selectFirstOrThrow("div.row > .text-right > a").attrAsRelativeUrl("href") + return MangaChapter( + id = generateUid(href), + name = chName, + number = number + 1, + url = href, + scanlator = element.select("div.col-md-6.text-truncate").text(), + branch = null, + uploadDate = chapterDateFormat.tryParse(element.select("span.badge.badge-primary.p-2").first()?.text()), + source = source, + ) + } + + + override suspend fun getPages(chapter: MangaChapter): List { + val redirectDoc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain), headers).parseHtml() + var doc = redirectToReadingPage(redirectDoc) + val currentUrl = doc.location() + val newUrl = if (!currentUrl.contains("cascade")) { + currentUrl.substringBefore("paginated") + "cascade" + } else { + currentUrl + } + + if (currentUrl != newUrl) { + doc = webClient.httpGet(newUrl, headers).parseHtml() + } + + return doc.select("div.viewer-container img:not(noscript img)").map { + val href = if (it.hasAttr("data-src")) { + it.attr("abs:data-src") + } else { + it.attr("abs:src") + } + MangaPage( + id = generateUid(href), + url = href, + preview = null, + source = source, + ) + } + } + + private suspend fun redirectToReadingPage(document: Document): Document { + val script1 = document.selectFirst("script:containsData(uniqid)") + val script2 = document.selectFirst("script:containsData(window.location.replace)") + + val redirectHeaders = Headers.Builder().set("Referer", document.baseUri()).build() + + if (script1 != null) { + val data = script1.data() + val regexParams = """\{uniqid:'(.+)',cascade:(.+)}""".toRegex() + val regexAction = """form\.action\s?=\s?'(.+)'""".toRegex() + val params = regexParams.find(data)!! + val action = regexAction.find(data)!!.groupValues[1].toHttpUrl() + + val formBody = mapOf( + "uniqid" to params.groupValues[1], + "cascade" to params.groupValues[2], + ) + return redirectToReadingPage(webClient.httpPost(action, formBody, redirectHeaders).parseHtml()) + } + + if (script2 != null) { + val data = script2.data() + val regexRedirect = """window\.location\.replace\('(.+)'\)""".toRegex() + val url = regexRedirect.find(data)!!.groupValues[1] + + return redirectToReadingPage(webClient.httpGet(url, redirectHeaders).parseHtml()) + } + + return document + } + + override suspend fun getTags(): Set { + val doc = webClient.httpGet("https://$domain/library", headers).parseHtml() + val elements = doc.body().select("div#books-genders > div > div") + return elements.mapNotNullToSet { element -> + MangaTag( + title = element.select("label").text(), + key = element.select("input").attr("value"), + source = source, + ) + } + } + + private fun parseStatus(status: String) = when { + status.contains("Publicándose") -> MangaState.ONGOING + status.contains("Finalizado") -> MangaState.FINISHED + else -> null + } +} diff --git a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt index ef660faa..095f1682 100644 --- a/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt +++ b/src/test/kotlin/org/koitharu/kotatsu/parsers/MangaSources.kt @@ -3,5 +3,5 @@ package org.koitharu.kotatsu.parsers import org.junit.jupiter.params.provider.EnumSource import org.koitharu.kotatsu.parsers.model.MangaSource -@EnumSource(MangaSource::class, names = ["LOCAL", "DUMMY"], mode = EnumSource.Mode.EXCLUDE) +@EnumSource(MangaSource::class, names = ["TUMANGAONLINE"], mode = EnumSource.Mode.INCLUDE) internal annotation class MangaSources