MyReadingManga: Update site structure (#2054)

master
Naga 9 months ago committed by GitHub
parent b5512e7574
commit 91ec95448c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,238 +1,354 @@
package org.koitharu.kotatsu.parsers.site.all package org.koitharu.kotatsu.parsers.site.all
import org.json.JSONObject
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.PagedMangaParser import org.koitharu.kotatsu.parsers.core.PagedMangaParser
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.ContentRating
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.model.ContentType
import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaListFilter
import org.koitharu.kotatsu.parsers.model.MangaListFilterCapabilities
import org.koitharu.kotatsu.parsers.model.MangaListFilterOptions
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.model.MangaState
import org.koitharu.kotatsu.parsers.model.MangaTag
import org.koitharu.kotatsu.parsers.model.RATING_UNKNOWN
import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl
import org.koitharu.kotatsu.parsers.util.generateUid
import org.koitharu.kotatsu.parsers.util.mapToSet
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import org.koitharu.kotatsu.parsers.util.urlEncoded
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.EnumSet
import java.util.Locale
import java.util.regex.Pattern import java.util.regex.Pattern
import java.util.*
@MangaSourceParser("MYREADINGMANGA", "MyReadingManga") @MangaSourceParser("MYREADINGMANGA", "MyReadingManga", type = ContentType.HENTAI)
internal class MyReadingManga(context: MangaLoaderContext) : PagedMangaParser(context, MangaParserSource.MYREADINGMANGA, 20) { internal class MyReadingManga(context: MangaLoaderContext) :
PagedMangaParser(context, MangaParserSource.MYREADINGMANGA, 18) {
override val configKeyDomain = ConfigKey.Domain("myreadingmanga.info") override val configKeyDomain = ConfigKey.Domain("myreadingmanga.info")
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) { override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys) super.onCreateConfig(keys)
keys.add(userAgentKey) keys.add(userAgentKey)
} }
override val filterCapabilities: MangaListFilterCapabilities override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities( get() = MangaListFilterCapabilities(
isMultipleTagsSupported = true, isSearchSupported = true,
isTagsExclusionSupported = false, isOriginalLocaleSupported = true,
isSearchSupported = true, )
isOriginalLocaleSupported = true,
)
override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
SortOrder.POPULARITY,
SortOrder.ALPHABETICAL,
SortOrder.NEWEST,
)
override suspend fun getFilterOptions() = MangaListFilterOptions(
availableTags = fetchTags(),
availableStates = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED),
availableContentRating = EnumSet.of(ContentRating.ADULT),
availableLocales = setOf(
Locale.ENGLISH, Locale.JAPANESE, Locale.CHINESE, Locale.GERMAN, Locale.ITALIAN,
Locale("ru"), Locale("es"), Locale("pt", "BR"), Locale("tr"), Locale("vi"),
Locale("ar"), Locale("id"), Locale("ko"),
),
)
private fun getLanguageForFilter(locale: Locale): String {
return when (locale.language) {
"en" -> "English"
"ja" -> "Japanese"
"zh" -> "Chinese"
"de" -> "German"
"it" -> "Italian"
"ru" -> "Russian"
"es" -> "Spanish"
"pt" -> "Portuguese"
"tr" -> "Turkish"
"vi" -> "Vietnamese"
"ar" -> "Arabic"
"id" -> "Indonesia"
"ko" -> "Korean"
else -> "English"
}
}
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { override val availableSortOrders: Set<SortOrder> = EnumSet.of(
val url = buildString { SortOrder.UPDATED,
append("https://") )
append(domain)
append("/search/?wpsolr_page=")
append(page)
when {
!filter.query.isNullOrEmpty() -> {
append("&wpsolr_q=")
append(filter.query.replace(' ', '+'))
}
else -> {
append("&wpsolr_sort=")
when (order) {
SortOrder.UPDATED -> append("sort_by_date_desc")
SortOrder.POPULARITY -> append("sort_by_relevancy_desc")
SortOrder.ALPHABETICAL -> append("sort_by_title_asc")
SortOrder.NEWEST -> append("sort_by_date_desc")
else -> append("sort_by_random")
}
}
}
var paramIndex = 0
filter.locale?.let {
append(
buildString {
append("&wpsolr_fq[")
append(paramIndex)
append("]=lang_str:")
}
)
append(getLanguageForFilter(it))
paramIndex++
}
if (filter.tags.isNotEmpty()) {
filter.tags.forEach { tag ->
append("&wpsolr_fq[$paramIndex]=")
append("genre_str:${tag.key}")
paramIndex++
}
}
filter.states.oneOrThrowIfMany()?.let {
append("&wpsolr_fq[$paramIndex]=status:")
append(
when (it) {
MangaState.ONGOING -> "Ongoing"
MangaState.FINISHED -> "Completed"
else -> "Ongoing"
},
)
paramIndex++
}
}
val doc = webClient.httpGet(url).parseHtml() override suspend fun getFilterOptions() = MangaListFilterOptions(
return parseMangaList(doc) availableTags = fetchTags(),
} availableStates = EnumSet.of(
MangaState.ONGOING,
MangaState.FINISHED,
),
availableContentRating = EnumSet.of(ContentRating.ADULT),
availableLocales = setOf(
Locale.ENGLISH,
Locale.FRENCH,
Locale.JAPANESE,
Locale.CHINESE,
Locale.GERMAN,
Locale.ITALIAN,
Locale.KOREAN,
Locale.TRADITIONAL_CHINESE,
Locale("es"), // Spanish
Locale("pt"), // Portuguese
Locale("ru"), // Russian
Locale("tr"), // Turkish
Locale("vi"), // Vietnamese
Locale("ar"), // Arabic
Locale("id"), // Indonesian (Bahasa)
Locale("th"), // Thai
Locale("pl"), // Polish
Locale("sv"), // Swedish
Locale("nl"), // Dutch (Flemish Dutch)
Locale("hu"), // Hungarian
Locale("hi"), // Hindi
Locale("he"), // Hebrew
Locale("el"), // Greek
Locale("fi"), // Finnish
Locale("fil"), // Filipino
Locale("da"), // Danish
Locale("cs"), // Czech
Locale("hr"), // Croatian
Locale("bg"), // Bulgarian
Locale("zh", "HK"), // Cantonese
Locale("fa"), // Persian
Locale("sk"), // Slovak
Locale("ro"), // Romanian
Locale("no"), // Norwegian
Locale("ms"), // Malay
Locale("lt"), // Lithuanian
),
)
private fun parseMangaList(doc: Document): List<Manga> { private fun getLanguageSlug(locale: Locale?): String? {
return doc.select("div.results-by-facets div[id*=res]").map { element -> return when {
val titleElement = element.selectFirst("a") ?: element.parseFailed("No title element found") locale?.language == "fr" -> "french"
val thumbnailElement = element.selectFirst("img") locale?.language == "ja" -> "jp"
locale?.language == "zh" && locale.country == "TW" -> "traditional-chinese"
Manga( locale?.language == "zh" && locale.country == "HK" -> "cantonese"
id = generateUid(titleElement.attr("href")), locale?.language == "zh" -> "chinese"
title = titleElement.text().replace(titleRegex.toRegex(), "").substringBeforeLast("(").trim(), locale?.language == "de" -> "german"
altTitles = emptySet(), locale?.language == "it" -> "italian"
url = titleElement.attrAsRelativeUrl("href"), locale?.language == "ko" -> "korean"
publicUrl = titleElement.absUrl("href"), locale?.language == "es" -> "spanish"
rating = RATING_UNKNOWN, locale?.language == "pt" -> "portuguese"
contentRating = ContentRating.ADULT, locale?.language == "ru" -> "russian"
coverUrl = findImageSrc(thumbnailElement), locale?.language == "tr" -> "turkish"
tags = emptySet(), locale?.language == "vi" -> "vietnamese"
state = null, locale?.language == "ar" -> "arabic"
authors = emptySet(), locale?.language == "id" -> "bahasa"
source = source, locale?.language == "th" -> "thai"
) locale?.language == "pl" -> "polish"
} locale?.language == "sv" -> "swedish"
} locale?.language == "nl" -> "flemish-dutch"
locale?.language == "hu" -> "hungarian"
locale?.language == "hi" -> "hindi"
locale?.language == "he" -> "hebrew"
locale?.language == "el" -> "greek"
locale?.language == "fi" -> "finnish"
locale?.language == "fil" -> "filipino"
locale?.language == "da" -> "danish"
locale?.language == "cs" -> "czech"
locale?.language == "hr" -> "croatian"
locale?.language == "bg" -> "bulgarian"
locale?.language == "fa" -> "persian"
locale?.language == "sk" -> "slovak"
locale?.language == "ro" -> "romanian"
locale?.language == "no" -> "norwegian-bokmal"
locale?.language == "ms" -> "malay"
locale?.language == "lt" -> "lithuanian"
else -> null //all
}
}
override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val title = doc.selectFirst("h1")?.text() ?: manga.title
val author = title.substringAfter("[").substringBefore("]").trim()
val genres = doc.select(".entry-header p a[href*=genre], [href*=tag], span.entry-categories a")
.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast("/").substringBefore("/"),
source = source,
)
}
val scanlatedBy = doc.select(".entry-terms:has(a[href*=group])")
.firstOrNull()?.select("a[href*=group]")
?.joinToString(prefix = "Scanlated by: ") { it.text() }
val extendedDescription = doc.select(".entry-content p:not(p:containsOwn(|)):not(.chapter-class + p)")
.joinToString("\n") { it.text() }
val description = listOfNotNull(title, scanlatedBy, extendedDescription).joinToString("\n").trim()
val state = when (doc.select("a[href*=status]").firstOrNull()?.text()) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
else -> null
}
val chapters = parseChapters(doc)
return manga.copy(
description = description,
tags = genres,
state = state,
authors = setOfNotNull(author.takeIf { it.isNotEmpty() }),
chapters = chapters,
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() val url = buildString {
append("https://")
val images = (doc.select("div.entry-content img") + doc.select("div.separator img[data-src]")) append(domain)
.mapNotNull { findImageSrc(it) }
.distinct()
return images.mapIndexed { _, url ->
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
private suspend fun fetchTags(): Set<MangaTag> { // Add language path if specified
val doc = webClient.httpGet("https://${domain}/search-6/").parseHtml() val langSlug = getLanguageSlug(filter.locale)
if (langSlug != null) {
return doc.select("div.select_opt").mapNotNull { element -> append("/lang/")
val dataAttr = element.attr("data-wpsolr-facet-data") append(langSlug)
val itemValue = dataAttr }
.takeIf { it.isNotBlank() }
?.let { json -> when {
try { !filter.query.isNullOrEmpty() -> {
val js = JSONObject(json) // Search with language: /lang/french/page/2/?s=example
val id = js.getString("id") if (page > 1) {
if (id != "genre_str") return@mapNotNull null append("/page/")
else js.getString("item_value") append(page)
} catch (e: Exception) { }
null append("/?s=")
append(filter.query.urlEncoded())
}
filter.tags.isNotEmpty() -> {
// Genre filtering doesn't work with language, so we ignore language for genre
if (langSlug == null) {
append("/genre/")
append(filter.tags.first().key)
append("/page/")
append(page)
append("/")
} else {
// If both language and genre are selected, just use language
append("/page/")
append(page)
append("/")
} }
} }
itemValue?.let { value -> filter.states.isNotEmpty() -> {
// Status filtering doesn't work with language either
if (langSlug == null) {
append("/status/")
append(
when (filter.states.first()) {
MangaState.ONGOING -> "ongoing"
MangaState.FINISHED -> "completed"
else -> "ongoing"
},
)
append("/page/")
append(page)
append("/")
} else {
// If both language and status are selected, just use language
append("/page/")
append(page)
append("/")
}
}
else -> {
// Regular browsing with or without language
append("/page/")
append(page)
append("/")
}
}
}
val doc = webClient.httpGet(url).parseHtml()
return parseMangaList(doc)
}
private fun parseMangaList(doc: Document): List<Manga> {
return doc.select("div.content-archive article.post:not(.category-video)").mapNotNull { element ->
val titleElement = element.selectFirst("h2.entry-title a") ?: return@mapNotNull null
val thumbnailElement = element.selectFirst("a.entry-image-link img")
Manga(
id = generateUid(titleElement.attr("href")),
title = titleElement.text().replace(titleRegex.toRegex(), "").substringBeforeLast("(").trim(),
altTitles = emptySet(),
url = titleElement.attrAsRelativeUrl("href"),
publicUrl = titleElement.absUrl("href"),
rating = RATING_UNKNOWN,
contentRating = ContentRating.ADULT,
coverUrl = findImageSrc(thumbnailElement),
tags = emptySet(),
state = null,
authors = emptySet(),
source = source,
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val title = doc.selectFirst("h1.entry-title")?.text() ?: manga.title
val altTitles = mutableSetOf<String>()
val altTitleElement = doc.selectFirst("p.alt-title-class")
if (altTitleElement != null) {
var nextElement = altTitleElement.nextElementSibling()
while (nextElement != null && nextElement.tagName() == "p" &&
!nextElement.hasClass("info-class") && !nextElement.hasClass("chapter-class")
) {
val altTitle = nextElement.text().trim()
if (altTitle.isNotEmpty()) {
altTitles.add(altTitle)
}
nextElement = nextElement.nextElementSibling()
}
}
var description = ""
val descriptionElement = doc.selectFirst("p.info-class")
if (descriptionElement != null) {
var nextElement = descriptionElement.nextElementSibling()
val descParts = mutableListOf<String>()
while (nextElement != null && nextElement.tagName() == "p" &&
!nextElement.hasClass("chapter-class") && !nextElement.hasClass("alt-title-class")
) {
val text = nextElement.text()
if (text.isNotEmpty()) {
descParts.add(text)
}
nextElement = nextElement.nextElementSibling()
}
description = descParts.joinToString("\n\n")
}
if (description.isEmpty()) {
description = doc.select("div.entry-content p strong")
.joinToString("\n") { it.text() }
.trim()
.ifEmpty { title }
}
val authorFromTitle = title.substringAfter("[").substringBefore("]").trim()
val authorFromTag = doc.select("span.entry-tags a[href*='/tag/']")
.firstOrNull { it.text().contains("(") && it.text().contains(")") }
?.text()?.trim()
val author = authorFromTag ?: authorFromTitle
val genres = mutableSetOf<MangaTag>()
doc.select("span.entry-terms:has(span:contains(Genres)) a").forEach {
genres.add(
MangaTag( MangaTag(
title = value, title = it.text(),
key = value, key = it.attr("href").substringAfterLast("/genre/").substringBefore("/"),
source = source,
),
)
}
val state = when (doc.select("a[href*=status]").firstOrNull()?.text()) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
else -> null
}
val chapters = parseChapters(doc)
return manga.copy(
altTitles = altTitles,
description = description,
tags = genres,
state = state,
authors = setOfNotNull(author.takeIf { it.isNotEmpty() && it != title }),
chapters = chapters,
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
val images = doc.select("div.entry-content img.img-myreadingmanga, div.entry-content div > img")
.filter { element ->
val src = findImageSrc(element)
src != null && !src.contains("GH-") && !src.contains("nucarnival") &&
!src.contains("/wp-content/uploads/202") // Exclude old uploads that might be ads
}
.mapNotNull { findImageSrc(it) }
.distinct()
return images.mapIndexed { index, url ->
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
private suspend fun fetchTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://$domain/").parseHtml()
return doc.select("h4.widget-title.widgettitle:contains(Genres) + .tagcloud a")
.mapToSet { element ->
MangaTag(
title = element.text().substringBefore(" ("),
key = element.attr("href").trimEnd('/').substringAfterLast('/'),
source = source, source = source,
) )
} }
}.toSet()
} }
private val titleRegex = Pattern.compile("""\[[^]]*]""") private val titleRegex = Pattern.compile("""\[[^]]*]""")
@ -254,44 +370,82 @@ internal class MyReadingManga(context: MangaLoaderContext) : PagedMangaParser(co
} }
} }
private fun parseChapters(document: Document): List<MangaChapter> { private fun parseChapters(document: Document): List<MangaChapter> {
val chapters = mutableListOf<MangaChapter>() val chapters = mutableListOf<MangaChapter>()
val mangaUrl = document.baseUri() val mangaUrl = document.baseUri().removeSuffix("/")
val date = parseDate(document.select(".entry-time").text()) val date = parseDate(document.select("time.entry-time").text())
val chFirstName = document.select(".chapter-class a[href*=$mangaUrl]").firstOrNull()?.text()
?.ifEmpty { "Ch. 1" }?.replaceFirstChar { it.uppercase() } ?: "Ch. 1"
chapters.add(importChapter("1", mangaUrl, date, chFirstName))
val lastChapterNumber = document.select("a[class=page-numbers]").lastOrNull()?.text()?.toIntOrNull()
if (lastChapterNumber != null && lastChapterNumber > 1) {
for (i in 2..lastChapterNumber) {
chapters.add(importChapter(i.toString(), mangaUrl, date, "Ch. $i"))
}
}
return chapters
}
private fun parseDate(date: String): Long { // Look for chapter information
return try { val chapterClass = document.selectFirst("div.chapter-class")
SimpleDateFormat("MMM dd, yyyy", Locale.US).parse(date)?.time ?: 0
} catch (e: Exception) {
0
}
}
private fun importChapter(pageNumber: String, mangaUrl: String, date: Long, chapterName: String): MangaChapter { // Check if there's a chapter title after the chapter-class div
return MangaChapter( var chapterTitle: String? = null
id = generateUid("$mangaUrl/$pageNumber"), if (chapterClass != null) {
title = chapterName, var nextElement = chapterClass.nextElementSibling()
number = pageNumber.toFloatOrNull() ?: 0f, while (nextElement != null && nextElement.tagName() != "div") {
url = "$mangaUrl/$pageNumber", if (nextElement.tagName() == "p" && nextElement.text().contains("Chapter", ignoreCase = true)) {
uploadDate = date, chapterTitle = nextElement.text().trim()
source = source, break
scanlator = null, }
branch = null, nextElement = nextElement.nextElementSibling()
volume = 0, }
) }
}
// Check for pagination
val paginationInContent =
document.select("div.entry-pagination a.page-numbers, div.chapter-class .entry-pagination a.page-numbers")
.mapNotNull { it.text().toIntOrNull() }
.maxOrNull()
if (paginationInContent != null && paginationInContent > 1) {
// Multi-page manga with chapters
for (i in 1..paginationInContent) {
val title = when {
chapterTitle != null && i == 1 -> chapterTitle
chapterTitle != null -> chapterTitle.replace("1", i.toString())
else -> "Chapter $i"
}
chapters.add(
MangaChapter(
id = generateUid("$mangaUrl/$i"),
title = title,
number = i.toFloat(),
url = if (i == 1) mangaUrl else "$mangaUrl/$i/",
uploadDate = date,
source = source,
scanlator = null,
branch = null,
volume = 0,
),
)
}
} else {
// Single page manga or no pagination found
chapters.add(
MangaChapter(
id = generateUid(mangaUrl),
title = chapterTitle ?: "Complete",
number = 1f,
url = mangaUrl,
uploadDate = date,
source = source,
scanlator = null,
branch = null,
volume = 0,
),
)
}
return chapters
}
private fun parseDate(date: String): Long {
return try {
SimpleDateFormat("MMMM dd, yyyy", Locale.US).parse(date)?.time ?: 0
} catch (_: Exception) {
0L
}
}
} }

Loading…
Cancel
Save