MyReadingManga: Update site structure (#2054)

master
Naga 9 months ago committed by GitHub
parent b5512e7574
commit 91ec95448c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,238 +1,354 @@
package org.koitharu.kotatsu.parsers.site.all package org.koitharu.kotatsu.parsers.site.all
import org.json.JSONObject
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.core.PagedMangaParser import org.koitharu.kotatsu.parsers.core.PagedMangaParser
import org.koitharu.kotatsu.parsers.model.* import org.koitharu.kotatsu.parsers.model.ContentRating
import org.koitharu.kotatsu.parsers.util.* import org.koitharu.kotatsu.parsers.model.ContentType
import org.koitharu.kotatsu.parsers.model.Manga
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaListFilter
import org.koitharu.kotatsu.parsers.model.MangaListFilterCapabilities
import org.koitharu.kotatsu.parsers.model.MangaListFilterOptions
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.model.MangaState
import org.koitharu.kotatsu.parsers.model.MangaTag
import org.koitharu.kotatsu.parsers.model.RATING_UNKNOWN
import org.koitharu.kotatsu.parsers.model.SortOrder
import org.koitharu.kotatsu.parsers.util.attrAsRelativeUrl
import org.koitharu.kotatsu.parsers.util.generateUid
import org.koitharu.kotatsu.parsers.util.mapToSet
import org.koitharu.kotatsu.parsers.util.parseHtml
import org.koitharu.kotatsu.parsers.util.toAbsoluteUrl
import org.koitharu.kotatsu.parsers.util.urlEncoded
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.EnumSet
import java.util.Locale
import java.util.regex.Pattern import java.util.regex.Pattern
import java.util.*
@MangaSourceParser("MYREADINGMANGA", "MyReadingManga") @MangaSourceParser("MYREADINGMANGA", "MyReadingManga", type = ContentType.HENTAI)
internal class MyReadingManga(context: MangaLoaderContext) : PagedMangaParser(context, MangaParserSource.MYREADINGMANGA, 20) { internal class MyReadingManga(context: MangaLoaderContext) :
PagedMangaParser(context, MangaParserSource.MYREADINGMANGA, 18) {
override val configKeyDomain = ConfigKey.Domain("myreadingmanga.info") override val configKeyDomain = ConfigKey.Domain("myreadingmanga.info")
override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) { override fun onCreateConfig(keys: MutableCollection<ConfigKey<*>>) {
super.onCreateConfig(keys) super.onCreateConfig(keys)
keys.add(userAgentKey) keys.add(userAgentKey)
} }
override val filterCapabilities: MangaListFilterCapabilities override val filterCapabilities: MangaListFilterCapabilities
get() = MangaListFilterCapabilities( get() = MangaListFilterCapabilities(
isMultipleTagsSupported = true, isSearchSupported = true,
isTagsExclusionSupported = false, isOriginalLocaleSupported = true,
isSearchSupported = true, )
isOriginalLocaleSupported = true,
) override val availableSortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.UPDATED,
override val availableSortOrders: Set<SortOrder> = EnumSet.of( )
SortOrder.UPDATED,
SortOrder.POPULARITY, override suspend fun getFilterOptions() = MangaListFilterOptions(
SortOrder.ALPHABETICAL, availableTags = fetchTags(),
SortOrder.NEWEST, availableStates = EnumSet.of(
) MangaState.ONGOING,
MangaState.FINISHED,
override suspend fun getFilterOptions() = MangaListFilterOptions( ),
availableTags = fetchTags(), availableContentRating = EnumSet.of(ContentRating.ADULT),
availableStates = EnumSet.of(MangaState.ONGOING, MangaState.FINISHED), availableLocales = setOf(
availableContentRating = EnumSet.of(ContentRating.ADULT), Locale.ENGLISH,
availableLocales = setOf( Locale.FRENCH,
Locale.ENGLISH, Locale.JAPANESE, Locale.CHINESE, Locale.GERMAN, Locale.ITALIAN, Locale.JAPANESE,
Locale("ru"), Locale("es"), Locale("pt", "BR"), Locale("tr"), Locale("vi"), Locale.CHINESE,
Locale("ar"), Locale("id"), Locale("ko"), Locale.GERMAN,
), Locale.ITALIAN,
) Locale.KOREAN,
Locale.TRADITIONAL_CHINESE,
private fun getLanguageForFilter(locale: Locale): String { Locale("es"), // Spanish
return when (locale.language) { Locale("pt"), // Portuguese
"en" -> "English" Locale("ru"), // Russian
"ja" -> "Japanese" Locale("tr"), // Turkish
"zh" -> "Chinese" Locale("vi"), // Vietnamese
"de" -> "German" Locale("ar"), // Arabic
"it" -> "Italian" Locale("id"), // Indonesian (Bahasa)
"ru" -> "Russian" Locale("th"), // Thai
"es" -> "Spanish" Locale("pl"), // Polish
"pt" -> "Portuguese" Locale("sv"), // Swedish
"tr" -> "Turkish" Locale("nl"), // Dutch (Flemish Dutch)
"vi" -> "Vietnamese" Locale("hu"), // Hungarian
"ar" -> "Arabic" Locale("hi"), // Hindi
"id" -> "Indonesia" Locale("he"), // Hebrew
"ko" -> "Korean" Locale("el"), // Greek
else -> "English" Locale("fi"), // Finnish
} Locale("fil"), // Filipino
} Locale("da"), // Danish
Locale("cs"), // Czech
Locale("hr"), // Croatian
Locale("bg"), // Bulgarian
Locale("zh", "HK"), // Cantonese
Locale("fa"), // Persian
Locale("sk"), // Slovak
Locale("ro"), // Romanian
Locale("no"), // Norwegian
Locale("ms"), // Malay
Locale("lt"), // Lithuanian
),
)
private fun getLanguageSlug(locale: Locale?): String? {
return when {
locale?.language == "fr" -> "french"
locale?.language == "ja" -> "jp"
locale?.language == "zh" && locale.country == "TW" -> "traditional-chinese"
locale?.language == "zh" && locale.country == "HK" -> "cantonese"
locale?.language == "zh" -> "chinese"
locale?.language == "de" -> "german"
locale?.language == "it" -> "italian"
locale?.language == "ko" -> "korean"
locale?.language == "es" -> "spanish"
locale?.language == "pt" -> "portuguese"
locale?.language == "ru" -> "russian"
locale?.language == "tr" -> "turkish"
locale?.language == "vi" -> "vietnamese"
locale?.language == "ar" -> "arabic"
locale?.language == "id" -> "bahasa"
locale?.language == "th" -> "thai"
locale?.language == "pl" -> "polish"
locale?.language == "sv" -> "swedish"
locale?.language == "nl" -> "flemish-dutch"
locale?.language == "hu" -> "hungarian"
locale?.language == "hi" -> "hindi"
locale?.language == "he" -> "hebrew"
locale?.language == "el" -> "greek"
locale?.language == "fi" -> "finnish"
locale?.language == "fil" -> "filipino"
locale?.language == "da" -> "danish"
locale?.language == "cs" -> "czech"
locale?.language == "hr" -> "croatian"
locale?.language == "bg" -> "bulgarian"
locale?.language == "fa" -> "persian"
locale?.language == "sk" -> "slovak"
locale?.language == "ro" -> "romanian"
locale?.language == "no" -> "norwegian-bokmal"
locale?.language == "ms" -> "malay"
locale?.language == "lt" -> "lithuanian"
else -> null //all
}
}
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> {
val url = buildString {
append("https://")
append(domain)
// Add language path if specified
val langSlug = getLanguageSlug(filter.locale)
if (langSlug != null) {
append("/lang/")
append(langSlug)
}
override suspend fun getListPage(page: Int, order: SortOrder, filter: MangaListFilter): List<Manga> { when {
val url = buildString { !filter.query.isNullOrEmpty() -> {
append("https://") // Search with language: /lang/french/page/2/?s=example
append(domain) if (page > 1) {
append("/search/?wpsolr_page=") append("/page/")
append(page) append(page)
when {
!filter.query.isNullOrEmpty() -> {
append("&wpsolr_q=")
append(filter.query.replace(' ', '+'))
}
else -> {
append("&wpsolr_sort=")
when (order) {
SortOrder.UPDATED -> append("sort_by_date_desc")
SortOrder.POPULARITY -> append("sort_by_relevancy_desc")
SortOrder.ALPHABETICAL -> append("sort_by_title_asc")
SortOrder.NEWEST -> append("sort_by_date_desc")
else -> append("sort_by_random")
}
}
}
var paramIndex = 0
filter.locale?.let {
append(
buildString {
append("&wpsolr_fq[")
append(paramIndex)
append("]=lang_str:")
} }
) append("/?s=")
append(getLanguageForFilter(it)) append(filter.query.urlEncoded())
paramIndex++ }
}
if (filter.tags.isNotEmpty()) {
filter.tags.forEach { tag ->
append("&wpsolr_fq[$paramIndex]=")
append("genre_str:${tag.key}")
paramIndex++
}
}
filter.states.oneOrThrowIfMany()?.let {
append("&wpsolr_fq[$paramIndex]=status:")
append(
when (it) {
MangaState.ONGOING -> "Ongoing"
MangaState.FINISHED -> "Completed"
else -> "Ongoing"
},
)
paramIndex++
}
}
val doc = webClient.httpGet(url).parseHtml() filter.tags.isNotEmpty() -> {
return parseMangaList(doc) // Genre filtering doesn't work with language, so we ignore language for genre
} if (langSlug == null) {
append("/genre/")
append(filter.tags.first().key)
append("/page/")
append(page)
append("/")
} else {
// If both language and genre are selected, just use language
append("/page/")
append(page)
append("/")
}
}
private fun parseMangaList(doc: Document): List<Manga> { filter.states.isNotEmpty() -> {
return doc.select("div.results-by-facets div[id*=res]").map { element -> // Status filtering doesn't work with language either
val titleElement = element.selectFirst("a") ?: element.parseFailed("No title element found") if (langSlug == null) {
val thumbnailElement = element.selectFirst("img") append("/status/")
append(
Manga( when (filter.states.first()) {
id = generateUid(titleElement.attr("href")), MangaState.ONGOING -> "ongoing"
title = titleElement.text().replace(titleRegex.toRegex(), "").substringBeforeLast("(").trim(), MangaState.FINISHED -> "completed"
altTitles = emptySet(), else -> "ongoing"
url = titleElement.attrAsRelativeUrl("href"), },
publicUrl = titleElement.absUrl("href"), )
rating = RATING_UNKNOWN, append("/page/")
contentRating = ContentRating.ADULT, append(page)
coverUrl = findImageSrc(thumbnailElement), append("/")
tags = emptySet(), } else {
state = null, // If both language and status are selected, just use language
authors = emptySet(), append("/page/")
source = source, append(page)
) append("/")
} }
} }
override suspend fun getDetails(manga: Manga): Manga { else -> {
val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml() // Regular browsing with or without language
val title = doc.selectFirst("h1")?.text() ?: manga.title append("/page/")
val author = title.substringAfter("[").substringBefore("]").trim() append(page)
val genres = doc.select(".entry-header p a[href*=genre], [href*=tag], span.entry-categories a") append("/")
.mapToSet { }
MangaTag( }
title = it.text().toTitleCase(), }
key = it.attr("href").substringAfterLast("/").substringBefore("/"),
source = source,
)
}
val scanlatedBy = doc.select(".entry-terms:has(a[href*=group])")
.firstOrNull()?.select("a[href*=group]")
?.joinToString(prefix = "Scanlated by: ") { it.text() }
val extendedDescription = doc.select(".entry-content p:not(p:containsOwn(|)):not(.chapter-class + p)")
.joinToString("\n") { it.text() }
val description = listOfNotNull(title, scanlatedBy, extendedDescription).joinToString("\n").trim()
val state = when (doc.select("a[href*=status]").firstOrNull()?.text()) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
else -> null
}
val chapters = parseChapters(doc) val doc = webClient.httpGet(url).parseHtml()
return parseMangaList(doc)
}
return manga.copy( private fun parseMangaList(doc: Document): List<Manga> {
description = description, return doc.select("div.content-archive article.post:not(.category-video)").mapNotNull { element ->
tags = genres, val titleElement = element.selectFirst("h2.entry-title a") ?: return@mapNotNull null
state = state, val thumbnailElement = element.selectFirst("a.entry-image-link img")
authors = setOfNotNull(author.takeIf { it.isNotEmpty() }),
chapters = chapters, Manga(
) id = generateUid(titleElement.attr("href")),
} title = titleElement.text().replace(titleRegex.toRegex(), "").substringBeforeLast("(").trim(),
altTitles = emptySet(),
url = titleElement.attrAsRelativeUrl("href"),
publicUrl = titleElement.absUrl("href"),
rating = RATING_UNKNOWN,
contentRating = ContentRating.ADULT,
coverUrl = findImageSrc(thumbnailElement),
tags = emptySet(),
state = null,
authors = emptySet(),
source = source,
)
}
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> { override suspend fun getDetails(manga: Manga): Manga {
val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml() val doc = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml()
val title = doc.selectFirst("h1.entry-title")?.text() ?: manga.title
val altTitles = mutableSetOf<String>()
val altTitleElement = doc.selectFirst("p.alt-title-class")
if (altTitleElement != null) {
var nextElement = altTitleElement.nextElementSibling()
while (nextElement != null && nextElement.tagName() == "p" &&
!nextElement.hasClass("info-class") && !nextElement.hasClass("chapter-class")
) {
val altTitle = nextElement.text().trim()
if (altTitle.isNotEmpty()) {
altTitles.add(altTitle)
}
nextElement = nextElement.nextElementSibling()
}
}
var description = ""
val descriptionElement = doc.selectFirst("p.info-class")
if (descriptionElement != null) {
var nextElement = descriptionElement.nextElementSibling()
val descParts = mutableListOf<String>()
while (nextElement != null && nextElement.tagName() == "p" &&
!nextElement.hasClass("chapter-class") && !nextElement.hasClass("alt-title-class")
) {
val text = nextElement.text()
if (text.isNotEmpty()) {
descParts.add(text)
}
nextElement = nextElement.nextElementSibling()
}
description = descParts.joinToString("\n\n")
}
if (description.isEmpty()) {
description = doc.select("div.entry-content p strong")
.joinToString("\n") { it.text() }
.trim()
.ifEmpty { title }
}
val authorFromTitle = title.substringAfter("[").substringBefore("]").trim()
val authorFromTag = doc.select("span.entry-tags a[href*='/tag/']")
.firstOrNull { it.text().contains("(") && it.text().contains(")") }
?.text()?.trim()
val author = authorFromTag ?: authorFromTitle
val genres = mutableSetOf<MangaTag>()
doc.select("span.entry-terms:has(span:contains(Genres)) a").forEach {
genres.add(
MangaTag(
title = it.text(),
key = it.attr("href").substringAfterLast("/genre/").substringBefore("/"),
source = source,
),
)
}
val state = when (doc.select("a[href*=status]").firstOrNull()?.text()) {
"Ongoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
else -> null
}
val chapters = parseChapters(doc)
return manga.copy(
altTitles = altTitles,
description = description,
tags = genres,
state = state,
authors = setOfNotNull(author.takeIf { it.isNotEmpty() && it != title }),
chapters = chapters,
)
}
val images = (doc.select("div.entry-content img") + doc.select("div.separator img[data-src]"))
.mapNotNull { findImageSrc(it) }
.distinct()
return images.mapIndexed { _, url -> override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
MangaPage( val doc = webClient.httpGet(chapter.url.toAbsoluteUrl(domain)).parseHtml()
id = generateUid(url),
url = url, val images = doc.select("div.entry-content img.img-myreadingmanga, div.entry-content div > img")
preview = null, .filter { element ->
source = source, val src = findImageSrc(element)
) src != null && !src.contains("GH-") && !src.contains("nucarnival") &&
} !src.contains("/wp-content/uploads/202") // Exclude old uploads that might be ads
} }
.mapNotNull { findImageSrc(it) }
.distinct()
return images.mapIndexed { index, url ->
MangaPage(
id = generateUid(url),
url = url,
preview = null,
source = source,
)
}
}
private suspend fun fetchTags(): Set<MangaTag> { private suspend fun fetchTags(): Set<MangaTag> {
val doc = webClient.httpGet("https://${domain}/search-6/").parseHtml() val doc = webClient.httpGet("https://$domain/").parseHtml()
return doc.select("h4.widget-title.widgettitle:contains(Genres) + .tagcloud a")
return doc.select("div.select_opt").mapNotNull { element -> .mapToSet { element ->
val dataAttr = element.attr("data-wpsolr-facet-data")
val itemValue = dataAttr
.takeIf { it.isNotBlank() }
?.let { json ->
try {
val js = JSONObject(json)
val id = js.getString("id")
if (id != "genre_str") return@mapNotNull null
else js.getString("item_value")
} catch (e: Exception) {
null
}
}
itemValue?.let { value ->
MangaTag( MangaTag(
title = value, title = element.text().substringBefore(" ("),
key = value, key = element.attr("href").trimEnd('/').substringAfterLast('/'),
source = source, source = source,
) )
} }
}.toSet()
} }
private val titleRegex = Pattern.compile("""\[[^]]*]""") private val titleRegex = Pattern.compile("""\[[^]]*]""")
@ -254,44 +370,82 @@ internal class MyReadingManga(context: MangaLoaderContext) : PagedMangaParser(co
} }
} }
private fun parseChapters(document: Document): List<MangaChapter> { private fun parseChapters(document: Document): List<MangaChapter> {
val chapters = mutableListOf<MangaChapter>() val chapters = mutableListOf<MangaChapter>()
val mangaUrl = document.baseUri() val mangaUrl = document.baseUri().removeSuffix("/")
val date = parseDate(document.select(".entry-time").text()) val date = parseDate(document.select("time.entry-time").text())
val chFirstName = document.select(".chapter-class a[href*=$mangaUrl]").firstOrNull()?.text()
?.ifEmpty { "Ch. 1" }?.replaceFirstChar { it.uppercase() } ?: "Ch. 1" // Look for chapter information
val chapterClass = document.selectFirst("div.chapter-class")
chapters.add(importChapter("1", mangaUrl, date, chFirstName))
// Check if there's a chapter title after the chapter-class div
val lastChapterNumber = document.select("a[class=page-numbers]").lastOrNull()?.text()?.toIntOrNull() var chapterTitle: String? = null
if (lastChapterNumber != null && lastChapterNumber > 1) { if (chapterClass != null) {
for (i in 2..lastChapterNumber) { var nextElement = chapterClass.nextElementSibling()
chapters.add(importChapter(i.toString(), mangaUrl, date, "Ch. $i")) while (nextElement != null && nextElement.tagName() != "div") {
} if (nextElement.tagName() == "p" && nextElement.text().contains("Chapter", ignoreCase = true)) {
} chapterTitle = nextElement.text().trim()
break
return chapters }
} nextElement = nextElement.nextElementSibling()
}
}
// Check for pagination
val paginationInContent =
document.select("div.entry-pagination a.page-numbers, div.chapter-class .entry-pagination a.page-numbers")
.mapNotNull { it.text().toIntOrNull() }
.maxOrNull()
if (paginationInContent != null && paginationInContent > 1) {
// Multi-page manga with chapters
for (i in 1..paginationInContent) {
val title = when {
chapterTitle != null && i == 1 -> chapterTitle
chapterTitle != null -> chapterTitle.replace("1", i.toString())
else -> "Chapter $i"
}
private fun parseDate(date: String): Long { chapters.add(
return try { MangaChapter(
SimpleDateFormat("MMM dd, yyyy", Locale.US).parse(date)?.time ?: 0 id = generateUid("$mangaUrl/$i"),
} catch (e: Exception) { title = title,
0 number = i.toFloat(),
} url = if (i == 1) mangaUrl else "$mangaUrl/$i/",
} uploadDate = date,
source = source,
scanlator = null,
branch = null,
volume = 0,
),
)
}
} else {
// Single page manga or no pagination found
chapters.add(
MangaChapter(
id = generateUid(mangaUrl),
title = chapterTitle ?: "Complete",
number = 1f,
url = mangaUrl,
uploadDate = date,
source = source,
scanlator = null,
branch = null,
volume = 0,
),
)
}
return chapters
}
private fun importChapter(pageNumber: String, mangaUrl: String, date: Long, chapterName: String): MangaChapter { private fun parseDate(date: String): Long {
return MangaChapter( return try {
id = generateUid("$mangaUrl/$pageNumber"), SimpleDateFormat("MMMM dd, yyyy", Locale.US).parse(date)?.time ?: 0
title = chapterName, } catch (_: Exception) {
number = pageNumber.toFloatOrNull() ?: 0f, 0L
url = "$mangaUrl/$pageNumber", }
uploadDate = date, }
source = source,
scanlator = null,
branch = null,
volume = 0,
)
}
} }

Loading…
Cancel
Save