[NudeMoon] Migrate to mobile website parsing

pull/238/head
Koitharu 3 years ago
parent 407ef5b655
commit 270890f841
Signed by: Koitharu
GPG Key ID: 676DEE768C17A9D7

@ -42,7 +42,7 @@ internal class NudeMoonParser(
context.cookieJar.insertCookies(
domain,
"NMfYa=1;",
"nm_mobile=0;",
"nm_mobile=1;",
)
}
@ -65,33 +65,24 @@ internal class NudeMoonParser(
else -> "https://$domain/all_manga?${getSortKey(sortOrder)}&rowstart=$offset"
}
val doc = webClient.httpGet(url).parseHtml()
val root = doc.body().run {
selectFirst("td.main-bg") ?: selectFirst("td.main-body")
} ?: doc.parseFailed("Cannot find root")
return root.select("table.news_pic2").mapNotNull { row ->
val a = row.selectFirst("td.bg_style1")?.selectFirst("a")
?: return@mapNotNull null
return doc.body().select("table.news_pic2").mapNotNull { row ->
val a = row.selectFirstOrThrow("a")
val href = a.attrAsRelativeUrl("href")
val title = a.selectFirst("h2")?.text().orEmpty()
val info = row.selectFirst("td[width=100%]") ?: return@mapNotNull null
val title = a.attr("title")
Manga(
id = generateUid(href),
url = href,
title = title.substringAfter(" / "),
altTitle = title.substringBefore(" / ", "")
.takeUnless { it.isBlank() },
author = info.getElementsContainingOwnText("Автор:").firstOrNull()
?.nextElementSibling()?.ownText(),
coverUrl = row.selectFirst("img.news_pic2")?.absUrl("data-src")
.orEmpty(),
tags = row.selectFirst("span.tag-links")?.select("a")
?.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}.orEmpty(),
altTitle = title.substringBefore(" / ", "").takeUnless { it.isBlank() },
author = row.getElementsByAttributeValueContaining("href", "/mangaka/").firstOrNull()?.textOrNull(),
coverUrl = row.selectFirst("img")?.absUrl("src").orEmpty(),
tags = row.selectFirst(".tag-links")?.select("a")?.mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}.orEmpty(),
source = source,
publicUrl = a.absUrl("href"),
rating = RATING_UNKNOWN,
@ -104,34 +95,32 @@ internal class NudeMoonParser(
override suspend fun getDetails(manga: Manga): Manga {
val body = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).parseHtml().body()
val root = body.selectFirst("table.shoutbox")
?: body.parseFailed("Cannot find root")
val info = root.select("div.tbl2")
val lastInfo = info.last()
val root = body.selectFirstOrThrow("table.news_pic2")
val dateFormat = SimpleDateFormat("dd/MM/yyyy")
return manga.copy(
largeCoverUrl = body.selectFirst("img.news_pic2")?.absUrl("src"),
description = info.select("div.blockquote").lastOrNull()?.html() ?: manga.description,
tags = info.select("span.tag-links").firstOrNull()?.select("a")?.mapToSet {
largeCoverUrl = body.selectFirstOrThrow("img[data-src]").attrAsAbsoluteUrl("data-src"),
description = root.selectFirst(".description")?.html() ?: manga.description,
tags = root.getElementsByAttributeValueContaining("href", "/tag/").mapToSet {
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/'),
source = source,
)
}?.plus(manga.tags) ?: manga.tags,
author = lastInfo?.getElementsByAttributeValueContaining("href", "mangaka/")?.text()
} + manga.tags,
author = root.getElementsByAttributeValueContaining("href", "/mangaka/").firstOrNull()?.text()
?: manga.author,
chapters = listOf(
MangaChapter(
id = manga.id,
url = getReadLink(manga.url),
url = manga.url,
source = source,
number = 1,
name = manga.title,
scanlator = lastInfo?.getElementsByAttributeValueContaining("href", "perevod/")?.text(),
uploadDate = lastInfo?.getElementsContainingOwnText("Дата:")
?.firstOrNull()
?.html()
?.parseDate() ?: 0L,
scanlator = root.getElementsByAttributeValueContaining("href", "/perevod/").firstOrNull()
?.textOrNull(),
uploadDate = dateFormat.tryParse(
root.getElementsMatchingOwnText("\\d{1,2}/\\d{2}/\\d{4}").firstOrNull()?.text(),
),
branch = null,
),
),
@ -141,60 +130,40 @@ internal class NudeMoonParser(
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.toAbsoluteUrl(domain)
val doc = webClient.httpGet(fullUrl).parseHtml()
val mangaId = chapter.url.substringAfterLast('/').substringBefore('-').toIntOrNull()
val script = doc.select("script").firstNotNullOfOrNull {
it.html().takeIf { x -> x.contains(" images = new ") }
} ?: if (isAuthorized) {
doc.parseFailed("Cannot find pages list")
} else {
throw AuthRequiredException(source)
}
val pagesRegex = Regex("images\\[(\\d+)].src\\s*=\\s*'([^']+)'", RegexOption.MULTILINE)
return pagesRegex.findAll(script).map { match ->
val i = match.groupValues[1].toInt()
val url = match.groupValues[2]
val pages = doc.select("img[data-src]")
return pages.map { img ->
val url = img.attrAsRelativeUrl("data-src")
MangaPage(
id = generateUid(url),
url = url,
preview = if (i <= MAX_THUMB_INDEX && mangaId != null) {
val part2 = url.substringBeforeLast('/')
val part3 = url.substringAfterLast('/')
val part1 = part2.substringBeforeLast('/')
"$part1/thumb/$mangaId/thumb_$part3"
} else {
null
},
preview = null,
source = source,
)
}.toList()
}
override suspend fun getPageUrl(page: MangaPage): String {
return page.url.toAbsoluteUrl("img.$domain")
}
override suspend fun getTags(): Set<MangaTag> {
val domain = domain
val doc = webClient.httpGet("https://$domain/all_manga").parseHtml()
val root = doc.body().getElementsContainingOwnText("Поиск манги по тегам")
.firstOrNull()?.parents()?.find { it.tag().normalName() == "tbody" }
?.selectFirst("td.textbox")?.selectFirst("td.small")
?: doc.parseFailed("Tags root not found")
return root.select("a").mapToSet {
val doc = webClient.httpGet("https://$domain/tags").parseHtml()
val root = doc.body().getElementsByAttributeValue("name", "multitags").first()
?: doc.parseFailed("Tags form not found")
return root.select("input").mapToSet {
val value = it.attr("value").trim()
MangaTag(
title = it.text().toTitleCase(),
key = it.attr("href").substringAfterLast('/')
.removeSuffix("+"),
title = value.toTitleCase(sourceLocale),
key = value.replace(' ', '_'),
source = source,
)
}
}
override suspend fun getUsername(): String {
val body = webClient.httpGet("https://${domain}/").parseHtml()
.body()
return body
.getElementsContainingOwnText("Профиль")
.firstOrNull()
?.attr("href")
?.substringAfterLast('/')
val body = webClient.httpGet("https://${domain}/").parseHtml().body()
return body.getElementsContainingOwnText("Профиль").firstOrNull()?.attr("href")?.substringAfterLast('/')
?: run {
throw if (body.selectFirst("form[name=\"loginform\"]") != null) {
AuthRequiredException(source)
@ -204,23 +173,10 @@ internal class NudeMoonParser(
}
}
private fun getSortKey(sortOrder: SortOrder) =
when (sortOrder) {
SortOrder.POPULARITY -> "views"
SortOrder.NEWEST -> "date"
SortOrder.RATING -> "like"
else -> "like"
}
private fun String.parseDate(): Long {
val dateString = substringBetweenFirst("Дата:", "<")?.trim() ?: return 0
val dateFormat = SimpleDateFormat("d MMMM yyyy", Locale("ru"))
return dateFormat.tryParse(dateString)
}
private fun getReadLink(url: String): String {
val prefix = url.substringBefore('-', "")
val suffix = url.substringAfter('-').trimStart('-')
return "$prefix-online-$suffix"
private fun getSortKey(sortOrder: SortOrder) = when (sortOrder) {
SortOrder.POPULARITY -> "views"
SortOrder.NEWEST -> "date"
SortOrder.RATING -> "like"
else -> "like"
}
}

Loading…
Cancel
Save