From 143303f7df882e856f97e8bbaddfd0ee1bfdb413 Mon Sep 17 00:00:00 2001 From: len Date: Mon, 20 Jun 2016 00:57:29 +0200 Subject: [PATCH] Parser improvements --- .../data/source/online/YamlOnlineSource.kt | 61 ++++++++++++++----- .../source/online/YamlOnlineSourceMappings.kt | 9 ++- .../kanade/tachiyomi/util/JsoupExtensions.kt | 4 ++ 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSource.kt b/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSource.kt index 090166006d..21f8c59195 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSource.kt +++ b/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSource.kt @@ -9,8 +9,10 @@ import eu.kanade.tachiyomi.data.source.getLanguages import eu.kanade.tachiyomi.data.source.model.MangasPage import eu.kanade.tachiyomi.data.source.model.Page import eu.kanade.tachiyomi.util.asJsoup +import eu.kanade.tachiyomi.util.attrOrText import okhttp3.Request import okhttp3.Response +import org.jsoup.Jsoup import org.jsoup.nodes.Element import java.text.SimpleDateFormat import java.util.* @@ -127,28 +129,59 @@ class YamlOnlineSource(context: Context, mappings: Map<*, *>) : OnlineSource(con } override fun pageListParse(response: Response, pages: MutableList) { - val document = response.asJsoup() + val body = response.body().string() + val url = response.request().url().toString() + + // TODO lazy initialization in Kotlin 1.1 + val document = Jsoup.parse(body, url) + with(map.pages) { - val url = response.request().url().toString() - pages_css?.let { - for (element in document.select(it)) { - val value = element.attr(pages_attr) - val pageUrl = replace?.let { url.replace(it.toRegex(), replacement!!.replace("\$value", value)) } ?: value - pages.add(Page(pages.size, pageUrl)) - } + // Capture a list of values where page urls will be resolved. + val capturedPages = if (pages_regex != null) + pages_regex!!.toRegex().findAll(body).map { it.value }.toList() + else if (pages_css != null) + document.select(pages_css).map { it.attrOrText(pages_attr!!) } + else + null + + // For each captured value, obtain the url and create a new page. + capturedPages?.forEach { value -> + // If the captured value isn't an url, we have to use replaces with the chapter url. + val pageUrl = if (replace != null && replacement != null) + url.replace(replace!!.toRegex(), replacement!!.replace("\$value", value)) + else + value + + pages.add(Page(pages.size, pageUrl)) } - for ((i, element) in document.select(image_css).withIndex()) { - pages.getOrNull(i)?.imageUrl = element.absUrl(image_attr) + // Capture a list of images. + val capturedImages = if (image_regex != null) + image_regex!!.toRegex().findAll(body).map { it.groups[1]?.value }.toList() + else if (image_css != null) + document.select(image_css).map { it.absUrl(image_attr) } + else + null + + // Assign the image url to each page + capturedImages?.forEachIndexed { i, url -> + val page = pages.getOrElse(i) { Page(i, "").apply { pages.add(this) } } + page.imageUrl = url } } - } override fun imageUrlParse(response: Response): String { - val document = response.asJsoup() - return with(map.pages) { - document.select(image_css).first().absUrl(image_attr) + val body = response.body().string() + val url = response.request().url().toString() + + with(map.pages) { + return if (image_regex != null) + image_regex!!.toRegex().find(body)!!.groups[1]!!.value + else if (image_css != null) + Jsoup.parse(body, url).select(image_css).first().absUrl(image_attr) + else + throw Exception("image_regex and image_css are null") } } diff --git a/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSourceMappings.kt b/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSourceMappings.kt index e819dfd53a..970534de82 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSourceMappings.kt +++ b/app/src/main/java/eu/kanade/tachiyomi/data/source/online/YamlOnlineSourceMappings.kt @@ -194,6 +194,9 @@ class DateNode(private val map: Map) : SelectableNode(map) { class PagesNode(private val map: Map) { + val pages_regex: String? + get() = map["pages_regex"] as? String + val pages_css: String? get() = map["pages_css"] as? String @@ -206,7 +209,11 @@ class PagesNode(private val map: Map) { val replacement: String? get() = map["url_replacement"] as? String - val image_css: String by map + val image_regex: String? + get() = map["image_regex"] as? String + + val image_css: String? + get() = map["image_css"] as? String val image_attr: String get() = map["image_attr"] as? String ?: "src" diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/JsoupExtensions.kt b/app/src/main/java/eu/kanade/tachiyomi/util/JsoupExtensions.kt index 86a5b18ac8..7b5609f6f8 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/JsoupExtensions.kt +++ b/app/src/main/java/eu/kanade/tachiyomi/util/JsoupExtensions.kt @@ -13,6 +13,10 @@ fun Element.selectInt(css: String, defaultValue: Int = 0): Int { return select(css).first()?.text()?.toInt() ?: defaultValue } +fun Element.attrOrText(css: String): String { + return if (css != "text") attr(css) else text() +} + /** * Returns a Jsoup document for this response. * @param html the body of the response. Use only if the body was read before calling this method.