Parser improvements

This commit is contained in:
len 2016-06-20 00:57:29 +02:00
parent 585f7ec17d
commit 143303f7df
3 changed files with 59 additions and 15 deletions

View File

@ -9,8 +9,10 @@ import eu.kanade.tachiyomi.data.source.getLanguages
import eu.kanade.tachiyomi.data.source.model.MangasPage import eu.kanade.tachiyomi.data.source.model.MangasPage
import eu.kanade.tachiyomi.data.source.model.Page import eu.kanade.tachiyomi.data.source.model.Page
import eu.kanade.tachiyomi.util.asJsoup import eu.kanade.tachiyomi.util.asJsoup
import eu.kanade.tachiyomi.util.attrOrText
import okhttp3.Request import okhttp3.Request
import okhttp3.Response import okhttp3.Response
import org.jsoup.Jsoup
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.* import java.util.*
@ -127,28 +129,59 @@ class YamlOnlineSource(context: Context, mappings: Map<*, *>) : OnlineSource(con
} }
override fun pageListParse(response: Response, pages: MutableList<Page>) { override fun pageListParse(response: Response, pages: MutableList<Page>) {
val document = response.asJsoup() val body = response.body().string()
val url = response.request().url().toString()
// TODO lazy initialization in Kotlin 1.1
val document = Jsoup.parse(body, url)
with(map.pages) { with(map.pages) {
val url = response.request().url().toString() // Capture a list of values where page urls will be resolved.
pages_css?.let { val capturedPages = if (pages_regex != null)
for (element in document.select(it)) { pages_regex!!.toRegex().findAll(body).map { it.value }.toList()
val value = element.attr(pages_attr) else if (pages_css != null)
val pageUrl = replace?.let { url.replace(it.toRegex(), replacement!!.replace("\$value", value)) } ?: value document.select(pages_css).map { it.attrOrText(pages_attr!!) }
pages.add(Page(pages.size, pageUrl)) else
} null
// For each captured value, obtain the url and create a new page.
capturedPages?.forEach { value ->
// If the captured value isn't an url, we have to use replaces with the chapter url.
val pageUrl = if (replace != null && replacement != null)
url.replace(replace!!.toRegex(), replacement!!.replace("\$value", value))
else
value
pages.add(Page(pages.size, pageUrl))
} }
for ((i, element) in document.select(image_css).withIndex()) { // Capture a list of images.
pages.getOrNull(i)?.imageUrl = element.absUrl(image_attr) val capturedImages = if (image_regex != null)
image_regex!!.toRegex().findAll(body).map { it.groups[1]?.value }.toList()
else if (image_css != null)
document.select(image_css).map { it.absUrl(image_attr) }
else
null
// Assign the image url to each page
capturedImages?.forEachIndexed { i, url ->
val page = pages.getOrElse(i) { Page(i, "").apply { pages.add(this) } }
page.imageUrl = url
} }
} }
} }
override fun imageUrlParse(response: Response): String { override fun imageUrlParse(response: Response): String {
val document = response.asJsoup() val body = response.body().string()
return with(map.pages) { val url = response.request().url().toString()
document.select(image_css).first().absUrl(image_attr)
with(map.pages) {
return if (image_regex != null)
image_regex!!.toRegex().find(body)!!.groups[1]!!.value
else if (image_css != null)
Jsoup.parse(body, url).select(image_css).first().absUrl(image_attr)
else
throw Exception("image_regex and image_css are null")
} }
} }

View File

@ -194,6 +194,9 @@ class DateNode(private val map: Map<String, Any?>) : SelectableNode(map) {
class PagesNode(private val map: Map<String, Any?>) { class PagesNode(private val map: Map<String, Any?>) {
val pages_regex: String?
get() = map["pages_regex"] as? String
val pages_css: String? val pages_css: String?
get() = map["pages_css"] as? String get() = map["pages_css"] as? String
@ -206,7 +209,11 @@ class PagesNode(private val map: Map<String, Any?>) {
val replacement: String? val replacement: String?
get() = map["url_replacement"] as? String get() = map["url_replacement"] as? String
val image_css: String by map val image_regex: String?
get() = map["image_regex"] as? String
val image_css: String?
get() = map["image_css"] as? String
val image_attr: String val image_attr: String
get() = map["image_attr"] as? String ?: "src" get() = map["image_attr"] as? String ?: "src"

View File

@ -13,6 +13,10 @@ fun Element.selectInt(css: String, defaultValue: Int = 0): Int {
return select(css).first()?.text()?.toInt() ?: defaultValue return select(css).first()?.text()?.toInt() ?: defaultValue
} }
fun Element.attrOrText(css: String): String {
return if (css != "text") attr(css) else text()
}
/** /**
* Returns a Jsoup document for this response. * Returns a Jsoup document for this response.
* @param html the body of the response. Use only if the body was read before calling this method. * @param html the body of the response. Use only if the body was read before calling this method.