From 2c2f8f5853b68e7d9f6def70d18c56835a0f236b Mon Sep 17 00:00:00 2001 From: Steven Smith Date: Sat, 25 Apr 2020 19:27:43 -0700 Subject: [PATCH] Improvements to EPUB support. (#2409) * Fix EPUBs containing relative file paths and/or alternate path separators. * Support calibre-generated EPUB covers. * Store EPUB pathSeparator in a field. * Process both types of image tags in EPUBs. * Process all EPUB image tags in order. --- .../kanade/tachiyomi/util/storage/EpubFile.kt | 84 ++++++++++++++----- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/app/src/main/java/eu/kanade/tachiyomi/util/storage/EpubFile.kt b/app/src/main/java/eu/kanade/tachiyomi/util/storage/EpubFile.kt index 0763abb2e2..e9e8dd8749 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/util/storage/EpubFile.kt +++ b/app/src/main/java/eu/kanade/tachiyomi/util/storage/EpubFile.kt @@ -18,6 +18,11 @@ class EpubFile(file: File) : Closeable { */ private val zip = ZipFile(file) + /** + * Path separator used by this epub. + */ + private val pathSeparator = getPathSeparator() + /** * Closes the underlying zip file. */ @@ -43,19 +48,17 @@ class EpubFile(file: File) : Closeable { * Returns the path of all the images found in the epub file. */ fun getImagesFromPages(): List { - val allEntries = zip.entries().toList() val ref = getPackageHref() val doc = getPackageDocument(ref) val pages = getPagesFromDocument(doc) - val hrefs = getHrefMap(ref, allEntries.map { it.name }) - return getImagesFromPages(pages, hrefs) + return getImagesFromPages(pages, ref) } /** * Returns the path to the package document. */ private fun getPackageHref(): String { - val meta = zip.getEntry("META-INF/container.xml") + val meta = zip.getEntry(resolveZipPath("META-INF", "container.xml")) if (meta != null) { val metaDoc = zip.getInputStream(meta).use { Jsoup.parse(it, null, "") } val path = metaDoc.getElementsByTag("rootfile").first()?.attr("full-path") @@ -63,7 +66,7 @@ class EpubFile(file: File) : Closeable { return path } } - return "OEBPS/content.opf" + return resolveZipPath("OEBPS", "content.opf") } /** @@ -89,28 +92,67 @@ class EpubFile(file: File) : Closeable { /** * Returns all the images contained in every page from the epub. */ - private fun getImagesFromPages(pages: List, hrefs: Map): List { - return pages.map { page -> - val entry = zip.getEntry(hrefs[page]) + private fun getImagesFromPages(pages: List, packageHref: String): List { + val result = ArrayList() + val basePath = getParentDirectory(packageHref) + pages.forEach { page -> + val entryPath = resolveZipPath(basePath, page) + val entry = zip.getEntry(entryPath) val document = zip.getInputStream(entry).use { Jsoup.parse(it, null, "") } - document.getElementsByTag("img").mapNotNull { hrefs[it.attr("src")] } - }.flatten() + val imageBasePath = getParentDirectory(entryPath) + + document.allElements.forEach { + if (it.tagName() == "img") { + result.add(resolveZipPath(imageBasePath, it.attr("src"))) + } else if (it.tagName() == "image") { + result.add(resolveZipPath(imageBasePath, it.attr("xlink:href"))) + } + } + } + + return result } /** - * Returns a map with a relative url as key and abolute url as path. + * Returns the path separator used by the epub file. */ - private fun getHrefMap(packageHref: String, entries: List): Map { - val lastSlashPos = packageHref.lastIndexOf('/') - if (lastSlashPos < 0) { - return entries.associateBy { it } + private fun getPathSeparator(): String { + val meta = zip.getEntry("META-INF\\container.xml") + if (meta != null) { + return "\\" + } else { + return "/" } - return entries.associateBy { entry -> - if (entry.isNotBlank() && entry.length > lastSlashPos) { - entry.substring(lastSlashPos + 1) - } else { - entry - } + } + + /** + * Resolves a zip path from base and relative components and a path separator. + */ + private fun resolveZipPath(basePath: String, relativePath: String): String { + if (relativePath.startsWith(pathSeparator)) { + // Path is absolute, so return as-is. + return relativePath + } + + var fixedBasePath = basePath.replace(pathSeparator, File.separator) + if (!fixedBasePath.startsWith(File.separator)) { + fixedBasePath = "${File.separator}$fixedBasePath" + } + + val fixedRelativePath = relativePath.replace(pathSeparator, File.separator) + val resolvedPath = File(fixedBasePath, fixedRelativePath).canonicalPath + return resolvedPath.replace(File.separator, pathSeparator).substring(1) + } + + /** + * Gets the parent directory of a path. + */ + private fun getParentDirectory(path: String): String { + val separatorIndex = path.lastIndexOf(pathSeparator) + if (separatorIndex >= 0) { + return path.substring(0, separatorIndex) + } else { + return "" } } }