Skip to content

Commit

Permalink
epub-parser:epub-parser: Fix line break issue in some cases (#154)
Browse files Browse the repository at this point in the history
* Fix linebreaking issues when parsing epub based on fragment & next-fragment logic.
* Added support for .htm files when parsing based on spine.
--------------------------------------------------------------------
Signed-off-by: starry-shivam <[email protected]>
  • Loading branch information
starry-shivam authored Apr 19, 2024
1 parent 51a4475 commit 29661b9
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
2 changes: 1 addition & 1 deletion app/src/main/java/com/starry/myne/epub/EpubParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ class EpubParser {
files: Map<String, EpubFile>,
): List<EpubChapter> {
var chapterIndex = 0
val chapterExtensions = listOf("xhtml", "xml", "html").map { ".$it" }
val chapterExtensions = listOf("xhtml", "xml", "html", "htm").map { ".$it" }
return spine
.selectChildTag("itemref")
.mapNotNull { manifestItems[it.getAttribute("idref")] }
Expand Down
33 changes: 19 additions & 14 deletions app/src/main/java/com/starry/myne/epub/EpubXMLFileParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class EpubXMLFileParser(
fragmentElement?.selectFirst("h1, h2, h3, h4, h5, h6")?.remove()

while (currentNode != null && currentNode != nextFragmentIdElement) {
bodyBuilder.append(getNodeStructuredText(currentNode) + "\n\n")
bodyBuilder.append(getNodeStructuredText(currentNode, true) + "\n\n")
currentNode = getNextSibling(currentNode)
}
bodyContent = bodyBuilder.toString()
Expand Down Expand Up @@ -215,21 +215,26 @@ class EpubXMLFileParser(
}
}

private fun getNodeStructuredText(node: Node): String {
val children = node.childNodes()
if (children.isEmpty())
return ""
private fun getNodeStructuredText(node: Node, singleNode: Boolean = false): String {
val nodeActions = mapOf(
"p" to { n: Node -> getPTraverse(n) },
"br" to { "\n" },
"hr" to { "\n\n" },
"img" to ::declareImgEntry,
"image" to ::declareImgEntry
)

return children.joinToString("") { child ->
when {
child.nodeName() == "p" -> getPTraverse(child)
child.nodeName() == "br" -> "\n"
child.nodeName() == "hr" -> "\n\n"
child.nodeName() == "img" -> declareImgEntry(child)
child.nodeName() == "image" -> declareImgEntry(child)
child is TextNode -> child.text().trim()
else -> getNodeTextTraverse(child)
val action: (Node) -> String = { n: Node ->
if (n is TextNode) {
n.text().trim()
} else {
getNodeTextTraverse(n)
}
}

val children = if (singleNode) listOf(node) else node.childNodes()
return children.joinToString("") { child ->
nodeActions[child.nodeName()]?.invoke(child) ?: action(child)
}
}
}

0 comments on commit 29661b9

Please sign in to comment.