diff --git a/quotequail/__init__.py b/quotequail/__init__.py
index 21e0ab8..a316961 100644
--- a/quotequail/__init__.py
+++ b/quotequail/__init__.py
@@ -146,9 +146,9 @@ def unwrap_html(html):
"type": typ,
}
- top_range = _html.trim_slice(lines, top_range)
- main_range = _html.trim_slice(lines, main_range)
- bottom_range = _html.trim_slice(lines, bottom_range)
+ top_range = _html.trim_slice(lines, top_range, start_refs, end_refs)
+ main_range = _html.trim_slice(lines, main_range, start_refs, end_refs)
+ bottom_range = _html.trim_slice(lines, bottom_range, start_refs, end_refs)
if top_range:
top_tree = _html.slice_tree(
diff --git a/quotequail/_html.py b/quotequail/_html.py
index 69ffca5..5e95b5a 100644
--- a/quotequail/_html.py
+++ b/quotequail/_html.py
@@ -22,7 +22,9 @@
"td",
"th",
]
-
+# replaced by binary data, so should be preserved in HTML no matter the text
+# around it.
+REPLACED_TAGS = ["img"]
BEGIN = "begin"
END = "end"
@@ -71,7 +73,11 @@ def trim_tree_before(element, include_element=True, keep_head=True):
el = parent_el
-def trim_slice(lines, slice_tuple):
+def is_replaced(el):
+ return isinstance(el.tag, string_class) and el.tag.lower() in REPLACED_TAGS
+
+
+def trim_slice(lines, slice_tuple, start_refs, end_refs):
"""
Trim a slice tuple (begin, end) so it starts at the first non-empty line
(obtained via indented_tree_line_generator / get_line_info) and ends at the
@@ -92,11 +98,19 @@ def _empty(line):
slice_end = len(lines)
# Trim from beginning
- while slice_start < slice_end and _empty(lines[slice_start]):
+ while (
+ slice_start < slice_end
+ and _empty(lines[slice_start])
+ and not is_replaced(start_refs[slice_start][0])
+ ):
slice_start += 1
# Trim from end
- while slice_end > slice_start and _empty(lines[slice_end - 1]):
+ while (
+ slice_end > slice_start
+ and _empty(lines[slice_end - 1])
+ and not is_replaced(end_refs[slice_end - 1][0])
+ ):
slice_end -= 1
return (slice_start, slice_end)
@@ -173,9 +187,9 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
new_tree = tree
if start_ref:
- include_start = start_ref[1] == BEGIN
+ include_start = start_ref[1] == BEGIN or is_replaced(start_ref[0])
if end_ref:
- include_end = end_ref[1] == END
+ include_end = end_ref[1] == END or is_replaced(end_ref[0])
# If start_ref is the same as end_ref, and we don't include the element,
# we are removing the entire tree. We need to handle this separately,
diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py
index d2de31a..8dcd513 100644
--- a/tests/test_quotequail.py
+++ b/tests/test_quotequail.py
@@ -949,6 +949,19 @@ def test_gmail_reply(self):
},
)
+ def test_reply_with_image(self):
+ html = 'Test 2.
On Jun 05, 2018, at 09:56 AM, John Doe <john@example.com> wrote:
' + self.assertEqual( + unwrap_html(html), + { + "date": "Jun 05, 2018, at 09:56 AM", + "from": "John Doe
Some text 1.
Bart