From b929ea67203a182ded17bb9fcfd641687924a670 Mon Sep 17 00:00:00 2001 From: Andrei Petre <p31andrei@gmail.com> Date: Thu, 5 Jul 2018 15:41:37 -0700 Subject: [PATCH] Add img to replaced tags which get preserved in HTML from slicing. --- quotequail/__init__.py | 6 +++--- quotequail/_html.py | 23 ++++++++++++++++++----- tests/test_quotequail.py | 10 ++++++++++ 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/quotequail/__init__.py b/quotequail/__init__.py index df80bfd..efae521 100644 --- a/quotequail/__init__.py +++ b/quotequail/__init__.py @@ -136,9 +136,9 @@ def unwrap_html(html): 'type': typ, } - top_range = _html.trim_slice(lines, top_range) - main_range = _html.trim_slice(lines, main_range) - bottom_range = _html.trim_slice(lines, bottom_range) + top_range = _html.trim_slice(lines, top_range, start_refs, end_refs) + main_range = _html.trim_slice(lines, main_range, start_refs, end_refs) + bottom_range = _html.trim_slice(lines, bottom_range, start_refs, end_refs) if top_range: top_tree = _html.slice_tree(tree, start_refs, end_refs, top_range, diff --git a/quotequail/_html.py b/quotequail/_html.py index f40a595..f4caf03 100644 --- a/quotequail/_html.py +++ b/quotequail/_html.py @@ -7,6 +7,9 @@ INLINE_TAGS = ['a', 'b', 'em', 'i', 'strong', 'span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', 'center', 'td', 'th'] +# replaced by binary data, so should be preserved in HTML no matter the text +# around it. +REPLACED_TAGS = ['img'] BEGIN = 'begin' END = 'end' @@ -53,7 +56,13 @@ def trim_tree_before(element, include_element=True, keep_head=True): parent_el.remove(remove_el) el = parent_el -def trim_slice(lines, slice_tuple): +def is_replaced(el): + return ( + isinstance(el.tag, string_class) and + el.tag.lower() in REPLACED_TAGS + ) + +def trim_slice(lines, slice_tuple, start_refs, end_refs): """ Trim a slice tuple (begin, end) so it starts at the first non-empty line (obtained via indented_tree_line_generator / get_line_info) and ends at the @@ -73,11 +82,15 @@ def _empty(line): slice_end = len(lines) # Trim from beginning - while slice_start < slice_end and _empty(lines[slice_start]): + while (slice_start < slice_end and + _empty(lines[slice_start]) and + not is_replaced(start_refs[slice_start][0])): slice_start += 1 # Trim from end - while slice_end > slice_start and _empty(lines[slice_end-1]): + while (slice_end > slice_start and + _empty(lines[slice_end-1]) and + not is_replaced(end_refs[slice_end-1][0])): slice_end -= 1 return (slice_start, slice_end) @@ -151,9 +164,9 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None): new_tree = tree if start_ref: - include_start = (start_ref[1] == BEGIN) + include_start = (start_ref[1] == BEGIN or is_replaced(start_ref[0])) if end_ref: - include_end = (end_ref[1] == END) + include_end = (end_ref[1] == END or is_replaced(end_ref[0])) # If start_ref is the same as end_ref, and we don't include the element, # we are removing the entire tree. We need to handle this separately, diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py index a4d72e1..269d76d 100644 --- a/tests/test_quotequail.py +++ b/tests/test_quotequail.py @@ -733,6 +733,16 @@ def test_gmail_reply(self): 'html_bottom': '<html><head></head><body><div class="gmail_extra">-- <br><div class="gmail_signature"><div dir="ltr"><div><div dir="ltr"><b>John Doe</b></div><div dir="ltr"><b>Senior Director</b><div>Some Company</div></div></div></div></div>\n</div>\n</body></html>', }) + def test_reply_with_image(self): + html = "Test 2.<br><br>On Jun 05, 2018, at 09:56 AM, John Doe <john@example.com> wrote:<br><blockquote><img src=\"https://example.com\" class=\"fr-fic fr-dib\"><br>Some text 1.<br><br>Bart</blockquote>" + self.assertEqual(unwrap_html(html), { + 'date': 'Jun 05, 2018, at 09:56 AM', + 'from': 'John Doe <john@example.com>', + 'html': u'<div><img src=\"https://example.com\" class=\"fr-fic fr-dib\"><br>Some text 1.<br><br>Bart</div>', + 'html_top': u'Test 2.', + 'type': 'reply' + }) + def test_outlook_forward(self): data = self.read_file('outlook_forward.html') result = unwrap_html(data)