From b929ea67203a182ded17bb9fcfd641687924a670 Mon Sep 17 00:00:00 2001
From: Andrei Petre <p31andrei@gmail.com>
Date: Thu, 5 Jul 2018 15:41:37 -0700
Subject: [PATCH] Add img to replaced tags which get preserved in HTML from
 slicing.

---
 quotequail/__init__.py   |  6 +++---
 quotequail/_html.py      | 23 ++++++++++++++++++-----
 tests/test_quotequail.py | 10 ++++++++++
 3 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/quotequail/__init__.py b/quotequail/__init__.py
index df80bfd..efae521 100644
--- a/quotequail/__init__.py
+++ b/quotequail/__init__.py
@@ -136,9 +136,9 @@ def unwrap_html(html):
             'type': typ,
         }
 
-        top_range = _html.trim_slice(lines, top_range)
-        main_range = _html.trim_slice(lines, main_range)
-        bottom_range = _html.trim_slice(lines, bottom_range)
+        top_range = _html.trim_slice(lines, top_range, start_refs, end_refs)
+        main_range = _html.trim_slice(lines, main_range, start_refs, end_refs)
+        bottom_range = _html.trim_slice(lines, bottom_range, start_refs, end_refs)
 
         if top_range:
             top_tree = _html.slice_tree(tree, start_refs, end_refs, top_range,
diff --git a/quotequail/_html.py b/quotequail/_html.py
index f40a595..f4caf03 100644
--- a/quotequail/_html.py
+++ b/quotequail/_html.py
@@ -7,6 +7,9 @@
 
 INLINE_TAGS = ['a', 'b', 'em', 'i', 'strong', 'span', 'font', 'q',
                'object', 'bdo', 'sub', 'sup', 'center', 'td', 'th']
+# replaced by binary data, so should be preserved in HTML no matter the text
+# around it.
+REPLACED_TAGS = ['img']
 
 BEGIN = 'begin'
 END = 'end'
@@ -53,7 +56,13 @@ def trim_tree_before(element, include_element=True, keep_head=True):
                 parent_el.remove(remove_el)
         el = parent_el
 
-def trim_slice(lines, slice_tuple):
+def is_replaced(el):
+    return (
+        isinstance(el.tag, string_class) and
+        el.tag.lower() in REPLACED_TAGS
+    )
+
+def trim_slice(lines, slice_tuple, start_refs, end_refs):
     """
     Trim a slice tuple (begin, end) so it starts at the first non-empty line
     (obtained via indented_tree_line_generator / get_line_info) and ends at the
@@ -73,11 +82,15 @@ def _empty(line):
         slice_end = len(lines)
 
     # Trim from beginning
-    while slice_start < slice_end and _empty(lines[slice_start]):
+    while (slice_start < slice_end and
+           _empty(lines[slice_start]) and
+           not is_replaced(start_refs[slice_start][0])):
         slice_start += 1
 
     # Trim from end
-    while slice_end > slice_start and _empty(lines[slice_end-1]):
+    while (slice_end > slice_start and
+           _empty(lines[slice_end-1]) and
+           not is_replaced(end_refs[slice_end-1][0])):
         slice_end -= 1
 
     return (slice_start, slice_end)
@@ -151,9 +164,9 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
         new_tree = tree
 
     if start_ref:
-        include_start = (start_ref[1] == BEGIN)
+        include_start = (start_ref[1] == BEGIN or is_replaced(start_ref[0]))
     if end_ref:
-        include_end = (end_ref[1] == END)
+        include_end = (end_ref[1] == END or is_replaced(end_ref[0]))
 
     # If start_ref is the same as end_ref, and we don't include the element,
     # we are removing the entire tree. We need to handle this separately,
diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py
index a4d72e1..269d76d 100644
--- a/tests/test_quotequail.py
+++ b/tests/test_quotequail.py
@@ -733,6 +733,16 @@ def test_gmail_reply(self):
             'html_bottom': '<html><head></head><body><div class="gmail_extra">-- <br><div class="gmail_signature"><div dir="ltr"><div><div dir="ltr"><b>John Doe</b></div><div dir="ltr"><b>Senior Director</b><div>Some Company</div></div></div></div></div>\n</div>\n</body></html>',
         })
 
+    def test_reply_with_image(self):
+        html = "Test 2.<br><br>On Jun 05, 2018, at 09:56 AM, John Doe &lt;john@example.com&gt; wrote:<br><blockquote><img src=\"https://example.com\" class=\"fr-fic fr-dib\"><br>Some text 1.<br><br>Bart</blockquote>"
+        self.assertEqual(unwrap_html(html), {
+            'date': 'Jun 05, 2018, at 09:56 AM',
+            'from': 'John Doe <john@example.com>',
+            'html': u'<div><img src=\"https://example.com\" class=\"fr-fic fr-dib\"><br>Some text 1.<br><br>Bart</div>',
+            'html_top': u'Test 2.',
+            'type': 'reply'
+        })
+
     def test_outlook_forward(self):
         data = self.read_file('outlook_forward.html')
         result = unwrap_html(data)