Skip to content

Commit

Permalink
Store HTML part of email if present
Browse files Browse the repository at this point in the history
Use a python library to strip signatures.
  • Loading branch information
nikolai-b committed Dec 14, 2018
1 parent 693aa64 commit a43bddb
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 9 deletions.
19 changes: 11 additions & 8 deletions app/models/message_thread.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,22 +206,25 @@ def add_subscriber(user)
def add_messages_from_email!(mail, in_reply_to)
from_address = mail.message.header[:from].addresses.first
from_name = mail.message.header[:from].display_names.first
h = ActionController::Base.helpers

user = User.find_or_invite(from_address, from_name)
fail "Invalid user: #{from_address.inspect} #{from_name.inspect}" if user.nil?

# For multipart messages we pull out the text/plain content
text = if mail.message.multipart?
mail.message.text_part.decoded
text = if mail.message.html_part
# For multipart messages we pull out the html part content and use python to remove the signature
body = %x(./lib/sig_strip.py #{Shellwords.escape(mail.message.html_part.decoded)})
body.gsub(%r{(</?html>|</?body>|</?head>)},"")
else
mail.message.decoded
# When there is no HTML we get the text part or just the message and use EmailReplyParser to remove the signature
body = (mail.message.text_part || mail.message).decoded
parsed = EmailReplyParser.read(body)
stripped = parsed.fragments.select { |f| !f.hidden? }.join("\n")
h.auto_link h.simple_format(stripped)
end

parsed = EmailReplyParser.read(text)
stripped = parsed.fragments.select { |f| !f.hidden? }.join("\n")

open_by!(user) if closed
messages.create!(body: stripped, created_by: user, in_reply_to: in_reply_to).tap { |mes| mes.skip_mod_queue! }
messages.create!(body: text, created_by: user, in_reply_to: in_reply_to).tap { |mes| mes.skip_mod_queue! }

# Attachments
mail.message.attachments.each do |attachment|
Expand Down
10 changes: 10 additions & 0 deletions lib/sig_strip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env python3

import sys
import talon
from talon import quotations

talon.init()

reply = quotations.extract_from_html(sys.argv[1])
print(reply)
4 changes: 4 additions & 0 deletions spec/factories/inbound_mails_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,9 @@
trait :with_pgp_sig do
raw_message { File.read(raw_email_path('pgp_sig')) }
end

trait :with_html do
raw_message { File.read(raw_email_path('html')) }
end
end
end
11 changes: 10 additions & 1 deletion spec/models/message_thread_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,16 @@
it 'should remove double-dash signatures' do
allow(mail.message).to receive(:decoded).and_return("Normal text here\n\n--\nSignature")
thread.add_messages_from_email!(mail, nil)
expect(messages[-1].body).to eq("Normal text here\n")
expect(messages[-1].body).to eq("<p>Normal text here\n</p>")
end
end

context 'with HTML' do
let(:mail) { create(:inbound_mail, :with_html) }

it 'should remove HTML signatures' do
thread.add_messages_from_email!(mail, nil)
expect(messages[-1].body).to eq("<p>\n This email has an HTML message body.\n</p>\n<br>\n<p>\nNikolai\n</p>\n<br>\n\n")
end
end

Expand Down
52 changes: 52 additions & 0 deletions spec/support/text/html_email.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Return-path: <[email protected]>
Envelope-to: [email protected]
Delivery-date: Thu, 18 Oct 2012 18:05:49 +0100
Received: from host86-178-142-72.range86-178.btcentralplus.com ([86.178.142.72]:60845 helo=[192.168.1.67])
by hera.krystal.co.uk with esmtpsa (TLSv1:DHE-RSA-CAMELLIA256-SHA:256)
(Exim 4.80)
(envelope-from <[email protected]>)
id 1TOtXZ-002C4J-1a
for [email protected]; Thu, 18 Oct 2012 18:05:49 +0100
essage-ID: <[email protected]>
Date: Thu, 18 Oct 2012 18:05:51 +0100
From: Andy Allan <[email protected]>
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120912 Thunderbird/15.0.1
IME-Version: 1.0
To: [email protected]
Subject: Email with attached image
Content-Type: multipart/mixed;
boundary="------------090907060808010805080606"

This is a multi-part message in MIME format.
--------------090907060808010805080606
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit

This email has a plain message body.

Nikolai


--------------090907060808010805080606
Content-Type: text/html; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable

<p>
This email has an HTML message body.
</p>
<br>
<p>
Nikolai
</p>
<br>
<div class=3D"gmail_quote">
<div dir=3D"ltr">
On Mon, 10 Dec 2018 at 09:10, Martin wrote:
<br>
</div>
<blockquote class=3D"gmail_quote" style=3D"margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div dir=3D"ltr">Hi Nikolai,</div>
<br>
<div>Here is the a message I&#39;d like a reply too.</div>
</blockquote>
</div>

0 comments on commit a43bddb

Please sign in to comment.