From 3fc6b07430fe02db66c2b75651caf11cbb8a8c92 Mon Sep 17 00:00:00 2001 From: Nitish Kansal Date: Mon, 15 Feb 2021 22:56:04 +0530 Subject: [PATCH 1/9] Enhancements (#84) * Use Default python email module function to retrieve filename instead of getting it from content id or content disposition as some of the emails are breaking because they send body with content-id * Regex update, Failing String: from ::1 port=44088 helo=mail.domain.com by mail.domain.com with esmtp envelope-from id 1jt7Nz-0000Da-by for xyz@domain.com; Wed, 08 Jul 2020 10:33:11 +0000 * Regex update, Failing String: from 0.0.0.0 port=35756 helo=domain.co.id by mail.domain.com with esmtps TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256 envelope-from id 1jr0oT-0006e6-Mx for local@domain.com; Thu, 02 Jul 2020 15:07:51 +0000 * Regex update, Failing String: from kk-worker4-prod unknown 172.16.0.56 by smtpd.kaskus.co.id Postfix with ESMTP id 8C02C2E063E for ; Wed, 8 Jul 2020 18:40:03 +0700 WIB * Regex update, Failing String: from 0.0.0.0 1.1.1.1 :56905 by domain.com with XMail 1.2 password ESMTP Server id for from ; Mon, 6 Jul 2020 01:09:35 +0900 * commented error logging in parsing received header as exception is caught and handled already and we dont even need to parse this header, just stopping some noise in our error logger * handle decoding of payload separately which are not truly decoded by get_payload * handle special case of 8bit and 7bit instead of just trying to decode all encoding which are not decoded properly by python * content-id cant define if its really an attachment * make a distinction between attachments and body and reduce number of failing emails and cover most of the cases * Typo Fixed: Needed to get the Content-Transfer-Encoding but we were picking Content-Type * If CTE is not available then also we should decode the payload which python encoded to keep encodings intact --- mailparser/const.py | 6 +++--- mailparser/mailparser.py | 37 ++++++++++++++++++++++++++++++------- mailparser/utils.py | 10 +++++++++- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/mailparser/const.py b/mailparser/const.py index 2ac985a..1dc1237 100644 --- a/mailparser/const.py +++ b/mailparser/const.py @@ -33,7 +33,7 @@ # need the beginning or space to differentiate from envelope-from ( - r'(?:(?:^|\s)from\s+(?P.+?)(?:\s*[(]?' + r'(?:(?:^)from\s+(?P.+?)(?:\s*[(]?' r'envelope-from|\s*[(]?envelope-sender|\s+' r'by|\s+with(?! cipher)|\s+id|\s+for|\s+via|;))' ), @@ -42,7 +42,7 @@ # envelope-from and -sender seem to optionally have space and/or # ( before them other clauses must have whitespace before ( - r'(?:by\s+(?P.+?)(?:\s*[(]?envelope-from|\s*' + r'(?:\sby\s(?P.+?)(?:\s*[(]?envelope-from|\s*' r'[(]?envelope-sender|\s+from|\s+with' r'(?! cipher)|\s+id|\s+for|\s+via|;))' ), @@ -51,7 +51,7 @@ r'envelope-sender|\s+from|\s+by|\s+id|\s+for|\s+via|;))' ), ( - r'[^\w](?:id\s+(?P.+?)(?:\s*[(]?envelope-from|\s*' + r'(?:\s+id\s+(?P.+?)(?:\s*[(]?envelope-from|\s*' r'[(]?envelope-sender|\s+from|\s+by|\s+with' r'(?! cipher)|\s+for|\s+via|;))' ), diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index 0509d0d..f9aed5a 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -353,14 +353,25 @@ def parse(self): charset = p.get_content_charset('utf-8') charset_raw = p.get_content_charset() log.debug("Charset {!r} part {!r}".format(charset, i)) + content_disposition = ported_string(p.get('content-disposition')) content_id = ported_string(p.get('content-id')) - log.debug("content-id {!r} part {!r}".format( - content_id, i)) - filename = decode_header_part( - p.get_filename("{}".format(content_id))) + log.debug("content-disposition {!r} part {!r}".format( + content_disposition, i)) + filename = p.get_filename() + + # Check if there is a filename present then its an attachment + # Check if there is no filename but content id is present then + # check again if content sub type is not html or plain to make + # sure it can be treated as attachment + is_attachment = False + if filename: + is_attachment = True + else: + if content_id and p.get_content_subtype() not in ['html', 'plain']: + is_attachment = True # this is an attachment - if filename: + if is_attachment: log.debug("Email part {!r} is an attachment".format(i)) log.debug("Filename {!r} part {!r}".format(filename, i)) binary = False @@ -412,8 +423,20 @@ def parse(self): # this isn't an attachments else: log.debug("Email part {!r} is not an attachment".format(i)) - payload = ported_string( - p.get_payload(decode=True), encoding=charset) + + # Get the payload using get_payload method with decode=True + # As Python truly decodes only 'base64', 'quoted-printable', 'x-uuencode', 'uuencode', 'uue', 'x-uue' + # And for other encodings it breaks the characters so we need to decode them with encoding python is appying + # To maintain the characters + payload = p.get_payload(decode=True) + cte = p.get('Content-Transfer-Encoding') + if cte: + cte = cte.lower() + if not cte or cte in ['7bit', '8bit']: + payload = payload.decode('raw-unicode-escape') + else: + payload = ported_string(payload, encoding=charset) + if payload: if p.get_content_subtype() == 'html': self._text_html.append(payload) diff --git a/mailparser/utils.py b/mailparser/utils.py index 428e520..16cb839 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -290,7 +290,15 @@ def parse_received(received): if len(values_by_clause) == 0: # we weren't able to match anything... msg = "Unable to match any clauses in %s" % (received) - log.error(msg) + + # Modification #1: Commenting the following log as this raised exception is caught above and then raw header is updated in response + # We dont want to get so many errors in our error logger as we are not even trying to parse the received headers + # Wanted to make it configurable via settiings, but this package does not depend on django and making configurable setting + # will make it django dependent, so better to keep it working with only python dependent and on any framework of python + # commenting it just for our use + + # log.error(msg) + raise MailParserReceivedParsingError(msg) return values_by_clause From ece9b7fb1db8474d0090e66e4be4be3d4a7d97e4 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 19 Feb 2021 22:50:20 +0100 Subject: [PATCH 2/9] Fixed issue #83. Fixed PR #84 (received regex and filename attachs) --- mailparser/const.py | 14 ++++++++------ mailparser/mailparser.py | 14 ++++++++++---- mailparser/utils.py | 31 +++++++++++++++++++------------ tests/test_mail_parser.py | 23 ++++++++++------------- 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/mailparser/const.py b/mailparser/const.py index 1dc1237..89efa3c 100644 --- a/mailparser/const.py +++ b/mailparser/const.py @@ -33,7 +33,7 @@ # need the beginning or space to differentiate from envelope-from ( - r'(?:(?:^)from\s+(?P.+?)(?:\s*[(]?' + r'(?:(?:^|\s)from\s+(?P.+?)(?:\s*[(]?' r'envelope-from|\s*[(]?envelope-sender|\s+' r'by|\s+with(?! cipher)|\s+id|\s+for|\s+via|;))' ), @@ -42,7 +42,7 @@ # envelope-from and -sender seem to optionally have space and/or # ( before them other clauses must have whitespace before ( - r'(?:\sby\s(?P.+?)(?:\s*[(]?envelope-from|\s*' + r'(?:by\s+(?P.+?)(?:\s*[(]?envelope-from|\s*' r'[(]?envelope-sender|\s+from|\s+with' r'(?! cipher)|\s+id|\s+for|\s+via|;))' ), @@ -51,7 +51,7 @@ r'envelope-sender|\s+from|\s+by|\s+id|\s+for|\s+via|;))' ), ( - r'(?:\s+id\s+(?P.+?)(?:\s*[(]?envelope-from|\s*' + r'[^\w](?:id\s+(?P.+?)(?:\s*[(]?envelope-from|\s*' r'[(]?envelope-sender|\s+from|\s+by|\s+with' r'(?! cipher)|\s+for|\s+via|;))' ), @@ -65,16 +65,18 @@ r'envelope-from|\s*[(]?envelope-sender|\s+' r'from|\s+by|\s+id|\s+for|\s+with(?! cipher)|;))' ), - # assumes emails are always inside <> r'(?:envelope-from\s+<(?P.+?)>)', r'(?:envelope-sender\s+<(?P.+?)>)', # datetime comes after ; at the end r';\s*(?P.*)', - + # sendgrid datetime - r'(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{9} \+0000 UTC) m=\+\d+\.\d+' + ( + r'(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:' + r'\d{2}\.\d{9} \+0000 UTC) m=\+\d+\.\d+' + ) ] RECEIVED_COMPILED_LIST = [ diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index f9aed5a..f23cc89 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -353,7 +353,8 @@ def parse(self): charset = p.get_content_charset('utf-8') charset_raw = p.get_content_charset() log.debug("Charset {!r} part {!r}".format(charset, i)) - content_disposition = ported_string(p.get('content-disposition')) + content_disposition = ported_string( + p.get('content-disposition')) content_id = ported_string(p.get('content-id')) log.debug("content-disposition {!r} part {!r}".format( content_disposition, i)) @@ -367,8 +368,10 @@ def parse(self): if filename: is_attachment = True else: - if content_id and p.get_content_subtype() not in ['html', 'plain']: + if content_id and \ + p.get_content_subtype() not in ['html', 'plain']: is_attachment = True + filename = content_id # this is an attachment if is_attachment: @@ -425,8 +428,11 @@ def parse(self): log.debug("Email part {!r} is not an attachment".format(i)) # Get the payload using get_payload method with decode=True - # As Python truly decodes only 'base64', 'quoted-printable', 'x-uuencode', 'uuencode', 'uue', 'x-uue' - # And for other encodings it breaks the characters so we need to decode them with encoding python is appying + # As Python truly decodes only 'base64', + # 'quoted-printable', 'x-uuencode', + # 'uuencode', 'uue', 'x-uue' + # And for other encodings it breaks the characters so + # we need to decode them with encoding python is appying # To maintain the characters payload = p.get_payload(decode=True) cte = p.get('Content-Transfer-Encoding') diff --git a/mailparser/utils.py b/mailparser/utils.py index 16cb839..3fd4a3c 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -101,19 +101,19 @@ def ported_string(raw_data, encoding='utf-8', errors='ignore'): return six.text_type() if isinstance(raw_data, six.text_type): - return raw_data.strip() + return raw_data if six.PY2: try: - return six.text_type(raw_data, encoding, errors).strip() + return six.text_type(raw_data, encoding, errors) except LookupError: - return six.text_type(raw_data, "utf-8", errors).strip() + return six.text_type(raw_data, "utf-8", errors) if six.PY3: try: - return six.text_type(raw_data, encoding).strip() + return six.text_type(raw_data, encoding) except (LookupError, UnicodeDecodeError): - return six.text_type(raw_data, "utf-8", errors).strip() + return six.text_type(raw_data, "utf-8", errors) def decode_header_part(header): @@ -141,7 +141,7 @@ def decode_header_part(header): log.error("Failed decoding header part: {}".format(header)) output += header - return output + return output.strip() def ported_open(file_): @@ -291,10 +291,18 @@ def parse_received(received): # we weren't able to match anything... msg = "Unable to match any clauses in %s" % (received) - # Modification #1: Commenting the following log as this raised exception is caught above and then raw header is updated in response - # We dont want to get so many errors in our error logger as we are not even trying to parse the received headers - # Wanted to make it configurable via settiings, but this package does not depend on django and making configurable setting - # will make it django dependent, so better to keep it working with only python dependent and on any framework of python + # Modification #1: Commenting the following log as + # this raised exception is caught above and then + # raw header is updated in response + # We dont want to get so many errors in our error + # logger as we are not even trying to parse the + # received headers + # Wanted to make it configurable via settiings, + # but this package does not depend on django and + # making configurable setting + # will make it django dependent, + # so better to keep it working with only python + # dependent and on any framework of python # commenting it just for our use # log.error(msg) @@ -476,7 +484,7 @@ def get_header(message, name): headers = [decode_header_part(i) for i in headers] if len(headers) == 1: # in this case return a string - return headers[0] + return headers[0].strip() # in this case return a list return headers return six.text_type() @@ -559,7 +567,6 @@ def write_sample(binary, payload, path, filename): # pragma: no cover """ if not os.path.exists(path): os.makedirs(path) - sample = os.path.join(path, filename) if binary: diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index f1740ea..543d1a7 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -200,14 +200,14 @@ def test_fingerprints_body(self): mail = mailparser.parse_from_file(mail_test_1) md5, sha1, sha256, sha512 = fingerprints( mail.body.encode("utf-8")) - self.assertEqual(md5, "1bbdb7dcf511113bbc0c1b214aeac392") - self.assertEqual(sha1, "ce9e62b50fa4e2168278880b14460b905b24eb4b") - self.assertEqual(sha256, ("1e9b96e3f1bc74702f9703391e8ba0715b849" - "7127a7ff857013ab33385898574")) - self.assertEqual(sha512, ("ad858f7b5ec5549e55650fd13df7683e403489" - "77522995851fb6b625ac54744cf3a4bf652784" - "dba971ef99afeec4e6caf2fdd10be72eabb730" - "c312ffbe1c4de3")) + self.assertEqual(md5, "55852a2efe95e7249887c92cc02123f8") + self.assertEqual(sha1, "62fef1e38327ed09363624c3aff8ea11723ee05f") + self.assertEqual(sha256, ("cd4af1017f2e623f6d38f691048b6" + "a28d8b1f44a0478137b4337eac6de78f71a")) + self.assertEqual(sha512, ("4a573c7929b078f2a2c1c0f869d418b0c020d4" + "d37196bd6dcc209f9ccb29ca67355aa5e47b97" + "c8bf90377204f59efde7ba1fc071b6f250a665" + "72f63b997e92e8")) def test_fingerprints_unicodeencodeerror(self): mail = mailparser.parse_from_file(mail_test_7) @@ -564,11 +564,7 @@ def test_ported_string(self): s = ported_string(raw_data) self.assertEqual(s, six.text_type()) - raw_data = "test " - s = ported_string(raw_data) - self.assertEqual(s, "test") - - raw_data = u"test " + raw_data = u"test" s = ported_string(raw_data) self.assertEqual(s, "test") @@ -671,5 +667,6 @@ def test_write_uuencode_attachment(self): shutil.rmtree(temp_dir) self.assertEqual(md5.hexdigest(), '4f2cf891e7cfb349fca812091f184ecc') + if __name__ == '__main__': unittest.main(verbosity=2) From 9e4e8fe548f29982bc7838e931c5cff39d4180a8 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sun, 21 Feb 2021 23:09:14 +0100 Subject: [PATCH 3/9] Fixed Python 2.7. Test for Python 3.7/8/9 --- .travis.yml | 6 +++--- mailparser/mailparser.py | 2 +- tox.ini | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index ce3328c..4db0527 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,9 +5,9 @@ language: python python: - "2.7" - - "3.4" - - "3.5" - - "3.6" + - "3.7" + - "3.8" + - "3.9" before_install: - sudo apt-get -qq update diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index f23cc89..aba6eec 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -358,7 +358,7 @@ def parse(self): content_id = ported_string(p.get('content-id')) log.debug("content-disposition {!r} part {!r}".format( content_disposition, i)) - filename = p.get_filename() + filename = decode_header_part(p.get_filename()) # Check if there is a filename present then its an attachment # Check if there is no filename but content id is present then diff --git a/tox.ini b/tox.ini index eba8ffb..9d99fdd 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = begin, py27, py37, end +envlist = begin, py27, py39, end [testenv:begin] commands = coverage erase From 300be8b66d6404759f443899a7cfc4289d8cb3c1 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 22 Feb 2021 00:31:32 +0100 Subject: [PATCH 4/9] Refactoring README --- README.md | 45 ++++++++++++++++++++-------------------- docs/bitcoin-qrcode.png | Bin 0 -> 477 bytes 2 files changed, 22 insertions(+), 23 deletions(-) create mode 100644 docs/bitcoin-qrcode.png diff --git a/README.md b/README.md index 333c1ec..a9dd1cf 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,6 @@ # mail-parser -## Overview - mail-parser is not only a wrapper for [email](https://docs.python.org/2/library/email.message.html) Python Standard Library. It give you an easy way to pass from raw mail to Python object that you can use in your code. It's the key module of [SpamScope](https://github.com/SpamScope/spamscope). @@ -28,15 +26,25 @@ $ apt-cache show libemail-outlook-message-perl mail-parser supports Python 3. -## mail-parser on Web + +## Apache 2 Open Source License +mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license. + +If you want support the project: + + +[![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2) + + +# mail-parser on Web - [Splunk app](https://splunkbase.splunk.com/app/4129/) - [FreeBSD port](https://www.freshports.org/mail/py-mail-parser/) - [Arch User Repository](https://aur.archlinux.org/packages/mailparser/) -## Description +# Description -mail-parser takes as input a raw email and generates a parsed object. The properties of this object are the same name of +mail-parser takes as input a raw email and generates a parsed object. The properties of this object are the same name of [RFC headers](https://www.iana.org/assignments/message-headers/message-headers.xhtml): - bcc @@ -107,27 +115,18 @@ $ mail.to_raw (raw header) The command line tool use the JSON format. -### Defects +## Defects These defects can be used to evade the antispam filter. An example are the mails with a malformed boundary that can hide a not legitimate epilogue (often malware). This library can take these epilogues. -### Apache 2 Open Source License -mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license. - -If you want support the project: - - -[![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2) - - -## Authors +# Authors -### Main Author +## Main Author **Fedele Mantuano**: [LinkedIn](https://www.linkedin.com/in/fmantuano/) -## Installation +# Installation Clone repository @@ -149,7 +148,7 @@ or use `pip`: $ pip install mail-parser ``` -## Usage in a project +# Usage in a project Import `mailparser` module: @@ -196,7 +195,7 @@ It's possible to write the attachments on disk with the method: mail.write_attachments(base_path) ``` -## Usage from command-line +# Usage from command-line If you installed mailparser with `pip` or `setup.py` you can use it with command-line. @@ -216,7 +215,7 @@ optional arguments: -s STRING, --string STRING Raw email string (default: None) -k, --stdin Enable parsing from stdin (default: False) - -l {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET}, --log-level {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET} + -l {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET}, --log-level {CRITICAL,ERROR,WARNING,INFO,DEBUG,NOTSET} Set log level (default: WARNING) -j, --json Show the JSON of parsed mail (default: False) -b, --body Print the body of mail (default: False) @@ -253,11 +252,11 @@ $ mailparser -f example_mail -j This example will show you the tokenized mail in a JSON pretty format. -From [raw mail](https://gist.github.com/fedelemantuano/5dd702004c25a46b2bd60de21e67458e) to +From [raw mail](https://gist.github.com/fedelemantuano/5dd702004c25a46b2bd60de21e67458e) to [parsed mail](https://gist.github.com/fedelemantuano/e958aa2813c898db9d2d09469db8e6f6). -## Exceptions +# Exceptions Exceptions hierarchy of mail-parser: diff --git a/docs/bitcoin-qrcode.png b/docs/bitcoin-qrcode.png new file mode 100644 index 0000000000000000000000000000000000000000..e50dd044424267674783f68db5fa57e20ce7c9d4 GIT binary patch literal 477 zcmV<30V4j1P)W&uDT4<36ipK|P>$^+Wj^p&E5KB|KUkM9R{iH1~p zCIos))YuhXfK`Hv6v?wJ2OQF=Y7mtP*jJf-9H}D@D38sIr=t7JgJ1cd`xAHrblO>p Thxi|H00000NkvXXu0mjfB-zB~ literal 0 HcmV?d00001 From 1d2f77444b477ac819b8493d52399afde0d6d4ca Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 22 Feb 2021 00:35:40 +0100 Subject: [PATCH 5/9] Added bitcoin donate --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a9dd1cf..a748bb6 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,10 @@ If you want support the project: [![Donate](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif "Donate")](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2) +![Bitcoin Donate](https://i.stack.imgur.com/MnQ6V.png) + +![](https://github.com/SpamScope/mail-parser/raw/develop/docs/bitcoin-qrcode.png) + # mail-parser on Web - [Splunk app](https://splunkbase.splunk.com/app/4129/) From eebb8f44297d85f168161876fb7bee49f4ec49e0 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 22 Feb 2021 00:37:11 +0100 Subject: [PATCH 6/9] Minor change --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a748bb6..dbe4b96 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ $ apt-cache show libemail-outlook-message-perl mail-parser supports Python 3. -## Apache 2 Open Source License +# Apache 2 Open Source License mail-parser can be downloaded, used, and modified free of charge. It is available under the Apache 2 license. If you want support the project: From c598b2b5305c48f33218ad7b81d95665d273d9e9 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Mon, 22 Feb 2021 23:49:30 +0100 Subject: [PATCH 7/9] Issue Email content 'rtf' not handled #68 --- mailparser/mailparser.py | 18 +++++++++++------- tests/test_mail_parser.py | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index aba6eec..4e14bc8 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -42,6 +42,7 @@ msgconvert, ported_open, ported_string, + random_string, receiveds_parsing, write_attachments, ) @@ -355,23 +356,26 @@ def parse(self): log.debug("Charset {!r} part {!r}".format(charset, i)) content_disposition = ported_string( p.get('content-disposition')) - content_id = ported_string(p.get('content-id')) log.debug("content-disposition {!r} part {!r}".format( content_disposition, i)) + content_id = ported_string(p.get('content-id')) + log.debug("content-id {!r} part {!r}".format( + content_id, i)) + content_subtype = ported_string(p.get_content_subtype()) + log.debug("content subtype {!r} part {!r}".format( + content_subtype, i)) filename = decode_header_part(p.get_filename()) - # Check if there is a filename present then its an attachment - # Check if there is no filename but content id is present then - # check again if content sub type is not html or plain to make - # sure it can be treated as attachment is_attachment = False if filename: is_attachment = True else: - if content_id and \ - p.get_content_subtype() not in ['html', 'plain']: + if content_id and content_subtype not in ('html', 'plain'): is_attachment = True filename = content_id + elif content_subtype in ('rtf'): + is_attachment = True + filename = "{}.rtf".format(random_string()) # this is an attachment if is_attachment: diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 543d1a7..400dff3 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -456,7 +456,7 @@ def test_parse_from_file_msg(self): m = mailparser.parse_from_file_msg(mail_outlook_1) email = m.mail self.assertIn("attachments", email) - self.assertEqual(len(email["attachments"]), 5) + self.assertEqual(len(email["attachments"]), 6) self.assertIn("from", email) self.assertEqual(email["from"][0][1], "NueblingV@w-vwa.de") self.assertIn("subject", email) From 0d4adad62dbce3aa669f13aeef6c0838a1cd39c0 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 26 Feb 2021 18:26:48 +0100 Subject: [PATCH 8/9] Added new managed version of Python. --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 959bd43..909da61 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,8 @@ "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], install_requires=requires, entry_points={'console_scripts': [ From 0abd896c0b5b3d33df8a31158bb4eacd9ce29e76 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 26 Feb 2021 18:29:43 +0100 Subject: [PATCH 9/9] New version --- mailparser/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mailparser/version.py b/mailparser/version.py index dc0e3bc..41eeab1 100644 --- a/mailparser/version.py +++ b/mailparser/version.py @@ -17,7 +17,7 @@ limitations under the License. """ -__version__ = "3.14.0" +__version__ = "3.15.0" if __name__ == "__main__": print(__version__)