From 1044333918a469d1892fd61021bf2798ed3c1210 Mon Sep 17 00:00:00 2001 From: pataquets Date: Wed, 15 Nov 2017 17:47:52 +0100 Subject: [PATCH 01/22] Add Docker workflow support for building and testing. --- Dockerfile | 5 +++++ docker-compose.yml | 9 +++++++++ 2 files changed, 14 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8bad59b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +FROM python:onbuild + +RUN python setup.py install + +ENTRYPOINT [ "mailparser" ] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ae392d3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +# Docker Compose build manifest. +# Usage: +# $ docker-compose up --build + +mailparser: + build: . + command: --json -f /mails/mail_test_1 + volumes: + - ./tests/mails/:/mails/:ro From dbbcaace6b02c6c982694ff56aed3ec541ef669c Mon Sep 17 00:00:00 2001 From: pataquets Date: Wed, 15 Nov 2017 18:17:24 +0100 Subject: [PATCH 02/22] Add Docker workflow to build docs. --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 2369f42..a00dbfd 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,23 @@ or use `pip`: $ pip install mail-parser ``` +### Building with Docker +Complete working Docker workflow is possible allowing you to start hacking and building without any other requirements or dependencies. All the required libs and build tools are handled by Docker build process. +Using the provided Dockerfile you can build a complete working image with all the required dependencies. If you're not familiar with Docker, better use Docker Compose to both build and run your source easy and effortlessly. + +From the ```docker-compose.yml``` directory, run: +``` +$ docker-compose up --build +``` +Skip the ```--build``` switch to launch the last built container image without rebuilding again. + +The provided ```docker-compose.yml``` file is configured to: + +* Mount your host's ```tests/mails/``` dir from your source tree inside the container at ```/data/``` (read-only). +* A command line test example. + +See the ```docker-compose.yml``` to view and tweak the launch parameters. + ## Usage in a project Import `mailparser` module: From 090ac85c86c5e6e784a3de84365b43234c692277 Mon Sep 17 00:00:00 2001 From: pataquets Date: Wed, 15 Nov 2017 17:50:57 +0100 Subject: [PATCH 03/22] Parse To: header into a list of recipients instead of a string. --- mailparser/mailparser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index 9268561..4de5989 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -285,6 +285,7 @@ def _append_defects(self, part, part_content_type): def _reset(self): """Reset the state of object. """ + self._to = list() self._attachments = list() self._text_plain = list() self._defects = list() @@ -305,7 +306,7 @@ def _make_mail(self): "headers": self.headers, "message_id": self.message_id, "subject": self.subject, - "to": self.to_, + "to": email.utils.getaddresses([self.to_]), "receiveds": self.receiveds_obj, "has_defects": self.has_defects, "has_anomalies": self.has_anomalies} From 856850b86bc0b380ea5d2962f5289e833a3e8b1a Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 16 Nov 2017 19:05:44 +0100 Subject: [PATCH 04/22] Fixed to_ property to parse header `to` as list Added logging in WARNING for cli tool --- mailparser/__main__.py | 12 +++++++++++- mailparser/mailparser.py | 7 ++++--- tests/test_mail_parser.py | 12 +++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/mailparser/__main__.py b/mailparser/__main__.py index 1af686c..785f8fb 100644 --- a/mailparser/__main__.py +++ b/mailparser/__main__.py @@ -18,6 +18,7 @@ """ import argparse +import logging import os import runpy import sys @@ -32,6 +33,15 @@ __version__ = runpy.run_path( os.path.join(current, "version.py"))["__version__"] +# Logging +log = logging.getLogger() +log.setLevel(logging.WARNING) +ch = logging.StreamHandler(sys.stdout) +formatter = logging.Formatter( + "%(asctime)s | %(name)s | %(levelname)s | %(message)s") +ch.setFormatter(formatter) +log.addHandler(ch) + def get_args(): parser = argparse.ArgumentParser( @@ -221,7 +231,7 @@ def main(): safe_print(parser.headers) if args.to: - safe_print(parser.to_) + safe_print(json.dumps(parser.to_)) if args.from_: safe_print(parser.from_) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index 4de5989..a8c12c6 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -306,7 +306,7 @@ def _make_mail(self): "headers": self.headers, "message_id": self.message_id, "subject": self.subject, - "to": email.utils.getaddresses([self.to_]), + "to": self.to_, "receiveds": self.receiveds_obj, "has_defects": self.has_defects, "has_anomalies": self.has_anomalies} @@ -504,8 +504,9 @@ def message_id(self): @property def to_(self): """Return the receiver of message. """ - return decode_header_part( - self.message.get('to', self.message.get('delivered-to', ''))) + to_ = decode_header_part(self.message.get( + 'to', self.message.get('delivered-to', ''))) + return email.utils.parseaddr([to_]), @property def from_(self): diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 7586dee..b33dab8 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -121,7 +121,7 @@ def test_parsing_know_values(self): raw = "mporcile@server_mail.it" result = mail.to_ - self.assertEqual(raw, result) + self.assertEqual(raw, result[0][1]) raw = "" result = mail.from_ @@ -174,7 +174,10 @@ def test_types(self): self.assertIsInstance(result, six.text_type) result = mail.to_ - self.assertIsInstance(result, six.text_type) + self.assertIsInstance(result, tuple) + self.assertEquals(len(result), 1) + self.assertIsInstance(result[0], tuple) + self.assertEquals(len(result[0]), 2) result = mail.subject self.assertIsInstance(result, six.text_type) @@ -364,7 +367,10 @@ def test_from_file_obj(self): self.assertIsInstance(result, six.text_type) result = mail.to_ - self.assertIsInstance(result, six.text_type) + self.assertIsInstance(result, tuple) + self.assertEquals(len(result), 1) + self.assertIsInstance(result[0], tuple) + self.assertEquals(len(result[0]), 2) result = mail.subject self.assertIsInstance(result, six.text_type) From 94e34c3163b3ed0f5e4687ad611ef492d3103442 Mon Sep 17 00:00:00 2001 From: Alfonso Montero Date: Sat, 18 Nov 2017 09:22:48 +0100 Subject: [PATCH 05/22] Improve 'To' and 'Reply-To' headers handling (#16) * Add headers parsing into a list in addition to a string. Also, use it for JSON output. * Remove unneded property resetting. * Allow 'To' field to have multiple addresses. * Add 'delivered-to' field as a stand-alone property. * Add multi-recipient 'To:' header to test email. --- README | 8 +++++--- README.md | 5 ++++- mailparser/__main__.py | 10 ++++++++++ mailparser/mailparser.py | 30 ++++++++++++++++++++++++------ tests/mails/mail_test_2 | 1 + 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/README b/README index d502075..cde57c5 100644 --- a/README +++ b/README @@ -23,8 +23,8 @@ Description mail-parser takes as input a raw mail and generates a parsed object. This object is a tokenized email with some indicator: -- body - headers - subject - from - to - attachments - message id - date -- charset mail - sender IP address - receiveds +- body - headers - subject - from - to - delivered_to - attachments - message id +- date - charset mail - sender IP address - receiveds We have also two types of indicator: - anomalies: mail without message id or date - `defects`_: mail with some not compliance RFC part @@ -96,6 +96,7 @@ Then you can get all parts mail.headers mail.message_id mail.to_ + mail.delivered_to_ mail.from_ mail.subject mail.text_plain_list: only text plain mail parts in a list @@ -125,7 +126,7 @@ These are all swithes: :: - usage: mailparser [-h] (-f FILE | -s STRING | -k) [-j] [-b] [-a] [-r] [-t] [-m] + usage: mailparser [-h] (-f FILE | -s STRING | -k) [-j] [-b] [-a] [-r] [-t] [-dt] [-m] [-u] [-c] [-d] [-n] [-i Trust mail server string] [-p] [-z] [-v] @@ -143,6 +144,7 @@ These are all swithes: -a, --attachments Print the attachments of mail (default: False) -r, --headers Print the headers of mail (default: False) -t, --to Print the to of mail (default: False) + -dt, --delivered-to Print the delivered-to of mail (default: False) -m, --from Print the from of mail (default: False) -u, --subject Print the subject of mail (default: False) -c, --receiveds Print all receiveds of mail (default: False) diff --git a/README.md b/README.md index a00dbfd..285152f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ mail-parser takes as input a raw email and generates a parsed object. This objec - subject - from - to + - delivered_to - attachments - message id - date @@ -118,6 +119,7 @@ mail.body mail.headers mail.message_id mail.to_ +mail.delivered_to_ mail.from_ mail.subject mail.text_plain_list: only text plain mail parts in a list @@ -141,7 +143,7 @@ If you installed mailparser with `pip` or `setup.py` you can use it with command These are all swithes: ``` -usage: mailparser.py [-h] (-f FILE | -s STRING | -k) [-j] [-b] [-a] [-r] [-t] [-m] +usage: mailparser.py [-h] (-f FILE | -s STRING | -k) [-j] [-b] [-a] [-r] [-t] [-dt] [-m] [-u] [-c] [-d] [-n] [-i Trust mail server string] [-p] [-z] [-v] @@ -158,6 +160,7 @@ optional arguments: -a, --attachments Print the attachments of mail (default: False) -r, --headers Print the headers of mail (default: False) -t, --to Print the to of mail (default: False) + -dt, --delivered-to Print the delivered-to of mail (default: False) -m, --from Print the from of mail (default: False) -u, --subject Print the subject of mail (default: False) -c, --receiveds Print all receiveds of mail (default: False) diff --git a/mailparser/__main__.py b/mailparser/__main__.py index 785f8fb..fc2773c 100644 --- a/mailparser/__main__.py +++ b/mailparser/__main__.py @@ -102,6 +102,13 @@ def get_args(): action="store_true", help="Print the to of mail") + parser.add_argument( + "-dt", + "--delivered-to", + dest="delivered_to", + action="store_true", + help="Print the delivered-to of mail") + parser.add_argument( "-m", "--from", @@ -233,6 +240,9 @@ def main(): if args.to: safe_print(json.dumps(parser.to_)) + if args.delivered_to: + safe_print(json.dumps(parser.delivered_to_)) + if args.from_: safe_print(parser.from_) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index a8c12c6..de7021c 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -20,6 +20,7 @@ from __future__ import unicode_literals import datetime import email +from email.header import decode_header import logging import os import re @@ -285,7 +286,6 @@ def _append_defects(self, part, part_content_type): def _reset(self): """Reset the state of object. """ - self._to = list() self._attachments = list() self._text_plain = list() self._defects = list() @@ -303,10 +303,11 @@ def _make_mail(self): "body": self.body, "date": self.date_mail, "from": self.from_, - "headers": self.headers, + "headers": self.headers_obj, "message_id": self.message_id, "subject": self.subject, "to": self.to_, + "delivered_to": self.delivered_to_, "receiveds": self.receiveds_obj, "has_defects": self.has_defects, "has_anomalies": self.has_anomalies} @@ -479,14 +480,24 @@ def message_as_string(self): @property def body(self): - """Return the only the body. """ + """Return only the body. """ return "\n".join(self.text_plain_list) + @property + def headers_obj(self): + """Return all headers as object + + Return: + list of headers + """ + + return self.message.items() + @property def headers(self): - """Return the only the headers. """ + """Return only the headers. """ s = "" - for k, v in self.message.items(): + for k, v in self.headers_obj: v_u = re.sub(" +", " ", decode_header_part(v)) s += k + ": " + v_u + "\n" return s @@ -506,7 +517,14 @@ def to_(self): """Return the receiver of message. """ to_ = decode_header_part(self.message.get( 'to', self.message.get('delivered-to', ''))) - return email.utils.parseaddr([to_]), + return email.utils.getaddresses([to_]) + + @property + def delivered_to_(self): + """Return the receiver of message. """ + delivered_to_ = decode_header_part( + self.message.get('delivered-to', '')) + return email.utils.getaddresses([delivered_to_]) @property def from_(self): diff --git a/tests/mails/mail_test_2 b/tests/mails/mail_test_2 index 3ca36ce..fce7f33 100644 --- a/tests/mails/mail_test_2 +++ b/tests/mails/mail_test_2 @@ -1,4 +1,5 @@ Return-Path: +To: echo@tu-berlin.de, "Porcile, M." Delivered-To: mporcile@server_mail.it Received: (qmail 21858 invoked from network); 29 Nov 2015 08:46:02 -0000 Received: from smtp2.regione.vda.it (217.76.210.112) From c0d262096b49523adbb4e9bce446fad43b067504 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 23 Nov 2017 00:03:50 +0100 Subject: [PATCH 06/22] Changed many properties. Added dynamic properties for addresses headers, converted json and raw header. This version is more object oriented and it's not back compatibility --- README.md | 2 +- mailparser/mailparser.py | 381 +++++++++++++++----------------------- tests/test_mail_parser.py | 28 +++ 3 files changed, 181 insertions(+), 230 deletions(-) diff --git a/README.md b/README.md index 285152f..51deb01 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ mail.body mail.headers mail.message_id mail.to_ -mail.delivered_to_ +mail.delivered_to mail.from_ mail.subject mail.text_plain_list: only text plain mail parts in a list diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index de7021c..f1a85b2 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -20,7 +20,6 @@ from __future__ import unicode_literals import datetime import email -from email.header import decode_header import logging import os import re @@ -38,10 +37,14 @@ REGXIP = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}") EPILOGUE_DEFECTS = {"StartBoundaryNotFoundDefect"} +ADDRESSES_HEADERS = ("bcc", "cc", "delivered_to", "from", "reply_to", "to") +MAIN_HEADERS = ("attachments", "body", "date", "headers" + "message_id", "received", "subject") def parse_from_file_obj(fp): - """Parsing email from a file-like object. + """ + Parsing email from a file-like object. Args: fp (file-like object): file-like object of raw email @@ -53,7 +56,8 @@ def parse_from_file_obj(fp): def parse_from_file(fp): - """Parsing email from file. + """ + Parsing email from file. Args: fp (string): file path of raw email @@ -65,7 +69,8 @@ def parse_from_file(fp): def parse_from_file_msg(fp): - """Parsing email from file Outlook msg. + """ + Parsing email from file Outlook msg. Args: fp (string): file path of raw Outlook email @@ -77,7 +82,8 @@ def parse_from_file_msg(fp): def parse_from_string(s): - """Parsing email from string. + """ + Parsing email from string. Args: s (string): raw email @@ -89,7 +95,8 @@ def parse_from_string(s): def parse_from_bytes(bt): - """Parsing email from bytes. Only for Python 3 + """ + Parsing email from bytes. Only for Python 3 Args: bt (bytes-like object): raw email as bytes-like object @@ -105,15 +112,21 @@ class MailParser(object): MailParser package provides a standard parser that understands most email document structures like official email package. MailParser handles the enconding of email and split the raw email for you. + + Headers: + https://www.iana.org/assignments/message-headers/message-headers.xhtml """ def __init__(self, message=None): - """Init a new object from a message object structure. """ + """ + Init a new object from a message object structure. + """ self._message = message @classmethod def from_file_obj(cls, fp): - """Init a new object from a file-like object. + """ + Init a new object from a file-like object. Not for Outlook msg. Args: @@ -129,7 +142,8 @@ def from_file_obj(cls, fp): @classmethod def from_file(cls, fp, is_outlook=False): - """Init a new object from a file path. + """ + Init a new object from a file path. Args: fp (string): file path of raw email @@ -164,7 +178,8 @@ def from_file_msg(cls, fp): @classmethod def from_string(cls, s): - """Init a new object from a string. + """ + Init a new object from a string. Args: s (string): raw email @@ -178,7 +193,8 @@ def from_string(cls, s): @classmethod def from_bytes(cls, bt): - """Init a new object from bytes. + """ + Init a new object from bytes. Args: bt (bytes-like object): raw email as bytes-like object @@ -193,79 +209,22 @@ def from_bytes(cls, bt): message = email.message_from_bytes(bt) return cls(message) - def parse_from_file_obj(self, fp): - """Parse the raw email from a file path. - - Args: - fp (file-like object): file-like object of raw email - - Returns: - Instance of MailParser - """ - - self._message = email.message_from_file(fp) - return self.parse() - - def parse_from_file(self, fp): - """Parse the raw email from a file path. - - Args: - fp (string): file path of raw email - - Returns: - Instance of MailParser - """ - - with ported_open(fp) as f: - self._message = email.message_from_file(f) - return self.parse() - - def parse_from_file_msg(self, fp): - """Parse the raw email from a file path Outlook. - - Args: - fp (string): file path of raw email - - Returns: - Instance of MailParser + def _reset(self): """ - t, _ = msgconvert(fp) - with ported_open(t) as f: - self._message = email.message_from_file(f) - os.remove(t) - return self.parse() - - def parse_from_string(self, s): - """Parse the raw email from a string. - - Args: - s (string): raw email - - Returns: - Instance of MailParser + Reset the state of mail object. """ - self._message = email.message_from_string(s) - return self.parse() - - def parse_from_bytes(self, bt): - """Parse the raw mail from bytes. - - Args: - bt (bytes-like object): raw email as bytes-like object + self._attachments = [] + self._text_plain = [] + self._defects = [] + self._defects_categories = set() + self._has_defects = False - Returns: - Instance of MailParser + def _append_defects(self, part, part_content_type): """ - if six.PY2: - raise EnvironmentError( - "Parsing from bytes is valid only for Python 3.x version") - - self._message = email.message_from_bytes(bt) - return self.parse() + Add new defects and defects categories to object attributes. - def _append_defects(self, part, part_content_type): - """The defects attribute contains a list of all the problems found + The defects are a list of all the problems found when parsing this message. """ @@ -273,7 +232,7 @@ def _append_defects(self, part, part_content_type): for e in part.defects: defects = "{}: {}".format(e.__class__.__name__, e.__doc__) - self._defects_category.add(e.__class__.__name__) + self._defects_categories.add(e.__class__.__name__) part_defects.setdefault(part_content_type, []).append(defects) # Tag mail with defect @@ -283,55 +242,36 @@ def _append_defects(self, part, part_content_type): # Save all defects self._defects.append(part_defects) - def _reset(self): - """Reset the state of object. """ + def _make_mail(self): + """ + This method assigns the right values to all tokens of email. + """ + self._mail = {} - self._attachments = list() - self._text_plain = list() - self._defects = list() - self._defects_category = set() - self._has_defects = False - self._has_anomalies = False - self._anomalies = list() + for i in MAIN_HEADERS + ADDRESSES_HEADERS: + if getattr(self, i): + self._mail[i] = getattr(self, i) + + # add defects + self._mail["has_defects"] = self.has_defects - def _make_mail(self): - """This method assigns the right values to all tokens of email. """ - - # mail object - self._mail = { - "attachments": self.attachments_list, - "body": self.body, - "date": self.date_mail, - "from": self.from_, - "headers": self.headers_obj, - "message_id": self.message_id, - "subject": self.subject, - "to": self.to_, - "delivered_to": self.delivered_to_, - "receiveds": self.receiveds_obj, - "has_defects": self.has_defects, - "has_anomalies": self.has_anomalies} - - # Add defects if self.has_defects: self._mail["defects"] = self.defects - self._mail["defects_category"] = list(self._defects_category) - - # Add anomalies - if self.has_anomalies: - self._mail["anomalies"] = self.anomalies + self._mail["defects_categories"] = list(self.defects_categories) def parse(self): - """This method parses the raw email and makes the tokens. + """ + This method parses the raw email and makes the tokens. Returns: Instance of MailParser with raw email parsed """ + # check if a valid mail if not self.message.keys(): raise ValueError("This email doesn't have headers") - # Reset for new mail + # reset and start parsing self._reset() parts = [] # Normal parts plus defects @@ -342,7 +282,7 @@ def parse(self): parts.append(p) # If defects are in epilogue defects get epilogue - if self.defects_category & EPILOGUE_DEFECTS: + if self.defects_categories & EPILOGUE_DEFECTS: epilogue = find_between( self.message.epilogue, "{}".format("--" + self.message.get_boundary()), @@ -396,7 +336,8 @@ def parse(self): return self def get_server_ipaddress(self, trust): - """Return the ip address of sender + """ + Return the ip address of sender Extract a reliable sender IP address heuristically for each message. Although the message format dictates a chain of relaying IP @@ -440,123 +381,89 @@ def get_server_ipaddress(self, trust): if not ip.is_private: return six.text_type(check[-1]) - @property - def receiveds_obj(self): - """Return all headers receiveds as object - - Return: - list of receiveds - """ + def __getattr__(self, name): + name = name.strip("_").lower() - output = [] - receiveds = self.message.get_all("received", []) + # object headers + if name in ADDRESSES_HEADERS: + h = decode_header_part(self.message.get(name, six.text_type())) + return email.utils.getaddresses([h]) - for i in receiveds: - output.append(decode_header_part(i)) + # json headers + elif name.endswith("_json"): + name = name[:-5] + return json.dumps(getattr(self, name), ensure_ascii=False) - return output + # raw headers + elif name.endswith("_raw"): + name = name[:-4] + return self.message.get(name) @property - def receiveds(self): - """Return all headers receiveds as json - - Return: - string of all receiveds + def subject(self): """ - s = "" - for i in self.receiveds_obj: - s += "Received: " + i + "\n" - return s.strip() - - @property - def message(self): - """email.message.Message class. """ - return self._message - - @property - def message_as_string(self): - """Return the entire message flattened as a string. """ - return self.message.as_string() - - @property - def body(self): - """Return only the body. """ - return "\n".join(self.text_plain_list) + Return subject text + """ + return decode_header_part(self.message.get( + 'subject', six.text_type())) @property - def headers_obj(self): - """Return all headers as object - - Return: - list of headers + def attachments(self): """ - - return self.message.items() + Return a list of all attachments in the mail + """ + return self._attachments @property - def headers(self): - """Return only the headers. """ - s = "" - for k, v in self.headers_obj: - v_u = re.sub(" +", " ", decode_header_part(v)) - s += k + ": " + v_u + "\n" - return s + def received(self): + """ + Return a list of all received headers + """ + output = [] + for i in self.message.get_all("received", []): + output.append(decode_header_part(i)) + return output @property def message_id(self): - """Return the message id. """ + """ + Return the message id. + """ message_id = self.message.get('message-id', None) - if not message_id: - self._anomalies.append('mail_without_message-id') - return None - else: - return ported_string(message_id) - - @property - def to_(self): - """Return the receiver of message. """ - to_ = decode_header_part(self.message.get( - 'to', self.message.get('delivered-to', ''))) - return email.utils.getaddresses([to_]) - - @property - def delivered_to_(self): - """Return the receiver of message. """ - delivered_to_ = decode_header_part( - self.message.get('delivered-to', '')) - return email.utils.getaddresses([delivered_to_]) + return ported_string(message_id) @property - def from_(self): - """Return the sender of message. """ - return decode_header_part( - self.message.get('from', '')) + def body(self): + """ + Return all text plain parts of mail delimited from string + "--- mail_boundary ---" + """ + return "\n--- mail_boundary ---\n".join(self.text_plain) @property - def subject(self): - """Return the subject of message. """ - return decode_header_part( - self.message.get('subject', '')) + def headers(self): + """ + Return only the headers as Python object + """ + d = {} + for k, v in self.message.items(): + d[k] = decode_header_part(v) + return d @property - def text_plain_list(self): - """Return a list of all text plain part of email. """ + def text_plain(self): + """ + Return a list of all text plain parts of email. + """ return self._text_plain @property - def attachments_list(self): - """Return the attachments list of email. """ - return self._attachments - - @property - def date_mail(self): - """Return the date of email as datetime.datetime. """ + def date(self): + """ + Return the mail date in datetime.datetime format and UTC. + """ date_ = self.message.get('date') - if not date_: - self._anomalies.append('mail_without_date') - return None - try: d = email.utils.parsedate_tz(date_) t = email.utils.mktime_tz(d) @@ -565,45 +472,61 @@ def date_mail(self): return None @property - def parsed_mail_obj(self): - """Return an Python object with all tokens of email. """ + def mail(self): + """ + Return the Python object of mail parsed + """ return self._mail @property - def parsed_mail_json(self): - """Return a json with all tokens of email. """ - self._mail["date"] = self.date_mail.isoformat() \ - if self.date_mail else "" - return json.dumps( - self._mail, ensure_ascii=False, indent=None) + def mail_json(self): + """ + Return the JSON of mail parsed + """ + if self.mail.get("date"): + self._mail["date"] = self.date.isoformat() + return json.dumps(self.mail, ensure_ascii=False) + + @property + def date_json(self): + """ + Return the JSON of date + """ + if self.date: + return json.dumps(self.date.isoformat()) @property def defects(self): - """The defects property contains a list of + """ + The defects property contains a list of all the problems found when parsing this message. """ return self._defects @property - def defects_category(self): - """Return a list with only defects categories. """ - return self._defects_category + def defects_categories(self): + """ + Return a set with only defects categories. + """ + return self._defects_categories @property def has_defects(self): - """Return a boolean: True if mail has defects. """ + """ + Return a boolean: True if mail has defects. + """ return self._has_defects @property - def anomalies(self): - """The anomalies property contains a list of - all anomalies in mail: - - mail_without_date - - mail_without_message-id + def message(self): """ - return self._anomalies + email.message.Message class. + """ + return self._message @property - def has_anomalies(self): - """Return a boolean: True if mail has anomalies. """ - return True if self.anomalies else False + def message_as_string(self): + """ + Return the entire message flattened as a string. + """ + return self.message.as_string() diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index b33dab8..79acba1 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -44,6 +44,7 @@ class TestMailParser(unittest.TestCase): + @unittest.skip("skip") def test_ipaddress(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" @@ -60,6 +61,7 @@ def test_ipaddress(self): result = mail.get_server_ipaddress(trust) self.assertEqual(result, None) + @unittest.skip("skip") def test_fingerprints_body(self): mail = mailparser.parse_from_file(mail_test_1) md5, sha1, sha256, sha512 = fingerprints( @@ -73,31 +75,37 @@ def test_fingerprints_body(self): "dba971ef99afeec4e6caf2fdd10be72eabb730" "c312ffbe1c4de3")) + @unittest.skip("skip") def test_fingerprints_unicodeencodeerror(self): mail = mailparser.parse_from_file(mail_test_7) for i in mail.attachments_list: fingerprints(i["payload"]) + @unittest.skip("skip") def test_malformed_mail(self): mail = mailparser.parse_from_file(mail_malformed_3) defects_category = mail.defects_category self.assertIn("StartBoundaryNotFoundDefect", defects_category) self.assertIn("MultipartInvariantViolationDefect", defects_category) + @unittest.skip("skip") def test_type_error(self): mail = mailparser.parse_from_file(mail_test_5) self.assertEqual(len(mail.attachments_list), 5) for i in mail.attachments_list: self.assertIsInstance(i["filename"], six.text_type) + @unittest.skip("skip") def test_valid_mail(self): with self.assertRaises(ValueError): mailparser.parse_from_string("fake mail") + @unittest.skip("skip") def test_valid_date_mail(self): mail = mailparser.parse_from_file(mail_test_1) self.assertIn("mail_without_date", mail.anomalies) + @unittest.skip("skip") def test_receiveds(self): mail = mailparser.parse_from_file(mail_test_1) self.assertIsInstance(mail.receiveds_obj, list) @@ -105,6 +113,7 @@ def test_receiveds(self): self.assertIsInstance(mail.receiveds, six.text_type) self.assertIn("Received:", mail.receiveds) + @unittest.skip("skip") def test_parsing_know_values(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" @@ -142,6 +151,7 @@ def test_parsing_know_values(self): result = mail.date_mail.isoformat() self.assertEqual(raw_utc, result) + @unittest.skip("skip") def test_types(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" @@ -197,6 +207,7 @@ def test_types(self): result = mail.anomalies self.assertIsInstance(result, list) + @unittest.skip("skip") def test_defects_anomalies(self): mail = mailparser.parse_from_file(mail_malformed_1) @@ -228,6 +239,7 @@ def test_defects_anomalies(self): self.assertIn("anomalies", mail.parsed_mail_obj) self.assertIn("has_anomalies", mail.parsed_mail_obj) + @unittest.skip("skip") def test_defects_bug(self): mail = mailparser.parse_from_file(mail_malformed_2) @@ -242,6 +254,7 @@ def test_defects_bug(self): result = len(mail.attachments_list) self.assertEqual(0, result) + @unittest.skip("skip") def test_add_content_type(self): mail = mailparser.parse_from_file(mail_test_3) @@ -259,11 +272,13 @@ def test_add_content_type(self): result["attachments"][0]["content_transfer_encoding"], "quoted-printable") + @unittest.skip("skip") def test_from_bytes(self): if six.PY2: with self.assertRaises(EnvironmentError): mailparser.MailParser.from_bytes(b"") + @unittest.skip("skip") def test_classmethods(self): # MailParser.from_file m = mailparser.MailParser.from_file(mail_test_3) @@ -277,6 +292,7 @@ def test_classmethods(self): result = m.parsed_mail_obj self.assertEqual(len(result["attachments"]), 1) + @unittest.skip("skip") def test_parser_methods(self): m = mailparser.MailParser() self.assertIsNone(m.message) @@ -294,11 +310,13 @@ def test_parser_methods(self): o.parse_from_file_obj(fp) self.assertEqual(len(result["attachments"]), 1) + @unittest.skip("skip") def test_bug_UnicodeDecodeError(self): m = mailparser.parse_from_file(mail_test_6) self.assertIsInstance(m.parsed_mail_obj, dict) self.assertIsInstance(m.parsed_mail_json, six.text_type) + @unittest.skip("skip") def test_parse_from_file_msg(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 @@ -320,6 +338,7 @@ def test_parse_from_file_msg(self): m = m.parse_from_file_msg(mail_outlook_1) self.assertEqual(email["body"], m.body) + @unittest.skip("skip") def test_msgconvert(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 @@ -334,6 +353,7 @@ def test_msgconvert(self): m = mailparser.parse_from_file(f) self.assertEqual(m.from_, "") + @unittest.skip("skip") def test_from_file_obj(self): with ported_open(mail_test_2) as fp: mail = mailparser.parse_from_file_obj(fp) @@ -390,6 +410,14 @@ def test_from_file_obj(self): result = mail.anomalies self.assertIsInstance(result, list) + def test_getattr(self): + mail = mailparser.parse_from_file(mail_test_2) + print("\n\n") + print mail.date_json + print("\n\n") + print mail.date_raw + print("\n\n") + if __name__ == '__main__': unittest.main(verbosity=2) From 95668267176a7814648c66bd14e692d05ab9fb92 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 00:06:29 +0100 Subject: [PATCH 07/22] Fixed some unittest --- mailparser/mailparser.py | 31 ++++++---- tests/test_mail_parser.py | 126 +++++++++++++++----------------------- 2 files changed, 70 insertions(+), 87 deletions(-) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index f1a85b2..f71d2ff 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -292,8 +292,7 @@ def parse(self): p = email.message_from_string(epilogue) parts.append(p) except TypeError: - log.warning( - "Failed to get epilogue part. Probably malformed.") + pass except: log.error( "Failed to get epilogue part. Should check raw mail.") @@ -397,7 +396,8 @@ def __getattr__(self, name): # raw headers elif name.endswith("_raw"): name = name[:-4] - return self.message.get(name) + raw = self.message.get_all(name) + return json.dumps(raw, ensure_ascii=False) @property def subject(self): @@ -450,6 +450,13 @@ def headers(self): d[k] = decode_header_part(v) return d + @property + def headers_json(self): + """ + Return the JSON of headers + """ + return json.dumps(self.headers, ensure_ascii=False, indent=2) + @property def text_plain(self): """ @@ -471,6 +478,14 @@ def date(self): except: return None + @property + def date_json(self): + """ + Return the JSON of date + """ + if self.date: + return json.dumps(self.date.isoformat(), ensure_ascii=False) + @property def mail(self): """ @@ -485,15 +500,7 @@ def mail_json(self): """ if self.mail.get("date"): self._mail["date"] = self.date.isoformat() - return json.dumps(self.mail, ensure_ascii=False) - - @property - def date_json(self): - """ - Return the JSON of date - """ - if self.date: - return json.dumps(self.date.isoformat()) + return json.dumps(self.mail, ensure_ascii=False, indent=2) @property def defects(self): diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 79acba1..546db7c 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -44,7 +44,6 @@ class TestMailParser(unittest.TestCase): - @unittest.skip("skip") def test_ipaddress(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" @@ -61,7 +60,6 @@ def test_ipaddress(self): result = mail.get_server_ipaddress(trust) self.assertEqual(result, None) - @unittest.skip("skip") def test_fingerprints_body(self): mail = mailparser.parse_from_file(mail_test_1) md5, sha1, sha256, sha512 = fingerprints( @@ -75,45 +73,34 @@ def test_fingerprints_body(self): "dba971ef99afeec4e6caf2fdd10be72eabb730" "c312ffbe1c4de3")) - @unittest.skip("skip") def test_fingerprints_unicodeencodeerror(self): mail = mailparser.parse_from_file(mail_test_7) - for i in mail.attachments_list: + for i in mail.attachments: fingerprints(i["payload"]) - @unittest.skip("skip") def test_malformed_mail(self): mail = mailparser.parse_from_file(mail_malformed_3) - defects_category = mail.defects_category - self.assertIn("StartBoundaryNotFoundDefect", defects_category) - self.assertIn("MultipartInvariantViolationDefect", defects_category) + defects_categories = mail.defects_categories + self.assertIn("StartBoundaryNotFoundDefect", defects_categories) + self.assertIn("MultipartInvariantViolationDefect", defects_categories) - @unittest.skip("skip") def test_type_error(self): mail = mailparser.parse_from_file(mail_test_5) - self.assertEqual(len(mail.attachments_list), 5) - for i in mail.attachments_list: + self.assertEqual(len(mail.attachments), 5) + for i in mail.attachments: self.assertIsInstance(i["filename"], six.text_type) - @unittest.skip("skip") def test_valid_mail(self): with self.assertRaises(ValueError): mailparser.parse_from_string("fake mail") - @unittest.skip("skip") - def test_valid_date_mail(self): - mail = mailparser.parse_from_file(mail_test_1) - self.assertIn("mail_without_date", mail.anomalies) - - @unittest.skip("skip") def test_receiveds(self): mail = mailparser.parse_from_file(mail_test_1) - self.assertIsInstance(mail.receiveds_obj, list) - self.assertEqual(len(mail.receiveds_obj), 4) - self.assertIsInstance(mail.receiveds, six.text_type) - self.assertIn("Received:", mail.receiveds) + self.assertEqual(len(mail.received), 4) + self.assertIsInstance(mail.received, list) + self.assertIsInstance(mail.received_json, six.text_type) + self.assertIsInstance(mail.received_raw, six.text_type) - @unittest.skip("skip") def test_parsing_know_values(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" @@ -128,13 +115,18 @@ def test_parsing_know_values(self): result = mail.message_id self.assertEqual(raw, result) - raw = "mporcile@server_mail.it" - result = mail.to_ + raw = "echo@tu-berlin.de" + result = mail.to + self.assertEqual(len(result), 2) + self.assertIsInstance(result, list) + self.assertIsInstance(result[0], tuple) + self.assertIsInstance(mail.to_json, six.text_type) + self.assertIsInstance(mail.to_raw, six.text_type) self.assertEqual(raw, result[0][1]) - raw = "" + raw = "meteo@regione.vda.it" result = mail.from_ - self.assertEqual(raw, result) + self.assertEqual(raw, result[0][1]) raw = "Bollettino Meteorologico del 29/11/2015" result = mail.subject @@ -143,49 +135,51 @@ def test_parsing_know_values(self): result = mail.has_defects self.assertEqual(False, result) - result = len(mail.attachments_list) + result = len(mail.attachments) self.assertEqual(3, result) - raw = "Sun, 29 Nov 2015 09:45:18 +0100" + # raw = "Sun, 29 Nov 2015 09:45:18 +0100" + self.assertIsInstance(mail.date_raw, six.text_type) + self.assertIsInstance(mail.date_json, six.text_type) raw_utc = datetime.datetime(2015, 11, 29, 8, 45, 18, 0).isoformat() - result = mail.date_mail.isoformat() + result = mail.date.isoformat() self.assertEqual(raw_utc, result) - @unittest.skip("skip") def test_types(self): mail = mailparser.parse_from_file(mail_test_2) trust = "smtp.customers.net" self.assertEqual(False, mail.has_defects) - result = mail.parsed_mail_obj + result = mail.mail self.assertIsInstance(result, dict) self.assertNotIn("defects", result) - self.assertNotIn("anomalies", result) self.assertIn("has_defects", result) - self.assertIn("has_anomalies", result) result = mail.get_server_ipaddress(trust) self.assertIsInstance(result, six.text_type) - result = mail.parsed_mail_json + result = mail.mail_json self.assertIsInstance(result, six.text_type) - result = mail.headers + result = mail.headers_json self.assertIsInstance(result, six.text_type) + result = mail.headers + self.assertIsInstance(result, dict) + result = mail.body self.assertIsInstance(result, six.text_type) - result = mail.date_mail + result = mail.date self.assertIsInstance(result, datetime.datetime) result = mail.from_ - self.assertIsInstance(result, six.text_type) + self.assertIsInstance(result, list) - result = mail.to_ - self.assertIsInstance(result, tuple) - self.assertEquals(len(result), 1) + result = mail.to + self.assertIsInstance(result, list) + self.assertEquals(len(result), 2) self.assertIsInstance(result[0], tuple) self.assertEquals(len(result[0]), 2) @@ -195,63 +189,53 @@ def test_types(self): result = mail.message_id self.assertIsInstance(result, six.text_type) - result = mail.attachments_list + result = mail.attachments self.assertIsInstance(result, list) - result = mail.date_mail + result = mail.date self.assertIsInstance(result, datetime.datetime) result = mail.defects self.assertIsInstance(result, list) - result = mail.anomalies - self.assertIsInstance(result, list) - - @unittest.skip("skip") - def test_defects_anomalies(self): + def test_defects(self): mail = mailparser.parse_from_file(mail_malformed_1) self.assertEqual(True, mail.has_defects) self.assertEqual(1, len(mail.defects)) - self.assertEqual(1, len(mail.defects_category)) - self.assertIn("defects", mail.parsed_mail_obj) + self.assertEqual(1, len(mail.defects_categories)) + self.assertIn("defects", mail.mail) self.assertIn("StartBoundaryNotFoundDefect", - mail.defects_category) - self.assertIsInstance(mail.parsed_mail_json, six.text_type) + mail.defects_categories) + self.assertIsInstance(mail.mail_json, six.text_type) - result = len(mail.attachments_list) + result = len(mail.attachments) self.assertEqual(1, result) mail = mailparser.parse_from_file(mail_test_1) if six.PY2: self.assertEqual(False, mail.has_defects) - self.assertNotIn("defects", mail.parsed_mail_obj) + self.assertNotIn("defects", mail.mail) elif six.PY3: self.assertEqual(True, mail.has_defects) self.assertEqual(1, len(mail.defects)) - self.assertEqual(1, len(mail.defects_category)) - self.assertIn("defects", mail.parsed_mail_obj) + self.assertEqual(1, len(mail.defects_categories)) + self.assertIn("defects", mail.mail) self.assertIn( - "CloseBoundaryNotFoundDefect", mail.defects_category) - - self.assertEqual(True, mail.has_anomalies) - self.assertEqual(2, len(mail.anomalies)) - self.assertIn("anomalies", mail.parsed_mail_obj) - self.assertIn("has_anomalies", mail.parsed_mail_obj) + "CloseBoundaryNotFoundDefect", mail.defects_categories) - @unittest.skip("skip") def test_defects_bug(self): mail = mailparser.parse_from_file(mail_malformed_2) self.assertEqual(True, mail.has_defects) self.assertEqual(1, len(mail.defects)) - self.assertEqual(1, len(mail.defects_category)) - self.assertIn("defects", mail.parsed_mail_obj) + self.assertEqual(1, len(mail.defects_categories)) + self.assertIn("defects", mail.mail) self.assertIn("StartBoundaryNotFoundDefect", - mail.defects_category) + mail.defects_categories) self.assertIsInstance(mail.parsed_mail_json, six.text_type) - result = len(mail.attachments_list) + result = len(mail.attachments) self.assertEqual(0, result) @unittest.skip("skip") @@ -410,14 +394,6 @@ def test_from_file_obj(self): result = mail.anomalies self.assertIsInstance(result, list) - def test_getattr(self): - mail = mailparser.parse_from_file(mail_test_2) - print("\n\n") - print mail.date_json - print("\n\n") - print mail.date_raw - print("\n\n") - if __name__ == '__main__': unittest.main(verbosity=2) From 3c92a01eb93b8a1994f75d2ab0d41dbf81c056ea Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 00:34:06 +0100 Subject: [PATCH 08/22] Fixed main --- mailparser/__main__.py | 29 ++++++++--------------------- mailparser/mailparser.py | 7 +++++++ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/mailparser/__main__.py b/mailparser/__main__.py index fc2773c..53c2233 100644 --- a/mailparser/__main__.py +++ b/mailparser/__main__.py @@ -137,13 +137,6 @@ def get_args(): action="store_true", help="Print the defects of mail") - parser.add_argument( - "-n", - "--anomalies", - dest="anomalies", - action="store_true", - help="Print the anomalies of mail") - parser.add_argument( "-o", "--outlook", @@ -227,37 +220,31 @@ def main(): parser = mailparser.parse_from_file_obj(sys.stdin) if args.json: - j = json.loads(parser.parsed_mail_json) - safe_print(json.dumps(j, ensure_ascii=False, indent=4)) + safe_print(parser.mail_json) if args.body: - # safe_print(parser.body) safe_print(parser.body) if args.headers: - safe_print(parser.headers) + safe_print(parser.headers_json) if args.to: - safe_print(json.dumps(parser.to_)) + safe_print(parser.to_json) if args.delivered_to: - safe_print(json.dumps(parser.delivered_to_)) + safe_print(parser.delivered_to_json) if args.from_: - safe_print(parser.from_) + safe_print(parser.from_json) if args.subject: safe_print(parser.subject) if args.receiveds: - safe_print(parser.receiveds) + safe_print(parser.received_json) if args.defects: - for i in parser.defects_category: - safe_print(i) - - if args.anomalies: - for i in parser.anomalies: + for i in parser.defects_categories: safe_print(i) if args.senderip: @@ -268,7 +255,7 @@ def main(): safe_print("Not Found") if args.attachments or args.attachments_hash: - print_attachments(parser.attachments_list, args.attachments_hash) + print_attachments(parser.attachments, args.attachments_hash) if args.mail_hash: print_mail_fingerprints(parser.body.encode("utf-8")) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index f71d2ff..77b8f92 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -424,6 +424,13 @@ def received(self): output.append(decode_header_part(i)) return output + @property + def received_json(self): + """ + Return a JSON of all received headers + """ + return json.dumps(self.received, ensure_ascii=False, indent=2) + @property def message_id(self): """ From ae4b4c53b44d4d07ee07a272119bbacc35fcc4bb Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 00:46:40 +0100 Subject: [PATCH 09/22] Fixed parsing headers like reply-to --- mailparser/mailparser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index 77b8f92..fe8b58a 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -385,7 +385,8 @@ def __getattr__(self, name): # object headers if name in ADDRESSES_HEADERS: - h = decode_header_part(self.message.get(name, six.text_type())) + h = decode_header_part( + self.message.get(name.replace("_", "-"), six.text_type())) return email.utils.getaddresses([h]) # json headers From b71c1209b1b98803b1ffe590c7bd054d048c3605 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 10:38:31 +0100 Subject: [PATCH 10/22] Removed README rst format and added markdown2rst to convert markdown in rst. Moved docker file in docker folder. Added new requirement to convert markdown file. Fixed tests --- README | 181 ------------------ Dockerfile => docker/Dockerfile | 0 .../docker-compose.yml | 0 mailparser/utils.py | 8 + requirements.txt | 1 + setup.py | 5 +- tests/test_mail_parser.py | 9 +- tests/test_main.py | 3 - 8 files changed, 20 insertions(+), 187 deletions(-) delete mode 100644 README rename Dockerfile => docker/Dockerfile (100%) rename docker-compose.yml => docker/docker-compose.yml (100%) diff --git a/README b/README deleted file mode 100644 index cde57c5..0000000 --- a/README +++ /dev/null @@ -1,181 +0,0 @@ -|PyPI version| |Build Status| |Coverage Status| |BCH compliance| - -mail-parser -=========== - -Overview --------- - -mail-parser is a wrapper for `email`_ Python Standard Library. It’s the -key module of `SpamScope`_. - -mail-parser can parse Outlook email format (.msg). To use this feature, you need to install ``libemail-outlook-message-perl`` package. For Debian based systems: - -:: - - $ apt-get install libemail-outlook-message-perl - $ apt-cache show libemail-outlook-message-perl - -mail-parser supports Python 3. - -Description ------------ - -mail-parser takes as input a raw mail and generates a parsed object. -This object is a tokenized email with some indicator: -- body - headers - subject - from - to - delivered_to - attachments - message id -- date - charset mail - sender IP address - receiveds - -We have also two types of indicator: - anomalies: mail without message id or date -- `defects`_: mail with some not compliance RFC part - -Defects -~~~~~~~ - -These defects can be used to evade the antispam filter. An example are -the mails with a malformed boundary that can hide a not legitimate -epilogue (often malware). This library can take these epilogues. - -Apache 2 Open Source License -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -mail-parser can be downloaded, used, and modified free of charge. It is -available under the Apache 2 license. - -Authors -------- - -Main Author -~~~~~~~~~~~ - -Fedele Mantuano (**Twitter**: -[@fedelemantuano](https://twitter.com/fedelemantuano)) - -Installation ------------- - -Clone repository - -:: - - git clone https://github.com/SpamScope/mail-parser.git - -and install mail-parser with ``setup.py``: - -:: - - cd mail-parser - - python setup.py install - -or use ``pip``: - -:: - - pip install mail-parser - -Usage in a project -------------------- - -Import ``mailparser`` module: - -:: - - import mailparser - - mail = mailparser.parse_from_file(f) - mail = mailparser.parse_from_file_obj(fp) - mail = mailparser.parse_from_string(raw_mail) - mail = mailparser.parse_from_bytes(byte_mail) - -Then you can get all parts - -:: - - mail.body - mail.headers - mail.message_id - mail.to_ - mail.delivered_to_ - mail.from_ - mail.subject - mail.text_plain_list: only text plain mail parts in a list - mail.attachments_list: list of all attachments - mail.date_mail - mail.parsed_mail_obj: tokenized mail in a object - mail.parsed_mail_json: tokenized mail in a JSON - mail.defects: defect RFC not compliance - mail.defects_category: only defects categories - mail.has_defects - mail.anomalies - mail.has_anomalies - mail.get_server_ipaddress(trust="my_server_mail_trust") - mail.receiveds - -.. _email: https://docs.python.org/2/library/email.message.html -.. _SpamScope: https://github.com/SpamScope/spamscope -.. _defects: https://docs.python.org/2/library/email.message.html#email.message.Message.defects - -Usage from command-line ------------------------ - -If you installed mailparser with ``pip`` or ``setup.py`` you can use it with -command-line. - -These are all swithes: - -:: - - usage: mailparser [-h] (-f FILE | -s STRING | -k) [-j] [-b] [-a] [-r] [-t] [-dt] [-m] - [-u] [-c] [-d] [-n] [-i Trust mail server string] [-p] [-z] - [-v] - - Wrapper for email Python Standard Library - - optional arguments: - -h, --help show this help message and exit - -f FILE_, --file FILE_ - Raw email file (default: None) - -s STRING_, --string STRING_ - Raw email string (default: None) - -k, --stdin Enable parsing from stdin (default: False) - -j, --json Show the JSON of parsed mail (default: False) - -b, --body Print the body of mail (default: False) - -a, --attachments Print the attachments of mail (default: False) - -r, --headers Print the headers of mail (default: False) - -t, --to Print the to of mail (default: False) - -dt, --delivered-to Print the delivered-to of mail (default: False) - -m, --from Print the from of mail (default: False) - -u, --subject Print the subject of mail (default: False) - -c, --receiveds Print all receiveds of mail (default: False) - -d, --defects Print the defects of mail (default: False) - -n, --anomalies Print the anomalies of mail (default: False) - -o, --outlook Analyze Outlook msg (default: False) - -i Trust mail server string, --senderip Trust mail server string - Extract a reliable sender IP address heuristically - (default: None) - -p, --mail-hash Print mail fingerprints without headers (default: - False) - -z, --attachments-hash - Print attachments with fingerprints (default: False) - -v, --version show program's version number and exit - - It takes as input a raw mail and generates a parsed object. - -Example: - -.. code:: shell - - $ mailparser -f example_mail -j - -This example will show you the tokenized mail in a JSON pretty format. - - -.. |PyPI version| image:: https://badge.fury.io/py/mail-parser.svg - :target: https://badge.fury.io/py/mail-parser -.. |Build Status| image:: https://travis-ci.org/SpamScope/mail-parser.svg?branch=develop - :target: https://travis-ci.org/SpamScope/mail-parser -.. |Coverage Status| image:: https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop - :target: https://coveralls.io/github/SpamScope/mail-parser?branch=develop -.. |BCH compliance| image:: https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=devel - :target: https://bettercodehub.com/ diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/docker-compose.yml b/docker/docker-compose.yml similarity index 100% rename from docker-compose.yml rename to docker/docker-compose.yml diff --git a/mailparser/utils.py b/mailparser/utils.py index db682c6..a00f0cb 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -184,3 +184,11 @@ def msgconvert(email): else: stdoutdata, _ = out.communicate() return temp, stdoutdata.decode("utf-8").strip() + + +def markdown2rst(file_path): + import pandoc + doc = pandoc.Document() + with open(file_path) as f: + doc.markdown = f.read() + return doc.rst diff --git a/requirements.txt b/requirements.txt index ded39e4..e4e9a04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ ipaddress==1.0.17 +pyandoc==0.2.0 simplejson==3.10.0 six==1.10.0 diff --git a/setup.py b/setup.py index 9945d66..93468d5 100644 --- a/setup.py +++ b/setup.py @@ -21,11 +21,12 @@ import runpy from setuptools import setup +from mailparser.utils import markdown2rst current = os.path.realpath(os.path.dirname(__file__)) +readme_file = os.path.join(current, 'README.md') -with open(os.path.join(current, 'README')) as f: - long_description = f.read().strip() +long_description = markdown2rst(readme_file) with open(os.path.join(current, 'requirements.txt')) as f: requires = f.read().splitlines() diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 546db7c..3ac9ba6 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -39,7 +39,8 @@ sys.path.append(root) import mailparser -from mailparser.utils import fingerprints, msgconvert, ported_open +from mailparser.utils import ( + fingerprints, msgconvert, ported_open, markdown2rst) class TestMailParser(unittest.TestCase): @@ -394,6 +395,12 @@ def test_from_file_obj(self): result = mail.anomalies self.assertIsInstance(result, list) + def test_markdown2rst(self): + current = os.path.realpath(os.path.dirname(__file__)) + readme = os.path.join(current, "..", "README.md") + rst = markdown2rst(readme) + self.assertIsInstance(rst, str) + if __name__ == '__main__': unittest.main(verbosity=2) diff --git a/tests/test_main.py b/tests/test_main.py index 6b9d025..ea531b8 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -78,9 +78,6 @@ def test_options(self): parsed = self.parser.parse_args(["--file", "mail.eml", "-d"]) self.assertTrue(parsed.defects) - parsed = self.parser.parse_args(["--file", "mail.eml", "--anomalies"]) - self.assertTrue(parsed.anomalies) - parsed = self.parser.parse_args([ "--file", "mail.eml", "--senderip", "trust"]) self.assertTrue(parsed.senderip) From 9f87a12a1f6bc2b4256f16590186a49343f475db Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 10:51:25 +0100 Subject: [PATCH 11/22] Fixed travis to install pandoc --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 471c753..c4e2570 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: - sudo apt-get -qq update # Install msgconvert - - sudo apt-get install -y libemail-outlook-message-perl + - sudo apt-get install -y libemail-outlook-message-perl pandoc # command to install dependencies install: From fb3cd22442f12e0a272ef8fdf76bf55ac7677239 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 10:52:41 +0100 Subject: [PATCH 12/22] Fixed badges to point to master --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 51deb01..15a893b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![PyPI version](https://badge.fury.io/py/mail-parser.svg)](https://badge.fury.io/py/mail-parser) -[![Build Status](https://travis-ci.org/SpamScope/mail-parser.svg?branch=develop)](https://travis-ci.org/SpamScope/mail-parser) -[![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop) -[![BCH compliance](https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=develop)](https://bettercodehub.com/) +[![Build Status](https://travis-ci.org/SpamScope/mail-parser.svg?branch=master)](https://travis-ci.org/SpamScope/mail-parser) +[![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=master)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop) +[![BCH compliance](https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=master)](https://bettercodehub.com/) # mail-parser From ae9dac2b88383a5d36567a85d73dfb22163df3d7 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 11:46:19 +0100 Subject: [PATCH 13/22] Changed python pandoc library --- mailparser/utils.py | 8 +++----- requirements.txt | 2 +- tests/test_mail_parser.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/mailparser/utils.py b/mailparser/utils.py index a00f0cb..a44bd28 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -187,8 +187,6 @@ def msgconvert(email): def markdown2rst(file_path): - import pandoc - doc = pandoc.Document() - with open(file_path) as f: - doc.markdown = f.read() - return doc.rst + import pypandoc + output = pypandoc.convert_file(file_path, 'rst') + return output diff --git a/requirements.txt b/requirements.txt index e4e9a04..7a9fc7f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ ipaddress==1.0.17 -pyandoc==0.2.0 +pypandoc==1.4 simplejson==3.10.0 six==1.10.0 diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 3ac9ba6..784fcbd 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -399,7 +399,7 @@ def test_markdown2rst(self): current = os.path.realpath(os.path.dirname(__file__)) readme = os.path.join(current, "..", "README.md") rst = markdown2rst(readme) - self.assertIsInstance(rst, str) + self.assertIsInstance(rst, six.text_type) if __name__ == '__main__': From f88123e5dc7a37d14aede207b96d6e01c3514280 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 13:07:26 +0100 Subject: [PATCH 14/22] Fixed all tests --- tests/test_mail_parser.py | 70 +++++++++++---------------------------- 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 784fcbd..bc330be 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -239,13 +239,12 @@ def test_defects_bug(self): result = len(mail.attachments) self.assertEqual(0, result) - @unittest.skip("skip") def test_add_content_type(self): mail = mailparser.parse_from_file(mail_test_3) self.assertEqual(False, mail.has_defects) - result = mail.parsed_mail_obj + result = mail.mail self.assertEqual(len(result["attachments"]), 1) self.assertIsInstance( @@ -257,51 +256,29 @@ def test_add_content_type(self): result["attachments"][0]["content_transfer_encoding"], "quoted-printable") - @unittest.skip("skip") def test_from_bytes(self): if six.PY2: with self.assertRaises(EnvironmentError): mailparser.MailParser.from_bytes(b"") - @unittest.skip("skip") def test_classmethods(self): # MailParser.from_file m = mailparser.MailParser.from_file(mail_test_3) m.parse() - result = m.parsed_mail_obj + result = m.mail self.assertEqual(len(result["attachments"]), 1) # MailParser.from_string m = mailparser.MailParser.from_string(m.message_as_string) m.parse() - result = m.parsed_mail_obj - self.assertEqual(len(result["attachments"]), 1) - - @unittest.skip("skip") - def test_parser_methods(self): - m = mailparser.MailParser() - self.assertIsNone(m.message) - - m.parse_from_file(mail_test_3) - result = m.parsed_mail_obj - self.assertEqual(len(result["attachments"]), 1) - - n = mailparser.MailParser() - n.parse_from_string(m.message_as_string) + result = m.mail self.assertEqual(len(result["attachments"]), 1) - o = mailparser.MailParser() - with open(mail_test_3) as fp: - o.parse_from_file_obj(fp) - self.assertEqual(len(result["attachments"]), 1) - - @unittest.skip("skip") def test_bug_UnicodeDecodeError(self): m = mailparser.parse_from_file(mail_test_6) - self.assertIsInstance(m.parsed_mail_obj, dict) - self.assertIsInstance(m.parsed_mail_json, six.text_type) + self.assertIsInstance(m.mail, dict) + self.assertIsInstance(m.mail_json, six.text_type) - @unittest.skip("skip") def test_parse_from_file_msg(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 @@ -312,18 +289,13 @@ def test_parse_from_file_msg(self): """ m = mailparser.parse_from_file_msg(mail_outlook_1) - email = m.parsed_mail_obj + email = m.mail self.assertIn("attachments", email) self.assertEqual(len(email["attachments"]), 5) self.assertIn("from", email) - self.assertEqual(email["from"], "") + self.assertEqual(email["from"][0][1], "NueblingV@w-vwa.de") self.assertIn("subject", email) - m = mailparser.MailParser() - m = m.parse_from_file_msg(mail_outlook_1) - self.assertEqual(email["body"], m.body) - - @unittest.skip("skip") def test_msgconvert(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 @@ -336,9 +308,8 @@ def test_msgconvert(self): f, _ = msgconvert(mail_outlook_1) self.assertTrue(os.path.exists(f)) m = mailparser.parse_from_file(f) - self.assertEqual(m.from_, "") + self.assertEqual(m.from_[0][1], "NueblingV@w-vwa.de") - @unittest.skip("skip") def test_from_file_obj(self): with ported_open(mail_test_2) as fp: mail = mailparser.parse_from_file_obj(fp) @@ -346,34 +317,36 @@ def test_from_file_obj(self): self.assertEqual(False, mail.has_defects) - result = mail.parsed_mail_obj + result = mail.mail self.assertIsInstance(result, dict) self.assertNotIn("defects", result) self.assertNotIn("anomalies", result) self.assertIn("has_defects", result) - self.assertIn("has_anomalies", result) result = mail.get_server_ipaddress(trust) self.assertIsInstance(result, six.text_type) - result = mail.parsed_mail_json + result = mail.mail_json self.assertIsInstance(result, six.text_type) result = mail.headers + self.assertIsInstance(result, dict) + + result = mail.headers_json self.assertIsInstance(result, six.text_type) result = mail.body self.assertIsInstance(result, six.text_type) - result = mail.date_mail + result = mail.date self.assertIsInstance(result, datetime.datetime) result = mail.from_ - self.assertIsInstance(result, six.text_type) + self.assertIsInstance(result, list) - result = mail.to_ - self.assertIsInstance(result, tuple) - self.assertEquals(len(result), 1) + result = mail.to + self.assertIsInstance(result, list) + self.assertEquals(len(result), 2) self.assertIsInstance(result[0], tuple) self.assertEquals(len(result[0]), 2) @@ -383,18 +356,15 @@ def test_from_file_obj(self): result = mail.message_id self.assertIsInstance(result, six.text_type) - result = mail.attachments_list + result = mail.attachments self.assertIsInstance(result, list) - result = mail.date_mail + result = mail.date self.assertIsInstance(result, datetime.datetime) result = mail.defects self.assertIsInstance(result, list) - result = mail.anomalies - self.assertIsInstance(result, list) - def test_markdown2rst(self): current = os.path.realpath(os.path.dirname(__file__)) readme = os.path.join(current, "..", "README.md") From 31f6ff4b9061855e159e5334a52762b0effb8dca Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 16:48:14 +0100 Subject: [PATCH 15/22] Remove pypandoc requirement. README.rst will make offline. New Dockerfile --- README.rst | 216 ++++++++++++++++++++++++++++++++++++++ docker/Dockerfile | 12 ++- requirements.txt | 1 - setup.py | 5 +- tests/test_mail_parser.py | 9 +- 5 files changed, 226 insertions(+), 17 deletions(-) create mode 100644 README.rst diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..1d1c7a2 --- /dev/null +++ b/README.rst @@ -0,0 +1,216 @@ +|PyPI version| |Build Status| |Coverage Status| |BCH compliance| + +mail-parser +=========== + +Overview +-------- + +mail-parser is a wrapper for +`email `__ Python +Standard Library. It's the key module of +`SpamScope `__. + +mail-parser can parse Outlook email format (.msg). To use this feature, +you need to install ``libemail-outlook-message-perl`` package. For +Debian based systems: + +:: + + $ apt-get install libemail-outlook-message-perl + +For more details: + +:: + + $ apt-cache show libemail-outlook-message-perl + +mail-parser supports Python 3. + +Description +----------- + +mail-parser takes as input a raw email and generates a parsed object. +This object is a tokenized email with some indicator: - body - headers - +subject - from - to - delivered\_to - attachments - message id - date - +charset mail - sender IP address - receiveds + +We have also two types of indicator: - anomalies: mail without message +id or date - +`defects `__: +mail with some not compliance RFC part + +Defects +~~~~~~~ + +These defects can be used to evade the antispam filter. An example are +the mails with a malformed boundary that can hide a not legitimate +epilogue (often malware). This library can take these epilogues. + +Apache 2 Open Source License +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +mail-parser can be downloaded, used, and modified free of charge. It is +available under the Apache 2 license. |Donate| + +Authors +------- + +Main Author +~~~~~~~~~~~ + +Fedele Mantuano (**Twitter**: +[@fedelemantuano](https://twitter.com/fedelemantuano)) + +Installation +------------ + +Clone repository + +:: + + git clone https://github.com/SpamScope/mail-parser.git + +and install mail-parser with ``setup.py``: + +:: + + $ cd mail-parser + + $ python setup.py install + +or use ``pip``: + +:: + + $ pip install mail-parser + +Building with Docker +~~~~~~~~~~~~~~~~~~~~ + +Complete working Docker workflow is possible allowing you to start +hacking and building without any other requirements or dependencies. All +the required libs and build tools are handled by Docker build process. +Using the provided Dockerfile you can build a complete working image +with all the required dependencies. If you're not familiar with Docker, +better use Docker Compose to both build and run your source easy and +effortlessly. + +From the ``docker-compose.yml`` directory, run: + +:: + + $ docker-compose up --build + +Skip the ``--build`` switch to launch the last built container image +without rebuilding again. + +The provided ``docker-compose.yml`` file is configured to: + +- Mount your host's ``tests/mails/`` dir from your source tree inside + the container at ``/data/`` (read-only). +- A command line test example. + +See the ``docker-compose.yml`` to view and tweak the launch parameters. + +Usage in a project +------------------ + +Import ``mailparser`` module: + +:: + + import mailparser + + mail = mailparser.parse_from_file(f) + mail = mailparser.parse_from_file_obj(fp) + mail = mailparser.parse_from_string(raw_mail) + mail = mailparser.parse_from_bytes(byte_mail) + +Then you can get all parts + +:: + + mail.body + mail.headers + mail.message_id + mail.to_ + mail.delivered_to + mail.from_ + mail.subject + mail.text_plain_list: only text plain mail parts in a list + mail.attachments_list: list of all attachments + mail.date_mail + mail.parsed_mail_obj: tokenized mail in a object + mail.parsed_mail_json: tokenized mail in a JSON + mail.defects: defect RFC not compliance + mail.defects_category: only defects categories + mail.has_defects + mail.anomalies + mail.has_anomalies + mail.get_server_ipaddress(trust="my_server_mail_trust") + mail.receiveds + +Usage from command-line +----------------------- + +If you installed mailparser with ``pip`` or ``setup.py`` you can use it +with command-line. + +These are all swithes: + +:: + + usage: mailparser.py [-h] (-f FILE | -s STRING | -k) [-j] [-b] [-a] [-r] [-t] [-dt] [-m] + [-u] [-c] [-d] [-n] [-i Trust mail server string] [-p] [-z] + [-v] + + Wrapper for email Python Standard Library + + optional arguments: + -h, --help show this help message and exit + -f FILE, --file FILE Raw email file (default: None) + -s STRING, --string STRING + Raw email string (default: None) + -k, --stdin Enable parsing from stdin (default: False) + -j, --json Show the JSON of parsed mail (default: False) + -b, --body Print the body of mail (default: False) + -a, --attachments Print the attachments of mail (default: False) + -r, --headers Print the headers of mail (default: False) + -t, --to Print the to of mail (default: False) + -dt, --delivered-to Print the delivered-to of mail (default: False) + -m, --from Print the from of mail (default: False) + -u, --subject Print the subject of mail (default: False) + -c, --receiveds Print all receiveds of mail (default: False) + -d, --defects Print the defects of mail (default: False) + -n, --anomalies Print the anomalies of mail (default: False) + -o, --outlook Analyze Outlook msg (default: False) + -i Trust mail server string, --senderip Trust mail server string + Extract a reliable sender IP address heuristically + (default: None) + -p, --mail-hash Print mail fingerprints without headers (default: + False) + -z, --attachments-hash + Print attachments with fingerprints (default: False) + -v, --version show program's version number and exit + + It takes as input a raw mail and generates a parsed object. + +Example: + +.. code:: shell + + $ mailparser -f example_mail -j + +This example will show you the tokenized mail in a JSON pretty format. + +.. |PyPI version| image:: https://badge.fury.io/py/mail-parser.svg + :target: https://badge.fury.io/py/mail-parser +.. |Build Status| image:: https://travis-ci.org/SpamScope/mail-parser.svg?branch=master + :target: https://travis-ci.org/SpamScope/mail-parser +.. |Coverage Status| image:: https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=master + :target: https://coveralls.io/github/SpamScope/mail-parser?branch=develop +.. |BCH compliance| image:: https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=master + :target: https://bettercodehub.com/ +.. |Donate| image:: https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif + :target: https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2 diff --git a/docker/Dockerfile b/docker/Dockerfile index 8bad59b..d612ab0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,7 @@ -FROM python:onbuild - -RUN python setup.py install - -ENTRYPOINT [ "mailparser" ] +FROM python +ENV MAIL_PARSER_PATH=/tmp/mailparser +ARG BRANCH=develop +RUN git clone -b $BRANCH --single-branch https://github.com/SpamScope/mail-parser.git $MAIL_PARSER_PATH && \ + cd $MAIL_PARSER_PATH && python setup.py install +ENTRYPOINT ["mailparser"] +CMD ["-h"] diff --git a/requirements.txt b/requirements.txt index 7a9fc7f..ded39e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ ipaddress==1.0.17 -pypandoc==1.4 simplejson==3.10.0 six==1.10.0 diff --git a/setup.py b/setup.py index 93468d5..774bf2f 100644 --- a/setup.py +++ b/setup.py @@ -21,12 +21,11 @@ import runpy from setuptools import setup -from mailparser.utils import markdown2rst current = os.path.realpath(os.path.dirname(__file__)) -readme_file = os.path.join(current, 'README.md') -long_description = markdown2rst(readme_file) +with open(os.path.join(current, 'README.rst')) as f: + long_description = f.read() with open(os.path.join(current, 'requirements.txt')) as f: requires = f.read().splitlines() diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index bc330be..8a04d27 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -39,8 +39,7 @@ sys.path.append(root) import mailparser -from mailparser.utils import ( - fingerprints, msgconvert, ported_open, markdown2rst) +from mailparser.utils import fingerprints, msgconvert, ported_open class TestMailParser(unittest.TestCase): @@ -365,12 +364,6 @@ def test_from_file_obj(self): result = mail.defects self.assertIsInstance(result, list) - def test_markdown2rst(self): - current = os.path.realpath(os.path.dirname(__file__)) - readme = os.path.join(current, "..", "README.md") - rst = markdown2rst(readme) - self.assertIsInstance(rst, six.text_type) - if __name__ == '__main__': unittest.main(verbosity=2) From 66274b69b86dcfb6b9326e542878a08402c7dc93 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 24 Nov 2017 17:48:45 +0100 Subject: [PATCH 16/22] New Dockerfile image --- README.md | 17 ----------------- docker/README.md | 28 ++++++++++++++++++++++++++++ docker/docker-compose.yml | 17 +++++++++-------- 3 files changed, 37 insertions(+), 25 deletions(-) create mode 100644 docker/README.md diff --git a/README.md b/README.md index 15a893b..7e40049 100644 --- a/README.md +++ b/README.md @@ -82,23 +82,6 @@ or use `pip`: $ pip install mail-parser ``` -### Building with Docker -Complete working Docker workflow is possible allowing you to start hacking and building without any other requirements or dependencies. All the required libs and build tools are handled by Docker build process. -Using the provided Dockerfile you can build a complete working image with all the required dependencies. If you're not familiar with Docker, better use Docker Compose to both build and run your source easy and effortlessly. - -From the ```docker-compose.yml``` directory, run: -``` -$ docker-compose up --build -``` -Skip the ```--build``` switch to launch the last built container image without rebuilding again. - -The provided ```docker-compose.yml``` file is configured to: - -* Mount your host's ```tests/mails/``` dir from your source tree inside the container at ```/data/``` (read-only). -* A command line test example. - -See the ```docker-compose.yml``` to view and tweak the launch parameters. - ## Usage in a project Import `mailparser` module: diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..70525c1 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,28 @@ +# fmantuano/spamscope-mail-parser + +This Dockerfile represents a Docker image that encapsulates mail-parser. The [official image](https://hub.docker.com/r/fmantuano/spamscope-mail-parser/) is on Docker Hub. + +To run this image after installing Docker, use a command like this: + +``` +sudo docker run -i -t --rm -v ~/mails:/mails fmantuano/spamscope-mail-parser +``` + +This command runs mail-parser help as default, but you can use all others options. + +To share the "mails" directory between your host and the container, create a "mails" directory on your host. Then run the tool like this. + +There also is an example of `docker-compose` + +From the `docker-compose.yml` directory, run: + +``` +$ sudo docker-compose up +``` + +The provided ```docker-compose.yml``` file is configured to: + + - Mount your host's `~/mails/` folder from your source tree inside the container at `/mails/` (read-only). + - A command line test example. + +See the ```docker-compose.yml``` to view and tweak the launch parameters. diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index ae392d3..a7f0548 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,9 +1,10 @@ -# Docker Compose build manifest. -# Usage: -# $ docker-compose up --build +version: '2.1' -mailparser: - build: . - command: --json -f /mails/mail_test_1 - volumes: - - ./tests/mails/:/mails/:ro +services: + + mailparser: + image: fmantuano/spamscope-mail-parser:develop + command: --json -f /mails/mail_test_1 + container_name: mailparser + volumes: + - ~/mails/:/mails/:ro From 4ea4ff4433b8f4bd004681cd457475dc053b73e4 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 25 Nov 2017 07:59:29 +0100 Subject: [PATCH 17/22] Added docker build in travis Update README --- .travis.yml | 27 +++++++++++++ README.md | 81 ++++++++++++++++++++++++--------------- README.rst | 106 ++++++++++++++++++++++++++-------------------------- 3 files changed, 130 insertions(+), 84 deletions(-) diff --git a/.travis.yml b/.travis.yml index c4e2570..0af46f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,23 @@ before_install: # Install msgconvert - sudo apt-get install -y libemail-outlook-message-perl pandoc + + # Build latest images spamscope-root, spamscope-elasticsearch + + # make images + - if [ "$TRAVIS_BRANCH" == "master" ]; then + cd docker && + docker build --build-arg BRANCH=$TRAVIS_BRANCH -t $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH . && + docker run -i -t --rm $DOCKER_USERNAME/spamscope-mail-parser && + cd -; + fi + + - if [ "$TRAVIS_BRANCH" == "develop" ]; then + cd docker && + docker build --build-arg BRANCH=$TRAVIS_BRANCH -t $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH . && + docker run -i -t --rm $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH && + cd -; + fi # command to install dependencies install: @@ -37,6 +54,16 @@ script: after_success: coveralls + - if [ "$TRAVIS_BRANCH" == "master" ]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; + docker push $DOCKER_USERNAME/spamscope-mail-parser; + fi + + - if [ "$TRAVIS_BRANCH" == "develop" ]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; + docker push $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH; + fi + notifications: email: false slack: diff --git a/README.md b/README.md index 7e40049..f8bac01 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,14 @@ [![PyPI version](https://badge.fury.io/py/mail-parser.svg)](https://badge.fury.io/py/mail-parser) -[![Build Status](https://travis-ci.org/SpamScope/mail-parser.svg?branch=master)](https://travis-ci.org/SpamScope/mail-parser) -[![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=master)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop) -[![BCH compliance](https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=master)](https://bettercodehub.com/) +[![Build Status](https://travis-ci.org/SpamScope/mail-parser.svg?branch=develop)](https://travis-ci.org/SpamScope/mail-parser) +[![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop) +[![BCH compliance](https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=develop)](https://bettercodehub.com/) # mail-parser ## Overview -mail-parser is a wrapper for [email](https://docs.python.org/2/library/email.message.html) Python Standard Library. +mail-parser is not only a wrapper for [email](https://docs.python.org/2/library/email.message.html) Python Standard Library. +It give you an easy way to pass from raw mail to Python object that you can use in your code. It's the key module of [SpamScope](https://github.com/SpamScope/spamscope). mail-parser can parse Outlook email format (.msg). To use this feature, you need to install `libemail-outlook-message-perl` package. For Debian based systems: @@ -24,26 +25,46 @@ $ apt-cache show libemail-outlook-message-perl mail-parser supports Python 3. + ## Description -mail-parser takes as input a raw email and generates a parsed object. This object is a tokenized email with some indicator: - - body - - headers +mail-parser takes as input a raw email and generates a parsed object. The properties of this object have the same name of +[RFC headers](https://www.iana.org/assignments/message-headers/message-headers.xhtml): + + - bcc + - cc + - date + - delivered_to + - from\_ (not `from` because is a keyword of Python) + - message_id + - received + - reply_to - subject - - from - to - - delivered_to + +There are other properties to get: + - body + - headers - attachments - - message id - - date - - charset mail - sender IP address - - receiveds -We have also two types of indicator: - - anomalies: mail without message id or date +mail-parser can detect defect in mail: - [defects](https://docs.python.org/2/library/email.message.html#email.message.Message.defects): mail with some not compliance RFC part +All properties have a JSON and raw property that you can get with: + - name_json + - name_raw + +Example: + +``` +$ mail.to (Python object) +$ mail.to_json (JSON) +$ mail.to_raw (raw header) +``` + +The command line tool use the JSON format. + ### Defects These defects can be used to evade the antispam filter. An example are the mails with a malformed boundary that can hide a not legitimate epilogue (often malware). This library can take these epilogues. @@ -98,25 +119,25 @@ mail = mailparser.parse_from_bytes(byte_mail) Then you can get all parts ``` +mail.attachments: list of all attachments mail.body -mail.headers -mail.message_id -mail.to_ +mail.date: datetime object in UTC +mail.defects: defect RFC not compliance +mail.defects_categories: only defects categories mail.delivered_to mail.from_ -mail.subject -mail.text_plain_list: only text plain mail parts in a list -mail.attachments_list: list of all attachments -mail.date_mail -mail.parsed_mail_obj: tokenized mail in a object -mail.parsed_mail_json: tokenized mail in a JSON -mail.defects: defect RFC not compliance -mail.defects_category: only defects categories -mail.has_defects -mail.anomalies -mail.has_anomalies mail.get_server_ipaddress(trust="my_server_mail_trust") -mail.receiveds +mail.has_defects +mail.headers +mail.headers +mail.mail: tokenized mail in a object +mail.message: email.message.Message object +mail.message_as_string: message as string +mail.message_id +mail.received +mail.subject +mail.text_plain: only text plain mail parts in a list +mail.to ``` ## Usage from command-line diff --git a/README.rst b/README.rst index 1d1c7a2..86f8091 100644 --- a/README.rst +++ b/README.rst @@ -6,9 +6,10 @@ mail-parser Overview -------- -mail-parser is a wrapper for +mail-parser is not only a wrapper for `email `__ Python -Standard Library. It's the key module of +Standard Library. It give you an easy way to pass from raw mail to +Python object that you can use in your code. It's the key module of `SpamScope `__. mail-parser can parse Outlook email format (.msg). To use this feature, @@ -31,15 +32,40 @@ Description ----------- mail-parser takes as input a raw email and generates a parsed object. -This object is a tokenized email with some indicator: - body - headers - -subject - from - to - delivered\_to - attachments - message id - date - -charset mail - sender IP address - receiveds - -We have also two types of indicator: - anomalies: mail without message -id or date - +The properties of this object have the same name of `RFC +headers `__: + +- bcc +- cc +- date +- delivered\_to +- from\_ (not ``from`` because is a keyword of Python) +- message\_id +- received +- reply\_to +- subject +- to + +There are other properties to get: - body - headers - attachments - +sender IP address + +mail-parser can detect defect in mail: - `defects `__: mail with some not compliance RFC part +All properties have a JSON and raw property that you can get with: - +name\_json - name\_raw + +Example: + +:: + + $ mail.to (Python object) + $ mail.to_json (JSON) + $ mail.to_raw (raw header) + +The command line tool use the JSON format. + Defects ~~~~~~~ @@ -85,34 +111,6 @@ or use ``pip``: $ pip install mail-parser -Building with Docker -~~~~~~~~~~~~~~~~~~~~ - -Complete working Docker workflow is possible allowing you to start -hacking and building without any other requirements or dependencies. All -the required libs and build tools are handled by Docker build process. -Using the provided Dockerfile you can build a complete working image -with all the required dependencies. If you're not familiar with Docker, -better use Docker Compose to both build and run your source easy and -effortlessly. - -From the ``docker-compose.yml`` directory, run: - -:: - - $ docker-compose up --build - -Skip the ``--build`` switch to launch the last built container image -without rebuilding again. - -The provided ``docker-compose.yml`` file is configured to: - -- Mount your host's ``tests/mails/`` dir from your source tree inside - the container at ``/data/`` (read-only). -- A command line test example. - -See the ``docker-compose.yml`` to view and tweak the launch parameters. - Usage in a project ------------------ @@ -131,25 +129,25 @@ Then you can get all parts :: + mail.attachments: list of all attachments mail.body - mail.headers - mail.message_id - mail.to_ + mail.date: datetime object in UTC + mail.defects: defect RFC not compliance + mail.defects_categories: only defects categories mail.delivered_to mail.from_ - mail.subject - mail.text_plain_list: only text plain mail parts in a list - mail.attachments_list: list of all attachments - mail.date_mail - mail.parsed_mail_obj: tokenized mail in a object - mail.parsed_mail_json: tokenized mail in a JSON - mail.defects: defect RFC not compliance - mail.defects_category: only defects categories - mail.has_defects - mail.anomalies - mail.has_anomalies mail.get_server_ipaddress(trust="my_server_mail_trust") - mail.receiveds + mail.has_defects + mail.headers + mail.headers + mail.mail: tokenized mail in a object + mail.message: email.message.Message object + mail.message_as_string: message as string + mail.message_id + mail.received + mail.subject + mail.text_plain: only text plain mail parts in a list + mail.to Usage from command-line ----------------------- @@ -206,11 +204,11 @@ This example will show you the tokenized mail in a JSON pretty format. .. |PyPI version| image:: https://badge.fury.io/py/mail-parser.svg :target: https://badge.fury.io/py/mail-parser -.. |Build Status| image:: https://travis-ci.org/SpamScope/mail-parser.svg?branch=master +.. |Build Status| image:: https://travis-ci.org/SpamScope/mail-parser.svg?branch=develop :target: https://travis-ci.org/SpamScope/mail-parser -.. |Coverage Status| image:: https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=master +.. |Coverage Status| image:: https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop :target: https://coveralls.io/github/SpamScope/mail-parser?branch=develop -.. |BCH compliance| image:: https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=master +.. |BCH compliance| image:: https://bettercodehub.com/edge/badge/SpamScope/mail-parser?branch=develop :target: https://bettercodehub.com/ .. |Donate| image:: https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif :target: https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=VEPXYP745KJF2 From fc6bc2ff83331e50e4bffb905792b03d117e753b Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 25 Nov 2017 08:10:05 +0100 Subject: [PATCH 18/22] Fixed travis yml to make only one image --- .travis.yml | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0af46f2..ac88025 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,18 +18,21 @@ before_install: # Build latest images spamscope-root, spamscope-elasticsearch # make images - - if [ "$TRAVIS_BRANCH" == "master" ]; then - cd docker && - docker build --build-arg BRANCH=$TRAVIS_BRANCH -t $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH . && - docker run -i -t --rm $DOCKER_USERNAME/spamscope-mail-parser && - cd -; - fi - - - if [ "$TRAVIS_BRANCH" == "develop" ]; then - cd docker && - docker build --build-arg BRANCH=$TRAVIS_BRANCH -t $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH . && - docker run -i -t --rm $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH && - cd -; + - if [ "$TRAVIS_PYTHON_VERSION" == "3.6" ]; then + + if [ "$TRAVIS_BRANCH" == "master" ]; then + cd docker && + docker build --build-arg BRANCH=$TRAVIS_BRANCH -t $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH . && + docker run -i -t --rm $DOCKER_USERNAME/spamscope-mail-parser && + cd -; + fi + + if [ "$TRAVIS_BRANCH" == "develop" ]; then + cd docker && + docker build --build-arg BRANCH=$TRAVIS_BRANCH -t $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH . && + docker run -i -t --rm $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH && + cd -; + fi fi # command to install dependencies @@ -52,16 +55,19 @@ script: - python -m mailparser -f tests/mails/mail_test_6 -j after_success: - coveralls + - coveralls - - if [ "$TRAVIS_BRANCH" == "master" ]; then - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; - docker push $DOCKER_USERNAME/spamscope-mail-parser; - fi + - if [ "$TRAVIS_PYTHON_VERSION" == "3.6" ]; then + + if [ "$TRAVIS_BRANCH" == "master" ]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; + docker push $DOCKER_USERNAME/spamscope-mail-parser; + fi - - if [ "$TRAVIS_BRANCH" == "develop" ]; then - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; - docker push $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH; + if [ "$TRAVIS_BRANCH" == "develop" ]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; + docker push $DOCKER_USERNAME/spamscope-mail-parser:$TRAVIS_BRANCH; + fi fi notifications: From d4c5333e48e3280e930585e6d8d5813ae293039a Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 25 Nov 2017 08:23:55 +0100 Subject: [PATCH 19/22] Added requirement to Dockerfile --- .travis.yml | 4 ++-- docker/Dockerfile | 8 ++++++-- docker/README.md | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index ac88025..10d5af2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ before_install: # Build latest images spamscope-root, spamscope-elasticsearch # make images - - if [ "$TRAVIS_PYTHON_VERSION" == "3.6" ]; then + - if [ "$TRAVIS_PYTHON_VERSION" == "2.7" ]; then if [ "$TRAVIS_BRANCH" == "master" ]; then cd docker && @@ -57,7 +57,7 @@ script: after_success: - coveralls - - if [ "$TRAVIS_PYTHON_VERSION" == "3.6" ]; then + - if [ "$TRAVIS_PYTHON_VERSION" == "2.7" ]; then if [ "$TRAVIS_BRANCH" == "master" ]; then docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"; diff --git a/docker/Dockerfile b/docker/Dockerfile index d612ab0..c21a3c2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,11 @@ FROM python ENV MAIL_PARSER_PATH=/tmp/mailparser ARG BRANCH=develop -RUN git clone -b $BRANCH --single-branch https://github.com/SpamScope/mail-parser.git $MAIL_PARSER_PATH && \ - cd $MAIL_PARSER_PATH && python setup.py install +RUN apt-get -yqq update \ + && apt-get -yqq --no-install-recommends install libemail-outlook-message-perl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && git clone -b $BRANCH --single-branch https://github.com/SpamScope/mail-parser.git $MAIL_PARSER_PATH \ + && cd $MAIL_PARSER_PATH && python setup.py install ENTRYPOINT ["mailparser"] CMD ["-h"] diff --git a/docker/README.md b/docker/README.md index 70525c1..d30697e 100644 --- a/docker/README.md +++ b/docker/README.md @@ -10,7 +10,7 @@ sudo docker run -i -t --rm -v ~/mails:/mails fmantuano/spamscope-mail-parser This command runs mail-parser help as default, but you can use all others options. -To share the "mails" directory between your host and the container, create a "mails" directory on your host. Then run the tool like this. +To share the "mails" directory between your host and the container, create a "mails" directory on your host. There also is an example of `docker-compose` From d6e33175fa706dfd840d730e419dc52861c6b745 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sat, 2 Dec 2017 19:42:11 +0100 Subject: [PATCH 20/22] Parsing in __init__ so the object has all data --- mailparser/mailparser.py | 22 ++++++++++++++-------- tests/test_mail_parser.py | 4 ++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index fe8b58a..db496c2 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -52,7 +52,7 @@ def parse_from_file_obj(fp): Returns: Instance of MailParser with raw email parsed """ - return MailParser.from_file_obj(fp).parse() + return MailParser.from_file_obj(fp) def parse_from_file(fp): @@ -65,7 +65,7 @@ def parse_from_file(fp): Returns: Instance of MailParser with raw email parsed """ - return MailParser.from_file(fp).parse() + return MailParser.from_file(fp) def parse_from_file_msg(fp): @@ -78,7 +78,7 @@ def parse_from_file_msg(fp): Returns: Instance of MailParser with raw email parsed """ - return MailParser.from_file_msg(fp).parse() + return MailParser.from_file_msg(fp) def parse_from_string(s): @@ -91,7 +91,7 @@ def parse_from_string(s): Returns: Instance of MailParser with raw email parsed """ - return MailParser.from_string(s).parse() + return MailParser.from_string(s) def parse_from_bytes(bt): @@ -104,7 +104,7 @@ def parse_from_bytes(bt): Returns: Instance of MailParser with raw email parsed """ - return MailParser.from_bytes(bt).parse() + return MailParser.from_bytes(bt) class MailParser(object): @@ -122,6 +122,13 @@ def __init__(self, message=None): Init a new object from a message object structure. """ self._message = message + self.parse() + + def __str__(self): + if self.message: + return self.subject + else: + return six.text_type() @classmethod def from_file_obj(cls, fp): @@ -267,9 +274,8 @@ def parse(self): Instance of MailParser with raw email parsed """ - # check if a valid mail - if not self.message.keys(): - raise ValueError("This email doesn't have headers") + if not self.message: + return self # reset and start parsing self._reset() diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 8a04d27..d200a7f 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -91,8 +91,8 @@ def test_type_error(self): self.assertIsInstance(i["filename"], six.text_type) def test_valid_mail(self): - with self.assertRaises(ValueError): - mailparser.parse_from_string("fake mail") + m = mailparser.parse_from_string("fake mail") + self.assertFalse(m.message) def test_receiveds(self): mail = mailparser.parse_from_file(mail_test_1) From d1f0ab256bb19e3621a09e73df71430266050244 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sun, 3 Dec 2017 12:45:29 +0100 Subject: [PATCH 21/22] Update release --- mailparser/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mailparser/version.py b/mailparser/version.py index 5987648..0bc022c 100644 --- a/mailparser/version.py +++ b/mailparser/version.py @@ -17,7 +17,7 @@ limitations under the License. """ -__version__ = "2.1.0" +__version__ = "3.0.0" if __name__ == "__main__": print(__version__) From 2c3eb7ba67461b61992912e0848dc5644d12a5f9 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sun, 3 Dec 2017 12:50:46 +0100 Subject: [PATCH 22/22] Update social info --- README.md | 2 +- README.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f8bac01..da14d49 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ mail-parser can be downloaded, used, and modified free of charge. It is availabl ## Authors ### Main Author -Fedele Mantuano (**Twitter**: [@fedelemantuano](https://twitter.com/fedelemantuano)) +**Fedele Mantuano**: [LinkedIn](https://www.linkedin.com/in/fmantuano/) ## Installation diff --git a/README.rst b/README.rst index 86f8091..2949ade 100644 --- a/README.rst +++ b/README.rst @@ -85,8 +85,8 @@ Authors Main Author ~~~~~~~~~~~ -Fedele Mantuano (**Twitter**: -[@fedelemantuano](https://twitter.com/fedelemantuano)) +**Fedele Mantuano**: +`LinkedIn `__ Installation ------------