From e26f8f57210fc9a923aa207f8ce4f9514d64c6e2 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Wed, 3 Oct 2018 23:39:30 +0200 Subject: [PATCH 1/5] Added getting timezone --- mailparser/const.py | 4 +++- mailparser/mailparser.py | 15 ++++++++++++++- mailparser/utils.py | 9 +++++++-- tests/test_mail_parser.py | 16 +++++++++++++++- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/mailparser/const.py b/mailparser/const.py index 0b98477..b37d739 100644 --- a/mailparser/const.py +++ b/mailparser/const.py @@ -84,4 +84,6 @@ "body", "date", "received", - "to_domains"]) + "timezone", + "to_domains", +]) diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index 68d3fb6..2cddc40 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -547,10 +547,23 @@ def date(self): conv = None try: - conv = convert_mail_date(date) + conv, _ = convert_mail_date(date) finally: return conv + @property + def timezone(self): + """ + Return timezone. Offset from UTC. + """ + date = self.message.get('date') + timezone = 0 + + try: + _, timezone = convert_mail_date(date) + finally: + return timezone + @property def date_json(self): """ diff --git a/mailparser/utils.py b/mailparser/utils.py index b944a21..e4980d3 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -282,9 +282,14 @@ def receiveds_parsing(receiveds): def convert_mail_date(date): + log.debug("Date to parse: {!r}".format(date)) d = email.utils.parsedate_tz(date) + log.debug("Date parsed: {!r}".format(d)) t = email.utils.mktime_tz(d) - return datetime.datetime.utcfromtimestamp(t) + log.debug("Date parsed in timestamp: {!r}".format(t)) + date_utc = datetime.datetime.utcfromtimestamp(t) + timezone = d[9] / 3600 if d[9] else 0 + return date_utc, timezone def receiveds_not_parsed(receiveds): @@ -342,7 +347,7 @@ def receiveds_format(receiveds): # "for ; Tue, 7 Mar 2017 14:29:24 -0800", i["date"] = i["date"].split(";")[-1] try: - j["date_utc"] = convert_mail_date(i["date"]) + j["date_utc"], _ = convert_mail_date(i["date"]) except TypeError: j["date_utc"] = None diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index ad6ef5c..9a52a5f 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -29,12 +29,14 @@ import mailparser from mailparser.utils import ( + convert_mail_date, fingerprints, get_header, get_to_domains, msgconvert, ported_open, - receiveds_parsing) + receiveds_parsing, +) from mailparser.exceptions import MailParserEnvironmentError @@ -463,6 +465,9 @@ def test_from_file_obj(self): result = mail.defects self.assertIsInstance(result, list) + result = mail.timezone + self.assertEquals(result, 1) + def test_get_to_domains(self): m = mailparser.parse_from_file(mail_test_6) @@ -477,6 +482,15 @@ def test_get_to_domains(self): self.assertIsInstance(m.to_domains_json, six.text_type) + def test_convert_mail_date(self): + s = "Mon, 20 Mar 2017 05:12:54 +0600" + d, t = convert_mail_date(s) + self.assertEquals(t, 6) + self.assertEquals(str(d), "2017-03-19 23:12:54") + s = "Mon, 20 Mar 2017 05:12:54 -0600" + d, t = convert_mail_date(s) + self.assertEquals(t, -6) + if __name__ == '__main__': unittest.main(verbosity=2) From 2613c192cb6dabcec836bf92a3cdbe9dd240e58e Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 4 Oct 2018 22:19:46 +0200 Subject: [PATCH 2/5] Fixed format timezone --- .coveragerc | 13 +++++++++++++ README.md | 2 ++ README.rst | 3 ++- mailparser/utils.py | 5 +++-- tests/test_mail_parser.py | 18 ++++++++++++++++-- 5 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..9323f5e --- /dev/null +++ b/.coveragerc @@ -0,0 +1,13 @@ +[run] +source = src/modules/ + +[report] +omit = mailparser/version.py + +exclude_lines = + pragma: no cover + except OSError + def __repr__ + raise AssertionError + raise NotImplementedError + if __name__ == .__main__.: diff --git a/README.md b/README.md index 4b40441..3113960 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ There are other properties to get: - attachments - sender IP address - to domains + - timezone To get custom headers you should replace "-" with "\_". Example for header `X-MSMail-Priority`: @@ -172,6 +173,7 @@ mail.text_plain: only text plain mail parts in a list mail.text_html: only text html mail parts in a list mail.to mail.to_domains +mail.timezone: returns the timezone, offset from UTC ``` ## Usage from command-line diff --git a/README.rst b/README.rst index 8ae8d34..b8b7db4 100644 --- a/README.rst +++ b/README.rst @@ -61,7 +61,7 @@ headers - to There are other properties to get: - body - body html - body plain - -headers - attachments - sender IP address - to domains +headers - attachments - sender IP address - to domains - timezone To get custom headers you should replace “-” with “\_”. Example for header ``X-MSMail-Priority``: @@ -178,6 +178,7 @@ Then you can get all parts mail.text_html: only text html mail parts in a list mail.to mail.to_domains + mail.timezone: returns the timezone, offset from UTC Usage from command-line ----------------------- diff --git a/mailparser/utils.py b/mailparser/utils.py index e4980d3..348f8a4 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -289,6 +289,7 @@ def convert_mail_date(date): log.debug("Date parsed in timestamp: {!r}".format(t)) date_utc = datetime.datetime.utcfromtimestamp(t) timezone = d[9] / 3600 if d[9] else 0 + timezone = "{:+.0f}".format(timezone) return date_utc, timezone @@ -430,7 +431,7 @@ def safe_print(data): print(data.encode('utf-8')) -def print_mail_fingerprints(data): +def print_mail_fingerprints(data): # pragma: no cover md5, sha1, sha256, sha512 = fingerprints(data) print("md5:\t{}".format(md5)) print("sha1:\t{}".format(sha1)) @@ -438,7 +439,7 @@ def print_mail_fingerprints(data): print("sha512:\t{}".format(sha512)) -def print_attachments(attachments, flag_hash): +def print_attachments(attachments, flag_hash): # pragma: no cover if flag_hash: for i in attachments: if i.get("content_transfer_encoding") == "base64": diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 9a52a5f..34873e7 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -35,6 +35,7 @@ get_to_domains, msgconvert, ported_open, + ported_string, receiveds_parsing, ) @@ -485,11 +486,24 @@ def test_get_to_domains(self): def test_convert_mail_date(self): s = "Mon, 20 Mar 2017 05:12:54 +0600" d, t = convert_mail_date(s) - self.assertEquals(t, 6) + self.assertEquals(t, "+6") self.assertEquals(str(d), "2017-03-19 23:12:54") s = "Mon, 20 Mar 2017 05:12:54 -0600" d, t = convert_mail_date(s) - self.assertEquals(t, -6) + self.assertEquals(t, "-6") + + def test_ported_string(self): + raw_data = "" + s = ported_string(raw_data) + self.assertEquals(s, six.text_type()) + + raw_data = "test " + s = ported_string(raw_data) + self.assertEquals(s, "test") + + raw_data = u"test " + s = ported_string(raw_data) + self.assertEquals(s, "test") if __name__ == '__main__': From 233bbd2c1a906a45488a335cc1a52577f69b706d Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 4 Oct 2018 22:28:05 +0200 Subject: [PATCH 3/5] Fixed test --- tests/test_mail_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 34873e7..55827ac 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -467,7 +467,7 @@ def test_from_file_obj(self): self.assertIsInstance(result, list) result = mail.timezone - self.assertEquals(result, 1) + self.assertEquals(result, "+1") def test_get_to_domains(self): m = mailparser.parse_from_file(mail_test_6) From e239fdc841587f882366b4b8313d0b495bbceff8 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 4 Oct 2018 22:37:35 +0200 Subject: [PATCH 4/5] Removed function from test --- mailparser/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mailparser/utils.py b/mailparser/utils.py index 348f8a4..01acbd1 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -424,7 +424,7 @@ def get_mail_keys(message): return all_parts -def safe_print(data): +def safe_print(data): # pragma: no cover try: print(data) except UnicodeEncodeError: From d37f9b88a834d6794e78fe3dd9bf76eb9ea3599f Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Thu, 4 Oct 2018 22:39:16 +0200 Subject: [PATCH 5/5] Update version --- mailparser/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mailparser/version.py b/mailparser/version.py index 1df5ad2..c972064 100644 --- a/mailparser/version.py +++ b/mailparser/version.py @@ -17,7 +17,7 @@ limitations under the License. """ -__version__ = "3.5.1" # pragma: no cover +__version__ = "3.6.0" -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": print(__version__)