From 7096fd9b2a0cf51f2ee50c57bb243a0a32eb0fef Mon Sep 17 00:00:00 2001 From: Destiny Peterson Date: Thu, 16 Jun 2022 01:04:03 -0700 Subject: [PATCH] Fix issue deanmalmgren#342 Clarification, _getStringStream *should* return `unicode` in Python 2, `str` in Python 3, IF the stream requested exists. If it does not exist, it returns `None`, which cannot be added to bytes. This commit adds a check for None, returning an empty bytes string if matched. --- textract/parsers/msg_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/textract/parsers/msg_parser.py b/textract/parsers/msg_parser.py index a1b7e17a..e521b481 100644 --- a/textract/parsers/msg_parser.py +++ b/textract/parsers/msg_parser.py @@ -8,13 +8,15 @@ def ensure_bytes(string): """Normalize string to bytes. - `ExtractMsg.Message._getStringStream` can return unicode or bytes depending + `extract_msg.Message._getStringStream` can return unicode or bytes depending on what is originally stored in message file. This helper functon makes sure, that bytes type is returned. """ if isinstance(string, six.string_types): return string.encode('utf-8') + if string is None: + return b'' return string