From 81db8da3a96fd3b26a2d97658e1f17aaebbf20b5 Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 25 Oct 2023 22:52:53 +0100 Subject: [PATCH] remove other problematic headers --- cli/src/commands/parse/msgs.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/cli/src/commands/parse/msgs.rs b/cli/src/commands/parse/msgs.rs index 8ebdf961..b8e02192 100644 --- a/cli/src/commands/parse/msgs.rs +++ b/cli/src/commands/parse/msgs.rs @@ -31,7 +31,9 @@ const MSG_NAME_USER_PROPERTY_NAME: &str = "MSG NAME ID"; const STREAM_PATH_ATTACHMENT_STORE_PREFIX: &str = "__attach_version1.0_#"; static CONTENT_TYPE_MIME_HEADER_RX: Lazy = - Lazy::new(|| Regex::new(r"Content-Type:((\s)+.+\n)*").unwrap()); + Lazy::new(|| Regex::new(r"Content-Type:((\s)+.+\n)+").unwrap()); +static CONTENT_TRANSFER_ENCODING_MIME_HEADER_RX: Lazy = + Lazy::new(|| Regex::new(r"Content-Transfer-Encoding:((\s)+.+\n)+").unwrap()); static STREAM_PATH_MESSAGE_BODY_PLAIN: Lazy = Lazy::new(|| PathBuf::from("__substg1.0_1000001F")); static STREAM_PATH_MESSAGE_HEADER: Lazy = @@ -178,11 +180,20 @@ fn read_attachment( }) } -fn remove_content_type_header(headers_string: String) -> Result { - Ok(CONTENT_TYPE_MIME_HEADER_RX +fn remove_content_headers(headers_string: String) -> Result { + let mut clean_headers_string: String; + + clean_headers_string = CONTENT_TYPE_MIME_HEADER_RX .clone() .replace(&headers_string, "") - .to_string()) + .to_string(); + + clean_headers_string = CONTENT_TRANSFER_ENCODING_MIME_HEADER_RX + .clone() + .replace(&clean_headers_string, "") + .to_string(); + + Ok(clean_headers_string) } fn read_msg_to_document(path: &PathBuf) -> Result { @@ -197,7 +208,7 @@ fn read_msg_to_document(path: &PathBuf) -> Result { read_unicode_stream_to_string(STREAM_PATH_MESSAGE_HEADER.clone(), &mut compound_file)?; // As the content type won't match the parsed value from the body in the msg - let headers_string_no_content_type = remove_content_type_header(headers_string)?; + let headers_string_no_content_headers = remove_content_headers(headers_string)?; let plain_body_string = read_unicode_stream_to_string(STREAM_PATH_MESSAGE_BODY_PLAIN.clone(), &mut compound_file)?; @@ -230,7 +241,7 @@ fn read_msg_to_document(path: &PathBuf) -> Result { Ok(Document { raw_email: RawEmail { body: RawEmailBody::Plain(plain_body_string), - headers: RawEmailHeaders::Raw(headers_string_no_content_type), + headers: RawEmailHeaders::Raw(headers_string_no_content_headers), attachments, }, user_properties,