Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Convert serialized posts to eml #5152

Merged
merged 4 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions src/internal/converters/eml/eml.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/str"
"github.com/alcionai/corso/src/internal/converters/ics"
"github.com/alcionai/corso/src/internal/m365/collection/groups/metadata"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/services/m365/api"
)
Expand Down Expand Up @@ -303,3 +304,137 @@ func FromJSON(ctx context.Context, body []byte) (string, error) {

return email.GetMessage(), nil
}

//-------------------------------------------------------------
// Postable -> EML
//-------------------------------------------------------------

// FromJSONPostToEML converts a postable (as json) to .eml format.
// TODO(pandeyabs): This is a stripped down copy of messageable to
// eml conversion, it can be folded into one function by having a post
// to messageable converter.
func FromJSONPostToEML(
ctx context.Context,
body []byte,
postMetadata metadata.ConversationPostMetadata,
) (string, error) {
ctx = clues.Add(ctx, "body_len", len(body))

data, err := api.BytesToPostable(body)
if err != nil {
return "", clues.WrapWC(ctx, err, "converting to postable")
}

ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId()))

email := mail.NewMSG()
email.Encoding = mail.EncodingBase64 // Doing it to be safe for when we have eventMessage (newline issues)
email.AllowDuplicateAddress = true // More "correct" conversion
email.AddBccToHeader = true // Don't ignore Bcc
email.AllowEmptyAttachments = true // Don't error on empty attachments
email.UseProvidedAddress = true // Don't try to parse the email address

if data.GetFrom() != nil {
email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress()))
}

// We don't have the To, Cc, Bcc recipient information for posts due to a graph
// limitation. All posts carry the group email address as the only recipient
// for now.
email.AddTo(postMetadata.Recipients...)
email.SetSubject(postMetadata.Topic)

// Reply-To email address is not available for posts. Note that this is different
// from inReplyTo field.

if data.GetCreatedDateTime() != nil {
email.SetDate(ptr.Val(data.GetCreatedDateTime()).Format(dateFormat))
}

if data.GetBody() != nil {
if data.GetBody().GetContentType() != nil {
var contentType mail.ContentType

switch data.GetBody().GetContentType().String() {
case "html":
contentType = mail.TextHTML
case "text":
contentType = mail.TextPlain
default:
// https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties
// This should not be possible according to the documentation
logger.Ctx(ctx).
With("body_type", data.GetBody().GetContentType().String()).
Info("unknown body content type")

contentType = mail.TextPlain
}

email.SetBody(contentType, ptr.Val(data.GetBody().GetContent()))
}
}

if data.GetAttachments() != nil {
for _, attachment := range data.GetAttachments() {
kind := ptr.Val(attachment.GetContentType())

bytes, err := attachment.GetBackingStore().Get("contentBytes")
if err != nil {
return "", clues.WrapWC(ctx, err, "failed to get attachment bytes").
With("kind", kind)
}

if bytes == nil {
// TODO(meain): Handle non file attachments
// https://github.com/alcionai/corso/issues/4772
//
// TODO(pandeyabs): Above issue is for messages.
// This is not a problem for posts but leaving it here for safety.
logger.Ctx(ctx).
With("attachment_id", ptr.Val(attachment.GetId()),
"attachment_type", ptr.Val(attachment.GetOdataType())).
Info("no contentBytes for attachment")

continue
}

bts, ok := bytes.([]byte)
if !ok {
return "", clues.WrapWC(ctx, err, "invalid content bytes").
With("kind", kind).
With("interface_type", fmt.Sprintf("%T", bytes))
}

name := ptr.Val(attachment.GetName())

contentID, err := attachment.GetBackingStore().Get("contentId")
if err != nil {
return "", clues.WrapWC(ctx, err, "getting content id for attachment").
With("kind", kind)
}

if contentID != nil {
cids, _ := str.AnyToString(contentID)
if len(cids) > 0 {
name = cids
}
}

email.Attach(&mail.File{
// cannot use filename as inline attachment will not get mapped properly
Name: name,
MimeType: kind,
Data: bts,
Inline: ptr.Val(attachment.GetIsInline()),
})
}
}

// Note: Posts cannot be of type EventMessageResponse, EventMessage or
// CalendarSharingMessage. So we don't need to handle those cases here.
if err = email.GetError(); err != nil {
return "", clues.WrapWC(ctx, err, "converting to eml")
}

return email.GetMessage(), nil
}
73 changes: 73 additions & 0 deletions src/internal/converters/eml/eml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/converters/eml/testdata"
"github.com/alcionai/corso/src/internal/converters/ics"
"github.com/alcionai/corso/src/internal/m365/collection/groups/metadata"
stub "github.com/alcionai/corso/src/internal/m365/service/groups/mock"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/services/m365/api"
)
Expand Down Expand Up @@ -325,3 +327,74 @@ func (suite *EMLUnitSuite) TestConvert_eml_ics_from_event_obj() {
assert.NotEqual(t, ptr.Val(msg.GetSubject()), event.GetProperty(ical.ComponentPropertySummary).Value)
assert.Equal(t, ptr.Val(evt.GetSubject()), event.GetProperty(ical.ComponentPropertySummary).Value)
}

//-------------------------------------------------------------
// Postable -> EML tests
//-------------------------------------------------------------

func (suite *EMLUnitSuite) TestConvert_postable_to_eml() {
t := suite.T()

ctx, flush := tester.NewContext(t)
defer flush()

body := []byte(stub.PostWithAttachments)

postMetadata := metadata.ConversationPostMetadata{
Recipients: []string{"[email protected]"},
Topic: "test subject",
}

out, err := FromJSONPostToEML(ctx, body, postMetadata)
assert.NoError(t, err, "converting to eml")

post, err := api.BytesToPostable(body)
require.NoError(t, err, "creating post")

eml, err := enmime.ReadEnvelope(strings.NewReader(out))
require.NoError(t, err, "reading created eml")

assert.Equal(t, postMetadata.Topic, eml.GetHeader("Subject"))
assert.Equal(t, post.GetCreatedDateTime().Format(time.RFC1123Z), eml.GetHeader("Date"))

assert.Equal(t, formatAddress(post.GetFrom().GetEmailAddress()), eml.GetHeader("From"))

// Test recipients. The post metadata should contain the group email address.

tos := strings.Split(eml.GetHeader("To"), ", ")
for _, sourceTo := range postMetadata.Recipients {
assert.Contains(t, tos, sourceTo)
}

// Assert cc, bcc to be empty since they are not supported for posts right now.
assert.Equal(t, "", eml.GetHeader("Cc"))
assert.Equal(t, "", eml.GetHeader("Bcc"))

// Test attachments using PostWithAttachments data as a reference.
// This data has 1 direct attachment and 1 inline attachment.
assert.Equal(t, 1, len(eml.Attachments), "direct attachment count")
assert.Equal(t, 1, len(eml.Inlines), "inline attachment count")

for _, sourceAttachment := range post.GetAttachments() {
targetContent := eml.Attachments[0].Content
if ptr.Val(sourceAttachment.GetIsInline()) {
targetContent = eml.Inlines[0].Content
}

sourceContent, err := sourceAttachment.GetBackingStore().Get("contentBytes")
assert.NoError(t, err, "getting source attachment content")

assert.Equal(t, sourceContent, targetContent)
}

// Test body
source := strings.ReplaceAll(eml.HTML, "\n", "")
target := strings.ReplaceAll(ptr.Val(post.GetBody().GetContent()), "\n", "")

// replace the cid with a constant value to make the comparison
re := regexp.MustCompile(`(?:src|originalSrc)="cid:[^"]*"`)
source = re.ReplaceAllString(source, `src="cid:replaced"`)
target = re.ReplaceAllString(target, `src="cid:replaced"`)

assert.Equal(t, source, target)
}
85 changes: 85 additions & 0 deletions src/internal/m365/service/groups/mock/post-with-attachments.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"@odata.context": "https://graph.microsoft.com/v1.0/$metadata#groups('1623c35a-b67a-473a-9b21-5e4891f22e70')/conversations('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQAQAHUiDz4vCHZNqyz90GJoN54%3D')/threads('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng%3D%3D')/posts(*,attachments())/$entity",
"@odata.etag": "W/\"CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9\"",
"id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidRn9AAA=",
"createdDateTime": "2024-01-29T02:22:18Z",
"lastModifiedDateTime": "2024-01-29T02:22:19Z",
"changeKey": "CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9",
"categories": [],
"receivedDateTime": "2024-01-29T02:22:19Z",
"hasAttachments": true,
"conversationThreadId": "AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng==",
"conversationId": null,
"body": {
"contentType": "html",
"content": "<html><body><div>\r\n<div dir=\"ltr\"><table id=\"x_x_x_x_x_x_content\" style=\"text-align:left;background-color:white;border-spacing:0;border-collapse:collapse;margin:0;box-sizing:border-box;\">\r\n<tr>\r\n<td style=\"text-align:left;vertical-align:top;box-sizing:border-box;\">\r\n<div style=\"text-align:left;margin:0;\"><span style=\"color:black;font-size:15px;font-family:Segoe UI,Tahoma,Microsoft Sans Serif,Verdana,sans-serif;background-color:white;\"><a href=\"mailto:[email protected]\" id=\"OWAf1f38008-d513-0f95-4824-1d46e7a2841b\" data-linkindex=\"0\" style=\"text-align:left;background-color:white;text-decoration:none;margin:0;\"><img data-imagetype=\"AttachmentByCid\" originalSrc=\"cid:7fa9ea6b-8e03-473c-8b34-cae13eaa33aa\" explicitlogon=\"[email protected]\" src=\"cid:7fa9ea6b-8e03-473c-8b34-cae13eaa33aa\" id=\"x_x_image_0\" data-outlook-trace=\"F:5|T:5\" size=\"447\" style=\"vertical-align:top;display:block;width:64px;height:64px;max-width:1001px;margin:0;min-height:auto;min-width:auto;\"></a></span></div></td></tr></table>\r\n<div style=\"color:black;font-size:12pt;font-family:Aptos,Aptos_EmbeddedFont,Aptos_MSFontService,Calibri,Helvetica,sans-serif;text-align:left;background-color:white;margin:0;\"><span style=\"color:black;font-family:Aptos,Aptos_EmbeddedFont,Aptos_MSFontService,Calibri,Helvetica,sans-serif;\"><br>\r\n</span></div>\r\n<div style=\"color:black;font-size:12pt;font-family:Aptos,Aptos_EmbeddedFont,Aptos_MSFontService,Calibri,Helvetica,sans-serif;text-align:left;background-color:white;margin:0;\"><span style=\"color:black;font-family:Aptos,Aptos_EmbeddedFont,Aptos_MSFontService,Calibri,Helvetica,sans-serif;\">Embedded + direct attachments.</span></div></div></div>\r\n</body></html>"
},
"from": {
"emailAddress": {
"name": "Dustin Corners",
"address": "[email protected]"
}
},
"sender": {
"emailAddress": {
"name": "Dustin Corners",
"address": "[email protected]"
}
},
"newParticipants": [],
"attachments": [
{
"@odata.type": "#microsoft.graph.fileAttachment",
"@odata.mediaContentType": "image/png",
"id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidRn9AAABEgAQAJALn6ReFnlAuFpgf3BBdwM=",
"lastModifiedDateTime": "2024-01-29T02:22:18Z",
"name": "image.png",
"contentType": "image/png",
"size": 690,
"isInline": true,
"contentId": "7fa9ea6b-8e03-473c-8b34-cae13eaa33aa",
"contentLocation": null,
"contentBytes": "iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAAXlJREFUeF7tmcFtwkAQRWcjhIKogQLSQ5pIIbnkEkqAC23QQ670EArIKeckQuICWsAGrayFsdhRtPO4YuOdN//NYjuI809wXr8AgAQ4J4ACzgPAEEQBFHBOAAWcB4BdAAVQwDkBFHAeAHYBFEAB5wRQwHkA2AVQQKbr3b/WYPZUtEkBAE0CCpNWp8xoXecEAKCsayRASwAFTrtTYTWZAe02WJi01gCrdZEAK9IkQEuAXYBd4HiTVng4MwQZgkbDRjsDrRqDAlakSYCWgJGaKIACRlHTGmDVGBRQk158jeRtslF3VHuCUTLz7wXS/+Gx+O+fDwnhWVtP7+PN7gW6Vnh58UPxv3MJ8tq7mD4nFgfQtag0fmnn46KsVOgDTXFO93u3FMD756qNfUXFR055AH+DRxlvlyIPL+29eSWdb0KSB7CRrYxkaPFgQpHaux6aB9BcqvAgumtFyh+7DqAy51M+14dgZc7fDqDyzueHYPy28s7nATgpPkLYA1p04EEgvdAkAAAAAElFTkSuQmCC"
},
{
"@odata.type": "#microsoft.graph.fileAttachment",
"@odata.mediaContentType": "application/octet-stream",
"id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidRn9AAABEgAQAO6vI6h5OXZDlVIaM2DTB_I=",
"lastModifiedDateTime": "2024-01-29T02:22:18Z",
"name": "file_100bytes",
"contentType": "application/octet-stream",
"size": 250,
"isInline": false,
"contentId": null,
"contentLocation": null,
"contentBytes": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="
}
],
"[email protected]": "https://graph.microsoft.com/v1.0/groups('1623c35a-b67a-473a-9b21-5e4891f22e70')/threads('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng==')/posts('AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidMn9AAA=')/$ref",
"[email protected]": "https://graph.microsoft.com/v1.0/groups('1623c35a-b67a-473a-9b21-5e4891f22e70')/threads('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng==')/posts('AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidMn9AAA=')",
"inReplyTo": {
"@odata.etag": "W/\"CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9\"",
"id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidMn9AAA=",
"createdDateTime": "2024-01-29T02:21:18Z",
"lastModifiedDateTime": "2024-01-29T02:21:19Z",
"changeKey": "CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9",
"categories": [],
"receivedDateTime": "2024-01-29T02:21:19Z",
"hasAttachments": true,
"body": {
"contentType": "html",
"content": "<html><body><div>\r\n<div dir=\"ltr\">\r\n<div style=\"color:black;font-size:12pt;font-family:Aptos,Aptos_EmbeddedFont,Aptos_MSFontService,Calibri,Helvetica,sans-serif;\">Test Reply</div></div></div>\r\n</body></html>" },
"from": {
"emailAddress": {
"name": "Dustin Corners",
"address": "[email protected]"
}
},
"sender": {
"emailAddress": {
"name": "Dustin Corners",
"address": "[email protected]"
}
}
}
}
6 changes: 6 additions & 0 deletions src/internal/m365/service/groups/mock/testdata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package stub

import _ "embed"

//go:embed post-with-attachments.json
var PostWithAttachments string
29 changes: 29 additions & 0 deletions src/pkg/services/m365/api/conversations.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ package api

import (
"context"
"strings"

"github.com/alcionai/clues"
"github.com/jaytaylor/html2text"
"github.com/microsoft/kiota-abstractions-go/serialization"
"github.com/microsoftgraph/msgraph-sdk-go/groups"
"github.com/microsoftgraph/msgraph-sdk-go/models"

"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/sanitize"
"github.com/alcionai/corso/src/internal/common/str"
"github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/logger"
Expand Down Expand Up @@ -190,3 +193,29 @@ func (c Conversations) getAttachments(

return result, totalSize, nil
}

func bytesToPostable(body []byte) (serialization.Parsable, error) {
v, err := CreateFromBytes(body, models.CreatePostFromDiscriminatorValue)
if err != nil {
if !strings.Contains(err.Error(), invalidJSON) {
return nil, clues.Wrap(err, "deserializing bytes to message")
}

// If the JSON was invalid try sanitizing and deserializing again.
// Sanitizing should transform characters < 0x20 according to the spec where
// possible. The resulting JSON may still be invalid though.
body = sanitize.JSONBytes(body)
v, err = CreateFromBytes(body, models.CreatePostFromDiscriminatorValue)
}

return v, clues.Stack(err).OrNil()
}

func BytesToPostable(body []byte) (models.Postable, error) {
v, err := bytesToPostable(body)
if err != nil {
return nil, clues.Stack(err)
}

return v.(models.Postable), nil
}
Loading
Loading