diff --git a/pkg/message/build.go b/pkg/message/build.go index 176e1563..c9c82ced 100644 --- a/pkg/message/build.go +++ b/pkg/message/build.go @@ -369,30 +369,30 @@ func getMessageHeader(msg proton.Message, opts JobOptions) message.Header { // SetText will RFC2047-encode. if msg.Subject != "" { - hdr.SetText("Subject", msg.Subject) + setUTF8EncodedHeaderIfNeeded(&hdr, "Subject", msg.Subject) } // mail.Address.String() will RFC2047-encode if necessary. if !addressEmpty(msg.Sender) { - hdr.Set("From", msg.Sender.String()) + setHeaderIfNeeded(&hdr, "From", msg.Sender.String()) } if len(msg.ReplyTos) > 0 && !msg.IsDraft() { if !(len(msg.ReplyTos) == 1 && addressEmpty(msg.ReplyTos[0])) { - hdr.Set("Reply-To", toAddressList(msg.ReplyTos)) + setHeaderIfNeeded(&hdr, "Reply-To", toAddressList(msg.ReplyTos)) } } if len(msg.ToList) > 0 { - hdr.Set("To", toAddressList(msg.ToList)) + setHeaderIfNeeded(&hdr, "To", toAddressList(msg.ToList)) } if len(msg.CCList) > 0 { - hdr.Set("Cc", toAddressList(msg.CCList)) + setHeaderIfNeeded(&hdr, "Cc", toAddressList(msg.CCList)) } if len(msg.BCCList) > 0 { - hdr.Set("Bcc", toAddressList(msg.BCCList)) + setHeaderIfNeeded(&hdr, "Bcc", toAddressList(msg.BCCList)) } setMessageIDIfNeeded(msg, &hdr) @@ -401,7 +401,7 @@ func getMessageHeader(msg proton.Message, opts JobOptions) message.Header { if opts.SanitizeDate { if date, err := rfc5322.ParseDateTime(hdr.Get("Date")); err != nil || date.Before(time.Unix(0, 0)) { msgDate := SanitizeMessageDate(msg.Time) - hdr.Set("Date", msgDate.In(time.UTC).Format(time.RFC1123Z)) + setHeaderIfNeeded(&hdr, "Date", msgDate.In(time.UTC).Format(time.RFC1123Z)) // We clobbered the date so we save it under X-Original-Date only if no such value exists. if !hdr.Has("X-Original-Date") { hdr.Set("X-Original-Date", date.In(time.UTC).Format(time.RFC1123Z)) @@ -412,7 +412,7 @@ func getMessageHeader(msg proton.Message, opts JobOptions) message.Header { // Set our internal ID if requested. // This is important for us to detect whether APPENDed things are actually "move like outlook". if opts.AddInternalID { - hdr.Set("X-Pm-Internal-Id", msg.ID) + setHeaderIfNeeded(&hdr, "X-Pm-Internal-Id", msg.ID) } // Set our external ID if requested. @@ -426,7 +426,7 @@ func getMessageHeader(msg proton.Message, opts JobOptions) message.Header { // Set our server date if requested. // Can be useful to see how long it took for a message to arrive. if opts.AddMessageDate { - hdr.Set("X-Pm-Date", time.Unix(msg.Time, 0).In(time.UTC).Format(time.RFC1123Z)) + setHeaderIfNeeded(&hdr, "X-Pm-Date", time.Unix(msg.Time, 0).In(time.UTC).Format(time.RFC1123Z)) } // Include the message ID in the references (supposedly this somehow improves outlook support...). @@ -463,6 +463,25 @@ func setMessageIDIfNeeded(msg proton.Message, hdr *message.Header) { } } +// setTextHeaderIfNeeded sets a text (UTF-encoded) header entry if its does not exists or if value is changed. +// Not systematically overwriting the value prevents it from being moved to the top (Del + Add) if not changed. +func setUTF8EncodedHeaderIfNeeded(header *message.Header, k, v string) { + encoded := mime.QEncoding.Encode("utf-8", v) + if header.Has(k) && (header.Get(k) == encoded) { + return + } + header.Set(k, encoded) +} + +// setHeaderIfNeeded sets a header entry if its does not exists or if value is changed. +// Not systematically overwriting the value prevents it from being moved to the top (Del + Add) if not changed. +func setHeaderIfNeeded(header *message.Header, key, value string) { + if header.Has(key) && (header.Get(key) == value) { + return + } + header.Set(key, value) +} + func getTextPartHeader(hdr message.Header, body []byte, mimeType rfc822.MIMEType) message.Header { params := make(map[string]string) @@ -509,8 +528,9 @@ func getAttachmentPartHeader(att proton.Attachment) message.Header { func toMessageHeader(hdr proton.Headers) message.Header { var res message.Header - - for _, key := range hdr.Order { + // go-message's message.Header are in reversed order (you should only add fields at the top, so storing in reverse order offer faster performances). + for i := len(hdr.Order) - 1; i >= 0; i-- { + key := hdr.Order[i] for _, val := range hdr.Values[key] { // Using AddRaw instead of Add to save key-value pair as byte buffer within Header. // This buffer is used latter on in message writer to construct message and avoid crash diff --git a/pkg/message/build_framework_test.go b/pkg/message/build_framework_test.go index e878260f..18cb77f8 100644 --- a/pkg/message/build_framework_test.go +++ b/pkg/message/build_framework_test.go @@ -21,6 +21,7 @@ import ( "bufio" "bytes" "encoding/base64" + "net/mail" "strings" "testing" "time" @@ -92,6 +93,67 @@ func newRawTestMessageWithHeaders(messageID, addressID, mimeType, body string, d } } +func newTestMessageFromRFC822(t *testing.T, literal []byte) proton.Message { + // Note attachment are not supported. + p := rfc822.Parse(literal) + h, err := p.ParseHeader() + require.NoError(t, err) + var parsedHeaders proton.Headers + parsedHeaders.Values = make(map[string][]string) + h.Entries(func(key, val string) { + parsedHeaders.Values[key] = []string{val} + parsedHeaders.Order = append(parsedHeaders.Order, key) + }) + var mailHeaders = mail.Header(parsedHeaders.Values) + require.True(t, h.Has("Content-Type")) + mime, _, err := rfc822.ParseMIMEType(h.Get("Content-Type")) + require.NoError(t, err) + date, err := mailHeaders.Date() + require.NoError(t, err) + sender, err := mail.ParseAddress(parsedHeaders.Values["From"][0]) + require.NoError(t, err) + + return proton.Message{ + MessageMetadata: proton.MessageMetadata{ + ID: "messageID", + AddressID: "addressID", + LabelIDs: []string{}, + ExternalID: "", + Subject: parsedHeaders.Values["Subject"][0], + Sender: sender, + ToList: parseAddressList(t, mailHeaders, "To"), + CCList: parseAddressList(t, mailHeaders, "Cc"), + BCCList: parseAddressList(t, mailHeaders, "Bcc"), + ReplyTos: parseAddressList(t, mailHeaders, "Reply-To"), + Flags: 0, + Time: date.Unix(), + Size: 0, + Unread: false, + IsReplied: false, + IsRepliedAll: false, + IsForwarded: false, + NumAttachments: 0, + }, + Header: string(h.Raw()), + ParsedHeaders: parsedHeaders, + Body: string(p.Body()), + MIMEType: mime, + Attachments: nil, + } +} + +func parseAddressList(t *testing.T, header mail.Header, key string) []*mail.Address { + var result []*mail.Address + if len(header.Get(key)) == 0 { + return nil + } + + result, err := header.AddressList(key) + require.NoError(t, err) + + return result +} + func addTestAttachment( t *testing.T, kr *crypto.KeyRing, diff --git a/pkg/message/header_test.go b/pkg/message/header_test.go index 48e590db..3d916557 100644 --- a/pkg/message/header_test.go +++ b/pkg/message/header_test.go @@ -18,9 +18,14 @@ package message import ( + "bytes" + "regexp" + "strings" "testing" + gomessage "github.com/emersion/go-message" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestHeaderLines(t *testing.T) { @@ -130,3 +135,69 @@ func FuzzReadHeaderBody(f *testing.F) { _, _, _ = readHeaderBody(b) }) } + +func TestHeaderOrder(t *testing.T) { + literal := []byte(`X-Pm-Content-Encryption: end-to-end +X-Pm-Origin: internal +Subject: header test +To: Test Proton +From: Dummy Recipient +Date: Tue, 15 Oct 2024 07:54:39 +0000 +Mime-Version: 1.0 +Content-Type: multipart/mixed;boundary=---------------------a136fc3851075ca3f022f5c3ec6bf8f5 +Message-Id: <1rYR51zNVZdyCXVvAZ8C9N8OaBg4wO_wg6VlSoLK_Mv-2AaiF5UL-vE_tIZ6FdYP8ylsuV3fpaKUpVwuUcnQ6ql_83aEgZvfC5QcZbind1k=@proton.me> +X-Pm-Spamscore: 0 +Received: from mail.protonmail.ch by mail.protonmail.ch; Tue, 15 Oct 2024 07:54:43 +0000 +X-Original-To: test@proton.me +Return-Path: +Delivered-To: test@proton.me + +lorem`) + + // build a proton message + message := newTestMessageFromRFC822(t, literal) + options := JobOptions{ + IgnoreDecryptionErrors: true, + SanitizeDate: true, + AddInternalID: true, + AddExternalID: true, + AddMessageDate: true, + AddMessageIDReference: true, + SanitizeMBOXHeaderLine: true, + } + + // Rebuild the headers using bridge's algorithm, sanitizing fields. + hdr := getTextPartHeader(getMessageHeader(message, options), []byte(message.Body), message.MIMEType) + var b bytes.Buffer + w, err := gomessage.CreateWriter(&b, hdr) + require.NoError(t, err) + _ = w.Close() + + // split the header + str := string(regexp.MustCompile(`\r\n(\s+)`).ReplaceAll(b.Bytes(), nil)) // join multi + lines := strings.Split(str, "\r\n") + + // Check we have the expected order + require.Equal(t, len(lines), 17) + + // The fields added or modified are at the top + require.True(t, strings.HasPrefix(lines[0], "Content-Type: multipart/mixed;boundary=")) // we changed the boundary + require.True(t, strings.HasPrefix(lines[1], "References: ")) // Reference was added + require.True(t, strings.HasPrefix(lines[2], "X-Pm-Date: ")) // X-Pm-Date was added + require.True(t, strings.HasPrefix(lines[3], "X-Pm-Internal-Id: ")) // X-Pm-Internal-Id was added + require.Equal(t, `To: "Test Proton" `, lines[4]) // Name was double quoted + require.Equal(t, `From: "Dummy Recipient" `, lines[5]) // Name was double quoted + + // all other fields appear in their original order + require.Equal(t, `X-Pm-Content-Encryption: end-to-end`, lines[6]) + require.Equal(t, `X-Pm-Origin: internal`, lines[7]) + require.Equal(t, `Subject: header test`, lines[8]) + require.Equal(t, `Date: Tue, 15 Oct 2024 07:54:39 +0000`, lines[9]) + require.Equal(t, `Mime-Version: 1.0`, lines[10]) + require.Equal(t, `Message-Id: <1rYR51zNVZdyCXVvAZ8C9N8OaBg4wO_wg6VlSoLK_Mv-2AaiF5UL-vE_tIZ6FdYP8ylsuV3fpaKUpVwuUcnQ6ql_83aEgZvfC5QcZbind1k=@proton.me>`, lines[11]) + require.Equal(t, `X-Pm-Spamscore: 0`, lines[12]) + require.Equal(t, `Received: from mail.protonmail.ch by mail.protonmail.ch; Tue, 15 Oct 2024 07:54:43 +0000`, lines[13]) + require.Equal(t, `X-Original-To: test@proton.me`, lines[14]) + require.Equal(t, `Return-Path: `, lines[15]) + require.Equal(t, `Delivered-To: test@proton.me`, lines[16]) +}