diff --git a/internal/services/imapservice/sync_build.go b/internal/services/imapservice/sync_build.go index 80f1336f..c696ecfa 100644 --- a/internal/services/imapservice/sync_build.go +++ b/internal/services/imapservice/sync_build.go @@ -46,6 +46,7 @@ func defaultMessageJobOpts() message.JobOptions { AddExternalID: true, // Whether to include ExternalID as X-Pm-External-Id. AddMessageDate: true, // Whether to include message time as X-Pm-Date. AddMessageIDReference: true, // Whether to include the MessageID in References. + SanitizeMBOXHeaderLine: true, // Whether to ignore header line representing MBOX delimiter } } diff --git a/pkg/message/build.go b/pkg/message/build.go index a75b5ebb..176e1563 100644 --- a/pkg/message/build.go +++ b/pkg/message/build.go @@ -19,6 +19,7 @@ package message import ( "bytes" + "fmt" "mime" "net/mail" "strings" @@ -46,6 +47,12 @@ var ( const InternalIDDomain = `protonmail.internalid` func BuildRFC822Into(kr *crypto.KeyRing, decrypted *DecryptedMessage, opts JobOptions, buf *bytes.Buffer) error { + if opts.SanitizeMBOXHeaderLine { + if err := sanitizeMBOXHeaderLine(decrypted); err != nil { + return fmt.Errorf("failed to sanitize MBOX header: %w", err) + } + } + switch { case len(decrypted.Msg.Attachments) > 0: return buildMultipartRFC822(decrypted, opts, buf) @@ -560,3 +567,80 @@ func (bw *boundary) gen() string { bw.val = algo.HashHexSHA256(bw.val) return bw.val } + +func mboxFrom() []byte { + return []byte("From ") +} + +func mboxGtFrom() []byte { + return []byte(">From ") +} + +func sanitizeMBOXHeaderLine(decrypted *DecryptedMessage) error { + if decrypted == nil { + return nil + } + + if decrypted.Body.Len() == 0 { + return nil + } + + i := indexMBOXHeaderLine(decrypted) + for i >= 0 { + var buf bytes.Buffer + + // copy until mbox line + if i > 0 { + if _, err := buf.Write(decrypted.Body.Next(i)); err != nil { + return fmt.Errorf("cannot copy first lines: %w", err) + } + } + + // dump mbox line + eol := bytes.IndexRune(decrypted.Body.Bytes(), '\n') + if eol == 0 || eol == -1 { + return errors.New("cannot find end of mbox line") + } + + _ = decrypted.Body.Next(eol + 1) + + // copy rest + if _, err := buf.Write(decrypted.Body.Bytes()); err != nil { + return fmt.Errorf("cannot rest of message: %w", err) + } + + decrypted.Body = buf + i = indexMBOXHeaderLine(decrypted) + } + + return nil +} + +func indexMBOXHeaderLine(decrypted *DecryptedMessage) int { + b := decrypted.Body.Bytes() + + headerEnd := bytes.Index(b, []byte("\n\n")) + if headerEnd < 0 { + headerEnd = bytes.Index(b, []byte("\r\n\r\n")) + } + if headerEnd < 0 { + headerEnd = len(b) + } + + for i := 0; i < headerEnd; i++ { + if i != 0 && b[i] != '\n' { + continue + } + + j := 0 + if i != 0 { + j = i + 1 + } + + if bytes.HasPrefix(b[j:], mboxFrom()) || bytes.HasPrefix(b[j:], mboxGtFrom()) { + return j + } + } + + return -1 +} diff --git a/pkg/message/build_test.go b/pkg/message/build_test.go index d80aef7e..4fa4ee96 100644 --- a/pkg/message/build_test.go +++ b/pkg/message/build_test.go @@ -18,6 +18,7 @@ package message import ( + "bytes" "net/mail" "os" "path/filepath" @@ -1298,3 +1299,96 @@ func TestBuildComplexMIMEType(t *testing.T) { expectContentTypeParam(`name`, is(`Cat_August_2010-4.jpeg`)). expectContentDispositionParam(`filename`, is(`Cat_August_2010-4.jpeg`)) } + +func TestHasMBOXHeaderLine(t *testing.T) { + cases := map[string]struct { + index, indexCRLF int + }{ + "From: ok\nTo: Ok": {-1, -1}, + "From: ok\nTo: Ok\n\nFrom - 123": {-1, -1}, + "From: ok\nTo: Ok\n\n>From - 123": {-1, -1}, + ">From: ok\nTo: Ok": {-1, -1}, + ">From: ok\nTo: Ok\n\nFrom - 123": {-1, -1}, + ">From: ok\nTo: Ok\n\n>From - 123": {-1, -1}, + + "From - 123\nFrom: ok\nTo: Ok": {0, 0}, + "From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0}, + "From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0}, + + "From: ok\nFrom - 123\nTo: Ok": {9, 10}, + "From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": {9, 10}, + "From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": {9, 10}, + + ">From - 123\nFrom: ok\nTo: Ok": {0, 0}, + ">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0}, + ">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0}, + + "From: ok\n>From - 123\nTo: Ok": {9, 10}, + "From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": {9, 10}, + "From: ok\n>From - 123\nTo: Ok\n\n>From - 123": {9, 10}, + } + + test := func(t *testing.T, wantIndex int, given string, useCRLF bool) { + decrypted := &DecryptedMessage{} + + if useCRLF { + decrypted.Body = *bytes.NewBufferString(strings.ReplaceAll(given, "\n", "\r\n")) + } else { + decrypted.Body = *bytes.NewBufferString(given) + } + + require.Equal(t, wantIndex, indexMBOXHeaderLine(decrypted)) + } + + for given, want := range cases { + t.Run("LF-"+given, func(t *testing.T) { test(t, want.index, given, false) }) + t.Run("CRLF-"+given, func(t *testing.T) { test(t, want.indexCRLF, given, true) }) + } +} + +func TestSanitizeMBOXHeaderLine(t *testing.T) { + cases := map[string]string{ + "From: ok\nTo: Ok": "From: ok\nTo: Ok", + "From: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", + "From: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + + ">From: ok\nTo: Ok": ">From: ok\nTo: Ok", + ">From: ok\nTo: Ok\n\nFrom - 123": ">From: ok\nTo: Ok\n\nFrom - 123", + ">From: ok\nTo: Ok\n\n>From - 123": ">From: ok\nTo: Ok\n\n>From - 123", + + "From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok", + "From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", + "From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + + "From: ok\nFrom - 123\nTo: Ok": "From: ok\nTo: Ok", + "From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", + "From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + + ">From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok", + ">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", + ">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + + "From: ok\n>From - 123\nTo: Ok": "From: ok\nTo: Ok", + "From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", + "From: ok\n>From - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + } + + test := func(t *testing.T, given, want string, useCRLF bool) { + decrypted := &DecryptedMessage{} + + if useCRLF { + decrypted.Body = *bytes.NewBufferString(strings.ReplaceAll(given, "\n", "\r\n")) + want = strings.ReplaceAll(want, "\n", "\r\n") + } else { + decrypted.Body = *bytes.NewBufferString(given) + } + + require.NoError(t, sanitizeMBOXHeaderLine(decrypted)) + require.Equal(t, []byte(want), decrypted.Body.Bytes()) + } + + for given, want := range cases { + t.Run("LF"+given, func(t *testing.T) { test(t, given, want, false) }) + t.Run("CRLF"+given, func(t *testing.T) { test(t, given, want, true) }) + } +} diff --git a/pkg/message/options.go b/pkg/message/options.go index 1950bafb..151dad02 100644 --- a/pkg/message/options.go +++ b/pkg/message/options.go @@ -24,4 +24,5 @@ type JobOptions struct { AddExternalID bool // Whether to include ExternalID as X-Pm-External-Id. AddMessageDate bool // Whether to include message time as X-Pm-Date. AddMessageIDReference bool // Whether to include the MessageID in References. + SanitizeMBOXHeaderLine bool // Whether to ignore header line representing MBOX delimiter }