From 20183bf984b8ce43c8d629f3cd5268edccc7cf40 Mon Sep 17 00:00:00 2001 From: Atanas Janeshliev Date: Tue, 1 Jul 2025 15:41:01 +0200 Subject: [PATCH] fix(BRIDGE-374): Tweaked MBOX header sanitization logic, ensures that RFC822 headers are present. --- pkg/message/build.go | 55 ++++++++++++++---- pkg/message/build_test.go | 116 +++++++++++++++++++++++++++----------- 2 files changed, 126 insertions(+), 45 deletions(-) diff --git a/pkg/message/build.go b/pkg/message/build.go index 71b89fb9..c25991f2 100644 --- a/pkg/message/build.go +++ b/pkg/message/build.go @@ -36,6 +36,7 @@ import ( "github.com/emersion/go-message/textproto" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "golang.org/x/exp/maps" ) var ( @@ -606,7 +607,7 @@ func sanitizeMBOXHeaderLine(decrypted *DecryptedMessage) error { return nil } - i := indexMBOXHeaderLine(decrypted) + i := indexMBOXHeaderLine(decrypted.Body.Bytes()) for i >= 0 { var buf bytes.Buffer @@ -627,29 +628,61 @@ func sanitizeMBOXHeaderLine(decrypted *DecryptedMessage) error { // copy rest if _, err := buf.Write(decrypted.Body.Bytes()); err != nil { - return fmt.Errorf("cannot rest of message: %w", err) + return fmt.Errorf("cannot copy rest of message: %w", err) } decrypted.Body = buf - i = indexMBOXHeaderLine(decrypted) + i = indexMBOXHeaderLine(decrypted.Body.Bytes()) } return nil } -func indexMBOXHeaderLine(decrypted *DecryptedMessage) int { - b := decrypted.Body.Bytes() +func extractHeaderPartEnd(body []byte) int { + headerEnd := bytes.Index(body, []byte("\n\n")) + if headerEnd < 0 { + headerEnd = bytes.Index(body, []byte("\r\n\r\n")) + } - headerEnd := bytes.Index(b, []byte("\n\n")) if headerEnd < 0 { - headerEnd = bytes.Index(b, []byte("\r\n\r\n")) + return -1 } - if headerEnd < 0 { - headerEnd = len(b) + + requiredHeaders := map[string]bool{ + "from": false, + "to": false, + "date": false, } + headerSection := body[:headerEnd] + lines := bytes.Split(headerSection, []byte{'\n'}) + for _, line := range lines { + colonIdx := bytes.IndexByte(line, byte(':')) + if colonIdx <= 0 { + continue + } + + fieldName := bytes.ToLower(line[:colonIdx]) + switch string(fieldName) { + case "from", "to", "date": + requiredHeaders[string(fieldName)] = true + } + } + + for _, val := range maps.Values(requiredHeaders) { + if !val { + return -1 + } + } + + return headerEnd +} + +func indexMBOXHeaderLine(body []byte) int { + headerEnd := extractHeaderPartEnd(body) + for i := 0; i < headerEnd; i++ { - if i != 0 && b[i] != '\n' { + if i != 0 && body[i] != '\n' { continue } @@ -658,7 +691,7 @@ func indexMBOXHeaderLine(decrypted *DecryptedMessage) int { j = i + 1 } - if bytes.HasPrefix(b[j:], mboxFrom()) || bytes.HasPrefix(b[j:], mboxGtFrom()) { + if bytes.HasPrefix(body[j:], mboxFrom()) || bytes.HasPrefix(body[j:], mboxGtFrom()) { return j } } diff --git a/pkg/message/build_test.go b/pkg/message/build_test.go index 0a3d60da..3f1b3ba6 100644 --- a/pkg/message/build_test.go +++ b/pkg/message/build_test.go @@ -1304,28 +1304,55 @@ func TestHasMBOXHeaderLine(t *testing.T) { cases := map[string]struct { index, indexCRLF int }{ - "From: ok\nTo: Ok": {-1, -1}, - "From: ok\nTo: Ok\n\nFrom - 123": {-1, -1}, - "From: ok\nTo: Ok\n\n>From - 123": {-1, -1}, - ">From: ok\nTo: Ok": {-1, -1}, - ">From: ok\nTo: Ok\n\nFrom - 123": {-1, -1}, - ">From: ok\nTo: Ok\n\n>From - 123": {-1, -1}, + // No MBOX line and missing header. + "From: ok\nTo: Ok": {-1, -1}, - "From - 123\nFrom: ok\nTo: Ok": {0, 0}, - "From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0}, - "From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0}, + // MBOX line in body, not header. + "From: ok\nTo: Ok\n\nFrom - 123": {-1, -1}, + "From: ok\nTo: Ok\n\n>From - 123": {-1, -1}, - "From: ok\nFrom - 123\nTo: Ok": {9, 10}, - "From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": {9, 10}, - "From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": {9, 10}, + // MBOX lines without proper header ending. + "From ok\nFrom: ok\nTo: Ok\nDate: ok": {-1, -1}, + ">From ok\nFrom: ok\nTo: Ok\nDate: ok": {-1, -1}, - ">From - 123\nFrom: ok\nTo: Ok": {0, 0}, - ">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0}, - ">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0}, + // MBOX lines with proper header ending. + "From ok\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0}, + ">From ok\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0}, - "From: ok\n>From - 123\nTo: Ok": {9, 10}, - "From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": {9, 10}, - "From: ok\n>From - 123\nTo: Ok\n\n>From - 123": {9, 10}, + // MBOX lines in middle of headers. + "From: ok\nFrom middle\nTo: Ok\nDate: ok\n\n": {9, 10}, + "From: ok\n>From middle\nTo: Ok\nDate: ok\n\n": {9, 10}, + + // Multiple MBOX lines, should return first one. + "From first\nFrom second\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0}, + "From first\n>From second\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0}, + ">From first\nFrom second\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0}, + + // MBOX lines at various positions. + "From: sender\nTo: recipient\nFrom mbox\nDate: today\n\n": {27, 29}, + "From: sender\nTo: recipient\n>From mbox\nDate: today\n\n": {27, 29}, + "From: sender\nTo: recipient\nDate: today\nFrom mbox\n\n": {39, 42}, + "From: sender\nTo: recipient\nDate: today\n>From mbox\n\n": {39, 42}, + + // MBOX line at end of header section. + "From: ok\nTo: Ok\nDate: ok\nFrom mbox\n\n": {25, 28}, + "From: ok\nTo: Ok\nDate: ok\n>From mbox\n\n": {25, 28}, + + // Headers with missing required fields. + "From mbox\nFrom: ok\nTo: Ok\n\n": {-1, -1}, + "From mbox\nFrom: ok\nDate: ok\n\n": {-1, -1}, + "From mbox\nTo: Ok\nDate: ok\n\n": {-1, -1}, + + // Valid headers but no MBOX lines. + "From: sender\nTo: recipient\nDate: today\n\n": {-1, -1}, + "From: sender\nTo: recipient\nDate: today\nSubject: test\n\n": {-1, -1}, + + // Headers that are like-MBOX but not. + "From: sender\n>From: other\nTo: recipient\nDate: today\n\n": {-1, -1}, + "From: sender\nFrom: other\nTo: recipient\nDate: today\n\n": {-1, -1}, + + "From mbox\nFrom: sender\n>From: header\nTo: recipient\nDate: today\n\n": {0, 0}, + "From: sender\nFrom mbox\n>From: header\nTo: recipient\nDate: today\n\n": {13, 14}, } test := func(t *testing.T, wantIndex int, given string, useCRLF bool) { @@ -1337,7 +1364,19 @@ func TestHasMBOXHeaderLine(t *testing.T) { decrypted.Body = *bytes.NewBufferString(given) } - require.Equal(t, wantIndex, indexMBOXHeaderLine(decrypted)) + headerIdx := indexMBOXHeaderLine(decrypted.Body.Bytes()) + require.Equal(t, wantIndex, headerIdx) + + if headerIdx == -1 { + return + } + + partFromOne := decrypted.Body.Bytes()[headerIdx : headerIdx+5] + partFromTwo := decrypted.Body.Bytes()[headerIdx : headerIdx+6] + hasMboxHeader := strings.HasPrefix(string(partFromOne), "From ") || strings.HasPrefix(string(partFromOne), ">From ") || + strings.HasPrefix(string(partFromTwo), "From ") || strings.HasPrefix(string(partFromTwo), ">From ") + + require.True(t, hasMboxHeader) } for given, want := range cases { @@ -1348,29 +1387,35 @@ func TestHasMBOXHeaderLine(t *testing.T) { func TestSanitizeMBOXHeaderLine(t *testing.T) { cases := map[string]string{ + // Unchanged - no MBOX headers in header section "From: ok\nTo: Ok": "From: ok\nTo: Ok", "From: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", "From: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", - ">From: ok\nTo: Ok": ">From: ok\nTo: Ok", - ">From: ok\nTo: Ok\n\nFrom - 123": ">From: ok\nTo: Ok\n\nFrom - 123", - ">From: ok\nTo: Ok\n\n>From - 123": ">From: ok\nTo: Ok\n\n>From - 123", + // Unchanged - no MBOX headers. + "From: ok\nTo: ok\nDate: ok": "From: ok\nTo: ok\nDate: ok", + "From: ok\nTo: ok\nDate: ok\n\n": "From: ok\nTo: ok\nDate: ok\n\n", - "From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok", - "From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", - "From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + // MBOX headers should be removed + "From ok\nFrom: ok\nTo: ok\nDate: ok\n\n": "From: ok\nTo: ok\nDate: ok\n\n", + ">From ok\nFrom: ok\nTo: ok\nDate: ok\n\n": "From: ok\nTo: ok\nDate: ok\n\n", - "From: ok\nFrom - 123\nTo: Ok": "From: ok\nTo: Ok", - "From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", - "From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + // MBOX header mixed in-between. + "From: sender\nFrom line\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n", + "From: sender\n>From line\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n", + "From line\nFrom: sender\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n", - ">From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok", - ">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", - ">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + // Multiple MBOX headers. + "From line1\nFrom line2\nFrom: sender\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n", + "From line1\n>From line2\nFrom: sender\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n", - "From: ok\n>From - 123\nTo: Ok": "From: ok\nTo: Ok", - "From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", - "From: ok\n>From - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", + // Incomplete required headers - should not process. + "From line\nFrom: sender\nTo: recipient\n\n": "From line\nFrom: sender\nTo: recipient\n\n", + "From line\nFrom: sender\nDate: today\n\n": "From line\nFrom: sender\nDate: today\n\n", + + // MBOX and complete required headers - No separation between header part - should not be processed. + "From ok\nFrom: ok\nTo: ok\nDate: ok": "From ok\nFrom: ok\nTo: ok\nDate: ok", + ">From ok\nFrom: ok\nTo: ok\nDate: ok": ">From ok\nFrom: ok\nTo: ok\nDate: ok", } test := func(t *testing.T, given, want string, useCRLF bool) { @@ -1385,6 +1430,9 @@ func TestSanitizeMBOXHeaderLine(t *testing.T) { require.NoError(t, sanitizeMBOXHeaderLine(decrypted)) require.Equal(t, []byte(want), decrypted.Body.Bytes()) + + require.NoError(t, sanitizeMBOXHeaderLine(decrypted)) + require.Equal(t, []byte(want), decrypted.Body.Bytes()) } for given, want := range cases {