fix(BRIDGE-374): Tweaked MBOX header sanitization logic, ensures that RFC822 headers are present.

This commit is contained in:
Atanas Janeshliev
2025-07-01 15:41:01 +02:00
parent 47316a1843
commit 20183bf984
2 changed files with 126 additions and 45 deletions

View File

@ -36,6 +36,7 @@ import (
"github.com/emersion/go-message/textproto" "github.com/emersion/go-message/textproto"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"golang.org/x/exp/maps"
) )
var ( var (
@ -606,7 +607,7 @@ func sanitizeMBOXHeaderLine(decrypted *DecryptedMessage) error {
return nil return nil
} }
i := indexMBOXHeaderLine(decrypted) i := indexMBOXHeaderLine(decrypted.Body.Bytes())
for i >= 0 { for i >= 0 {
var buf bytes.Buffer var buf bytes.Buffer
@ -627,29 +628,61 @@ func sanitizeMBOXHeaderLine(decrypted *DecryptedMessage) error {
// copy rest // copy rest
if _, err := buf.Write(decrypted.Body.Bytes()); err != nil { if _, err := buf.Write(decrypted.Body.Bytes()); err != nil {
return fmt.Errorf("cannot rest of message: %w", err) return fmt.Errorf("cannot copy rest of message: %w", err)
} }
decrypted.Body = buf decrypted.Body = buf
i = indexMBOXHeaderLine(decrypted) i = indexMBOXHeaderLine(decrypted.Body.Bytes())
} }
return nil return nil
} }
func indexMBOXHeaderLine(decrypted *DecryptedMessage) int { func extractHeaderPartEnd(body []byte) int {
b := decrypted.Body.Bytes() headerEnd := bytes.Index(body, []byte("\n\n"))
if headerEnd < 0 {
headerEnd = bytes.Index(body, []byte("\r\n\r\n"))
}
headerEnd := bytes.Index(b, []byte("\n\n"))
if headerEnd < 0 { if headerEnd < 0 {
headerEnd = bytes.Index(b, []byte("\r\n\r\n")) return -1
} }
if headerEnd < 0 {
headerEnd = len(b) requiredHeaders := map[string]bool{
"from": false,
"to": false,
"date": false,
} }
headerSection := body[:headerEnd]
lines := bytes.Split(headerSection, []byte{'\n'})
for _, line := range lines {
colonIdx := bytes.IndexByte(line, byte(':'))
if colonIdx <= 0 {
continue
}
fieldName := bytes.ToLower(line[:colonIdx])
switch string(fieldName) {
case "from", "to", "date":
requiredHeaders[string(fieldName)] = true
}
}
for _, val := range maps.Values(requiredHeaders) {
if !val {
return -1
}
}
return headerEnd
}
func indexMBOXHeaderLine(body []byte) int {
headerEnd := extractHeaderPartEnd(body)
for i := 0; i < headerEnd; i++ { for i := 0; i < headerEnd; i++ {
if i != 0 && b[i] != '\n' { if i != 0 && body[i] != '\n' {
continue continue
} }
@ -658,7 +691,7 @@ func indexMBOXHeaderLine(decrypted *DecryptedMessage) int {
j = i + 1 j = i + 1
} }
if bytes.HasPrefix(b[j:], mboxFrom()) || bytes.HasPrefix(b[j:], mboxGtFrom()) { if bytes.HasPrefix(body[j:], mboxFrom()) || bytes.HasPrefix(body[j:], mboxGtFrom()) {
return j return j
} }
} }

View File

@ -1304,28 +1304,55 @@ func TestHasMBOXHeaderLine(t *testing.T) {
cases := map[string]struct { cases := map[string]struct {
index, indexCRLF int index, indexCRLF int
}{ }{
"From: ok\nTo: Ok": {-1, -1}, // No MBOX line and missing header.
"From: ok\nTo: Ok\n\nFrom - 123": {-1, -1}, "From: ok\nTo: Ok": {-1, -1},
"From: ok\nTo: Ok\n\n>From - 123": {-1, -1},
">From: ok\nTo: Ok": {-1, -1},
">From: ok\nTo: Ok\n\nFrom - 123": {-1, -1},
">From: ok\nTo: Ok\n\n>From - 123": {-1, -1},
"From - 123\nFrom: ok\nTo: Ok": {0, 0}, // MBOX line in body, not header.
"From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0}, "From: ok\nTo: Ok\n\nFrom - 123": {-1, -1},
"From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0}, "From: ok\nTo: Ok\n\n>From - 123": {-1, -1},
"From: ok\nFrom - 123\nTo: Ok": {9, 10}, // MBOX lines without proper header ending.
"From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": {9, 10}, "From ok\nFrom: ok\nTo: Ok\nDate: ok": {-1, -1},
"From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": {9, 10}, ">From ok\nFrom: ok\nTo: Ok\nDate: ok": {-1, -1},
">From - 123\nFrom: ok\nTo: Ok": {0, 0}, // MBOX lines with proper header ending.
">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0}, "From ok\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0},
">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0}, ">From ok\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0},
"From: ok\n>From - 123\nTo: Ok": {9, 10}, // MBOX lines in middle of headers.
"From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": {9, 10}, "From: ok\nFrom middle\nTo: Ok\nDate: ok\n\n": {9, 10},
"From: ok\n>From - 123\nTo: Ok\n\n>From - 123": {9, 10}, "From: ok\n>From middle\nTo: Ok\nDate: ok\n\n": {9, 10},
// Multiple MBOX lines, should return first one.
"From first\nFrom second\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0},
"From first\n>From second\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0},
">From first\nFrom second\nFrom: ok\nTo: Ok\nDate: ok\n\n": {0, 0},
// MBOX lines at various positions.
"From: sender\nTo: recipient\nFrom mbox\nDate: today\n\n": {27, 29},
"From: sender\nTo: recipient\n>From mbox\nDate: today\n\n": {27, 29},
"From: sender\nTo: recipient\nDate: today\nFrom mbox\n\n": {39, 42},
"From: sender\nTo: recipient\nDate: today\n>From mbox\n\n": {39, 42},
// MBOX line at end of header section.
"From: ok\nTo: Ok\nDate: ok\nFrom mbox\n\n": {25, 28},
"From: ok\nTo: Ok\nDate: ok\n>From mbox\n\n": {25, 28},
// Headers with missing required fields.
"From mbox\nFrom: ok\nTo: Ok\n\n": {-1, -1},
"From mbox\nFrom: ok\nDate: ok\n\n": {-1, -1},
"From mbox\nTo: Ok\nDate: ok\n\n": {-1, -1},
// Valid headers but no MBOX lines.
"From: sender\nTo: recipient\nDate: today\n\n": {-1, -1},
"From: sender\nTo: recipient\nDate: today\nSubject: test\n\n": {-1, -1},
// Headers that are like-MBOX but not.
"From: sender\n>From: other\nTo: recipient\nDate: today\n\n": {-1, -1},
"From: sender\nFrom: other\nTo: recipient\nDate: today\n\n": {-1, -1},
"From mbox\nFrom: sender\n>From: header\nTo: recipient\nDate: today\n\n": {0, 0},
"From: sender\nFrom mbox\n>From: header\nTo: recipient\nDate: today\n\n": {13, 14},
} }
test := func(t *testing.T, wantIndex int, given string, useCRLF bool) { test := func(t *testing.T, wantIndex int, given string, useCRLF bool) {
@ -1337,7 +1364,19 @@ func TestHasMBOXHeaderLine(t *testing.T) {
decrypted.Body = *bytes.NewBufferString(given) decrypted.Body = *bytes.NewBufferString(given)
} }
require.Equal(t, wantIndex, indexMBOXHeaderLine(decrypted)) headerIdx := indexMBOXHeaderLine(decrypted.Body.Bytes())
require.Equal(t, wantIndex, headerIdx)
if headerIdx == -1 {
return
}
partFromOne := decrypted.Body.Bytes()[headerIdx : headerIdx+5]
partFromTwo := decrypted.Body.Bytes()[headerIdx : headerIdx+6]
hasMboxHeader := strings.HasPrefix(string(partFromOne), "From ") || strings.HasPrefix(string(partFromOne), ">From ") ||
strings.HasPrefix(string(partFromTwo), "From ") || strings.HasPrefix(string(partFromTwo), ">From ")
require.True(t, hasMboxHeader)
} }
for given, want := range cases { for given, want := range cases {
@ -1348,29 +1387,35 @@ func TestHasMBOXHeaderLine(t *testing.T) {
func TestSanitizeMBOXHeaderLine(t *testing.T) { func TestSanitizeMBOXHeaderLine(t *testing.T) {
cases := map[string]string{ cases := map[string]string{
// Unchanged - no MBOX headers in header section
"From: ok\nTo: Ok": "From: ok\nTo: Ok", "From: ok\nTo: Ok": "From: ok\nTo: Ok",
"From: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", "From: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123",
"From: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", "From: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123",
">From: ok\nTo: Ok": ">From: ok\nTo: Ok", // Unchanged - no MBOX headers.
">From: ok\nTo: Ok\n\nFrom - 123": ">From: ok\nTo: Ok\n\nFrom - 123", "From: ok\nTo: ok\nDate: ok": "From: ok\nTo: ok\nDate: ok",
">From: ok\nTo: Ok\n\n>From - 123": ">From: ok\nTo: Ok\n\n>From - 123", "From: ok\nTo: ok\nDate: ok\n\n": "From: ok\nTo: ok\nDate: ok\n\n",
"From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok", // MBOX headers should be removed
"From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", "From ok\nFrom: ok\nTo: ok\nDate: ok\n\n": "From: ok\nTo: ok\nDate: ok\n\n",
"From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", ">From ok\nFrom: ok\nTo: ok\nDate: ok\n\n": "From: ok\nTo: ok\nDate: ok\n\n",
"From: ok\nFrom - 123\nTo: Ok": "From: ok\nTo: Ok", // MBOX header mixed in-between.
"From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", "From: sender\nFrom line\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n",
"From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", "From: sender\n>From line\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n",
"From line\nFrom: sender\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n",
">From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok", // Multiple MBOX headers.
">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", "From line1\nFrom line2\nFrom: sender\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n",
">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", "From line1\n>From line2\nFrom: sender\nTo: recipient\nDate: today\n\n": "From: sender\nTo: recipient\nDate: today\n\n",
"From: ok\n>From - 123\nTo: Ok": "From: ok\nTo: Ok", // Incomplete required headers - should not process.
"From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123", "From line\nFrom: sender\nTo: recipient\n\n": "From line\nFrom: sender\nTo: recipient\n\n",
"From: ok\n>From - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123", "From line\nFrom: sender\nDate: today\n\n": "From line\nFrom: sender\nDate: today\n\n",
// MBOX and complete required headers - No separation between header part - should not be processed.
"From ok\nFrom: ok\nTo: ok\nDate: ok": "From ok\nFrom: ok\nTo: ok\nDate: ok",
">From ok\nFrom: ok\nTo: ok\nDate: ok": ">From ok\nFrom: ok\nTo: ok\nDate: ok",
} }
test := func(t *testing.T, given, want string, useCRLF bool) { test := func(t *testing.T, given, want string, useCRLF bool) {
@ -1385,6 +1430,9 @@ func TestSanitizeMBOXHeaderLine(t *testing.T) {
require.NoError(t, sanitizeMBOXHeaderLine(decrypted)) require.NoError(t, sanitizeMBOXHeaderLine(decrypted))
require.Equal(t, []byte(want), decrypted.Body.Bytes()) require.Equal(t, []byte(want), decrypted.Body.Bytes())
require.NoError(t, sanitizeMBOXHeaderLine(decrypted))
require.Equal(t, []byte(want), decrypted.Body.Bytes())
} }
for given, want := range cases { for given, want := range cases {