fix(GODT-3163): filter MBOX format delimiter.

This commit is contained in:
Jakub
2024-03-06 08:15:55 +01:00
committed by Jakub Cuth
parent 1c7c342e19
commit d2fbbc3e25
4 changed files with 180 additions and 0 deletions

View File

@ -46,6 +46,7 @@ func defaultMessageJobOpts() message.JobOptions {
AddExternalID: true, // Whether to include ExternalID as X-Pm-External-Id.
AddMessageDate: true, // Whether to include message time as X-Pm-Date.
AddMessageIDReference: true, // Whether to include the MessageID in References.
SanitizeMBOXHeaderLine: true, // Whether to ignore header line representing MBOX delimiter
}
}

View File

@ -19,6 +19,7 @@ package message
import (
"bytes"
"fmt"
"mime"
"net/mail"
"strings"
@ -46,6 +47,12 @@ var (
const InternalIDDomain = `protonmail.internalid`
func BuildRFC822Into(kr *crypto.KeyRing, decrypted *DecryptedMessage, opts JobOptions, buf *bytes.Buffer) error {
if opts.SanitizeMBOXHeaderLine {
if err := sanitizeMBOXHeaderLine(decrypted); err != nil {
return fmt.Errorf("failed to sanitize MBOX header: %w", err)
}
}
switch {
case len(decrypted.Msg.Attachments) > 0:
return buildMultipartRFC822(decrypted, opts, buf)
@ -560,3 +567,80 @@ func (bw *boundary) gen() string {
bw.val = algo.HashHexSHA256(bw.val)
return bw.val
}
func mboxFrom() []byte {
return []byte("From ")
}
func mboxGtFrom() []byte {
return []byte(">From ")
}
func sanitizeMBOXHeaderLine(decrypted *DecryptedMessage) error {
if decrypted == nil {
return nil
}
if decrypted.Body.Len() == 0 {
return nil
}
i := indexMBOXHeaderLine(decrypted)
for i >= 0 {
var buf bytes.Buffer
// copy until mbox line
if i > 0 {
if _, err := buf.Write(decrypted.Body.Next(i)); err != nil {
return fmt.Errorf("cannot copy first lines: %w", err)
}
}
// dump mbox line
eol := bytes.IndexRune(decrypted.Body.Bytes(), '\n')
if eol == 0 || eol == -1 {
return errors.New("cannot find end of mbox line")
}
_ = decrypted.Body.Next(eol + 1)
// copy rest
if _, err := buf.Write(decrypted.Body.Bytes()); err != nil {
return fmt.Errorf("cannot rest of message: %w", err)
}
decrypted.Body = buf
i = indexMBOXHeaderLine(decrypted)
}
return nil
}
func indexMBOXHeaderLine(decrypted *DecryptedMessage) int {
b := decrypted.Body.Bytes()
headerEnd := bytes.Index(b, []byte("\n\n"))
if headerEnd < 0 {
headerEnd = bytes.Index(b, []byte("\r\n\r\n"))
}
if headerEnd < 0 {
headerEnd = len(b)
}
for i := 0; i < headerEnd; i++ {
if i != 0 && b[i] != '\n' {
continue
}
j := 0
if i != 0 {
j = i + 1
}
if bytes.HasPrefix(b[j:], mboxFrom()) || bytes.HasPrefix(b[j:], mboxGtFrom()) {
return j
}
}
return -1
}

View File

@ -18,6 +18,7 @@
package message
import (
"bytes"
"net/mail"
"os"
"path/filepath"
@ -1298,3 +1299,96 @@ func TestBuildComplexMIMEType(t *testing.T) {
expectContentTypeParam(`name`, is(`Cat_August_2010-4.jpeg`)).
expectContentDispositionParam(`filename`, is(`Cat_August_2010-4.jpeg`))
}
func TestHasMBOXHeaderLine(t *testing.T) {
cases := map[string]struct {
index, indexCRLF int
}{
"From: ok\nTo: Ok": {-1, -1},
"From: ok\nTo: Ok\n\nFrom - 123": {-1, -1},
"From: ok\nTo: Ok\n\n>From - 123": {-1, -1},
">From: ok\nTo: Ok": {-1, -1},
">From: ok\nTo: Ok\n\nFrom - 123": {-1, -1},
">From: ok\nTo: Ok\n\n>From - 123": {-1, -1},
"From - 123\nFrom: ok\nTo: Ok": {0, 0},
"From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0},
"From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0},
"From: ok\nFrom - 123\nTo: Ok": {9, 10},
"From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": {9, 10},
"From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": {9, 10},
">From - 123\nFrom: ok\nTo: Ok": {0, 0},
">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": {0, 0},
">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": {0, 0},
"From: ok\n>From - 123\nTo: Ok": {9, 10},
"From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": {9, 10},
"From: ok\n>From - 123\nTo: Ok\n\n>From - 123": {9, 10},
}
test := func(t *testing.T, wantIndex int, given string, useCRLF bool) {
decrypted := &DecryptedMessage{}
if useCRLF {
decrypted.Body = *bytes.NewBufferString(strings.ReplaceAll(given, "\n", "\r\n"))
} else {
decrypted.Body = *bytes.NewBufferString(given)
}
require.Equal(t, wantIndex, indexMBOXHeaderLine(decrypted))
}
for given, want := range cases {
t.Run("LF-"+given, func(t *testing.T) { test(t, want.index, given, false) })
t.Run("CRLF-"+given, func(t *testing.T) { test(t, want.indexCRLF, given, true) })
}
}
func TestSanitizeMBOXHeaderLine(t *testing.T) {
cases := map[string]string{
"From: ok\nTo: Ok": "From: ok\nTo: Ok",
"From: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123",
"From: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123",
">From: ok\nTo: Ok": ">From: ok\nTo: Ok",
">From: ok\nTo: Ok\n\nFrom - 123": ">From: ok\nTo: Ok\n\nFrom - 123",
">From: ok\nTo: Ok\n\n>From - 123": ">From: ok\nTo: Ok\n\n>From - 123",
"From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok",
"From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123",
"From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123",
"From: ok\nFrom - 123\nTo: Ok": "From: ok\nTo: Ok",
"From: ok\nFrom - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123",
"From: ok\nFrom - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123",
">From - 123\nFrom: ok\nTo: Ok": "From: ok\nTo: Ok",
">From - 123\nFrom: ok\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123",
">From - 123\nFrom: ok\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123",
"From: ok\n>From - 123\nTo: Ok": "From: ok\nTo: Ok",
"From: ok\n>From - 123\nTo: Ok\n\nFrom - 123": "From: ok\nTo: Ok\n\nFrom - 123",
"From: ok\n>From - 123\nTo: Ok\n\n>From - 123": "From: ok\nTo: Ok\n\n>From - 123",
}
test := func(t *testing.T, given, want string, useCRLF bool) {
decrypted := &DecryptedMessage{}
if useCRLF {
decrypted.Body = *bytes.NewBufferString(strings.ReplaceAll(given, "\n", "\r\n"))
want = strings.ReplaceAll(want, "\n", "\r\n")
} else {
decrypted.Body = *bytes.NewBufferString(given)
}
require.NoError(t, sanitizeMBOXHeaderLine(decrypted))
require.Equal(t, []byte(want), decrypted.Body.Bytes())
}
for given, want := range cases {
t.Run("LF"+given, func(t *testing.T) { test(t, given, want, false) })
t.Run("CRLF"+given, func(t *testing.T) { test(t, given, want, true) })
}
}

View File

@ -24,4 +24,5 @@ type JobOptions struct {
AddExternalID bool // Whether to include ExternalID as X-Pm-External-Id.
AddMessageDate bool // Whether to include message time as X-Pm-Date.
AddMessageIDReference bool // Whether to include the MessageID in References.
SanitizeMBOXHeaderLine bool // Whether to ignore header line representing MBOX delimiter
}