GODT-1146: Refactor header filtering

This commit is contained in:
James Houlahan
2021-04-30 10:04:41 +02:00
parent ce64aeb05f
commit 0b95ed4dea
14 changed files with 230 additions and 83 deletions

View File

@ -77,7 +77,7 @@ type GoQMLInterface struct {
_ string `property:"credentialsNotRemoved"`
_ string `property:"versionCheckFailed"`
//
_ func(isAvailable bool) `signal:"setConnectionStatus"`
_ func(isAvailable bool) `signal:"setConnectionStatus"`
_ func() `slot:"setToRestart"`

View File

@ -83,7 +83,7 @@ type GoQMLInterface struct {
_ float32 `property:"progress"`
_ string `property:"progressDescription"`
_ func(isAvailable bool) `signal:"setConnectionStatus"`
_ func(isAvailable bool) `signal:"setConnectionStatus"`
_ func() `slot:"setToRestart"`

View File

@ -20,9 +20,6 @@ package imap
import (
"bytes"
"context"
"fmt"
"net/textproto"
"sort"
"github.com/ProtonMail/proton-bridge/internal/imap/cache"
"github.com/ProtonMail/proton-bridge/pkg/message"
@ -53,7 +50,7 @@ func (im *imapMailbox) getMessage(
case imap.FetchEnvelope:
// No need to check IsFullHeaderCached here. API header
// contain enough information to build the envelope.
msg.Envelope = message.GetEnvelope(m, storeMessage.GetHeader())
msg.Envelope = message.GetEnvelope(m, storeMessage.GetMIMEHeader())
case imap.FetchBody, imap.FetchBodyStructure:
structure, err := im.getBodyStructure(storeMessage)
if err != nil {
@ -238,30 +235,31 @@ func isMessageInDraftFolder(m *pmapi.Message) bool {
// This will download message (or read from cache) and pick up the section,
// extract data (header,body, both) and trim the output if needed.
func (im *imapMailbox) getMessageBodySection( //nolint[funlen]
//
// In order to speed up (avoid download and decryptions) we
// cache the header. If a mail header was requested and DB
// contains full header (it means it was already built once)
// the DB header can be used without downloading and decrypting.
// Otherwise header is incomplete and clients would have issues
// e.g. AppleMail expects `text/plain` in HTML mails.
//
// For all other cases it is necessary to download and decrypt the message
// and drop the header which was obtained from cache. The header will
// will be stored in DB once successfully built. Check `getBodyAndStructure`.
func (im *imapMailbox) getMessageBodySection(
storeMessage storeMessageProvider,
section *imap.BodySectionName,
msgBuildCountHistogram *msgBuildCountHistogram,
) (imap.Literal, error) {
var header textproto.MIMEHeader
var extraNewlineAfterHeader bool
var header []byte
var response []byte
im.log.WithField("msgID", storeMessage.ID()).Trace("Getting message body")
isMainHeaderRequested := len(section.Path) == 0 && section.Specifier == imap.HeaderSpecifier
if isMainHeaderRequested && storeMessage.IsFullHeaderCached() {
// In order to speed up (avoid download and decryptions) we
// cache the header. If a mail header was requested and DB
// contains full header (it means it was already built once)
// the DB header can be used without downloading and decrypting.
// Otherwise header is incomplete and clients would have issues
// e.g. AppleMail expects `text/plain` in HTML mails.
header = storeMessage.GetHeader()
} else {
// For all other cases it is necessary to download and decrypt the message
// and drop the header which was obtained from cache. The header will
// will be stored in DB once successfully built. Check `getBodyAndStructure`.
structure, bodyReader, err := im.getBodyAndStructure(storeMessage, msgBuildCountHistogram)
if err != nil {
return nil, err
@ -278,10 +276,7 @@ func (im *imapMailbox) getMessageBodySection( //nolint[funlen]
case section.Specifier == imap.MIMESpecifier: // The MIME part specifier refers to the [MIME-IMB] header for this part.
fallthrough
case section.Specifier == imap.HeaderSpecifier:
if content, err := structure.GetSectionContent(bodyReader, section.Path); err == nil && content != nil {
extraNewlineAfterHeader = true
}
header, err = structure.GetSectionHeader(section.Path)
header, err = structure.GetSectionHeaderBytes(bodyReader, section.Path)
default:
err = errors.New("Unknown specifier " + string(section.Specifier))
}
@ -292,54 +287,13 @@ func (im *imapMailbox) getMessageBodySection( //nolint[funlen]
}
if header != nil {
response = filteredHeaderAsBytes(header, section)
// The blank line is included in all header fetches,
// except in the case of a message which has no body.
if extraNewlineAfterHeader {
response = append(response, []byte("\r\n")...)
}
response = filterHeader(header, section)
}
// Trim any output if requested.
return bytes.NewBuffer(section.ExtractPartial(response)), nil
}
// filteredHeaderAsBytes filters the header fields by section fields and it
// returns the filtered fields as bytes.
// Options are: all fields, only selected fields, all fields except selected.
func filteredHeaderAsBytes(header textproto.MIMEHeader, section *imap.BodySectionName) []byte {
// remove fields
if len(section.Fields) != 0 && section.NotFields {
for _, field := range section.Fields {
header.Del(field)
}
}
fields := make([]string, 0, len(header))
if len(section.Fields) == 0 || section.NotFields { // add all and sort
for f := range header {
fields = append(fields, f)
}
sort.Strings(fields)
} else { // add only requested (in requested order)
for _, f := range section.Fields {
fields = append(fields, textproto.CanonicalMIMEHeaderKey(f))
}
}
headerBuf := &bytes.Buffer{}
for _, canonical := range fields {
if values, ok := header[canonical]; !ok {
continue
} else {
for _, val := range values {
fmt.Fprintf(headerBuf, "%s: %s\r\n", canonical, val)
}
}
}
return headerBuf.Bytes()
}
// buildMessage from PM to IMAP.
func (im *imapMailbox) buildMessage(m *pmapi.Message) (*message.BodyStructure, []byte, error) {
body, err := im.builder.NewJobWithOptions(

View File

@ -0,0 +1,67 @@
// Copyright (c) 2021 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package imap
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFilterHeader(t *testing.T) {
const header = "To: somebody\r\nFrom: somebody else\r\nSubject: this is\r\n\ta multiline field\r\n\r\n"
assert.Equal(t, "To: somebody\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "To")
})))
assert.Equal(t, "From: somebody else\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "From")
})))
assert.Equal(t, "To: somebody\r\nFrom: somebody else\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "To") || strings.EqualFold(field, "From")
})))
assert.Equal(t, "Subject: this is\r\n\ta multiline field\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "Subject")
})))
}
// TestFilterHeaderNoNewline tests that we don't include a trailing newline when filtering
// if the original header also lacks one (which it can legally do if there is no body).
func TestFilterHeaderNoNewline(t *testing.T) {
const header = "To: somebody\r\nFrom: somebody else\r\nSubject: this is\r\n\ta multiline field\r\n"
assert.Equal(t, "To: somebody\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "To")
})))
assert.Equal(t, "From: somebody else\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "From")
})))
assert.Equal(t, "To: somebody\r\nFrom: somebody else\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "To") || strings.EqualFold(field, "From")
})))
assert.Equal(t, "Subject: this is\r\n\ta multiline field\r\n", string(filterHeaderLines([]byte(header), func(field string) bool {
return strings.EqualFold(field, "Subject")
})))
}

View File

@ -0,0 +1,104 @@
// Copyright (c) 2021 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package imap
import (
"bufio"
"bytes"
"io"
"strings"
"github.com/emersion/go-imap"
"github.com/pkg/errors"
)
func filterHeader(header []byte, section *imap.BodySectionName) []byte {
// Empty section.Fields means BODY[HEADER] was requested so we should return the full header.
if len(section.Fields) == 0 {
return header
}
fieldMap := make(map[string]struct{})
for _, field := range section.Fields {
fieldMap[strings.ToLower(field)] = struct{}{}
}
return filterHeaderLines(header, func(field string) bool {
_, ok := fieldMap[strings.ToLower(field)]
if section.NotFields {
ok = !ok
}
return ok
})
}
func filterHeaderLines(header []byte, wantField func(string) bool) []byte {
var res []byte
for _, line := range headerLines(header) {
if len(bytes.TrimSpace(line)) == 0 {
res = append(res, line...)
} else {
split := bytes.SplitN(line, []byte(": "), 2)
if len(split) != 2 {
continue
}
if wantField(string(bytes.ToLower(split[0]))) {
res = append(res, line...)
}
}
}
return res
}
// NOTE: This sucks because we trim and split stuff here already, only to do it again when we use this function!
func headerLines(header []byte) [][]byte {
var lines [][]byte
r := bufio.NewReader(bytes.NewReader(header))
for {
b, err := r.ReadBytes('\n')
if err != nil {
if err != io.EOF {
panic(errors.Wrap(err, "failed to read header line"))
}
break
}
switch {
case len(bytes.TrimSpace(b)) == 0:
lines = append(lines, b)
case len(bytes.SplitN(b, []byte(": "), 2)) != 2:
lines[len(lines)-1] = append(lines[len(lines)-1], b...)
default:
lines = append(lines, b)
}
}
return lines
}

View File

@ -18,7 +18,6 @@
package imap
import (
"errors"
"fmt"
"net/mail"
"strings"
@ -30,6 +29,7 @@ import (
"github.com/ProtonMail/proton-bridge/pkg/parallel"
"github.com/ProtonMail/proton-bridge/pkg/pmapi"
"github.com/emersion/go-imap"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -359,9 +359,8 @@ func (im *imapMailbox) SearchMessages(isUID bool, criteria *imap.SearchCriteria)
}
}
// In order to speed up search it is not needed to check
// if IsFullHeaderCached.
header := storeMessage.GetHeader()
// In order to speed up search it is not needed to check if IsFullHeaderCached.
header := storeMessage.GetMIMEHeader()
if !criteria.SentBefore.IsZero() || !criteria.SentSince.IsZero() {
t, err := mail.Header(header).Date()

View File

@ -102,7 +102,8 @@ type storeMessageProvider interface {
SetSize(int64) error
SetHeader([]byte) error
GetHeader() textproto.MIMEHeader
GetHeader() []byte
GetMIMEHeader() textproto.MIMEHeader
IsFullHeaderCached() bool
SetBodyStructure(*pkgMsg.BodyStructure) error
GetBodyStructure() (*pkgMsg.BodyStructure, error)

View File

@ -25,6 +25,7 @@ import (
pkgMsg "github.com/ProtonMail/proton-bridge/pkg/message"
"github.com/ProtonMail/proton-bridge/pkg/pmapi"
"github.com/pkg/errors"
bolt "go.etcd.io/bbolt"
)
@ -153,15 +154,27 @@ func (message *Message) getRawHeader() (raw []byte, err error) {
}
// GetHeader will return cached header from DB.
func (message *Message) GetHeader() textproto.MIMEHeader {
func (message *Message) GetHeader() []byte {
raw, err := message.getRawHeader()
if err != nil && raw == nil {
return textproto.MIMEHeader(message.msg.Header)
if err != nil {
panic(errors.Wrap(err, "failed to get raw message header"))
}
return raw
}
// GetMIMEHeader will return cached header from DB, parsed as a textproto.MIMEHeader.
func (message *Message) GetMIMEHeader() textproto.MIMEHeader {
raw, err := message.getRawHeader()
if err != nil {
panic(errors.Wrap(err, "failed to get raw message header"))
}
header, err := textproto.NewReader(bufio.NewReader(bytes.NewReader(raw))).ReadMIMEHeader()
if err != nil {
return textproto.MIMEHeader(message.msg.Header)
}
return header
}

View File

@ -6,9 +6,10 @@ package mocks
import (
context "context"
reflect "reflect"
pmapi "github.com/ProtonMail/proton-bridge/pkg/pmapi"
gomock "github.com/golang/mock/gomock"
reflect "reflect"
)
// MockPanicHandler is a mock of PanicHandler interface

View File

@ -5,9 +5,10 @@
package mocks
import (
gomock "github.com/golang/mock/gomock"
reflect "reflect"
time "time"
gomock "github.com/golang/mock/gomock"
)
// MockListener is a mock of Listener interface

View File

@ -5,10 +5,11 @@
package mocks
import (
reflect "reflect"
imap "github.com/emersion/go-imap"
sasl "github.com/emersion/go-sasl"
gomock "github.com/golang/mock/gomock"
reflect "reflect"
)
// MockPanicHandler is a mock of PanicHandler interface

View File

@ -5,9 +5,10 @@
package mocks
import (
gomock "github.com/golang/mock/gomock"
reflect "reflect"
time "time"
gomock "github.com/golang/mock/gomock"
)
// MockListener is a mock of Listener interface

View File

@ -5,10 +5,11 @@
package mocks
import (
reflect "reflect"
store "github.com/ProtonMail/proton-bridge/internal/store"
credentials "github.com/ProtonMail/proton-bridge/internal/users/credentials"
gomock "github.com/golang/mock/gomock"
reflect "reflect"
)
// MockLocator is a mock of Locator interface

View File

@ -247,12 +247,7 @@ func (bs *BodyStructure) GetMailHeader() (header textproto.MIMEHeader, err error
// GetMailHeaderBytes returns the bytes with main mail header.
// Warning: It can contain extra lines or multipart comment.
func (bs *BodyStructure) GetMailHeaderBytes(wholeMail io.ReadSeeker) (header []byte, err error) {
info, err := bs.getInfo([]int{})
if err != nil {
return
}
headerLength := info.Size - info.BSize
return goToOffsetAndReadNBytes(wholeMail, 0, headerLength)
return bs.GetSectionHeaderBytes(wholeMail, []int{})
}
func goToOffsetAndReadNBytes(wholeMail io.ReadSeeker, offset, length int) ([]byte, error) {
@ -279,6 +274,15 @@ func (bs *BodyStructure) GetSectionHeader(sectionPath []int) (header textproto.M
return
}
func (bs *BodyStructure) GetSectionHeaderBytes(wholeMail io.ReadSeeker, sectionPath []int) (header []byte, err error) {
info, err := bs.getInfo(sectionPath)
if err != nil {
return
}
headerLength := info.Size - info.BSize
return goToOffsetAndReadNBytes(wholeMail, info.Start, headerLength)
}
// IMAPBodyStructure will prepare imap bodystructure recurently for given part.
// Use empty path to create whole email structure.
func (bs *BodyStructure) IMAPBodyStructure(currentPart []int) (imapBS *imap.BodyStructure, err error) {