From 0b95ed4dea7e56128a085dc4678fb4594abb000b Mon Sep 17 00:00:00 2001 From: James Houlahan Date: Fri, 30 Apr 2021 10:04:41 +0200 Subject: [PATCH] GODT-1146: Refactor header filtering --- internal/frontend/qt-ie/ui.go | 2 +- internal/frontend/qt/ui.go | 2 +- internal/imap/mailbox_fetch.go | 78 ++++--------------- internal/imap/mailbox_fetch_test.go | 67 ++++++++++++++++ internal/imap/mailbox_header.go | 104 +++++++++++++++++++++++++ internal/imap/mailbox_messages.go | 7 +- internal/imap/store.go | 3 +- internal/store/message.go | 19 ++++- internal/store/mocks/mocks.go | 3 +- internal/store/mocks/utils_mocks.go | 3 +- internal/transfer/mocks/mocks.go | 3 +- internal/users/mocks/listener_mocks.go | 3 +- internal/users/mocks/mocks.go | 3 +- pkg/message/section.go | 16 ++-- 14 files changed, 230 insertions(+), 83 deletions(-) create mode 100644 internal/imap/mailbox_fetch_test.go create mode 100644 internal/imap/mailbox_header.go diff --git a/internal/frontend/qt-ie/ui.go b/internal/frontend/qt-ie/ui.go index 79f19c98..a5fdb0db 100644 --- a/internal/frontend/qt-ie/ui.go +++ b/internal/frontend/qt-ie/ui.go @@ -77,7 +77,7 @@ type GoQMLInterface struct { _ string `property:"credentialsNotRemoved"` _ string `property:"versionCheckFailed"` // - _ func(isAvailable bool) `signal:"setConnectionStatus"` + _ func(isAvailable bool) `signal:"setConnectionStatus"` _ func() `slot:"setToRestart"` diff --git a/internal/frontend/qt/ui.go b/internal/frontend/qt/ui.go index c8e2eda1..21be8472 100644 --- a/internal/frontend/qt/ui.go +++ b/internal/frontend/qt/ui.go @@ -83,7 +83,7 @@ type GoQMLInterface struct { _ float32 `property:"progress"` _ string `property:"progressDescription"` - _ func(isAvailable bool) `signal:"setConnectionStatus"` + _ func(isAvailable bool) `signal:"setConnectionStatus"` _ func() `slot:"setToRestart"` diff --git a/internal/imap/mailbox_fetch.go b/internal/imap/mailbox_fetch.go index 31916198..b1f8d724 100644 --- a/internal/imap/mailbox_fetch.go +++ b/internal/imap/mailbox_fetch.go @@ -20,9 +20,6 @@ package imap import ( "bytes" "context" - "fmt" - "net/textproto" - "sort" "github.com/ProtonMail/proton-bridge/internal/imap/cache" "github.com/ProtonMail/proton-bridge/pkg/message" @@ -53,7 +50,7 @@ func (im *imapMailbox) getMessage( case imap.FetchEnvelope: // No need to check IsFullHeaderCached here. API header // contain enough information to build the envelope. - msg.Envelope = message.GetEnvelope(m, storeMessage.GetHeader()) + msg.Envelope = message.GetEnvelope(m, storeMessage.GetMIMEHeader()) case imap.FetchBody, imap.FetchBodyStructure: structure, err := im.getBodyStructure(storeMessage) if err != nil { @@ -238,30 +235,31 @@ func isMessageInDraftFolder(m *pmapi.Message) bool { // This will download message (or read from cache) and pick up the section, // extract data (header,body, both) and trim the output if needed. -func (im *imapMailbox) getMessageBodySection( //nolint[funlen] +// +// In order to speed up (avoid download and decryptions) we +// cache the header. If a mail header was requested and DB +// contains full header (it means it was already built once) +// the DB header can be used without downloading and decrypting. +// Otherwise header is incomplete and clients would have issues +// e.g. AppleMail expects `text/plain` in HTML mails. +// +// For all other cases it is necessary to download and decrypt the message +// and drop the header which was obtained from cache. The header will +// will be stored in DB once successfully built. Check `getBodyAndStructure`. +func (im *imapMailbox) getMessageBodySection( storeMessage storeMessageProvider, section *imap.BodySectionName, msgBuildCountHistogram *msgBuildCountHistogram, ) (imap.Literal, error) { - var header textproto.MIMEHeader - var extraNewlineAfterHeader bool + var header []byte var response []byte im.log.WithField("msgID", storeMessage.ID()).Trace("Getting message body") isMainHeaderRequested := len(section.Path) == 0 && section.Specifier == imap.HeaderSpecifier if isMainHeaderRequested && storeMessage.IsFullHeaderCached() { - // In order to speed up (avoid download and decryptions) we - // cache the header. If a mail header was requested and DB - // contains full header (it means it was already built once) - // the DB header can be used without downloading and decrypting. - // Otherwise header is incomplete and clients would have issues - // e.g. AppleMail expects `text/plain` in HTML mails. header = storeMessage.GetHeader() } else { - // For all other cases it is necessary to download and decrypt the message - // and drop the header which was obtained from cache. The header will - // will be stored in DB once successfully built. Check `getBodyAndStructure`. structure, bodyReader, err := im.getBodyAndStructure(storeMessage, msgBuildCountHistogram) if err != nil { return nil, err @@ -278,10 +276,7 @@ func (im *imapMailbox) getMessageBodySection( //nolint[funlen] case section.Specifier == imap.MIMESpecifier: // The MIME part specifier refers to the [MIME-IMB] header for this part. fallthrough case section.Specifier == imap.HeaderSpecifier: - if content, err := structure.GetSectionContent(bodyReader, section.Path); err == nil && content != nil { - extraNewlineAfterHeader = true - } - header, err = structure.GetSectionHeader(section.Path) + header, err = structure.GetSectionHeaderBytes(bodyReader, section.Path) default: err = errors.New("Unknown specifier " + string(section.Specifier)) } @@ -292,54 +287,13 @@ func (im *imapMailbox) getMessageBodySection( //nolint[funlen] } if header != nil { - response = filteredHeaderAsBytes(header, section) - // The blank line is included in all header fetches, - // except in the case of a message which has no body. - if extraNewlineAfterHeader { - response = append(response, []byte("\r\n")...) - } + response = filterHeader(header, section) } // Trim any output if requested. return bytes.NewBuffer(section.ExtractPartial(response)), nil } -// filteredHeaderAsBytes filters the header fields by section fields and it -// returns the filtered fields as bytes. -// Options are: all fields, only selected fields, all fields except selected. -func filteredHeaderAsBytes(header textproto.MIMEHeader, section *imap.BodySectionName) []byte { - // remove fields - if len(section.Fields) != 0 && section.NotFields { - for _, field := range section.Fields { - header.Del(field) - } - } - - fields := make([]string, 0, len(header)) - if len(section.Fields) == 0 || section.NotFields { // add all and sort - for f := range header { - fields = append(fields, f) - } - sort.Strings(fields) - } else { // add only requested (in requested order) - for _, f := range section.Fields { - fields = append(fields, textproto.CanonicalMIMEHeaderKey(f)) - } - } - - headerBuf := &bytes.Buffer{} - for _, canonical := range fields { - if values, ok := header[canonical]; !ok { - continue - } else { - for _, val := range values { - fmt.Fprintf(headerBuf, "%s: %s\r\n", canonical, val) - } - } - } - return headerBuf.Bytes() -} - // buildMessage from PM to IMAP. func (im *imapMailbox) buildMessage(m *pmapi.Message) (*message.BodyStructure, []byte, error) { body, err := im.builder.NewJobWithOptions( diff --git a/internal/imap/mailbox_fetch_test.go b/internal/imap/mailbox_fetch_test.go new file mode 100644 index 00000000..22408888 --- /dev/null +++ b/internal/imap/mailbox_fetch_test.go @@ -0,0 +1,67 @@ +// Copyright (c) 2021 Proton Technologies AG +// +// This file is part of ProtonMail Bridge. +// +// ProtonMail Bridge is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// ProtonMail Bridge is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with ProtonMail Bridge. If not, see . + +package imap + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFilterHeader(t *testing.T) { + const header = "To: somebody\r\nFrom: somebody else\r\nSubject: this is\r\n\ta multiline field\r\n\r\n" + + assert.Equal(t, "To: somebody\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "To") + }))) + + assert.Equal(t, "From: somebody else\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "From") + }))) + + assert.Equal(t, "To: somebody\r\nFrom: somebody else\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "To") || strings.EqualFold(field, "From") + }))) + + assert.Equal(t, "Subject: this is\r\n\ta multiline field\r\n\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "Subject") + }))) +} + +// TestFilterHeaderNoNewline tests that we don't include a trailing newline when filtering +// if the original header also lacks one (which it can legally do if there is no body). +func TestFilterHeaderNoNewline(t *testing.T) { + const header = "To: somebody\r\nFrom: somebody else\r\nSubject: this is\r\n\ta multiline field\r\n" + + assert.Equal(t, "To: somebody\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "To") + }))) + + assert.Equal(t, "From: somebody else\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "From") + }))) + + assert.Equal(t, "To: somebody\r\nFrom: somebody else\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "To") || strings.EqualFold(field, "From") + }))) + + assert.Equal(t, "Subject: this is\r\n\ta multiline field\r\n", string(filterHeaderLines([]byte(header), func(field string) bool { + return strings.EqualFold(field, "Subject") + }))) +} diff --git a/internal/imap/mailbox_header.go b/internal/imap/mailbox_header.go new file mode 100644 index 00000000..fd5d3fe0 --- /dev/null +++ b/internal/imap/mailbox_header.go @@ -0,0 +1,104 @@ +// Copyright (c) 2021 Proton Technologies AG +// +// This file is part of ProtonMail Bridge. +// +// ProtonMail Bridge is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// ProtonMail Bridge is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with ProtonMail Bridge. If not, see . + +package imap + +import ( + "bufio" + "bytes" + "io" + "strings" + + "github.com/emersion/go-imap" + "github.com/pkg/errors" +) + +func filterHeader(header []byte, section *imap.BodySectionName) []byte { + // Empty section.Fields means BODY[HEADER] was requested so we should return the full header. + if len(section.Fields) == 0 { + return header + } + + fieldMap := make(map[string]struct{}) + + for _, field := range section.Fields { + fieldMap[strings.ToLower(field)] = struct{}{} + } + + return filterHeaderLines(header, func(field string) bool { + _, ok := fieldMap[strings.ToLower(field)] + + if section.NotFields { + ok = !ok + } + + return ok + }) +} + +func filterHeaderLines(header []byte, wantField func(string) bool) []byte { + var res []byte + + for _, line := range headerLines(header) { + if len(bytes.TrimSpace(line)) == 0 { + res = append(res, line...) + } else { + split := bytes.SplitN(line, []byte(": "), 2) + + if len(split) != 2 { + continue + } + + if wantField(string(bytes.ToLower(split[0]))) { + res = append(res, line...) + } + } + } + + return res +} + +// NOTE: This sucks because we trim and split stuff here already, only to do it again when we use this function! +func headerLines(header []byte) [][]byte { + var lines [][]byte + + r := bufio.NewReader(bytes.NewReader(header)) + + for { + b, err := r.ReadBytes('\n') + if err != nil { + if err != io.EOF { + panic(errors.Wrap(err, "failed to read header line")) + } + + break + } + + switch { + case len(bytes.TrimSpace(b)) == 0: + lines = append(lines, b) + + case len(bytes.SplitN(b, []byte(": "), 2)) != 2: + lines[len(lines)-1] = append(lines[len(lines)-1], b...) + + default: + lines = append(lines, b) + } + } + + return lines +} diff --git a/internal/imap/mailbox_messages.go b/internal/imap/mailbox_messages.go index e92ae88c..3a28fca1 100644 --- a/internal/imap/mailbox_messages.go +++ b/internal/imap/mailbox_messages.go @@ -18,7 +18,6 @@ package imap import ( - "errors" "fmt" "net/mail" "strings" @@ -30,6 +29,7 @@ import ( "github.com/ProtonMail/proton-bridge/pkg/parallel" "github.com/ProtonMail/proton-bridge/pkg/pmapi" "github.com/emersion/go-imap" + "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -359,9 +359,8 @@ func (im *imapMailbox) SearchMessages(isUID bool, criteria *imap.SearchCriteria) } } - // In order to speed up search it is not needed to check - // if IsFullHeaderCached. - header := storeMessage.GetHeader() + // In order to speed up search it is not needed to check if IsFullHeaderCached. + header := storeMessage.GetMIMEHeader() if !criteria.SentBefore.IsZero() || !criteria.SentSince.IsZero() { t, err := mail.Header(header).Date() diff --git a/internal/imap/store.go b/internal/imap/store.go index 44e9191d..7088cd87 100644 --- a/internal/imap/store.go +++ b/internal/imap/store.go @@ -102,7 +102,8 @@ type storeMessageProvider interface { SetSize(int64) error SetHeader([]byte) error - GetHeader() textproto.MIMEHeader + GetHeader() []byte + GetMIMEHeader() textproto.MIMEHeader IsFullHeaderCached() bool SetBodyStructure(*pkgMsg.BodyStructure) error GetBodyStructure() (*pkgMsg.BodyStructure, error) diff --git a/internal/store/message.go b/internal/store/message.go index cd9d9a64..fe05609e 100644 --- a/internal/store/message.go +++ b/internal/store/message.go @@ -25,6 +25,7 @@ import ( pkgMsg "github.com/ProtonMail/proton-bridge/pkg/message" "github.com/ProtonMail/proton-bridge/pkg/pmapi" + "github.com/pkg/errors" bolt "go.etcd.io/bbolt" ) @@ -153,15 +154,27 @@ func (message *Message) getRawHeader() (raw []byte, err error) { } // GetHeader will return cached header from DB. -func (message *Message) GetHeader() textproto.MIMEHeader { +func (message *Message) GetHeader() []byte { raw, err := message.getRawHeader() - if err != nil && raw == nil { - return textproto.MIMEHeader(message.msg.Header) + if err != nil { + panic(errors.Wrap(err, "failed to get raw message header")) } + + return raw +} + +// GetMIMEHeader will return cached header from DB, parsed as a textproto.MIMEHeader. +func (message *Message) GetMIMEHeader() textproto.MIMEHeader { + raw, err := message.getRawHeader() + if err != nil { + panic(errors.Wrap(err, "failed to get raw message header")) + } + header, err := textproto.NewReader(bufio.NewReader(bytes.NewReader(raw))).ReadMIMEHeader() if err != nil { return textproto.MIMEHeader(message.msg.Header) } + return header } diff --git a/internal/store/mocks/mocks.go b/internal/store/mocks/mocks.go index 80813820..acb846be 100644 --- a/internal/store/mocks/mocks.go +++ b/internal/store/mocks/mocks.go @@ -6,9 +6,10 @@ package mocks import ( context "context" + reflect "reflect" + pmapi "github.com/ProtonMail/proton-bridge/pkg/pmapi" gomock "github.com/golang/mock/gomock" - reflect "reflect" ) // MockPanicHandler is a mock of PanicHandler interface diff --git a/internal/store/mocks/utils_mocks.go b/internal/store/mocks/utils_mocks.go index e7f1cbb3..8a6c8677 100644 --- a/internal/store/mocks/utils_mocks.go +++ b/internal/store/mocks/utils_mocks.go @@ -5,9 +5,10 @@ package mocks import ( - gomock "github.com/golang/mock/gomock" reflect "reflect" time "time" + + gomock "github.com/golang/mock/gomock" ) // MockListener is a mock of Listener interface diff --git a/internal/transfer/mocks/mocks.go b/internal/transfer/mocks/mocks.go index afac025d..1c8040e0 100644 --- a/internal/transfer/mocks/mocks.go +++ b/internal/transfer/mocks/mocks.go @@ -5,10 +5,11 @@ package mocks import ( + reflect "reflect" + imap "github.com/emersion/go-imap" sasl "github.com/emersion/go-sasl" gomock "github.com/golang/mock/gomock" - reflect "reflect" ) // MockPanicHandler is a mock of PanicHandler interface diff --git a/internal/users/mocks/listener_mocks.go b/internal/users/mocks/listener_mocks.go index e7f1cbb3..8a6c8677 100644 --- a/internal/users/mocks/listener_mocks.go +++ b/internal/users/mocks/listener_mocks.go @@ -5,9 +5,10 @@ package mocks import ( - gomock "github.com/golang/mock/gomock" reflect "reflect" time "time" + + gomock "github.com/golang/mock/gomock" ) // MockListener is a mock of Listener interface diff --git a/internal/users/mocks/mocks.go b/internal/users/mocks/mocks.go index a4a8c616..3304b3af 100644 --- a/internal/users/mocks/mocks.go +++ b/internal/users/mocks/mocks.go @@ -5,10 +5,11 @@ package mocks import ( + reflect "reflect" + store "github.com/ProtonMail/proton-bridge/internal/store" credentials "github.com/ProtonMail/proton-bridge/internal/users/credentials" gomock "github.com/golang/mock/gomock" - reflect "reflect" ) // MockLocator is a mock of Locator interface diff --git a/pkg/message/section.go b/pkg/message/section.go index 05049c2c..515b68ca 100644 --- a/pkg/message/section.go +++ b/pkg/message/section.go @@ -247,12 +247,7 @@ func (bs *BodyStructure) GetMailHeader() (header textproto.MIMEHeader, err error // GetMailHeaderBytes returns the bytes with main mail header. // Warning: It can contain extra lines or multipart comment. func (bs *BodyStructure) GetMailHeaderBytes(wholeMail io.ReadSeeker) (header []byte, err error) { - info, err := bs.getInfo([]int{}) - if err != nil { - return - } - headerLength := info.Size - info.BSize - return goToOffsetAndReadNBytes(wholeMail, 0, headerLength) + return bs.GetSectionHeaderBytes(wholeMail, []int{}) } func goToOffsetAndReadNBytes(wholeMail io.ReadSeeker, offset, length int) ([]byte, error) { @@ -279,6 +274,15 @@ func (bs *BodyStructure) GetSectionHeader(sectionPath []int) (header textproto.M return } +func (bs *BodyStructure) GetSectionHeaderBytes(wholeMail io.ReadSeeker, sectionPath []int) (header []byte, err error) { + info, err := bs.getInfo(sectionPath) + if err != nil { + return + } + headerLength := info.Size - info.BSize + return goToOffsetAndReadNBytes(wholeMail, info.Start, headerLength) +} + // IMAPBodyStructure will prepare imap bodystructure recurently for given part. // Use empty path to create whole email structure. func (bs *BodyStructure) IMAPBodyStructure(currentPart []int) (imapBS *imap.BodyStructure, err error) {