From 8ab05a000c2f6d02fdca48e24d4398550fba587b Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 13 Apr 2021 07:49:13 +0200 Subject: [PATCH] GODT-1136 DB Cache header from builder and test --- internal/imap/mailbox_message.go | 187 +++++++----------- internal/imap/mailbox_messages.go | 28 ++- internal/imap/store.go | 5 +- internal/store/message.go | 44 +++++ internal/store/store.go | 53 ++--- internal/users/users.go | 5 + pkg/message/address.go | 46 ----- pkg/message/build_build.go | 5 + pkg/message/build_rfc822.go | 46 +++-- pkg/message/envelope.go | 55 ++++-- pkg/message/message.go | 4 +- pkg/message/section.go | 46 +++-- pkg/message/section_test.go | 18 ++ test/fakeapi/attachments.go | 37 +++- test/fakeapi/controller_control.go | 5 + .../bridge/imap/message/fetch.feature | 4 +- .../bridge/imap/message/fetch_header.feature | 41 ++++ test/imap_actions_messages_test.go | 14 ++ test/liveapi/messages.go | 4 + test/store_checks_test.go | 22 ++- test/store_setup_test.go | 108 ++++++---- 21 files changed, 471 insertions(+), 306 deletions(-) delete mode 100644 pkg/message/address.go create mode 100644 test/features/bridge/imap/message/fetch_header.feature diff --git a/internal/imap/mailbox_message.go b/internal/imap/mailbox_message.go index d52e8932..52854259 100644 --- a/internal/imap/mailbox_message.go +++ b/internal/imap/mailbox_message.go @@ -239,7 +239,9 @@ func (im *imapMailbox) getMessage(storeMessage storeMessageProvider, items []ima for _, item := range items { switch item { case imap.FetchEnvelope: - msg.Envelope = message.GetEnvelope(m) + // No need to check IsFullHeaderCached here. API header + // contain enough information to build the envelope. + msg.Envelope = message.GetEnvelope(m, storeMessage.GetHeader()) case imap.FetchBody, imap.FetchBodyStructure: var structure *message.BodyStructure structure, err = im.getBodyStructure(storeMessage) @@ -297,6 +299,7 @@ func (im *imapMailbox) getMessage(storeMessage storeMessageProvider, items []ima func (im *imapMailbox) getLiteralForSection(itemSection imap.FetchItem, msg *imap.Message, storeMessage storeMessageProvider, msgBuildCountHistogram *msgBuildCountHistogram) error { section, err := imap.ParseBodySectionName(itemSection) if err != nil { + log.WithError(err).Warn("Failed to parse body section name; part will be skipped") return nil //nolint[nilerr] ignore error } @@ -331,6 +334,7 @@ func (im *imapMailbox) getBodyStructure(storeMessage storeMessageProvider) (bs * return } +//nolint[funlen] Jakub will fix in refactor func (im *imapMailbox) getBodyAndStructure(storeMessage storeMessageProvider, msgBuildCountHistogram *msgBuildCountHistogram) ( structure *message.BodyStructure, bodyReader *bytes.Reader, err error, @@ -358,10 +362,17 @@ func (im *imapMailbox) getBodyAndStructure(storeMessage storeMessageProvider, ms } } if err == nil && structure != nil && len(body) > 0 { - if err := storeMessage.SetContentTypeAndHeader(m.MIMEType, m.Header); err != nil { - im.log.WithError(err). + header, errHead := structure.GetMailHeaderBytes(bytes.NewReader(body)) + if errHead == nil { + if errHead := storeMessage.SetHeader(header); errHead != nil { + im.log.WithError(errHead). + WithField("msgID", m.ID). + Warn("Cannot update header after building") + } + } else { + im.log.WithError(errHead). WithField("msgID", m.ID). - Warn("Cannot update header while building") + Warn("Cannot get header bytes after building") } if msgBuildCountHistogram != nil { times, err := storeMessage.IncreaseBuildCount() @@ -399,40 +410,32 @@ func isMessageInDraftFolder(m *pmapi.Message) bool { // This will download message (or read from cache) and pick up the section, // extract data (header,body, both) and trim the output if needed. -func (im *imapMailbox) getMessageBodySection(storeMessage storeMessageProvider, section *imap.BodySectionName, msgBuildCountHistogram *msgBuildCountHistogram) (literal imap.Literal, err error) { // nolint[funlen] - var ( - structure *message.BodyStructure - bodyReader *bytes.Reader - header textproto.MIMEHeader - response []byte - ) +func (im *imapMailbox) getMessageBodySection( + storeMessage storeMessageProvider, + section *imap.BodySectionName, + msgBuildCountHistogram *msgBuildCountHistogram, +) (imap.Literal, error) { + var header textproto.MIMEHeader + var response []byte im.log.WithField("msgID", storeMessage.ID()).Trace("Getting message body") - m := storeMessage.Message() - - if len(section.Path) == 0 && section.Specifier == imap.HeaderSpecifier { - // We can extract message header without decrypting. - header = message.GetHeader(m) - // We need to ensure we use the correct content-type, - // otherwise AppleMail expects `text/plain` in HTML mails. - if header.Get("Content-Type") == "" { - if err = im.fetchMessage(m); err != nil { - return - } - if _, err = im.setMessageContentType(m); err != nil { - return - } - if err = storeMessage.SetContentTypeAndHeader(m.MIMEType, m.Header); err != nil { - return - } - header = message.GetHeader(m) - } + isMainHeaderRequested := len(section.Path) == 0 && section.Specifier == imap.HeaderSpecifier + if isMainHeaderRequested && storeMessage.IsFullHeaderCached() { + // In order to speed up (avoid download and decryptions) we + // cache the header. If a mail header was requested and DB + // contains full header (it means it was already built once) + // the DB header can be used without downloading and decrypting. + // Otherwise header is incomplete and clients would have issues + // e.g. AppleMail expects `text/plain` in HTML mails. + header = storeMessage.GetHeader() } else { - // The rest of cases need download and decrypt. - structure, bodyReader, err = im.getBodyAndStructure(storeMessage, msgBuildCountHistogram) + // For all other cases it is necessary to download and decrypt the message + // and drop the header which was obtained from cache. The header will + // will be stored in DB once successfully built. Check `getBodyAndStructure`. + structure, bodyReader, err := im.getBodyAndStructure(storeMessage, msgBuildCountHistogram) if err != nil { - return + return nil, err } switch { @@ -443,107 +446,61 @@ func (im *imapMailbox) getMessageBodySection(storeMessage storeMessageProvider, // The TEXT specifier refers to the content of the message (or section), omitting the [RFC-2822] header. // Non-empty section with no specifier (imap.EntireSpecifier) refers to section content without header. response, err = structure.GetSectionContent(bodyReader, section.Path) - case section.Specifier == imap.MIMESpecifier: - // The MIME part specifier refers to the [MIME-IMB] header for this part. + case section.Specifier == imap.MIMESpecifier: // The MIME part specifier refers to the [MIME-IMB] header for this part. fallthrough case section.Specifier == imap.HeaderSpecifier: header, err = structure.GetSectionHeader(section.Path) default: err = errors.New("Unknown specifier " + string(section.Specifier)) } + + if err != nil { + return nil, err + } } - if err != nil { - return - } - - // Filter header. Options are: all fields, only selected fields, all fields except selected. if header != nil { - // remove fields - if len(section.Fields) != 0 && section.NotFields { - for _, field := range section.Fields { - header.Del(field) - } - } - - fields := make([]string, 0, len(header)) - if len(section.Fields) == 0 || section.NotFields { // add all and sort - for f := range header { - fields = append(fields, f) - } - sort.Strings(fields) - } else { // add only requested (in requested order) - for _, f := range section.Fields { - fields = append(fields, textproto.CanonicalMIMEHeaderKey(f)) - } - } - - headerBuf := &bytes.Buffer{} - for _, canonical := range fields { - if values, ok := header[canonical]; !ok { - continue - } else { - for _, val := range values { - fmt.Fprintf(headerBuf, "%s: %s\r\n", canonical, val) - } - } - } - response = headerBuf.Bytes() + response = filteredHeaderAsBytes(header, section) } // Trim any output if requested. - literal = bytes.NewBuffer(section.ExtractPartial(response)) - return literal, nil + return bytes.NewBuffer(section.ExtractPartial(response)), nil } -func (im *imapMailbox) fetchMessage(m *pmapi.Message) (err error) { - im.log.Trace("Fetching message") - - complete, err := im.storeMailbox.FetchMessage(m.ID) - if err != nil { - im.log.WithError(err).Error("Could not get message from store") - return +// filteredHeaderAsBytes filters the header fields by section fields and it +// returns the filtered fields as bytes. +// Options are: all fields, only selected fields, all fields except selected. +func filteredHeaderAsBytes(header textproto.MIMEHeader, section *imap.BodySectionName) []byte { + // remove fields + if len(section.Fields) != 0 && section.NotFields { + for _, field := range section.Fields { + header.Del(field) + } } - *m = *complete.Message() - - return -} - -const ( - noMultipart = iota // only body - simpleMultipart // body + attachment or inline - complexMultipart // mixed, rfc822, alternatives, ... -) - -func (im *imapMailbox) setMessageContentType(m *pmapi.Message) (multipartType int, err error) { - if m.MIMEType == "" { - err = fmt.Errorf("trying to set Content-Type without MIME TYPE") - return - } - // message.MIMEType can have just three values from our server: - // * `text/html` (refers to body type, but might contain attachments and inlines) - // * `text/plain` (refers to body type, but might contain attachments and inlines) - // * `multipart/mixed` (refers to external message with multipart structure) - // The proper header content fields must be set and saved to DB based MIMEType and content. - multipartType = noMultipart - if m.MIMEType == pmapi.ContentTypeMultipartMixed { - multipartType = complexMultipart - } else if m.NumAttachments != 0 { - multipartType = simpleMultipart + fields := make([]string, 0, len(header)) + if len(section.Fields) == 0 || section.NotFields { // add all and sort + for f := range header { + fields = append(fields, f) + } + sort.Strings(fields) + } else { // add only requested (in requested order) + for _, f := range section.Fields { + fields = append(fields, textproto.CanonicalMIMEHeaderKey(f)) + } } - h := textproto.MIMEHeader(m.Header) - if multipartType == noMultipart { - message.SetBodyContentFields(&h, m) - } else { - h.Set("Content-Type", - fmt.Sprintf("%s; boundary=%s", "multipart/mixed", message.GetBoundary(m)), - ) + headerBuf := &bytes.Buffer{} + for _, canonical := range fields { + if values, ok := header[canonical]; !ok { + continue + } else { + for _, val := range values { + fmt.Fprintf(headerBuf, "%s: %s\r\n", canonical, val) + } + } } - m.Header = mail.Header(h) - - return + return headerBuf.Bytes() } // buildMessage from PM to IMAP. diff --git a/internal/imap/mailbox_messages.go b/internal/imap/mailbox_messages.go index 1a26761a..00e3f2a9 100644 --- a/internal/imap/mailbox_messages.go +++ b/internal/imap/mailbox_messages.go @@ -358,23 +358,29 @@ func (im *imapMailbox) SearchMessages(isUID bool, criteria *imap.SearchCriteria) continue } } + + // In order to speed up search it is not needed to check + // if IsFullHeaderCached. + header := storeMessage.GetHeader() + if !criteria.SentBefore.IsZero() || !criteria.SentSince.IsZero() { - if t, err := m.Header.Date(); err == nil && !t.IsZero() { - if !criteria.SentBefore.IsZero() { - if truncated := criteria.SentBefore.Truncate(24 * time.Hour); t.Unix() > truncated.Unix() { - continue - } + t, err := mail.Header(header).Date() + if err != nil || t.IsZero() { + t = time.Unix(m.Time, 0) + } + if !criteria.SentBefore.IsZero() { + if truncated := criteria.SentBefore.Truncate(24 * time.Hour); t.Unix() > truncated.Unix() { + continue } - if !criteria.SentSince.IsZero() { - if truncated := criteria.SentSince.Truncate(24 * time.Hour); t.Unix() < truncated.Unix() { - continue - } + } + if !criteria.SentSince.IsZero() { + if truncated := criteria.SentSince.Truncate(24 * time.Hour); t.Unix() < truncated.Unix() { + continue } } } // Filter by headers. - header := message.GetHeader(m) headerMatch := true for criteriaKey, criteriaValues := range criteria.Header { for _, criteriaValue := range criteriaValues { @@ -382,6 +388,8 @@ func (im *imapMailbox) SearchMessages(isUID bool, criteria *imap.SearchCriteria) continue } switch criteriaKey { + case "Subject": + headerMatch = strings.Contains(strings.ToLower(m.Subject), strings.ToLower(criteriaValue)) case "From": headerMatch = addressMatch([]*mail.Address{m.Sender}, criteriaValue) case "To": diff --git a/internal/imap/store.go b/internal/imap/store.go index 1566ccba..44e9191d 100644 --- a/internal/imap/store.go +++ b/internal/imap/store.go @@ -20,6 +20,7 @@ package imap import ( "io" "net/mail" + "net/textproto" "github.com/ProtonMail/gopenpgp/v2/crypto" "github.com/ProtonMail/proton-bridge/internal/imap/uidplus" @@ -100,7 +101,9 @@ type storeMessageProvider interface { IsMarkedDeleted() bool SetSize(int64) error - SetContentTypeAndHeader(string, mail.Header) error + SetHeader([]byte) error + GetHeader() textproto.MIMEHeader + IsFullHeaderCached() bool SetBodyStructure(*pkgMsg.BodyStructure) error GetBodyStructure() (*pkgMsg.BodyStructure, error) IncreaseBuildCount() (uint32, error) diff --git a/internal/store/message.go b/internal/store/message.go index 172fe2c0..cd9d9a64 100644 --- a/internal/store/message.go +++ b/internal/store/message.go @@ -18,7 +18,10 @@ package store import ( + "bufio" + "bytes" "net/mail" + "net/textproto" pkgMsg "github.com/ProtonMail/proton-bridge/pkg/message" "github.com/ProtonMail/proton-bridge/pkg/pmapi" @@ -103,6 +106,8 @@ func (message *Message) SetSize(size int64) error { // header of decrypted message. This should not trigger any IMAP update. // NOTE: Content type depends on details of decrypted message which we want to // cache. +// +// Deprecated: Use SetHeader instead. func (message *Message) SetContentTypeAndHeader(mimeType string, header mail.Header) error { message.msg.MIMEType = mimeType message.msg.Header = header @@ -121,6 +126,45 @@ func (message *Message) SetContentTypeAndHeader(mimeType string, header mail.Hea return message.store.db.Update(txUpdate) } +// SetHeader checks header can be parsed and if yes it stores header bytes in +// database. +func (message *Message) SetHeader(header []byte) error { + _, err := textproto.NewReader(bufio.NewReader(bytes.NewReader(header))).ReadMIMEHeader() + if err != nil { + return err + } + return message.store.db.Update(func(tx *bolt.Tx) error { + return tx.Bucket(headersBucket).Put([]byte(message.ID()), header) + }) +} + +// IsFullHeaderCached will check that valid full header is stored in DB. +func (message *Message) IsFullHeaderCached() bool { + header, err := message.getRawHeader() + return err == nil && header != nil +} + +func (message *Message) getRawHeader() (raw []byte, err error) { + err = message.store.db.View(func(tx *bolt.Tx) error { + raw = tx.Bucket(headersBucket).Get([]byte(message.ID())) + return nil + }) + return +} + +// GetHeader will return cached header from DB. +func (message *Message) GetHeader() textproto.MIMEHeader { + raw, err := message.getRawHeader() + if err != nil && raw == nil { + return textproto.MIMEHeader(message.msg.Header) + } + header, err := textproto.NewReader(bufio.NewReader(bytes.NewReader(raw))).ReadMIMEHeader() + if err != nil { + return textproto.MIMEHeader(message.msg.Header) + } + return header +} + // SetBodyStructure stores serialized body structure in database. func (message *Message) SetBodyStructure(bs *pkgMsg.BodyStructure) error { txUpdate := func(tx *bolt.Tx) error { diff --git a/internal/store/store.go b/internal/store/store.go index 78c80c79..2f767df7 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -51,7 +51,9 @@ var ( // Database structure: // * metadata - // * {messageID} -> message data (subject, from, to, time, headers, body size, ...) + // * {messageID} -> message data (subject, from, to, time, body size, ...) + // * headers + // * {messageID} -> header bytes // * bodystructure // * {messageID} -> message body structure // * msgbuildcount @@ -77,6 +79,7 @@ var ( // * deleted_ids (can be missing or have no keys) // * {messageID} -> true metadataBucket = []byte("metadata") //nolint[gochecknoglobals] + headersBucket = []byte("headers") //nolint[gochecknoglobals] bodystructureBucket = []byte("bodystructure") //nolint[gochecknoglobals] msgBuildCountBucket = []byte("msgbuildcount") //nolint[gochecknoglobals] countsBucket = []byte("counts") //nolint[gochecknoglobals] @@ -199,40 +202,24 @@ func openBoltDatabase(filePath string) (db *bolt.DB, err error) { } tx := func(tx *bolt.Tx) (err error) { - if _, err = tx.CreateBucketIfNotExists(metadataBucket); err != nil { - return + buckets := [][]byte{ + metadataBucket, + headersBucket, + bodystructureBucket, + msgBuildCountBucket, + countsBucket, + addressInfoBucket, + addressModeBucket, + syncStateBucket, + mailboxesBucket, + mboxVersionBucket, } - if _, err = tx.CreateBucketIfNotExists(bodystructureBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(msgBuildCountBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(countsBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(addressInfoBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(addressModeBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(syncStateBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(mailboxesBucket); err != nil { - return - } - - if _, err = tx.CreateBucketIfNotExists(mboxVersionBucket); err != nil { - return + for _, bucket := range buckets { + if _, err = tx.CreateBucketIfNotExists(bucket); err != nil { + err = errors.Wrap(err, string(bucket)) + return + } } return diff --git a/internal/users/users.go b/internal/users/users.go index 2a34c6ae..0e087557 100644 --- a/internal/users/users.go +++ b/internal/users/users.go @@ -23,6 +23,7 @@ import ( "sync" "github.com/ProtonMail/proton-bridge/internal/events" + imapcache "github.com/ProtonMail/proton-bridge/internal/imap/cache" "github.com/ProtonMail/proton-bridge/internal/metrics" "github.com/ProtonMail/proton-bridge/pkg/listener" "github.com/ProtonMail/proton-bridge/pkg/pmapi" @@ -404,6 +405,10 @@ func (u *Users) ClearData() error { result = multierror.Append(result, err) } + // Need to clear imap cache otherwise fetch response will be remembered + // from previous test + imapcache.Clear() + return result } diff --git a/pkg/message/address.go b/pkg/message/address.go deleted file mode 100644 index f18c856f..00000000 --- a/pkg/message/address.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2021 Proton Technologies AG -// -// This file is part of ProtonMail Bridge. -// -// ProtonMail Bridge is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// ProtonMail Bridge is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with ProtonMail Bridge. If not, see . - -package message - -import ( - "net/mail" - "strings" - - "github.com/emersion/go-imap" -) - -func getAddresses(addrs []*mail.Address) (imapAddrs []*imap.Address) { - for _, a := range addrs { - if a == nil { - continue - } - - parts := strings.SplitN(a.Address, "@", 2) - if len(parts) != 2 { - continue - } - - imapAddrs = append(imapAddrs, &imap.Address{ - PersonalName: a.Name, - MailboxName: parts[0], - HostName: parts[1], - }) - } - - return -} diff --git a/pkg/message/build_build.go b/pkg/message/build_build.go index c8d386c0..596f0e7f 100644 --- a/pkg/message/build_build.go +++ b/pkg/message/build_build.go @@ -73,9 +73,14 @@ func buildWorker(buildReqCh <-chan fetchRes, buildResCh chan<- buildRes, wg *syn defer wg.Done() for req := range buildReqCh { + l := log. + WithField("addrID", req.msg.AddressID). + WithField("msgID", req.msg.ID) if kr, err := req.api.KeyRingForAddressID(req.msg.AddressID); err != nil { + l.WithError(err).Warn("Cannot find keyring for address") buildResCh <- newBuildResFailure(req.msg.ID, errors.Wrap(ErrNoSuchKeyRing, err.Error())) } else if literal, err := buildRFC822(kr, req.msg, req.atts, req.opts); err != nil { + l.WithError(err).Warn("Build failed") buildResCh <- newBuildResFailure(req.msg.ID, err) } else { buildResCh <- newBuildResSuccess(req.msg.ID, literal) diff --git a/pkg/message/build_rfc822.go b/pkg/message/build_rfc822.go index d7434097..4a117843 100644 --- a/pkg/message/build_rfc822.go +++ b/pkg/message/build_rfc822.go @@ -187,6 +187,12 @@ func writeAttachmentPart( return errors.Wrap(ErrDecryptionFailed, err.Error()) } + log. + WithField("attID", att.ID). + WithField("msgID", att.MessageID). + WithError(err). + Warn("Attachment decryption failed") + return writeCustomAttachmentPart(w, att, msg, err) } @@ -309,25 +315,13 @@ func getMessageHeader(msg *pmapi.Message, opts JobOptions) message.Header { // n hdr.Set("Bcc", toAddressList(msg.BCCList)) } - // Set Message-Id from ExternalID or ID if it's not already set. - if hdr.Get("Message-Id") == "" { - if msg.ExternalID != "" { - hdr.Set("Message-Id", "<"+msg.ExternalID+">") - } else { - hdr.Set("Message-Id", "<"+msg.ID+"@"+pmapi.InternalIDDomain+">") - } - } + setMessageIDIfNeeded(msg, &hdr) // Sanitize the date; it needs to have a valid unix timestamp. if opts.SanitizeDate { if date, err := rfc5322.ParseDateTime(hdr.Get("Date")); err != nil || date.Before(time.Unix(0, 0)) { - if msgTime := time.Unix(msg.Time, 0); msgTime.After(time.Unix(0, 0)) { - hdr.Set("Date", msgTime.In(time.UTC).Format(time.RFC1123Z)) - } else { - // No message should realistically be older than RFC822 itself. - hdr.Set("Date", time.Date(1982, 8, 13, 0, 0, 0, 0, time.UTC).Format(time.RFC1123Z)) - } - + msgDate := sanitizeMessageDate(msg.Time) + hdr.Set("Date", msgDate.In(time.UTC).Format(time.RFC1123Z)) // We clobbered the date so we save it under X-Original-Date. hdr.Set("X-Original-Date", date.In(time.UTC).Format(time.RFC1123Z)) } @@ -361,6 +355,28 @@ func getMessageHeader(msg *pmapi.Message, opts JobOptions) message.Header { // n return hdr } +// sanitizeMessageDate will return time from msgTime timestamp. If timestamp is +// not after epoch the RFC822 publish day will be used. No message should +// realistically be older than RFC822 itself. +func sanitizeMessageDate(msgTime int64) time.Time { + if msgTime := time.Unix(msgTime, 0); msgTime.After(time.Unix(0, 0)) { + return msgTime + } + return time.Date(1982, 8, 13, 0, 0, 0, 0, time.UTC) +} + +// setMessageIDIfNeeded sets Message-Id from ExternalID or ID if it's not +// already set. +func setMessageIDIfNeeded(msg *pmapi.Message, hdr *message.Header) { + if hdr.Get("Message-Id") == "" { + if msg.ExternalID != "" { + hdr.Set("Message-Id", "<"+msg.ExternalID+">") + } else { + hdr.Set("Message-Id", "<"+msg.ID+"@"+pmapi.InternalIDDomain+">") + } + } +} + func getTextPartHeader(hdr message.Header, body []byte, mimeType string) message.Header { params := make(map[string]string) diff --git a/pkg/message/envelope.go b/pkg/message/envelope.go index 19e1e6cf..6f20de04 100644 --- a/pkg/message/envelope.go +++ b/pkg/message/envelope.go @@ -19,30 +19,49 @@ package message import ( "net/mail" - "time" + "net/textproto" + "strings" "github.com/ProtonMail/proton-bridge/pkg/pmapi" "github.com/emersion/go-imap" ) -func GetEnvelope(m *pmapi.Message) *imap.Envelope { - messageID := m.ExternalID - if messageID == "" { - messageID = m.Header.Get("Message-Id") - } else { - messageID = "<" + messageID + ">" - } +// GetEnvelope will prepare envelope from pmapi message and cached header. +func GetEnvelope(msg *pmapi.Message, header textproto.MIMEHeader) *imap.Envelope { + hdr := toMessageHeader(mail.Header(header)) + setMessageIDIfNeeded(msg, &hdr) return &imap.Envelope{ - Date: time.Unix(m.Time, 0), - Subject: m.Subject, - From: getAddresses([]*mail.Address{m.Sender}), - Sender: getAddresses([]*mail.Address{m.Sender}), - ReplyTo: getAddresses(m.ReplyTos), - To: getAddresses(m.ToList), - Cc: getAddresses(m.CCList), - Bcc: getAddresses(m.BCCList), - InReplyTo: m.Header.Get("In-Reply-To"), - MessageId: messageID, + Date: sanitizeMessageDate(msg.Time), + Subject: msg.Subject, + From: getAddresses([]*mail.Address{msg.Sender}), + Sender: getAddresses([]*mail.Address{msg.Sender}), + ReplyTo: getAddresses(msg.ReplyTos), + To: getAddresses(msg.ToList), + Cc: getAddresses(msg.CCList), + Bcc: getAddresses(msg.BCCList), + InReplyTo: hdr.Get("In-Reply-To"), + MessageId: hdr.Get("Message-Id"), } } + +func getAddresses(addrs []*mail.Address) (imapAddrs []*imap.Address) { + for _, a := range addrs { + if a == nil { + continue + } + + parts := strings.SplitN(a.Address, "@", 2) + if len(parts) != 2 { + continue + } + + imapAddrs = append(imapAddrs, &imap.Address{ + PersonalName: a.Name, + MailboxName: parts[0], + HostName: parts[1], + }) + } + + return +} diff --git a/pkg/message/message.go b/pkg/message/message.go index 1652f67b..453d14a2 100644 --- a/pkg/message/message.go +++ b/pkg/message/message.go @@ -18,8 +18,6 @@ package message import ( - "crypto/sha512" - "fmt" "strings" "github.com/ProtonMail/proton-bridge/pkg/pmapi" @@ -35,7 +33,7 @@ var log = logrus.WithField("pkg", "pkg/message") //nolint[gochecknoglobals] func GetBoundary(m *pmapi.Message) string { // The boundary needs to be deterministic because messages are not supposed to // change. - return fmt.Sprintf("%x", sha512.Sum512_256([]byte(m.ID))) + return newBoundary(m.ID).gen() } func SeparateInlineAttachments(m *pmapi.Message) (atts, inlines []*pmapi.Attachment) { diff --git a/pkg/message/section.go b/pkg/message/section.go index 87333c31..738bfae4 100644 --- a/pkg/message/section.go +++ b/pkg/message/section.go @@ -337,30 +337,40 @@ func (bs *BodyStructure) GetSectionContent(wholeMail io.ReadSeeker, sectionPath if err != nil { return } - if _, err = wholeMail.Seek(int64(info.Start+info.Size-info.BSize), io.SeekStart); err != nil { - return - } - section = make([]byte, info.BSize) - _, err = wholeMail.Read(section) - return + return goToOffsetAndReadNBytes(wholeMail, info.Start+info.Size-info.BSize, info.BSize) +} - /* This is slow: - sectionBuf, err := bs.GetSection(wholeMail, sectionPath) +// GetMailHeader returns the main header of mail. +func (bs *BodyStructure) GetMailHeader() (header textproto.MIMEHeader, err error) { + return bs.GetSectionHeader([]int{}) +} + +// GetMailHeaderBytes returns the bytes with main mail header. +// Warning: It can contain extra lines or multipart comment. +func (bs *BodyStructure) GetMailHeaderBytes(wholeMail io.ReadSeeker) (header []byte, err error) { + info, err := bs.getInfo([]int{}) if err != nil { return } - - tp := textproto.NewReader(bufio.NewReader(buf)) - if _, err = tp.ReadMIMEHeader(); err != nil { - return err - } - - sectionBuf = &bytes.Buffer{} - _, err = io.Copy(sectionBuf, tp.R) - return - */ + headerLength := info.Size - info.BSize + return goToOffsetAndReadNBytes(wholeMail, 0, headerLength) } +func goToOffsetAndReadNBytes(wholeMail io.ReadSeeker, offset, length int) ([]byte, error) { + if length < 1 { + return nil, errors.New("requested non positive length") + } + if offset > 0 { + if _, err := wholeMail.Seek(int64(offset), io.SeekStart); err != nil { + return nil, err + } + } + out := make([]byte, length) + _, err := wholeMail.Read(out) + return out, err +} + +// GetSectionHeader returns the mime header of specified section. func (bs *BodyStructure) GetSectionHeader(sectionPath []int) (header textproto.MIMEHeader, err error) { info, err := bs.getInfo(sectionPath) if err != nil { diff --git a/pkg/message/section_test.go b/pkg/message/section_test.go index e0d7eb27..baf5129f 100644 --- a/pkg/message/section_test.go +++ b/pkg/message/section_test.go @@ -108,6 +108,24 @@ func TestGetSection(t *testing.T) { } } +func TestGetMainHeaderBytes(t *testing.T) { + wantHeader := []byte(`Subject: Sample mail +From: John Doe +To: Mary Smith +Date: Fri, 21 Nov 1997 09:55:06 -0600 +Content-Type: multipart/mixed; boundary="0000MAIN" + +`) + + structReader := strings.NewReader(sampleMail) + bs, err := NewBodyStructure(structReader) + require.NoError(t, err) + + haveHeader, err := bs.GetMailHeaderBytes(strings.NewReader(sampleMail)) + require.NoError(t, err) + require.Equal(t, wantHeader, haveHeader) +} + /* Structure example: HEADER ([RFC-2822] header of the message) TEXT ([RFC-2822] text body of the message) MULTIPART/MIXED diff --git a/test/fakeapi/attachments.go b/test/fakeapi/attachments.go index 1419a65b..3accb176 100644 --- a/test/fakeapi/attachments.go +++ b/test/fakeapi/attachments.go @@ -18,20 +18,51 @@ package fakeapi import ( + "bytes" "encoding/base64" + "fmt" "io" "io/ioutil" - "strings" + "net/textproto" "github.com/ProtonMail/proton-bridge/pkg/pmapi" ) +// dataPacketOutlineLightInstagram48png is data packet with encrypted data and +// session key +// +// gpg: encrypted with 2048-bit RSA key, ID 70B8CA23079F2167, created 2019-09-23 +// "james-test@protonmail.blue " +// +// If you need to rebuild you can dump KeyPacket string from `CreateAttachment` +// function when called during message sending test. +const dataPacketOutlineLightInstagram48png = `wcBMA3C4yiMHnyFnAQgAnH7Qs4lvnbpwdFh5fTgJVTwKZnoLHcbukczf1dld1h9+83wv1FUNosy08KAX3IbDPGnFf5hMTGyEvEcNI0/2HWgootPPCWVvHKfKjjBmstUxbBgRJWi35bBz0WzMarB4BWM8xO2ffqrylhgUhBdK5c26qZvU6veq4cfkKA4SFT9fld8KHY+Ph+v1Q3lP5p3lLX0CyH0CtX7HxdPXk30J+HkugyYOcQ/2upiGzmnIobmZE9kedEA5CWNQa1gxoQTzuxDOzYRZFQPidywMj8pOPCK+1825O/8IeIL/NbXpb1qEW0roOAjYVO/NQzWyWSuIOc6F/Y+ZxzpjhRhe8son+tLBuQEm1rzWfj5Yg4x56fkRPCtfNKaIfYCGEexQNXZWX8zMxjDWM93JDjLeb0C5FzlY2tME2+zYM/SegzFHiagWlZROgQvoiPxcl67FbcOd1YrNP63gw1dwBt4tg1kbwlcSQ76cHeJ6r/Sjg2Q2v52Ee7h3K5Q2h8UDCcgIfTrS4SAKbWjPRRypMJXBp+GH6LASD4m/A6cjjcuOg5Ssh2KaKjGsYrrHVnplmWfKZ18/OrFYSHKxytiIHrLd3GE7SxbyI6LvfKxa5QAKbPBxL39FjcryaJ9l7iWI63zYhOS6bi6fRWLCq6vK30kPvgn+mivGtAxLfSrgAlODmXPxBM4ZIVQNxYv35Fhxk/ENEvRALRv4Lmdv+lHwK1DdtAY/XozkghpLG9ySThqoktK16BKtPeCAEjktlRUt8x/F3Nl5IhOCxxt21xTnne4erz4g6HtCy1Q4fblJTi5iS4C27MdDAPZLZYxz70vxuoeXK4g61pPNVaYNLRE6vdlQUM0YYy58pHarW1+YQ54HXT5eFP/wz+xvNDW/2RCkvIBxioxiTiJNRzEtfivYGowImkxJkMlhv6g9i17Dz5ANca9y4hEKi+u55drRn1Nfikw8c8JmopFELF1eQ1gbDYdb4X40qV1AApTEH1hRYNZeaMwmLkGDvBUDy99p0i6+BTO/nr9wghRCyxv9urDvNMbGxLnbNq2wdNtYdR4UurjmrpyctqyeaERxUbHhGFqAlkRGbd8ZOFHWrCVpKKYihptSbITZnK4d1vJ7IKu/+nIRI9mwN5IRnJjVo31AvzWpSGnJDtidAjAHeIVwrKk/eONETXwBXuAaVUd8PuMJCv4xuw==` + +func newTestAttachment(iAtt int, msgID string) *pmapi.Attachment { + attID := fmt.Sprintf("attID%d", iAtt) + attContentID := fmt.Sprintf("<%s@protonmail.com>", attID) + return &pmapi.Attachment{ + ID: attID, + MessageID: msgID, + Name: "outline-light-instagram-48.png", + MIMEType: "image/png", + Disposition: "attachment", + ContentID: attContentID, + Header: textproto.MIMEHeader{}, + KeyPackets: dataPacketOutlineLightInstagram48png, + } +} + func (api *FakePMAPI) GetAttachment(attachmentID string) (io.ReadCloser, error) { if err := api.checkAndRecordCall(GET, "/mail/v4/attachments/"+attachmentID, nil); err != nil { return nil, err } - data := strings.NewReader("data") - return ioutil.NopCloser(data), nil + b, err := base64.StdEncoding.DecodeString(dataPacketOutlineLightInstagram48png) + if err != nil { + return nil, err + } + r := bytes.NewReader(b) + return ioutil.NopCloser(r), nil } func (api *FakePMAPI) CreateAttachment(attachment *pmapi.Attachment, data io.Reader, signature io.Reader) (*pmapi.Attachment, error) { diff --git a/test/fakeapi/controller_control.go b/test/fakeapi/controller_control.go index 4759a8c4..250309f7 100644 --- a/test/fakeapi/controller_control.go +++ b/test/fakeapi/controller_control.go @@ -142,6 +142,11 @@ func (ctl *Controller) AddUserMessage(username string, message *pmapi.Message) ( message.ID = ctl.messageIDGenerator.next("") message.LabelIDs = append(message.LabelIDs, pmapi.AllMailLabel) message.Header = mail.Header(messageUtils.GetHeader(message)) + + for iAtt := 0; iAtt < message.NumAttachments; iAtt++ { + message.Attachments = append(message.Attachments, newTestAttachment(iAtt, message.ID)) + } + ctl.messagesByUsername[username] = append(ctl.messagesByUsername[username], message) ctl.resetUsers() return message.ID, nil diff --git a/test/features/bridge/imap/message/fetch.feature b/test/features/bridge/imap/message/fetch.feature index e7ad8946..29986fa9 100644 --- a/test/features/bridge/imap/message/fetch.feature +++ b/test/features/bridge/imap/message/fetch.feature @@ -63,8 +63,8 @@ Feature: IMAP fetch messages Scenario: Fetch of very old message sent from the moon succeeds with modified date Given there are messages in mailbox "Folders/mbox" for "user" - | from | to | subject | time | - | john.doe@mail.com | user@pm.me | foo | 1969-07-20T00:00:00 | + | from | to | subject | time | + | john.doe@mail.com | user@pm.me | Very old email | 1969-07-20T00:00:00 | And there is IMAP client logged in as "user" And there is IMAP client selected in "Folders/mbox" When IMAP client sends command "FETCH 1:* rfc822" diff --git a/test/features/bridge/imap/message/fetch_header.feature b/test/features/bridge/imap/message/fetch_header.feature new file mode 100644 index 00000000..702f546f --- /dev/null +++ b/test/features/bridge/imap/message/fetch_header.feature @@ -0,0 +1,41 @@ +Feature: IMAP fetch header of message + Background: Fetch header deterministic content type and boundary + Given there is connected user "user" + And there are messages in mailbox "INBOX" for "user" + | id | from | to | subject | n attachments | content type | body | + | 1 | f@m.co | t@pm.me | A message with attachment | 2 | html | body | + | 2 | f@m.co | t@pm.me | A simple html message | 0 | html | body | + | 3 | f@m.co | t@pm.me | A simple plain message | 0 | plain | body | + # | 4 | f@m.co | t@pm.me | An externally encrypted message | 0 | mixed | body | + # | 5 | f@m.co | t@pm.me | A simple plain message in latin1 | 0 | plain-latin1 | body | + + And there is IMAP client logged in as "user" + And there is IMAP client selected in "INBOX" + + @ignore-live + Scenario Outline: Fetch header deterministic content type and boundary + Given header is not cached for message "" in "INBOX" for "user" + # First time need to download and cache + When IMAP client fetches header of "" + Then IMAP response is "OK" + And IMAP response contains "Content-Type: " + And IMAP response contains "" + And header is cached for message "" in "INBOX" for "user" + # Second time it's taken from imap cache + When IMAP client fetches body "" + Then IMAP response is "OK" + And IMAP response contains "Content-Type: " + And IMAP response contains "" + # Third time header taken from DB + When IMAP client fetches header of "" + Then IMAP response is "OK" + And IMAP response contains "Content-Type: " + And IMAP response contains "" + + Examples: + | id | contentType | parameter | + | 1 | multipart/mixed | boundary=4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce | + | 2 | text/html | charset=utf-8 | + | 3 | text/plain | charset=utf-8 | + + diff --git a/test/imap_actions_messages_test.go b/test/imap_actions_messages_test.go index 88eb49fd..722d87dd 100644 --- a/test/imap_actions_messages_test.go +++ b/test/imap_actions_messages_test.go @@ -30,6 +30,8 @@ import ( func IMAPActionsMessagesFeatureContext(s *godog.Suite) { s.Step(`^IMAP client sends command "([^"]*)"$`, imapClientSendsCommand) s.Step(`^IMAP client fetches "([^"]*)"$`, imapClientFetches) + s.Step(`^IMAP client fetches header(?:s)? of "([^"]*)"$`, imapClientFetchesHeader) + s.Step(`^IMAP client fetches body "([^"]*)"$`, imapClientFetchesBody) s.Step(`^IMAP client fetches by UID "([^"]*)"$`, imapClientFetchesByUID) s.Step(`^IMAP client searches for "([^"]*)"$`, imapClientSearchesFor) s.Step(`^IMAP client copies message seq "([^"]*)" to "([^"]*)"$`, imapClientCopiesMessagesTo) @@ -76,6 +78,18 @@ func imapClientFetches(fetchRange string) error { return nil } +func imapClientFetchesHeader(fetchRange string) error { + res := ctx.GetIMAPClient("imap").Fetch(fetchRange, "BODY.PEEK[HEADER]") + ctx.SetIMAPLastResponse("imap", res) + return nil +} + +func imapClientFetchesBody(fetchRange string) error { + res := ctx.GetIMAPClient("imap").Fetch(fetchRange, "BODY.PEEK[]") + ctx.SetIMAPLastResponse("imap", res) + return nil +} + func imapClientFetchesByUID(fetchRange string) error { res := ctx.GetIMAPClient("imap").FetchUID(fetchRange, "UID") ctx.SetIMAPLastResponse("imap", res) diff --git a/test/liveapi/messages.go b/test/liveapi/messages.go index 1a77c511..41ebf6e1 100644 --- a/test/liveapi/messages.go +++ b/test/liveapi/messages.go @@ -30,6 +30,10 @@ import ( ) func (ctl *Controller) AddUserMessage(username string, message *pmapi.Message) (string, error) { + if message.NumAttachments != 0 { + return "", errors.New("add user messages with attachments is not implemented for live") + } + client, ok := ctl.pmapiByUsername[username] if !ok { return "", fmt.Errorf("user %s does not exist", username) diff --git a/test/store_checks_test.go b/test/store_checks_test.go index 762c8e7e..42042814 100644 --- a/test/store_checks_test.go +++ b/test/store_checks_test.go @@ -46,6 +46,8 @@ func StoreChecksFeatureContext(s *godog.Suite) { s.Step(`^message "([^"]*)" in "([^"]*)" for "([^"]*)" is marked as unstarred$`, messagesInMailboxForUserIsMarkedAsUnstarred) s.Step(`^message "([^"]*)" in "([^"]*)" for "([^"]*)" is marked as deleted$`, messagesInMailboxForUserIsMarkedAsDeleted) s.Step(`^message "([^"]*)" in "([^"]*)" for "([^"]*)" is marked as undeleted$`, messagesInMailboxForUserIsMarkedAsUndeleted) + s.Step(`^header is not cached for message "([^"]*)" in "([^"]*)" for "([^"]*)"$`, messageHeaderIsNotCached) + s.Step(`^header is cached for message "([^"]*)" in "([^"]*)" for "([^"]*)"$`, messageHeaderIsCached) } func userHasMailbox(bddUserID, mailboxName string) error { @@ -260,7 +262,7 @@ func messagesContainsMessageRow(account *accounts.TestAccount, allMessages []int if message.Unread != unread { matches = false } - case "deleted": + case "deleted": //nolint[goconst] if storeMessage == nil { return false, fmt.Errorf("deleted column not supported for pmapi message object") } @@ -345,6 +347,24 @@ func messagesInMailboxForUserIsMarkedAsUndeleted(bddMessageIDs, mailboxName, bdd }) } +func messageHeaderIsNotCached(bddMessageIDs, mailboxName, bddUserID string) error { + return checkMessages(bddUserID, mailboxName, bddMessageIDs, func(message *store.Message) error { + if !message.IsFullHeaderCached() { + return nil + } + return fmt.Errorf("message %s \"%s\" is expected to not have cached header but has one", message.ID(), message.Message().Subject) + }) +} + +func messageHeaderIsCached(bddMessageIDs, mailboxName, bddUserID string) error { + return checkMessages(bddUserID, mailboxName, bddMessageIDs, func(message *store.Message) error { + if message.IsFullHeaderCached() { + return nil + } + return fmt.Errorf("message %s \"%s\" is expected to have cached header but it doesn't have", message.ID(), message.Message().Subject) + }) +} + func checkMessages(bddUserID, mailboxName, bddMessageIDs string, callback func(*store.Message) error) error { account := ctx.GetTestAccount(bddUserID) if account == nil { diff --git a/test/store_setup_test.go b/test/store_setup_test.go index 7270473c..f522d5cd 100644 --- a/test/store_setup_test.go +++ b/test/store_setup_test.go @@ -94,7 +94,7 @@ func thereAreMessagesInMailboxesForAddressOfUser(mailboxNames, bddAddressID, bdd for i := len(messages.Rows) - 1; i > 0; i-- { row := messages.Rows[i] message := &pmapi.Message{ - MIMEType: "text/plain", + MIMEType: pmapi.ContentTypePlainText, LabelIDs: labelIDs, AddressID: account.AddressID(), } @@ -108,48 +108,16 @@ func thereAreMessagesInMailboxesForAddressOfUser(mailboxNames, bddAddressID, bdd hasDeletedFlag := false for n, cell := range row.Cells { - switch head[n].Value { - case "id": + column := head[n].Value + if column == "id" { bddMessageID = cell.Value - case "from": - message.Sender = &mail.Address{ - Address: ctx.EnsureAddress(account.Username(), cell.Value), - } - case "to": - message.AddressID = ctx.EnsureAddressID(account.Username(), cell.Value) - message.ToList = []*mail.Address{{ - Address: ctx.EnsureAddress(account.Username(), cell.Value), - }} - case "cc": - message.AddressID = ctx.EnsureAddressID(account.Username(), cell.Value) - message.CCList = []*mail.Address{{ - Address: ctx.EnsureAddress(account.Username(), cell.Value), - }} - case "subject": - message.Subject = cell.Value - case "body": - message.Body = cell.Value - case "read": - unread := 1 - if cell.Value == "true" { - unread = 0 - } - message.Unread = unread - case "starred": - if cell.Value == "true" { - message.LabelIDs = append(message.LabelIDs, "10") - } - case "time": //nolint[goconst] - date, err := time.Parse(timeFormat, cell.Value) - if err != nil { - return internalError(err, "parsing time") - } - message.Time = date.Unix() - header.Set("Date", date.Format(time.RFC1123Z)) - case "deleted": + } + if column == "deleted" { hasDeletedFlag = cell.Value == "true" - default: - return fmt.Errorf("unexpected column name: %s", head[n].Value) + } + err := processMessageTableCell(column, cell.Value, account.Username(), message, header) + if err != nil { + return err } } message.Header = mail.Header(header) @@ -183,6 +151,64 @@ func thereAreMessagesInMailboxesForAddressOfUser(mailboxNames, bddAddressID, bdd return nil } +func processMessageTableCell(column, cellValue, username string, message *pmapi.Message, header textproto.MIMEHeader) error { + switch column { + case "deleted", "id": // it is processed in the main function + case "from": + message.Sender = &mail.Address{ + Address: ctx.EnsureAddress(username, cellValue), + } + case "to": + message.AddressID = ctx.EnsureAddressID(username, cellValue) + message.ToList = []*mail.Address{{ + Address: ctx.EnsureAddress(username, cellValue), + }} + case "cc": + message.AddressID = ctx.EnsureAddressID(username, cellValue) + message.CCList = []*mail.Address{{ + Address: ctx.EnsureAddress(username, cellValue), + }} + case "subject": + message.Subject = cellValue + case "body": + message.Body = cellValue + case "read": + unread := 1 + if cellValue == "true" { + unread = 0 + } + message.Unread = unread + case "starred": + if cellValue == "true" { + message.LabelIDs = append(message.LabelIDs, "10") + } + case "time": //nolint[goconst] It is more easy to read like this + date, err := time.Parse(timeFormat, cellValue) + if err != nil { + return internalError(err, "parsing time") + } + message.Time = date.Unix() + header.Set("Date", date.Format(time.RFC1123Z)) + case "n attachments": + numAttachments, err := strconv.Atoi(cellValue) + if err != nil { + return internalError(err, "parsing n attachments") + } + message.NumAttachments = numAttachments + case "content type": + switch cellValue { + case "html": + message.MIMEType = pmapi.ContentTypeHTML + case "plain": + message.MIMEType = pmapi.ContentTypePlainText + } + default: + return fmt.Errorf("unexpected column name: %s", column) + } + + return nil +} + func thereAreSomeMessagesInMailboxesForUser(numberOfMessages int, mailboxNames, bddUserID string) error { return thereAreSomeMessagesInMailboxesForAddressOfUser(numberOfMessages, mailboxNames, "", bddUserID) }