Cache body structure in order to reduce network traffic

This commit is contained in:
Jakub
2020-12-30 10:06:11 +01:00
committed by Michal Horejsek
parent 516ca018d3
commit 8ab852277c
15 changed files with 219 additions and 56 deletions

View File

@ -254,7 +254,8 @@ lint-license:
./utils/missing_license.sh check
lint-changelog:
./utils/changelog_linter.sh
./utils/changelog_linter.sh Changelog.md
./utils/changelog_linter.sh unreleased.md
lint-golang:
which golangci-lint || $(MAKE) install-linter

2
go.mod
View File

@ -64,6 +64,8 @@ require (
github.com/therecipe/qt v0.0.0-20200701200531-7f61353ee73e
github.com/twinj/uuid v1.0.0 // indirect
github.com/urfave/cli/v2 v2.2.0
github.com/urfave/cli v1.22.4
github.com/vmihailenco/msgpack/v5 v5.1.3
go.etcd.io/bbolt v1.3.5
golang.org/x/net v0.0.0-20200707034311-ab3426394381
golang.org/x/text v0.3.5-0.20201125200606-c27b9fd57aec

5
go.sum
View File

@ -281,6 +281,11 @@ github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyC
github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI=
github.com/vmihailenco/msgpack/v5 v5.1.3 h1:FwC9KPjyW8OqTUqMt6rQw9y50vA2cTLXPKCcBCRbQgg=
github.com/vmihailenco/msgpack/v5 v5.1.3/go.mod h1:C5gboKD0TJPqWDTVTtrQNfRbiBwHZGo8UTqP/9/XvLI=
github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vbd1qPqc=
github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=

View File

@ -240,7 +240,8 @@ func (im *imapMailbox) getMessage(storeMessage storeMessageProvider, items []ima
msg.Envelope = message.GetEnvelope(m)
case imap.FetchBody, imap.FetchBodyStructure:
var structure *message.BodyStructure
if structure, _, err = im.getBodyStructure(storeMessage); err != nil {
structure, err = im.getBodyStructure(storeMessage)
if err != nil {
return
}
if msg.BodyStructure, err = structure.IMAPBodyStructure([]int{}); err != nil {
@ -264,7 +265,7 @@ func (im *imapMailbox) getMessage(storeMessage storeMessageProvider, items []ima
// on our part and we need to compute "real" size of decrypted data.
if m.Size <= 0 {
im.log.WithField("msgID", storeMessage.ID()).Trace("Size unknown - downloading body")
if _, _, err = im.getBodyStructure(storeMessage); err != nil {
if _, _, err = im.getBodyAndStructure(storeMessage); err != nil {
return
}
}
@ -299,7 +300,24 @@ func (im *imapMailbox) getLiteralForSection(itemSection imap.FetchItem, msg *ima
return nil
}
func (im *imapMailbox) getBodyStructure(storeMessage storeMessageProvider) (
func (im *imapMailbox) getBodyStructure(storeMessage storeMessageProvider) (bs *message.BodyStructure, err error) {
// Apple Mail requests body structure for all
// messages irregularly. We cache bodystructure in
// local database in order to not re-download all
// messages from server.
bs, err = storeMessage.GetBodyStructure()
if err != nil {
im.log.WithError(err).Debug("Fail to retrieve bodystructure from database")
}
if bs == nil {
if bs, _, err = im.getBodyAndStructure(storeMessage); err != nil {
return
}
}
return
}
func (im *imapMailbox) getBodyAndStructure(storeMessage storeMessageProvider) (
structure *message.BodyStructure,
bodyReader *bytes.Reader, err error,
) {
@ -324,6 +342,11 @@ func (im *imapMailbox) getBodyStructure(storeMessage storeMessageProvider) (
}
// Drafts can change and we don't want to cache them.
if !isMessageInDraftFolder(m) {
if err := storeMessage.SetBodyStructure(structure); err != nil {
im.log.WithError(err).
WithField("msgID", m.ID).
Warn("Cannot update bodystructure while building")
}
cache.SaveMail(id, body, structure)
}
bodyReader = bytes.NewReader(body)
@ -380,7 +403,7 @@ func (im *imapMailbox) getMessageBodySection(storeMessage storeMessageProvider,
}
} else {
// The rest of cases need download and decrypt.
structure, bodyReader, err = im.getBodyStructure(storeMessage)
structure, bodyReader, err = im.getBodyAndStructure(storeMessage)
if err != nil {
return
}

View File

@ -277,7 +277,7 @@ func (im *imapMailbox) SearchMessages(isUID bool, criteria *imap.SearchCriteria)
}
if criteria.Body != nil || criteria.Text != nil {
log.Warn("Body and Text criteria not applied.")
log.Warn("Body and Text criteria not applied")
}
var apiIDs []string

View File

@ -24,6 +24,7 @@ import (
"github.com/ProtonMail/gopenpgp/v2/crypto"
"github.com/ProtonMail/proton-bridge/internal/imap/uidplus"
"github.com/ProtonMail/proton-bridge/internal/store"
backendMessage "github.com/ProtonMail/proton-bridge/pkg/message"
"github.com/ProtonMail/proton-bridge/pkg/pmapi"
)
@ -99,6 +100,8 @@ type storeMessageProvider interface {
SetSize(int64) error
SetContentTypeAndHeader(string, mail.Header) error
SetBodyStructure(*backendMessage.BodyStructure) error
GetBodyStructure() (*backendMessage.BodyStructure, error)
}
type storeUserWrap struct {

View File

@ -214,7 +214,7 @@ func (iu *imapUpdates) sendIMAPUpdate(update goIMAPBackend.Update, block bool) {
select {
case <-done:
case <-time.After(1 * time.Second):
log.Warn("IMAP update could not be delivered (timeout).")
log.Warn("IMAP update could not be delivered (timeout)")
return
}
}

View File

@ -20,6 +20,7 @@ package store
import (
"net/mail"
backendMessage "github.com/ProtonMail/proton-bridge/pkg/message"
"github.com/ProtonMail/proton-bridge/pkg/pmapi"
bolt "go.etcd.io/bbolt"
)
@ -119,3 +120,31 @@ func (message *Message) SetContentTypeAndHeader(mimeType string, header mail.Hea
}
return message.store.db.Update(txUpdate)
}
// SetBodyStructure stores serialized body structure in database
func (message *Message) SetBodyStructure(bs *backendMessage.BodyStructure) error {
txUpdate := func(tx *bolt.Tx) error {
return message.store.txPutBodyStructure(
tx.Bucket(bodystructureBucket),
message.ID(), bs,
)
}
return message.store.db.Update(txUpdate)
}
// GetBodyStructure deserialize body structure from database. If body structure
// is not in database it returns nil error and nil body structure. If error
// occurs it returns nil body structure.
func (message *Message) GetBodyStructure() (bs *backendMessage.BodyStructure, err error) {
txRead := func(tx *bolt.Tx) error {
bs, err = message.store.txGetBodyStructure(
tx.Bucket(bodystructureBucket),
message.ID(),
)
return err
}
if err = message.store.db.View(txRead); err != nil {
return nil, err
}
return bs, nil
}

View File

@ -71,16 +71,17 @@ var (
// * {messageID} -> uint32 imapUID
// * deleted_ids (can be missing or have no keys)
// * {messageID} -> true
metadataBucket = []byte("metadata") //nolint[gochecknoglobals]
countsBucket = []byte("counts") //nolint[gochecknoglobals]
addressInfoBucket = []byte("address_info") //nolint[gochecknoglobals]
addressModeBucket = []byte("address_mode") //nolint[gochecknoglobals]
syncStateBucket = []byte("sync_state") //nolint[gochecknoglobals]
mailboxesBucket = []byte("mailboxes") //nolint[gochecknoglobals]
imapIDsBucket = []byte("imap_ids") //nolint[gochecknoglobals]
apiIDsBucket = []byte("api_ids") //nolint[gochecknoglobals]
deletedIDsBucket = []byte("deleted_ids") //nolint[gochecknoglobals]
mboxVersionBucket = []byte("mailboxes_version") //nolint[gochecknoglobals]
metadataBucket = []byte("metadata") //nolint[gochecknoglobals]
bodystructureBucket = []byte("bodystructure") //nolint[gochecknoglobals]
countsBucket = []byte("counts") //nolint[gochecknoglobals]
addressInfoBucket = []byte("address_info") //nolint[gochecknoglobals]
addressModeBucket = []byte("address_mode") //nolint[gochecknoglobals]
syncStateBucket = []byte("sync_state") //nolint[gochecknoglobals]
mailboxesBucket = []byte("mailboxes") //nolint[gochecknoglobals]
imapIDsBucket = []byte("imap_ids") //nolint[gochecknoglobals]
apiIDsBucket = []byte("api_ids") //nolint[gochecknoglobals]
deletedIDsBucket = []byte("deleted_ids") //nolint[gochecknoglobals]
mboxVersionBucket = []byte("mailboxes_version") //nolint[gochecknoglobals]
// ErrNoSuchAPIID when mailbox does not have API ID.
ErrNoSuchAPIID = errors.New("no such api id") //nolint[gochecknoglobals]
@ -193,6 +194,10 @@ func openBoltDatabase(filePath string) (db *bolt.DB, err error) {
return
}
if _, err = tx.CreateBucketIfNotExists(bodystructureBucket); err != nil {
return
}
if _, err = tx.CreateBucketIfNotExists(countsBucket); err != nil {
return
}

View File

@ -27,6 +27,7 @@ import (
"strings"
"github.com/ProtonMail/gopenpgp/v2/crypto"
backendMessage "github.com/ProtonMail/proton-bridge/pkg/message"
"github.com/ProtonMail/proton-bridge/pkg/pmapi"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@ -170,6 +171,26 @@ func (store *Store) txPutMessage(metaBucket *bolt.Bucket, onlyMeta *pmapi.Messag
return nil
}
func (store *Store) txPutBodyStructure(bsBucket *bolt.Bucket, msgID string, bs *backendMessage.BodyStructure) error {
raw, err := bs.Serialize()
if err != nil {
return err
}
err = bsBucket.Put([]byte(msgID), raw)
if err != nil {
return errors.Wrap(err, "cannot put bodystructure bucket")
}
return nil
}
func (store *Store) txGetBodyStructure(bsBucket *bolt.Bucket, msgID string) (*backendMessage.BodyStructure, error) {
raw := bsBucket.Get([]byte(msgID))
if len(raw) == 0 {
return nil, nil
}
return backendMessage.DeserializeBodyStructure(raw)
}
// createOrUpdateMessageEvent is helper to create only one message with
// createOrUpdateMessagesEvent.
func (store *Store) createOrUpdateMessageEvent(msg *pmapi.Message) error {

View File

@ -20,7 +20,6 @@ package message
import (
"bufio"
"bytes"
"errors"
"io"
"io/ioutil"
"net/textproto"
@ -29,19 +28,21 @@ import (
pmmime "github.com/ProtonMail/proton-bridge/pkg/mime"
"github.com/emersion/go-imap"
"github.com/pkg/errors"
"github.com/vmihailenco/msgpack/v5"
)
type sectionInfo struct {
header textproto.MIMEHeader
start, bsize, size, lines int
type SectionInfo struct {
Header textproto.MIMEHeader
Start, BSize, Size, Lines int
reader io.Reader
}
// Count and read.
func (si *sectionInfo) Read(p []byte) (n int, err error) {
// Read and count
func (si *SectionInfo) Read(p []byte) (n int, err error) {
n, err = si.reader.Read(p)
si.size += n
si.lines += bytes.Count(p, []byte("\n"))
si.Size += n
si.Lines += bytes.Count(p, []byte("\n"))
return
}
@ -149,7 +150,7 @@ func (br *boundaryReader) WriteNextPartTo(part io.Writer) (err error) {
}
}
type BodyStructure map[string]*sectionInfo
type BodyStructure map[string]*SectionInfo
func NewBodyStructure(reader io.Reader) (structure *BodyStructure, err error) {
structure = &BodyStructure{}
@ -157,30 +158,47 @@ func NewBodyStructure(reader io.Reader) (structure *BodyStructure, err error) {
return
}
func DeserializeBodyStructure(raw []byte) (*BodyStructure, error) {
bs := &BodyStructure{}
err := msgpack.Unmarshal(raw, bs)
if err != nil {
return nil, errors.Wrap(err, "cannot deserialize bodystructure")
}
return bs, err
}
func (bs *BodyStructure) Serialize() ([]byte, error) {
data, err := msgpack.Marshal(bs)
if err != nil {
return nil, errors.Wrap(err, "cannot serialize bodystructure")
}
return data, nil
}
func (bs *BodyStructure) Parse(r io.Reader) error {
return bs.parseAllChildSections(r, []int{}, 0)
}
func (bs *BodyStructure) parseAllChildSections(r io.Reader, currentPath []int, start int) (err error) { //nolint[funlen]
info := &sectionInfo{
start: start,
size: 0,
bsize: 0,
lines: 0,
info := &SectionInfo{
Start: start,
Size: 0,
BSize: 0,
Lines: 0,
reader: r,
}
bufInfo := bufio.NewReader(info)
tp := textproto.NewReader(bufInfo)
if info.header, err = tp.ReadMIMEHeader(); err != nil {
if info.Header, err = tp.ReadMIMEHeader(); err != nil {
return
}
bodyInfo := &sectionInfo{reader: tp.R}
bodyInfo := &SectionInfo{reader: tp.R}
bodyReader := bufio.NewReader(bodyInfo)
mediaType, params, _ := pmmime.ParseMediaType(info.header.Get("Content-Type"))
mediaType, params, _ := pmmime.ParseMediaType(info.Header.Get("Content-Type"))
// If multipart, call getAllParts, else read to count lines.
if (strings.HasPrefix(mediaType, "multipart/") || mediaType == "message/rfc822") && params["boundary"] != "" {
@ -227,18 +245,18 @@ func (bs *BodyStructure) parseAllChildSections(r io.Reader, currentPath []int, s
info.reader = nil
// Store boundaries.
info.bsize = bodyInfo.size
info.BSize = bodyInfo.Size
path := stringPathFromInts(currentPath)
(*bs)[path] = info
// Fix start of subsections.
newPath := append(currentPath, 1)
shift := info.size - info.bsize
shift := info.Size - info.BSize
subInfo, err := bs.getInfo(newPath)
// If it has subparts.
for err == nil {
subInfo.start += shift
subInfo.Start += shift
// Level down.
subInfo, err = bs.getInfo(append(newPath, 1))
@ -287,7 +305,7 @@ func stringPathFromInts(ints []int) (ret string) {
return
}
func (bs *BodyStructure) getInfo(sectionPath []int) (sectionInfo *sectionInfo, err error) {
func (bs *BodyStructure) getInfo(sectionPath []int) (sectionInfo *SectionInfo, err error) {
path := stringPathFromInts(sectionPath)
sectionInfo, ok := (*bs)[path]
if !ok {
@ -301,10 +319,10 @@ func (bs *BodyStructure) GetSection(wholeMail io.ReadSeeker, sectionPath []int)
if err != nil {
return
}
if _, err = wholeMail.Seek(int64(info.start), io.SeekStart); err != nil {
if _, err = wholeMail.Seek(int64(info.Start), io.SeekStart); err != nil {
return
}
section = make([]byte, info.size)
section = make([]byte, info.Size)
_, err = wholeMail.Read(section)
return
}
@ -314,10 +332,10 @@ func (bs *BodyStructure) GetSectionContent(wholeMail io.ReadSeeker, sectionPath
if err != nil {
return
}
if _, err = wholeMail.Seek(int64(info.start+info.size-info.bsize), io.SeekStart); err != nil {
if _, err = wholeMail.Seek(int64(info.Start+info.Size-info.BSize), io.SeekStart); err != nil {
return
}
section = make([]byte, info.bsize)
section = make([]byte, info.BSize)
_, err = wholeMail.Read(section)
return
@ -343,17 +361,17 @@ func (bs *BodyStructure) GetSectionHeader(sectionPath []int) (header textproto.M
if err != nil {
return
}
header = info.header
header = info.Header
return
}
func (bs *BodyStructure) IMAPBodyStructure(currentPart []int) (imapBS *imap.BodyStructure, err error) {
var info *sectionInfo
var info *SectionInfo
if info, err = bs.getInfo(currentPart); err != nil {
return
}
mediaType, params, _ := pmmime.ParseMediaType(info.header.Get("Content-Type"))
mediaType, params, _ := pmmime.ParseMediaType(info.Header.Get("Content-Type"))
mediaTypeSep := strings.Split(mediaType, "/")
@ -364,23 +382,23 @@ func (bs *BodyStructure) IMAPBodyStructure(currentPart []int) (imapBS *imap.Body
MIMEType: mediaTypeSep[0],
MIMESubType: mediaTypeSep[1],
Params: params,
Size: uint32(info.bsize),
Lines: uint32(info.lines),
Size: uint32(info.BSize),
Lines: uint32(info.Lines),
}
if val := info.header.Get("Content-ID"); val != "" {
if val := info.Header.Get("Content-ID"); val != "" {
imapBS.Id = val
}
if val := info.header.Get("Content-Transfer-Encoding"); val != "" {
if val := info.Header.Get("Content-Transfer-Encoding"); val != "" {
imapBS.Encoding = val
}
if val := info.header.Get("Content-Description"); val != "" {
if val := info.Header.Get("Content-Description"); val != "" {
imapBS.Description = val
}
if val := info.header.Get("Content-Disposition"); val != "" {
if val := info.Header.Get("Content-Disposition"); val != "" {
imapBS.Disposition = val
}

View File

@ -18,7 +18,9 @@
package message
import (
"bytes"
"fmt"
"net/textproto"
"path/filepath"
"runtime"
"sort"
@ -68,8 +70,8 @@ func TestParseBodyStructure(t *testing.T) {
debug("%10s: %-50s %5s %5s %5s %5s", "section", "type", "start", "size", "bsize", "lines")
for _, path := range paths {
sec := (*bs)[path]
contentType := sec.header.Get("Content-Type")
debug("%10s: %-50s %5d %5d %5d %5d", path, contentType, sec.start, sec.size, sec.bsize, sec.lines)
contentType := sec.Header.Get("Content-Type")
debug("%10s: %-50s %5d %5d %5d %5d", path, contentType, sec.Start, sec.Size, sec.BSize, sec.Lines)
require.Equal(t, expectedStructure[path], contentType)
}
@ -88,7 +90,7 @@ func TestGetSection(t *testing.T) {
section, err := bs.GetSection(mailReader, try.path)
require.NoError(t, err)
debug("section %v: %d %d\n___\n%s\n‾‾‾\n", try.path, info.start, info.size, string(section))
debug("section %v: %d %d\n___\n%s\n‾‾‾\n", try.path, info.Start, info.Size, string(section))
require.True(t, string(section) == try.expectedSection, "not same as expected:\n___\n%s\n‾‾‾", try.expectedSection)
}
@ -100,7 +102,7 @@ func TestGetSection(t *testing.T) {
section, err := bs.GetSectionContent(mailReader, try.path)
require.NoError(t, err)
debug("content %v: %d %d\n___\n%s\n‾‾‾\n", try.path, info.start+info.size-info.bsize, info.bsize, string(section))
debug("content %v: %d %d\n___\n%s\n‾‾‾\n", try.path, info.Start+info.Size-info.BSize, info.BSize, string(section))
require.True(t, string(section) == try.expectedBody, "not same as expected:\n___\n%s\n‾‾‾", try.expectedBody)
}
@ -418,3 +420,37 @@ Content-Transfer-Encoding: base64
`,
},
}
func TestBodyStructureSerialize(t *testing.T) {
r := require.New(t)
want := &BodyStructure{
"1": {
Header: textproto.MIMEHeader{
"Content": []string{"type"},
},
Start: 1,
Size: 2,
BSize: 3,
Lines: 4,
},
"1.1.1": {
Header: textproto.MIMEHeader{
"X-Pm-Key": []string{"id"},
},
Start: 11,
Size: 12,
BSize: 13,
Lines: 14,
reader: bytes.NewBuffer([]byte("this should not be serialized")),
},
}
raw, err := want.Serialize()
r.NoError(err)
have, err := DeserializeBodyStructure(raw)
r.NoError(err)
// Before compare remove reader (should not be serialized)
(*want)["1.1.1"].reader = nil
r.Equal(want, have)
}

14
pkg/message/testdata/wrong_base64.eml vendored Normal file
View File

@ -0,0 +1,14 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: multipart/mixed; boundary=longrandomstring
--longrandomstring
Content-Type: text/html
<html><body>This is body of <b>HTML mail</b> with attachment</body></html>
--longrandomstring
Content-Type: application/octet-stream
Content-Transfer-Encoding: base64
b'aWYgeW91IGFyZSByZWFkaW5nIHRoaXMsIGhpIQ=='
--longrandomstring--

View File

@ -34,6 +34,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
* Bump gopenpgp dependency to v2.1.3 for improved memory usage.
* GODT-912 Changed scroll bar behaviour in settings tab
* GODT-149 Send heartbeat ASAP on each new calendar day.
* GODT-792 GODT-908 Cache body structure in order to reduce network traffic.
### Removed
* GODT-208 Remove deprecated use of BuildNameToCertificate.

View File

@ -17,7 +17,12 @@
# You should have received a copy of the GNU General Public License
# along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
CHANGELOG_FILE="`dirname $0`/../Changelog.md"
if [ $# -ne 1 ]; then
echo "First argument must be path to file"
exit 2
fi
CHANGELOG_FILE=$1
ERROR_COUNT_FILE="`mktemp`"
echo "0">$ERROR_COUNT_FILE