Files
proton-bridge/pkg/message/section.go
James Houlahan 6bd0739013 GODT-1158: Store full messages bodies on disk
- GODT-1158: simple on-disk cache in store
- GODT-1158: better member naming in event loop
- GODT-1158: create on-disk cache during bridge setup
- GODT-1158: better job options
- GODT-1158: rename GetLiteral to GetRFC822
- GODT-1158: rename events -> currentEvents
- GODT-1158: unlock cache per-user
- GODT-1158: clean up cache after logout
- GODT-1158: randomized encrypted cache passphrase
- GODT-1158: Opt out of on-disk cache in settings
- GODT-1158: free space in cache
- GODT-1158: make tests compile
- GODT-1158: optional compression
- GODT-1158: cache custom location
- GODT-1158: basic capacity checker
- GODT-1158: cache free space config
- GODT-1158: only unlock cache if pmapi client is unlocked as well
- GODT-1158: simple background sync worker
- GODT-1158: set size/bodystructure when caching message
- GODT-1158: limit store db update blocking with semaphore
- GODT-1158: dumb 10-semaphore
- GODT-1158: properly handle delete; remove bad bodystructure handling
- GODT-1158: hacky fix for caching after logout... baaaaad
- GODT-1158: cache worker
- GODT-1158: compute body structure lazily
- GODT-1158: cache size in store
- GODT-1158: notify cacher when adding to store
- GODT-1158: 15 second store cache watcher
- GODT-1158: enable cacher
- GODT-1158: better cache worker starting/stopping
- GODT-1158: limit cacher to less concurrency than disk cache
- GODT-1158: message builder prio + pchan pkg
- GODT-1158: fix pchan, use in message builder
- GODT-1158: no sem in cacher (rely on message builder prio)
- GODT-1158: raise priority of existing jobs when requested
- GODT-1158: pending messages in on-disk cache
- GODT-1158: WIP just a note about deleting messages from disk cache
- GODT-1158: pending wait when trying to write
- GODT-1158: pending.add to return bool
- GODT-1225: Headers in bodystructure are stored as bytes.
- GODT-1158: fixing header caching
- GODT-1158: don't cache in background
- GODT-1158: all concurrency set in settings
- GODT-1158: worker pools inside message builder
- GODT-1158: fix linter issues
- GODT-1158: remove completed builds from builder
- GODT-1158: remove builder pool
- GODT-1158: cacher defer job done properly
- GODT-1158: fix linter
- GODT-1299: Continue with bodystructure build if deserialization failed
- GODT-1324: Delete messages from the cache when they are deleted on the server
- GODT-1158: refactor cache tests
- GODT-1158: move builder to app/bridge
- GODT-1306: Migrate cache on disk when location is changed (and delete when disabled)
2021-11-30 10:12:36 +01:00

397 lines
10 KiB
Go

// Copyright (c) 2021 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package message
import (
"bufio"
"bytes"
"io"
"io/ioutil"
"net/textproto"
"strconv"
"strings"
pmmime "github.com/ProtonMail/proton-bridge/pkg/mime"
"github.com/emersion/go-imap"
"github.com/pkg/errors"
"github.com/vmihailenco/msgpack/v5"
)
// BodyStructure is used to parse an email into MIME sections and then generate
// body structure for IMAP server.
type BodyStructure map[string]*SectionInfo
// SectionInfo is used to hold data about parts of each section.
type SectionInfo struct {
Header []byte
Start, BSize, Size, Lines int
reader io.Reader
isHeaderReadFinished bool
}
// Read will also count the final size of section.
func (si *SectionInfo) Read(p []byte) (n int, err error) {
n, err = si.reader.Read(p)
si.Size += n
si.Lines += bytes.Count(p, []byte("\n"))
si.readHeader(p)
return
}
// readHeader appends read data to Header until empty line is found.
func (si *SectionInfo) readHeader(p []byte) {
if si.isHeaderReadFinished {
return
}
si.Header = append(si.Header, p...)
if i := bytes.Index(si.Header, []byte("\n\r\n")); i > 0 {
si.Header = si.Header[:i+3]
si.isHeaderReadFinished = true
return
}
// textproto works also with simple line ending so we should be liberal
// as well.
if i := bytes.Index(si.Header, []byte("\n\n")); i > 0 {
si.Header = si.Header[:i+2]
si.isHeaderReadFinished = true
}
}
// GetMIMEHeader parses bytes and return MIME header.
func (si *SectionInfo) GetMIMEHeader() (textproto.MIMEHeader, error) {
return textproto.NewReader(bufio.NewReader(bytes.NewReader(si.Header))).ReadMIMEHeader()
}
func NewBodyStructure(reader io.Reader) (structure *BodyStructure, err error) {
structure = &BodyStructure{}
err = structure.Parse(reader)
return
}
// DeserializeBodyStructure will create new structure from msgpack bytes.
func DeserializeBodyStructure(raw []byte) (*BodyStructure, error) {
bs := &BodyStructure{}
err := msgpack.Unmarshal(raw, bs)
if err != nil {
return nil, errors.Wrap(err, "cannot deserialize bodystructure")
}
return bs, err
}
// Serialize will write msgpack bytes.
func (bs *BodyStructure) Serialize() ([]byte, error) {
data, err := msgpack.Marshal(bs)
if err != nil {
return nil, errors.Wrap(err, "cannot serialize bodystructure")
}
return data, nil
}
// Parse will read the mail and create all body structures.
func (bs *BodyStructure) Parse(r io.Reader) error {
return bs.parseAllChildSections(r, []int{}, 0)
}
func (bs *BodyStructure) parseAllChildSections(r io.Reader, currentPath []int, start int) (err error) { //nolint[funlen]
info := &SectionInfo{
Start: start,
Size: 0,
BSize: 0,
Lines: 0,
reader: r,
}
bufInfo := bufio.NewReader(info)
tp := textproto.NewReader(bufInfo)
tpHeader, err := tp.ReadMIMEHeader()
if err != nil {
return
}
bodyInfo := &SectionInfo{reader: tp.R}
bodyReader := bufio.NewReader(bodyInfo)
mediaType, params, _ := pmmime.ParseMediaType(tpHeader.Get("Content-Type"))
// If multipart, call getAllParts, else read to count lines.
if (strings.HasPrefix(mediaType, "multipart/") || mediaType == rfc822Message) && params["boundary"] != "" {
nextPath := getChildPath(currentPath)
var br *boundaryReader
br, err = newBoundaryReader(bodyReader, params["boundary"])
// New reader seeks first boundary.
if err != nil {
// Return also EOF.
return
}
for err == nil {
start += br.skipped
part := &bytes.Buffer{}
err = br.writeNextPartTo(part)
if err != nil {
break
}
err = bs.parseAllChildSections(part, nextPath, start)
part.Reset()
nextPath[len(nextPath)-1]++
}
br.reader = nil
if err == io.EOF {
err = nil
}
if err != nil {
return
}
} else {
// Count length.
_, _ = bodyReader.WriteTo(ioutil.Discard)
}
// Clear all buffers.
bodyReader = nil //nolint[wastedassign] just to be sure we clear garbage collector
bodyInfo.reader = nil
tp.R = nil
tp = nil //nolint[wastedassign] just to be sure we clear garbage collector
bufInfo = nil //nolint[ineffassign] just to be sure we clear garbage collector
info.reader = nil
// Store boundaries.
info.BSize = bodyInfo.Size
path := stringPathFromInts(currentPath)
(*bs)[path] = info
// Fix start of subsections.
newPath := getChildPath(currentPath)
shift := info.Size - info.BSize
subInfo, err := bs.getInfo(newPath)
// If it has subparts.
for err == nil {
subInfo.Start += shift
// Level down.
subInfo, err = bs.getInfo(append(newPath, 1))
if err == nil {
newPath = append(newPath, 1)
continue
}
// Next.
newPath[len(newPath)-1]++
subInfo, err = bs.getInfo(newPath)
if err == nil {
continue
}
// Level up.
for {
newPath = newPath[:len(newPath)-1]
if len(newPath) > 0 {
newPath[len(newPath)-1]++
subInfo, err = bs.getInfo(newPath)
if err != nil {
err = nil
continue
}
}
break
}
// The end.
if len(newPath) == 0 {
break
}
}
return nil
}
// getChildPath will return the first child path of parent path.
// NOTE: Return value can be used to iterate over parts so it is necessary to
// copy parrent values in order to not rewrite values in parent.
func getChildPath(parent []int) []int {
// append alloc inline is the fasted way to copy
return append(append(make([]int, 0, len(parent)+1), parent...), 1)
}
func stringPathFromInts(ints []int) (ret string) {
for i, n := range ints {
if i != 0 {
ret += "."
}
ret += strconv.Itoa(n)
}
return
}
func (bs *BodyStructure) hasInfo(sectionPath []int) bool {
_, err := bs.getInfo(sectionPath)
return err == nil
}
func (bs *BodyStructure) getInfoCheckSection(sectionPath []int) (sectionInfo *SectionInfo, err error) {
if len(*bs) == 1 && len(sectionPath) == 1 && sectionPath[0] == 1 {
sectionPath = []int{}
}
return bs.getInfo(sectionPath)
}
func (bs *BodyStructure) getInfo(sectionPath []int) (sectionInfo *SectionInfo, err error) {
path := stringPathFromInts(sectionPath)
sectionInfo, ok := (*bs)[path]
if !ok {
err = errors.New("wrong section " + path)
}
return
}
// GetSection returns bytes of section including MIME header.
func (bs *BodyStructure) GetSection(wholeMail io.ReadSeeker, sectionPath []int) (section []byte, err error) {
info, err := bs.getInfoCheckSection(sectionPath)
if err != nil {
return
}
return goToOffsetAndReadNBytes(wholeMail, info.Start, info.Size)
}
// GetSectionContent returns bytes of section content (excluding MIME header).
func (bs *BodyStructure) GetSectionContent(wholeMail io.ReadSeeker, sectionPath []int) (section []byte, err error) {
info, err := bs.getInfoCheckSection(sectionPath)
if err != nil {
return
}
return goToOffsetAndReadNBytes(wholeMail, info.Start+info.Size-info.BSize, info.BSize)
}
// GetMailHeader returns the main header of mail.
func (bs *BodyStructure) GetMailHeader() (header textproto.MIMEHeader, err error) {
return bs.GetSectionHeader([]int{})
}
// GetMailHeaderBytes returns the bytes with main mail header.
// Warning: It can contain extra lines.
func (bs *BodyStructure) GetMailHeaderBytes() (header []byte, err error) {
return bs.GetSectionHeaderBytes([]int{})
}
func goToOffsetAndReadNBytes(wholeMail io.ReadSeeker, offset, length int) ([]byte, error) {
if length == 0 {
return []byte{}, nil
}
if length < 0 {
return nil, errors.New("requested negative length")
}
if offset > 0 {
if _, err := wholeMail.Seek(int64(offset), io.SeekStart); err != nil {
return nil, err
}
}
out := make([]byte, length)
_, err := wholeMail.Read(out)
return out, err
}
// GetSectionHeader returns the mime header of specified section.
func (bs *BodyStructure) GetSectionHeader(sectionPath []int) (textproto.MIMEHeader, error) {
info, err := bs.getInfoCheckSection(sectionPath)
if err != nil {
return nil, err
}
return info.GetMIMEHeader()
}
// GetSectionHeaderBytes returns raw header bytes of specified section.
func (bs *BodyStructure) GetSectionHeaderBytes(sectionPath []int) ([]byte, error) {
info, err := bs.getInfoCheckSection(sectionPath)
if err != nil {
return nil, err
}
return info.Header, nil
}
// IMAPBodyStructure will prepare imap bodystructure recurently for given part.
// Use empty path to create whole email structure.
func (bs *BodyStructure) IMAPBodyStructure(currentPart []int) (imapBS *imap.BodyStructure, err error) {
var info *SectionInfo
if info, err = bs.getInfo(currentPart); err != nil {
return
}
tpHeader, err := info.GetMIMEHeader()
if err != nil {
return
}
mediaType, params, _ := pmmime.ParseMediaType(tpHeader.Get("Content-Type"))
mediaTypeSep := strings.Split(mediaType, "/")
// If it is empty or missing it will not crash.
mediaTypeSep = append(mediaTypeSep, "")
imapBS = &imap.BodyStructure{
MIMEType: mediaTypeSep[0],
MIMESubType: mediaTypeSep[1],
Params: params,
Size: uint32(info.BSize),
Lines: uint32(info.Lines),
}
if val := tpHeader.Get("Content-ID"); val != "" {
imapBS.Id = val
}
if val := tpHeader.Get("Content-Transfer-Encoding"); val != "" {
imapBS.Encoding = val
}
if val := tpHeader.Get("Content-Description"); val != "" {
imapBS.Description = val
}
if val := tpHeader.Get("Content-Disposition"); val != "" {
imapBS.Disposition = val
}
nextPart := append(currentPart, 1)
for {
if !bs.hasInfo(nextPart) {
break
}
var subStruct *imap.BodyStructure
subStruct, err = bs.IMAPBodyStructure(nextPart)
if err != nil {
return
}
if imapBS.Parts == nil {
imapBS.Parts = []*imap.BodyStructure{}
}
imapBS.Parts = append(imapBS.Parts, subStruct)
nextPart[len(nextPart)-1]++
}
return imapBS, nil
}