Files
proton-bridge/pkg/message/parser/part.go
James Houlahan 3bd39b3ea5 fix(GODT-1804): Only promote content headers if non-empty
When attaching public key, we take the root mime part, create a new root,
and put the old root alongside an additional public key mime part.
But when moving the root, we would copy all content headers, even empty ones.
So we’d be left with Content-Disposition: "" which would fail to parse.
2023-02-06 15:57:24 +00:00

209 lines
4.8 KiB
Go

// Copyright (c) 2023 Proton AG
//
// This file is part of Proton Mail Bridge.
//
// Proton Mail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Proton Mail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (
"bytes"
"errors"
"mime"
"unicode/utf8"
pmmime "github.com/ProtonMail/proton-bridge/v3/pkg/mime"
"github.com/PuerkitoBio/goquery"
"github.com/emersion/go-message"
"github.com/sirupsen/logrus"
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
)
type Parts []*Part
type Part struct {
Header message.Header
Body []byte
children Parts
}
func (p *Part) ContentType() (string, map[string]string, error) {
t, params, err := p.Header.ContentType()
if err != nil {
// go-message's implementation of ContentType() doesn't handle duplicate parameters
// e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8
// so if it fails, we try again with pmmime's implementation, which does.
t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type"))
}
return t, params, err
}
func (p *Part) Child(n int) (part *Part, err error) {
if len(p.children) < n {
return nil, errors.New("no such part")
}
return p.children[n-1], nil
}
func (p *Part) Children() Parts {
return p.children
}
func (p *Part) AddChild(child *Part) {
if p.isMultipartMixed() {
p.children = append(p.children, child)
} else {
root := &Part{
Header: getContentHeaders(p.Header),
Body: p.Body,
children: p.children,
}
p.Body = nil
p.children = Parts{root, child}
stripContentHeaders(&p.Header)
p.Header.Set("Content-Type", "multipart/mixed")
}
}
func (p *Part) ConvertToUTF8() error {
logrus.Trace("Converting part to utf-8")
t, params, err := p.ContentType()
if err != nil {
return err
}
decoder := selectSuitableDecoder(p, t, params)
if p.Body, err = decoder.Bytes(p.Body); err != nil {
return err
}
if params == nil {
params = make(map[string]string)
}
params["charset"] = "UTF-8"
p.Header.SetContentType(t, params)
return nil
}
func (p *Part) ConvertMetaCharset() error {
doc, err := html.Parse(bytes.NewReader(p.Body))
if err != nil {
return err
}
goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) {
if val, ok := sel.Attr("content"); ok {
t, params, err := pmmime.ParseMediaType(val)
if err != nil {
return
}
params["charset"] = "UTF-8"
sel.SetAttr("content", mime.FormatMediaType(t, params))
}
if _, ok := sel.Attr("charset"); ok {
sel.SetAttr("charset", "UTF-8")
}
})
buf := new(bytes.Buffer)
if err := html.Render(buf, doc); err != nil {
return err
}
p.Body = buf.Bytes()
return nil
}
func selectSuitableDecoder(p *Part, t string, params map[string]string) *encoding.Decoder {
if charset, ok := params["charset"]; ok {
logrus.WithField("charset", charset).Trace("The part has a specified charset")
if decoder, err := pmmime.SelectDecoder(charset); err == nil {
logrus.Trace("The charset is known; decoder has been selected")
return decoder
}
logrus.Warn("The charset is unknown; no decoder could be selected")
}
if utf8.Valid(p.Body) {
logrus.Trace("The part is already valid utf-8, returning noop encoder")
return encoding.Nop.NewDecoder()
}
encoding, name, _ := charset.DetermineEncoding(p.Body, t)
logrus.WithField("name", name).Warn("Determined encoding by reading body")
return encoding.NewDecoder()
}
func (p *Part) is7BitClean() bool {
for _, b := range p.Body {
if b > 1<<7 {
return false
}
}
return true
}
func (p *Part) isMultipartMixed() bool {
t, _, err := p.ContentType()
if err != nil {
return false
}
return t == "multipart/mixed"
}
func getContentHeaders(header message.Header) message.Header {
var res message.Header
if contentType := header.Get("Content-Type"); contentType != "" {
res.Set("Content-Type", contentType)
}
if contentDisposition := header.Get("Content-Disposition"); contentDisposition != "" {
res.Set("Content-Disposition", contentDisposition)
}
if contentTransferEncoding := header.Get("Content-Transfer-Encoding"); contentTransferEncoding != "" {
res.Set("Content-Transfer-Encoding", contentTransferEncoding)
}
return res
}
func stripContentHeaders(header *message.Header) {
header.Del("Content-Type")
header.Del("Content-Disposition")
header.Del("Content-Transfer-Encoding")
}