forked from Silverfish/proton-bridge
feat: handle foreign encodings
This commit is contained in:
@ -205,7 +205,7 @@ func joinChildParts(childParts []parser.Parts) parser.Parts {
|
||||
func bestChoice(childParts []parser.Parts, preferredContentType string) (parser.Parts, error) {
|
||||
// If one of the parts has preferred content type, use that.
|
||||
for i := len(childParts) - 1; i >= 0; i-- {
|
||||
if allHaveContentType(childParts[i], preferredContentType) {
|
||||
if allPartsHaveContentType(childParts[i], preferredContentType) {
|
||||
return childParts[i], nil
|
||||
}
|
||||
}
|
||||
@ -214,7 +214,7 @@ func bestChoice(childParts []parser.Parts, preferredContentType string) (parser.
|
||||
return childParts[len(childParts)-1], nil
|
||||
}
|
||||
|
||||
func allHaveContentType(parts parser.Parts, contentType string) bool {
|
||||
func allPartsHaveContentType(parts parser.Parts, contentType string) bool {
|
||||
for _, part := range parts {
|
||||
t, _, err := part.Header.ContentType()
|
||||
if err != nil {
|
||||
@ -272,23 +272,11 @@ func getPlainBody(part *parser.Part) []byte {
|
||||
}
|
||||
}
|
||||
|
||||
func writeMIMEMessage(p *parser.Parser) (mime string, err error) {
|
||||
writer := p.
|
||||
NewWriter().
|
||||
WithCondition(func(p *parser.Part) (keep bool) {
|
||||
disp, _, err := p.Header.ContentDisposition()
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO: Is it true that we don't want to write attachments? I thought this was for externals...
|
||||
return disp != "attachment"
|
||||
})
|
||||
|
||||
func writeMIMEMessage(p *parser.Parser) (string, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
if err = writer.Write(buf); err != nil {
|
||||
return
|
||||
if err := p.NewWriter().Write(buf); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/emersion/go-message"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
@ -13,14 +13,19 @@ type Parser struct {
|
||||
root *Part
|
||||
}
|
||||
|
||||
func New(r io.Reader) (p *Parser, err error) {
|
||||
p = new(Parser)
|
||||
func New(r io.Reader) (*Parser, error) {
|
||||
p := new(Parser)
|
||||
|
||||
if err = p.parse(r); err != nil {
|
||||
return
|
||||
entity, err := message.Read(r)
|
||||
if err != nil && !message.IsUnknownCharset(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return
|
||||
if err := p.parseEntity(entity); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (p *Parser) NewWalker() *Walker {
|
||||
@ -51,32 +56,25 @@ func (p *Parser) Part(number []int) (part *Part, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func (p *Parser) parse(r io.Reader) error {
|
||||
entity, err := message.Read(r)
|
||||
if err != nil {
|
||||
if !message.IsUnknownCharset(err) {
|
||||
return err
|
||||
} else {
|
||||
fmt.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
return p.parseEntity(entity)
|
||||
}
|
||||
|
||||
func (p *Parser) enter() {
|
||||
func (p *Parser) beginPart() {
|
||||
p.stack = append(p.stack, &Part{})
|
||||
}
|
||||
|
||||
func (p *Parser) exit() {
|
||||
var built *Part
|
||||
func (p *Parser) endPart() {
|
||||
var part *Part
|
||||
|
||||
p.stack, built = p.stack[:len(p.stack)-1], p.stack[len(p.stack)-1]
|
||||
p.stack, part = p.stack[:len(p.stack)-1], p.stack[len(p.stack)-1]
|
||||
|
||||
if len(p.stack) > 0 {
|
||||
p.top().children = append(p.top().children, built)
|
||||
p.top().children = append(p.top().children, part)
|
||||
} else {
|
||||
p.root = built
|
||||
p.root = part
|
||||
}
|
||||
|
||||
if !part.isUTF8() {
|
||||
if err := part.convertToUTF8(); err != nil {
|
||||
logrus.WithError(err).Error("failed to convert part to utf-8")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,9 +94,9 @@ func (p *Parser) withBody(bytes []byte) {
|
||||
p.top().Body = bytes
|
||||
}
|
||||
|
||||
func (p *Parser) parseEntity(e *message.Entity) (err error) {
|
||||
p.enter()
|
||||
defer p.exit()
|
||||
func (p *Parser) parseEntity(e *message.Entity) error {
|
||||
p.beginPart()
|
||||
defer p.endPart()
|
||||
|
||||
p.withHeader(e.Header)
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@ -19,6 +20,30 @@ func newTestParser(t *testing.T, msg string) *Parser {
|
||||
return p
|
||||
}
|
||||
|
||||
func TestParserSpecifiedLatin1Charset(t *testing.T) {
|
||||
p := newTestParser(t, "text_plain_latin1.eml")
|
||||
|
||||
checkBodies(t, p, "ééééééé")
|
||||
}
|
||||
|
||||
func TestParserUnspecifiedLatin1Charset(t *testing.T) {
|
||||
p := newTestParser(t, "text_plain_unknown_latin1.eml")
|
||||
|
||||
checkBodies(t, p, "ééééééé")
|
||||
}
|
||||
|
||||
func TestParserSpecifiedLatin2Charset(t *testing.T) {
|
||||
p := newTestParser(t, "text_plain_latin2.eml")
|
||||
|
||||
checkBodies(t, p, "řšřšřš")
|
||||
}
|
||||
|
||||
func TestParserEmbeddedLatin2Charset(t *testing.T) {
|
||||
p := newTestParser(t, "text_html_embedded_latin2_encoding.eml")
|
||||
|
||||
checkBodies(t, p, `<html><head><meta charset="ISO-8859-2"></head><body>latin2 řšřš</body></html>`)
|
||||
}
|
||||
|
||||
func f(filename string) io.ReadCloser {
|
||||
f, err := os.Open(filepath.Join("testdata", filename))
|
||||
|
||||
@ -37,3 +62,21 @@ func s(filename string) string {
|
||||
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func checkBodies(t *testing.T, p *Parser, wantBodies ...string) {
|
||||
var partBodies, expectedBodies [][]byte
|
||||
|
||||
require.NoError(t, p.NewWalker().RegisterDefaultHandler(func(p *Part) (err error) {
|
||||
if p.Body != nil {
|
||||
partBodies = append(partBodies, p.Body)
|
||||
}
|
||||
|
||||
return
|
||||
}).Walk())
|
||||
|
||||
for _, body := range wantBodies {
|
||||
expectedBodies = append(expectedBodies, []byte(body))
|
||||
}
|
||||
|
||||
assert.ElementsMatch(t, expectedBodies, partBodies)
|
||||
}
|
||||
|
||||
@ -2,8 +2,12 @@ package parser
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
pmmime "github.com/ProtonMail/proton-bridge/pkg/mime"
|
||||
"github.com/emersion/go-message"
|
||||
"golang.org/x/net/html/charset"
|
||||
"golang.org/x/text/encoding"
|
||||
)
|
||||
|
||||
type Parts []*Part
|
||||
@ -30,35 +34,32 @@ func (p *Part) AddChild(child *Part) {
|
||||
p.children = append(p.children, child)
|
||||
}
|
||||
|
||||
func (p *Part) write(writer *message.Writer, w *Writer) (err error) {
|
||||
if len(p.children) > 0 {
|
||||
for _, child := range p.children {
|
||||
if err = child.writeAsChild(writer, w); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, err = writer.Write(p.Body); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
func (p *Part) isUTF8() bool {
|
||||
return utf8.Valid(p.Body)
|
||||
}
|
||||
|
||||
func (p *Part) writeAsChild(writer *message.Writer, w *Writer) (err error) {
|
||||
if !w.shouldWrite(p) {
|
||||
return
|
||||
}
|
||||
|
||||
childWriter, err := writer.CreatePart(p.Header)
|
||||
// TODO: Do we then need to set charset to utf-8? What if it's embedded in html?
|
||||
func (p *Part) convertToUTF8() error {
|
||||
t, params, err := p.Header.ContentType()
|
||||
if err != nil {
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
if err = p.write(childWriter, w); err != nil {
|
||||
return
|
||||
var decoder *encoding.Decoder
|
||||
|
||||
if knownCharset, ok := params["charset"]; !ok {
|
||||
encoding, _, _ := charset.DetermineEncoding(p.Body, t)
|
||||
decoder = encoding.NewDecoder()
|
||||
} else if decoder, err = pmmime.SelectDecoder(knownCharset); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return childWriter.Close()
|
||||
if p.Body, err = decoder.Bytes(p.Body); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
params["charset"] = "utf-8"
|
||||
p.Header.SetContentType(t, params)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
5
pkg/message/parser/testdata/text_html_embedded_latin2_encoding.eml
vendored
Normal file
5
pkg/message/parser/testdata/text_html_embedded_latin2_encoding.eml
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
From: Sender <sender@pm.me>
|
||||
To: Receiver <receiver@pm.me>
|
||||
Content-Type: text/html
|
||||
|
||||
<html><head><meta charset="ISO-8859-2"></head><body>latin2 <20><><EFBFBD><EFBFBD></body></html>
|
||||
5
pkg/message/parser/testdata/text_plain_latin1.eml
vendored
Normal file
5
pkg/message/parser/testdata/text_plain_latin1.eml
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
From: Sender <sender@pm.me>
|
||||
To: Receiver <receiver@pm.me>
|
||||
Content-Type: text/plain; charset=ISO-8859-1
|
||||
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
5
pkg/message/parser/testdata/text_plain_latin2.eml
vendored
Normal file
5
pkg/message/parser/testdata/text_plain_latin2.eml
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
From: Sender <sender@pm.me>
|
||||
To: Receiver <receiver@pm.me>
|
||||
Content-Type: text/plain; charset=ISO-8859-2
|
||||
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
5
pkg/message/parser/testdata/text_plain_unknown_latin1.eml
vendored
Normal file
5
pkg/message/parser/testdata/text_plain_unknown_latin1.eml
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
From: Sender <sender@pm.me>
|
||||
To: Receiver <receiver@pm.me>
|
||||
Content-Type: text/plain
|
||||
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
5
pkg/message/parser/testdata/text_plain_unknown_latin2.eml
vendored
Normal file
5
pkg/message/parser/testdata/text_plain_unknown_latin2.eml
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
From: Sender <sender@pm.me>
|
||||
To: Receiver <receiver@pm.me>
|
||||
Content-Type: text/plain
|
||||
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
@ -19,19 +19,27 @@ func newWriter(root *Part) *Writer {
|
||||
}
|
||||
}
|
||||
|
||||
// WithCondition allows setting a condition when parts should be written.
|
||||
// Parts are passed to each condition set and if any condition returns false,
|
||||
// the part is not written.
|
||||
// This initially seemed like a good idea but is now kinda useless.
|
||||
func (w *Writer) WithCondition(cond Condition) *Writer {
|
||||
w.cond = append(w.cond, cond)
|
||||
return w
|
||||
}
|
||||
|
||||
func (w *Writer) Write(ww io.Writer) (err error) {
|
||||
msgWriter, err := message.CreateWriter(ww, w.root.Header)
|
||||
if err != nil {
|
||||
return
|
||||
func (w *Writer) Write(ww io.Writer) error {
|
||||
if w.shouldFilter(w.root) {
|
||||
w.root.Header.Add("Content-Transfer-Encoding", "base64")
|
||||
}
|
||||
|
||||
if err = w.root.write(msgWriter, w); err != nil {
|
||||
return
|
||||
msgWriter, err := message.CreateWriter(ww, w.root.Header)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := w.write(msgWriter, w.root); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return msgWriter.Close()
|
||||
@ -46,3 +54,56 @@ func (w *Writer) shouldWrite(p *Part) bool {
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (w *Writer) shouldFilter(p *Part) bool {
|
||||
encoding := p.Header.Get("Content-Transfer-Encoding")
|
||||
|
||||
if encoding != "" && encoding == "quoted-printable" || encoding == "base64" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, b := range p.Body {
|
||||
if uint8(b) > 1<<7 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (w *Writer) write(writer *message.Writer, p *Part) error {
|
||||
if len(p.children) > 0 {
|
||||
for _, child := range p.children {
|
||||
if err := w.writeAsChild(writer, child); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := writer.Write(p.Body); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) writeAsChild(writer *message.Writer, p *Part) error {
|
||||
if !w.shouldWrite(p) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if w.shouldFilter(p) {
|
||||
p.Header.Add("Content-Transfer-Encoding", "base64")
|
||||
}
|
||||
|
||||
childWriter, err := writer.CreatePart(p.Header)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := w.write(childWriter, p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return childWriter.Close()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user