feat(GODT-906): support rfc2047-encoded content transfer encodings

This commit is contained in:
James Houlahan
2020-12-07 12:27:43 +01:00
parent f469d34781
commit 38eb9fdac7
7 changed files with 103 additions and 5 deletions

View File

@ -45,6 +45,11 @@ func Parse(r io.Reader, key, keyName string) (m *pmapi.Message, mimeBody, plainB
return return
} }
if err = convertEncodedTransferEncoding(p); err != nil {
err = errors.Wrap(err, "failed to convert encoded transfer encodings")
return
}
if err = convertForeignEncodings(p); err != nil { if err = convertForeignEncodings(p); err != nil {
err = errors.Wrap(err, "failed to convert foreign encodings") err = errors.Wrap(err, "failed to convert foreign encodings")
return return
@ -89,6 +94,30 @@ func Parse(r io.Reader, key, keyName string) (m *pmapi.Message, mimeBody, plainB
return m, mimeBodyBuffer.String(), plainBody, attReaders, nil return m, mimeBodyBuffer.String(), plainBody, attReaders, nil
} }
// convertEncodedTransferEncoding decodes any RFC2047-encoded content transfer encodings.
// Such content transfer encodings go against RFC but still exist in the wild anyway.
func convertEncodedTransferEncoding(p *parser.Parser) error {
logrus.Trace("Converting encoded transfer encoding")
return p.NewWalker().
RegisterDefaultHandler(func(p *parser.Part) error {
encoding := p.Header.Get("Content-Transfer-Encoding")
if encoding == "" {
return nil
}
dec, err := pmmime.WordDec.DecodeHeader(encoding)
if err != nil {
return err
}
p.Header.Set("Content-Transfer-Encoding", dec)
return nil
}).
Walk()
}
func convertForeignEncodings(p *parser.Parser) error { func convertForeignEncodings(p *parser.Parser) error {
logrus.Trace("Converting foreign encodings") logrus.Trace("Converting foreign encodings")
@ -104,12 +133,11 @@ func convertForeignEncodings(p *parser.Parser) error {
return p.ConvertToUTF8() return p.ConvertToUTF8()
}). }).
RegisterDefaultHandler(func(p *parser.Part) error { RegisterDefaultHandler(func(p *parser.Part) error {
t, params, _ := p.ContentType()
// multipart/alternative, for example, can contain extra charset. // multipart/alternative, for example, can contain extra charset.
if params != nil && params["charset"] != "" { if _, params, _ := p.ContentType(); params != nil && params["charset"] != "" {
return p.ConvertToUTF8() return p.ConvertToUTF8()
} }
logrus.WithField("type", t).Trace("Not converting part to utf-8")
return nil return nil
}). }).
Walk() Walk()

View File

@ -22,6 +22,7 @@ import (
"io/ioutil" "io/ioutil"
"github.com/emersion/go-message" "github.com/emersion/go-message"
"github.com/sirupsen/logrus"
) )
type Parser struct { type Parser struct {
@ -33,8 +34,15 @@ func New(r io.Reader) (*Parser, error) {
p := new(Parser) p := new(Parser)
entity, err := message.Read(newEndOfMailTrimmer(r)) entity, err := message.Read(newEndOfMailTrimmer(r))
if err != nil && !message.IsUnknownCharset(err) { if err != nil {
return nil, err switch {
case message.IsUnknownCharset(err):
logrus.WithError(err).Warning("Message has an unknown charset")
case message.IsUnknownEncoding(err):
logrus.WithError(err).Warning("Message has an unknown encoding")
default:
return nil, err
}
} }
if err := p.parseEntity(entity); err != nil { if err := p.parseEntity(entity); err != nil {

View File

@ -480,6 +480,37 @@ func TestParseWithTrailingEndOfMailIndicator(t *testing.T) {
assert.Equal(t, "boo!", plainBody) assert.Equal(t, "boo!", plainBody)
} }
func TestParseEncodedContentType(t *testing.T) {
f := getFileReader("rfc2047-content-transfer-encoding.eml")
m, _, plainBody, _, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@sender.com>`, m.Sender.String())
assert.Equal(t, `<user@somewhere.org>`, m.ToList[0].String())
assert.Equal(t, "bodybodybody\n", plainBody)
}
func TestParseNonEncodedContentType(t *testing.T) {
f := getFileReader("non-encoded-content-transfer-encoding.eml")
m, _, plainBody, _, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@sender.com>`, m.Sender.String())
assert.Equal(t, `<user@somewhere.org>`, m.ToList[0].String())
assert.Equal(t, "bodybodybody\n", plainBody)
}
func TestParseEncodedContentTypeBad(t *testing.T) {
f := getFileReader("rfc2047-content-transfer-encoding-bad.eml")
_, _, _, _, err := Parse(f, "", "") // nolint[dogsled]
require.Error(t, err)
}
func getFileReader(filename string) io.Reader { func getFileReader(filename string) io.Reader {
f, err := os.Open(filepath.Join("testdata", filename)) f, err := os.Open(filepath.Join("testdata", filename))
if err != nil { if err != nil {

View File

@ -0,0 +1,10 @@
To: user@somewhere.org
Subject: =?utf-8?Q?aoeuaoeuaoeu?=
Date: Sat, 16 Jun 2020 17:36:02 +0200
MIME-Version: 1.0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: 8bit
From: =?utf-8?Q?Sender?= <sender@sender.com>
bodybodybody

View File

@ -0,0 +1,10 @@
To: user@somewhere.org
Subject: =?utf-8?Q?aoeuaoeuaoeu?=
Date: Sat, 16 Jun 2020 17:36:02 +0200
MIME-Version: 1.0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: =?utf-8?Q?8bit
From: =?utf-8?Q?Sender?= <sender@sender.com>
bodybodybody

View File

@ -0,0 +1,10 @@
To: user@somewhere.org
Subject: =?utf-8?Q?aoeuaoeuaoeu?=
Date: Sat, 16 Jun 2020 17:36:02 +0200
MIME-Version: 1.0
Content-Type: text/plain;
charset="utf-8"
Content-Transfer-Encoding: =?utf-8?Q?8bit?=
From: =?utf-8?Q?Sender?= <sender@sender.com>
bodybodybody

View File

@ -5,6 +5,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
## Unreleased ## Unreleased
### Added ### Added
* GODT-906 Handle RFC2047-encoded content transfer encoding values.
### Changed ### Changed
* GODT-893 Bump go-rfc5322 dependency to v0.2.1 to properly detect syntax errors during parsing. * GODT-893 Bump go-rfc5322 dependency to v0.2.1 to properly detect syntax errors during parsing.