From 38eb9fdac7be9c8db5d9d12c69c9be3bf561cc93 Mon Sep 17 00:00:00 2001 From: James Houlahan Date: Mon, 7 Dec 2020 12:27:43 +0100 Subject: [PATCH] feat(GODT-906): support rfc2047-encoded content transfer encodings --- pkg/message/parser.go | 34 +++++++++++++++++-- pkg/message/parser/parser.go | 12 +++++-- pkg/message/parser_test.go | 31 +++++++++++++++++ .../non-encoded-content-transfer-encoding.eml | 10 ++++++ .../rfc2047-content-transfer-encoding-bad.eml | 10 ++++++ .../rfc2047-content-transfer-encoding.eml | 10 ++++++ unreleased.md | 1 + 7 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 pkg/message/testdata/non-encoded-content-transfer-encoding.eml create mode 100644 pkg/message/testdata/rfc2047-content-transfer-encoding-bad.eml create mode 100644 pkg/message/testdata/rfc2047-content-transfer-encoding.eml diff --git a/pkg/message/parser.go b/pkg/message/parser.go index c350316f..d06a0288 100644 --- a/pkg/message/parser.go +++ b/pkg/message/parser.go @@ -45,6 +45,11 @@ func Parse(r io.Reader, key, keyName string) (m *pmapi.Message, mimeBody, plainB return } + if err = convertEncodedTransferEncoding(p); err != nil { + err = errors.Wrap(err, "failed to convert encoded transfer encodings") + return + } + if err = convertForeignEncodings(p); err != nil { err = errors.Wrap(err, "failed to convert foreign encodings") return @@ -89,6 +94,30 @@ func Parse(r io.Reader, key, keyName string) (m *pmapi.Message, mimeBody, plainB return m, mimeBodyBuffer.String(), plainBody, attReaders, nil } +// convertEncodedTransferEncoding decodes any RFC2047-encoded content transfer encodings. +// Such content transfer encodings go against RFC but still exist in the wild anyway. +func convertEncodedTransferEncoding(p *parser.Parser) error { + logrus.Trace("Converting encoded transfer encoding") + + return p.NewWalker(). + RegisterDefaultHandler(func(p *parser.Part) error { + encoding := p.Header.Get("Content-Transfer-Encoding") + if encoding == "" { + return nil + } + + dec, err := pmmime.WordDec.DecodeHeader(encoding) + if err != nil { + return err + } + + p.Header.Set("Content-Transfer-Encoding", dec) + + return nil + }). + Walk() +} + func convertForeignEncodings(p *parser.Parser) error { logrus.Trace("Converting foreign encodings") @@ -104,12 +133,11 @@ func convertForeignEncodings(p *parser.Parser) error { return p.ConvertToUTF8() }). RegisterDefaultHandler(func(p *parser.Part) error { - t, params, _ := p.ContentType() // multipart/alternative, for example, can contain extra charset. - if params != nil && params["charset"] != "" { + if _, params, _ := p.ContentType(); params != nil && params["charset"] != "" { return p.ConvertToUTF8() } - logrus.WithField("type", t).Trace("Not converting part to utf-8") + return nil }). Walk() diff --git a/pkg/message/parser/parser.go b/pkg/message/parser/parser.go index 6639006e..5432c282 100644 --- a/pkg/message/parser/parser.go +++ b/pkg/message/parser/parser.go @@ -22,6 +22,7 @@ import ( "io/ioutil" "github.com/emersion/go-message" + "github.com/sirupsen/logrus" ) type Parser struct { @@ -33,8 +34,15 @@ func New(r io.Reader) (*Parser, error) { p := new(Parser) entity, err := message.Read(newEndOfMailTrimmer(r)) - if err != nil && !message.IsUnknownCharset(err) { - return nil, err + if err != nil { + switch { + case message.IsUnknownCharset(err): + logrus.WithError(err).Warning("Message has an unknown charset") + case message.IsUnknownEncoding(err): + logrus.WithError(err).Warning("Message has an unknown encoding") + default: + return nil, err + } } if err := p.parseEntity(entity); err != nil { diff --git a/pkg/message/parser_test.go b/pkg/message/parser_test.go index a4c5e04d..2918b984 100644 --- a/pkg/message/parser_test.go +++ b/pkg/message/parser_test.go @@ -480,6 +480,37 @@ func TestParseWithTrailingEndOfMailIndicator(t *testing.T) { assert.Equal(t, "boo!", plainBody) } +func TestParseEncodedContentType(t *testing.T) { + f := getFileReader("rfc2047-content-transfer-encoding.eml") + + m, _, plainBody, _, err := Parse(f, "", "") + require.NoError(t, err) + + assert.Equal(t, `"Sender" `, m.Sender.String()) + assert.Equal(t, ``, m.ToList[0].String()) + + assert.Equal(t, "bodybodybody\n", plainBody) +} + +func TestParseNonEncodedContentType(t *testing.T) { + f := getFileReader("non-encoded-content-transfer-encoding.eml") + + m, _, plainBody, _, err := Parse(f, "", "") + require.NoError(t, err) + + assert.Equal(t, `"Sender" `, m.Sender.String()) + assert.Equal(t, ``, m.ToList[0].String()) + + assert.Equal(t, "bodybodybody\n", plainBody) +} + +func TestParseEncodedContentTypeBad(t *testing.T) { + f := getFileReader("rfc2047-content-transfer-encoding-bad.eml") + + _, _, _, _, err := Parse(f, "", "") // nolint[dogsled] + require.Error(t, err) +} + func getFileReader(filename string) io.Reader { f, err := os.Open(filepath.Join("testdata", filename)) if err != nil { diff --git a/pkg/message/testdata/non-encoded-content-transfer-encoding.eml b/pkg/message/testdata/non-encoded-content-transfer-encoding.eml new file mode 100644 index 00000000..8ca7403a --- /dev/null +++ b/pkg/message/testdata/non-encoded-content-transfer-encoding.eml @@ -0,0 +1,10 @@ +To: user@somewhere.org +Subject: =?utf-8?Q?aoeuaoeuaoeu?= +Date: Sat, 16 Jun 2020 17:36:02 +0200 +MIME-Version: 1.0 +Content-Type: text/plain; + charset="utf-8" +Content-Transfer-Encoding: 8bit +From: =?utf-8?Q?Sender?= + +bodybodybody diff --git a/pkg/message/testdata/rfc2047-content-transfer-encoding-bad.eml b/pkg/message/testdata/rfc2047-content-transfer-encoding-bad.eml new file mode 100644 index 00000000..bbbe3047 --- /dev/null +++ b/pkg/message/testdata/rfc2047-content-transfer-encoding-bad.eml @@ -0,0 +1,10 @@ +To: user@somewhere.org +Subject: =?utf-8?Q?aoeuaoeuaoeu?= +Date: Sat, 16 Jun 2020 17:36:02 +0200 +MIME-Version: 1.0 +Content-Type: text/plain; + charset="utf-8" +Content-Transfer-Encoding: =?utf-8?Q?8bit +From: =?utf-8?Q?Sender?= + +bodybodybody diff --git a/pkg/message/testdata/rfc2047-content-transfer-encoding.eml b/pkg/message/testdata/rfc2047-content-transfer-encoding.eml new file mode 100644 index 00000000..9a110b25 --- /dev/null +++ b/pkg/message/testdata/rfc2047-content-transfer-encoding.eml @@ -0,0 +1,10 @@ +To: user@somewhere.org +Subject: =?utf-8?Q?aoeuaoeuaoeu?= +Date: Sat, 16 Jun 2020 17:36:02 +0200 +MIME-Version: 1.0 +Content-Type: text/plain; + charset="utf-8" +Content-Transfer-Encoding: =?utf-8?Q?8bit?= +From: =?utf-8?Q?Sender?= + +bodybodybody diff --git a/unreleased.md b/unreleased.md index 5686e88a..4d79f360 100644 --- a/unreleased.md +++ b/unreleased.md @@ -5,6 +5,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/) ## Unreleased ### Added +* GODT-906 Handle RFC2047-encoded content transfer encoding values. ### Changed * GODT-893 Bump go-rfc5322 dependency to v0.2.1 to properly detect syntax errors during parsing.