diff --git a/Changelog.md b/Changelog.md index bc634d8a..1cbb1a00 100644 --- a/Changelog.md +++ b/Changelog.md @@ -7,6 +7,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/) ### Fixed * GODT-752 Parsing message with empty CC. * GODT-752 Parsing non-utf8 multipart/alternative message. +* GODT-752 Parsing message with duplicate charset parameter. ## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX) diff --git a/pkg/message/parser.go b/pkg/message/parser.go index 1419b8b6..aeb96154 100644 --- a/pkg/message/parser.go +++ b/pkg/message/parser.go @@ -103,7 +103,7 @@ func convertForeignEncodings(p *parser.Parser) error { return p.ConvertToUTF8() }). RegisterDefaultHandler(func(p *parser.Part) error { - t, params, _ := p.Header.ContentType() + t, params, _ := p.ContentType() // multipart/alternative, for example, can contain extra charset. if params != nil && params["charset"] != "" { return p.ConvertToUTF8() @@ -297,7 +297,7 @@ func allPartsHaveContentType(parts parser.Parts, contentType string) bool { } for _, part := range parts { - t, _, err := part.Header.ContentType() + t, _, err := part.ContentType() if err != nil { return false } @@ -333,7 +333,7 @@ func determineMIMEType(p *parser.Parser) (string, error) { // getPlainBody returns the body of the given part, converting html to // plaintext where possible. func getPlainBody(part *parser.Part) []byte { - contentType, _, err := part.Header.ContentType() + contentType, _, err := part.ContentType() if err != nil { return part.Body } diff --git a/pkg/message/parser/handler.go b/pkg/message/parser/handler.go index f34d2035..60274b82 100644 --- a/pkg/message/parser/handler.go +++ b/pkg/message/parser/handler.go @@ -17,7 +17,9 @@ package parser -import "regexp" +import ( + "regexp" +) type HandlerFunc func(*Part) error @@ -35,7 +37,7 @@ func (h *handler) matchType(p *Part) bool { return false } - t, _, err := p.Header.ContentType() + t, _, err := p.ContentType() if err != nil { t = "" } diff --git a/pkg/message/parser/part.go b/pkg/message/parser/part.go index 52e22dcf..5b79a7fb 100644 --- a/pkg/message/parser/part.go +++ b/pkg/message/parser/part.go @@ -40,6 +40,18 @@ type Part struct { children Parts } +func (p *Part) ContentType() (string, map[string]string, error) { + t, params, err := p.Header.ContentType() + if err != nil { + // go-message's implementation of ContentType() doesn't handle duplicate parameters + // e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8 + // so if it fails, we try again with pmmime's implementation, which does. + t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type")) + } + + return t, params, err +} + func (p *Part) Child(n int) (part *Part, err error) { if len(p.children) < n { return nil, errors.New("no such part") @@ -72,7 +84,7 @@ func (p *Part) AddChild(child *Part) { func (p *Part) ConvertToUTF8() error { logrus.Trace("Converting part to utf-8") - t, params, err := p.Header.ContentType() + t, params, err := p.ContentType() if err != nil { return err } @@ -163,7 +175,7 @@ func (p *Part) is7BitClean() bool { } func (p *Part) isMultipartMixed() bool { - t, _, err := p.Header.ContentType() + t, _, err := p.ContentType() if err != nil { return false } diff --git a/pkg/message/parser/part_test.go b/pkg/message/parser/part_test.go index 9818f3b9..2e4f2c78 100644 --- a/pkg/message/parser/part_test.go +++ b/pkg/message/parser/part_test.go @@ -49,7 +49,7 @@ func TestPart(t *testing.T) { part, err := p.Section(getSectionNumber(partNumber)) require.NoError(t, err) - contType, _, err := part.Header.ContentType() + contType, _, err := part.ContentType() require.NoError(t, err) assert.Equal(t, wantContType, contType) } diff --git a/pkg/message/parser/visitor.go b/pkg/message/parser/visitor.go index 42a1b4d9..de34c8c2 100644 --- a/pkg/message/parser/visitor.go +++ b/pkg/message/parser/visitor.go @@ -58,7 +58,7 @@ func (v *Visitor) Visit() (interface{}, error) { } func (v *Visitor) visit(p *Part) (interface{}, error) { - t, _, err := p.Header.ContentType() + t, _, err := p.ContentType() if err != nil { return nil, err } diff --git a/pkg/message/parser_test.go b/pkg/message/parser_test.go index fae2d851..ceff707d 100644 --- a/pkg/message/parser_test.go +++ b/pkg/message/parser_test.go @@ -259,6 +259,21 @@ func TestParseTextPlainWithImageInline(t *testing.T) { assert.Equal(t, 8, img.Height) } +func TestParseTextPlainWithDuplicateCharset(t *testing.T) { + f := getFileReader("text_plain_duplicate_charset.eml") + + m, _, plainBody, attReaders, err := Parse(f, "", "") + require.NoError(t, err) + + assert.Equal(t, `"Sender" `, m.Sender.String()) + assert.Equal(t, `"Receiver" `, m.ToList[0].String()) + + assert.Equal(t, "body", m.Body) + assert.Equal(t, "body", plainBody) + + assert.Len(t, attReaders, 0) +} + func TestParseWithMultipleTextParts(t *testing.T) { f := getFileReader("multiple_text_parts.eml") diff --git a/pkg/message/testdata/text_plain_duplicate_charset.eml b/pkg/message/testdata/text_plain_duplicate_charset.eml new file mode 100644 index 00000000..a73178f0 --- /dev/null +++ b/pkg/message/testdata/text_plain_duplicate_charset.eml @@ -0,0 +1,5 @@ +From: Sender +To: Receiver +Content-Type: text/plain; charset=utf-8; charset=UTF-8 + +body \ No newline at end of file diff --git a/test/features/bridge/imap/message/import.feature b/test/features/bridge/imap/message/import.feature index da8ad4a4..4482963e 100644 --- a/test/features/bridge/imap/message/import.feature +++ b/test/features/bridge/imap/message/import.feature @@ -4,7 +4,6 @@ Feature: IMAP import messages And there is IMAP client logged in as "user" And there is IMAP client selected in "INBOX" - @ignore Scenario: Import message with double charset in content type When IMAP client imports message to "INBOX" """