fix: duplicate charset param

This commit is contained in:
James Houlahan
2020-09-24 14:18:05 +02:00
parent a89a3f6612
commit 15c1d7bc24
9 changed files with 44 additions and 10 deletions

View File

@ -7,6 +7,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
### Fixed ### Fixed
* GODT-752 Parsing message with empty CC. * GODT-752 Parsing message with empty CC.
* GODT-752 Parsing non-utf8 multipart/alternative message. * GODT-752 Parsing non-utf8 multipart/alternative message.
* GODT-752 Parsing message with duplicate charset parameter.
## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX) ## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX)

View File

@ -103,7 +103,7 @@ func convertForeignEncodings(p *parser.Parser) error {
return p.ConvertToUTF8() return p.ConvertToUTF8()
}). }).
RegisterDefaultHandler(func(p *parser.Part) error { RegisterDefaultHandler(func(p *parser.Part) error {
t, params, _ := p.Header.ContentType() t, params, _ := p.ContentType()
// multipart/alternative, for example, can contain extra charset. // multipart/alternative, for example, can contain extra charset.
if params != nil && params["charset"] != "" { if params != nil && params["charset"] != "" {
return p.ConvertToUTF8() return p.ConvertToUTF8()
@ -297,7 +297,7 @@ func allPartsHaveContentType(parts parser.Parts, contentType string) bool {
} }
for _, part := range parts { for _, part := range parts {
t, _, err := part.Header.ContentType() t, _, err := part.ContentType()
if err != nil { if err != nil {
return false return false
} }
@ -333,7 +333,7 @@ func determineMIMEType(p *parser.Parser) (string, error) {
// getPlainBody returns the body of the given part, converting html to // getPlainBody returns the body of the given part, converting html to
// plaintext where possible. // plaintext where possible.
func getPlainBody(part *parser.Part) []byte { func getPlainBody(part *parser.Part) []byte {
contentType, _, err := part.Header.ContentType() contentType, _, err := part.ContentType()
if err != nil { if err != nil {
return part.Body return part.Body
} }

View File

@ -17,7 +17,9 @@
package parser package parser
import "regexp" import (
"regexp"
)
type HandlerFunc func(*Part) error type HandlerFunc func(*Part) error
@ -35,7 +37,7 @@ func (h *handler) matchType(p *Part) bool {
return false return false
} }
t, _, err := p.Header.ContentType() t, _, err := p.ContentType()
if err != nil { if err != nil {
t = "" t = ""
} }

View File

@ -40,6 +40,18 @@ type Part struct {
children Parts children Parts
} }
func (p *Part) ContentType() (string, map[string]string, error) {
t, params, err := p.Header.ContentType()
if err != nil {
// go-message's implementation of ContentType() doesn't handle duplicate parameters
// e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8
// so if it fails, we try again with pmmime's implementation, which does.
t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type"))
}
return t, params, err
}
func (p *Part) Child(n int) (part *Part, err error) { func (p *Part) Child(n int) (part *Part, err error) {
if len(p.children) < n { if len(p.children) < n {
return nil, errors.New("no such part") return nil, errors.New("no such part")
@ -72,7 +84,7 @@ func (p *Part) AddChild(child *Part) {
func (p *Part) ConvertToUTF8() error { func (p *Part) ConvertToUTF8() error {
logrus.Trace("Converting part to utf-8") logrus.Trace("Converting part to utf-8")
t, params, err := p.Header.ContentType() t, params, err := p.ContentType()
if err != nil { if err != nil {
return err return err
} }
@ -163,7 +175,7 @@ func (p *Part) is7BitClean() bool {
} }
func (p *Part) isMultipartMixed() bool { func (p *Part) isMultipartMixed() bool {
t, _, err := p.Header.ContentType() t, _, err := p.ContentType()
if err != nil { if err != nil {
return false return false
} }

View File

@ -49,7 +49,7 @@ func TestPart(t *testing.T) {
part, err := p.Section(getSectionNumber(partNumber)) part, err := p.Section(getSectionNumber(partNumber))
require.NoError(t, err) require.NoError(t, err)
contType, _, err := part.Header.ContentType() contType, _, err := part.ContentType()
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, wantContType, contType) assert.Equal(t, wantContType, contType)
} }

View File

@ -58,7 +58,7 @@ func (v *Visitor) Visit() (interface{}, error) {
} }
func (v *Visitor) visit(p *Part) (interface{}, error) { func (v *Visitor) visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType() t, _, err := p.ContentType()
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -259,6 +259,21 @@ func TestParseTextPlainWithImageInline(t *testing.T) {
assert.Equal(t, 8, img.Height) assert.Equal(t, 8, img.Height)
} }
func TestParseTextPlainWithDuplicateCharset(t *testing.T) {
f := getFileReader("text_plain_duplicate_charset.eml")
m, _, plainBody, attReaders, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
assert.Equal(t, "body", m.Body)
assert.Equal(t, "body", plainBody)
assert.Len(t, attReaders, 0)
}
func TestParseWithMultipleTextParts(t *testing.T) { func TestParseWithMultipleTextParts(t *testing.T) {
f := getFileReader("multiple_text_parts.eml") f := getFileReader("multiple_text_parts.eml")

View File

@ -0,0 +1,5 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain; charset=utf-8; charset=UTF-8
body

View File

@ -4,7 +4,6 @@ Feature: IMAP import messages
And there is IMAP client logged in as "user" And there is IMAP client logged in as "user"
And there is IMAP client selected in "INBOX" And there is IMAP client selected in "INBOX"
@ignore
Scenario: Import message with double charset in content type Scenario: Import message with double charset in content type
When IMAP client imports message to "INBOX" When IMAP client imports message to "INBOX"
""" """