Compare commits

...

6 Commits

12 changed files with 133 additions and 13 deletions

View File

@ -4,13 +4,20 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
## Unreleased ## Unreleased
## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX) ## [IE 1.1.1] Danube (beta 2020-09-xx) [Bridge 1.4.1] Forth (beta 2020-09-xx)
### Fixed
* GODT-752 Parsing message with empty addresses.
* GODT-752 Parsing non-utf8 multipart/alternative message.
* GODT-752 Parsing message with duplicate charset parameter.
## [IE 1.1.0] Danube
### Fixed ### Fixed
* GODT-703 Import-Export showed always at least one total message. * GODT-703 Import-Export showed always at least one total message.
* GODT-738 Fix for mbox files with long lines. * GODT-738 Fix for mbox files with long lines.
## [Bridge 1.4.x] Forth (v1.4.0 beta 2020-09-XX) ## [Bridge 1.4.0] Forth
### Added ### Added
* GODT-682 Persistent anonymous API cookies for Import-Export. * GODT-682 Persistent anonymous API cookies for Import-Export.

View File

@ -152,6 +152,10 @@ func parseAddressComment(raw string) string {
} }
func parseAddressList(val string) (addrs []*mail.Address, err error) { func parseAddressList(val string) (addrs []*mail.Address, err error) {
if val == "" || val == "<>" {
return
}
addrs, err = mail.ParseAddressList(parseAddressComment(val)) addrs, err = mail.ParseAddressList(parseAddressComment(val))
if err == nil { if err == nil {
if addrs == nil { if addrs == nil {

View File

@ -103,7 +103,11 @@ func convertForeignEncodings(p *parser.Parser) error {
return p.ConvertToUTF8() return p.ConvertToUTF8()
}). }).
RegisterDefaultHandler(func(p *parser.Part) error { RegisterDefaultHandler(func(p *parser.Part) error {
t, _, _ := p.Header.ContentType() t, params, _ := p.ContentType()
// multipart/alternative, for example, can contain extra charset.
if params != nil && params["charset"] != "" {
return p.ConvertToUTF8()
}
logrus.WithField("type", t).Trace("Not converting part to utf-8") logrus.WithField("type", t).Trace("Not converting part to utf-8")
return nil return nil
}). }).
@ -293,7 +297,7 @@ func allPartsHaveContentType(parts parser.Parts, contentType string) bool {
} }
for _, part := range parts { for _, part := range parts {
t, _, err := part.Header.ContentType() t, _, err := part.ContentType()
if err != nil { if err != nil {
return false return false
} }
@ -329,7 +333,7 @@ func determineMIMEType(p *parser.Parser) (string, error) {
// getPlainBody returns the body of the given part, converting html to // getPlainBody returns the body of the given part, converting html to
// plaintext where possible. // plaintext where possible.
func getPlainBody(part *parser.Part) []byte { func getPlainBody(part *parser.Part) []byte {
contentType, _, err := part.Header.ContentType() contentType, _, err := part.ContentType()
if err != nil { if err != nil {
return part.Body return part.Body
} }

View File

@ -17,7 +17,9 @@
package parser package parser
import "regexp" import (
"regexp"
)
type HandlerFunc func(*Part) error type HandlerFunc func(*Part) error
@ -35,7 +37,7 @@ func (h *handler) matchType(p *Part) bool {
return false return false
} }
t, _, err := p.Header.ContentType() t, _, err := p.ContentType()
if err != nil { if err != nil {
t = "" t = ""
} }

View File

@ -40,6 +40,18 @@ type Part struct {
children Parts children Parts
} }
func (p *Part) ContentType() (string, map[string]string, error) {
t, params, err := p.Header.ContentType()
if err != nil {
// go-message's implementation of ContentType() doesn't handle duplicate parameters
// e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8
// so if it fails, we try again with pmmime's implementation, which does.
t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type"))
}
return t, params, err
}
func (p *Part) Child(n int) (part *Part, err error) { func (p *Part) Child(n int) (part *Part, err error) {
if len(p.children) < n { if len(p.children) < n {
return nil, errors.New("no such part") return nil, errors.New("no such part")
@ -72,7 +84,7 @@ func (p *Part) AddChild(child *Part) {
func (p *Part) ConvertToUTF8() error { func (p *Part) ConvertToUTF8() error {
logrus.Trace("Converting part to utf-8") logrus.Trace("Converting part to utf-8")
t, params, err := p.Header.ContentType() t, params, err := p.ContentType()
if err != nil { if err != nil {
return err return err
} }
@ -102,7 +114,7 @@ func (p *Part) ConvertMetaCharset() error {
goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) { goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) {
if val, ok := sel.Attr("content"); ok { if val, ok := sel.Attr("content"); ok {
t, params, err := mime.ParseMediaType(val) t, params, err := pmmime.ParseMediaType(val)
if err != nil { if err != nil {
return return
} }
@ -163,7 +175,7 @@ func (p *Part) is7BitClean() bool {
} }
func (p *Part) isMultipartMixed() bool { func (p *Part) isMultipartMixed() bool {
t, _, err := p.Header.ContentType() t, _, err := p.ContentType()
if err != nil { if err != nil {
return false return false
} }

View File

@ -49,7 +49,7 @@ func TestPart(t *testing.T) {
part, err := p.Section(getSectionNumber(partNumber)) part, err := p.Section(getSectionNumber(partNumber))
require.NoError(t, err) require.NoError(t, err)
contType, _, err := part.Header.ContentType() contType, _, err := part.ContentType()
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, wantContType, contType) assert.Equal(t, wantContType, contType)
} }

View File

@ -58,7 +58,7 @@ func (v *Visitor) Visit() (interface{}, error) {
} }
func (v *Visitor) visit(p *Part) (interface{}, error) { func (v *Visitor) visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType() t, _, err := p.ContentType()
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -224,6 +224,21 @@ func TestParseTextPlainWithPlainAttachment(t *testing.T) {
assert.Equal(t, readerToString(attReaders[0]), "attachment") assert.Equal(t, readerToString(attReaders[0]), "attachment")
} }
func TestParseTextPlainEmptyAddresses(t *testing.T) {
f := getFileReader("text_plain_empty_addresses.eml")
m, _, plainBody, attReaders, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
assert.Equal(t, "body", m.Body)
assert.Equal(t, "body", plainBody)
assert.Len(t, attReaders, 0)
}
func TestParseTextPlainWithImageInline(t *testing.T) { func TestParseTextPlainWithImageInline(t *testing.T) {
f := getFileReader("text_plain_image_inline.eml") f := getFileReader("text_plain_image_inline.eml")
@ -244,6 +259,21 @@ func TestParseTextPlainWithImageInline(t *testing.T) {
assert.Equal(t, 8, img.Height) assert.Equal(t, 8, img.Height)
} }
func TestParseTextPlainWithDuplicateCharset(t *testing.T) {
f := getFileReader("text_plain_duplicate_charset.eml")
m, _, plainBody, attReaders, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
assert.Equal(t, "body", m.Body)
assert.Equal(t, "body", plainBody)
assert.Len(t, attReaders, 0)
}
func TestParseWithMultipleTextParts(t *testing.T) { func TestParseWithMultipleTextParts(t *testing.T) {
f := getFileReader("multiple_text_parts.eml") f := getFileReader("multiple_text_parts.eml")
@ -416,6 +446,27 @@ func TestParseMultipartAlternativeNested(t *testing.T) {
assert.Equal(t, "*multipart 2.1*\n\n", plainBody) assert.Equal(t, "*multipart 2.1*\n\n", plainBody)
} }
func TestParseMultipartAlternativeLatin1(t *testing.T) {
f := getFileReader("multipart_alternative_latin1.eml")
m, _, plainBody, _, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
assert.Equal(t, `<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
</head>
<body>
<b>aoeuaoeu</b>
</body></html>`, m.Body)
assert.Equal(t, "*aoeuaoeu*\n\n", plainBody)
}
func getFileReader(filename string) io.Reader { func getFileReader(filename string) io.Reader {
f, err := os.Open(filepath.Join("testdata", filename)) f, err := os.Open(filepath.Join("testdata", filename))
if err != nil { if err != nil {

View File

@ -0,0 +1,30 @@
To: pmbridgeietest@outlook.com
From: schizofrenic <schizofrenic@pm.me>
Subject: aoeuaoeu
Date: Thu, 30 Jul 2020 13:35:24 +0200
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary="------------22BC647264E52252E386881A"; charset="iso-8859-1"
Content-Language: en-US
This is a multi-part message in MIME format.
--------------22BC647264E52252E386881A
Content-Type: text/plain
Content-Transfer-Encoding: 7bit
*aoeuaoeu*
--------------22BC647264E52252E386881A
Content-Type: text/html
Content-Transfer-Encoding: 7bit
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
</head>
<body>
<b>aoeuaoeu</b>
</body>
</html>
--------------22BC647264E52252E386881A--

View File

@ -0,0 +1,5 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain; charset=utf-8; charset=UTF-8
body

View File

@ -0,0 +1,6 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
CC:
Reply-To: <>
body

View File

@ -4,7 +4,6 @@ Feature: IMAP import messages
And there is IMAP client logged in as "user" And there is IMAP client logged in as "user"
And there is IMAP client selected in "INBOX" And there is IMAP client selected in "INBOX"
@ignore
Scenario: Import message with double charset in content type Scenario: Import message with double charset in content type
When IMAP client imports message to "INBOX" When IMAP client imports message to "INBOX"
""" """