Compare commits

...

6 Commits

12 changed files with 133 additions and 13 deletions

View File

@ -4,13 +4,20 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
## Unreleased
## [IE 1.1.x] Danube (v1.1.0 beta 2020-09-XX)
## [IE 1.1.1] Danube (beta 2020-09-xx) [Bridge 1.4.1] Forth (beta 2020-09-xx)
### Fixed
* GODT-752 Parsing message with empty addresses.
* GODT-752 Parsing non-utf8 multipart/alternative message.
* GODT-752 Parsing message with duplicate charset parameter.
## [IE 1.1.0] Danube
### Fixed
* GODT-703 Import-Export showed always at least one total message.
* GODT-738 Fix for mbox files with long lines.
## [Bridge 1.4.x] Forth (v1.4.0 beta 2020-09-XX)
## [Bridge 1.4.0] Forth
### Added
* GODT-682 Persistent anonymous API cookies for Import-Export.

View File

@ -152,6 +152,10 @@ func parseAddressComment(raw string) string {
}
func parseAddressList(val string) (addrs []*mail.Address, err error) {
if val == "" || val == "<>" {
return
}
addrs, err = mail.ParseAddressList(parseAddressComment(val))
if err == nil {
if addrs == nil {

View File

@ -103,7 +103,11 @@ func convertForeignEncodings(p *parser.Parser) error {
return p.ConvertToUTF8()
}).
RegisterDefaultHandler(func(p *parser.Part) error {
t, _, _ := p.Header.ContentType()
t, params, _ := p.ContentType()
// multipart/alternative, for example, can contain extra charset.
if params != nil && params["charset"] != "" {
return p.ConvertToUTF8()
}
logrus.WithField("type", t).Trace("Not converting part to utf-8")
return nil
}).
@ -293,7 +297,7 @@ func allPartsHaveContentType(parts parser.Parts, contentType string) bool {
}
for _, part := range parts {
t, _, err := part.Header.ContentType()
t, _, err := part.ContentType()
if err != nil {
return false
}
@ -329,7 +333,7 @@ func determineMIMEType(p *parser.Parser) (string, error) {
// getPlainBody returns the body of the given part, converting html to
// plaintext where possible.
func getPlainBody(part *parser.Part) []byte {
contentType, _, err := part.Header.ContentType()
contentType, _, err := part.ContentType()
if err != nil {
return part.Body
}

View File

@ -17,7 +17,9 @@
package parser
import "regexp"
import (
"regexp"
)
type HandlerFunc func(*Part) error
@ -35,7 +37,7 @@ func (h *handler) matchType(p *Part) bool {
return false
}
t, _, err := p.Header.ContentType()
t, _, err := p.ContentType()
if err != nil {
t = ""
}

View File

@ -40,6 +40,18 @@ type Part struct {
children Parts
}
func (p *Part) ContentType() (string, map[string]string, error) {
t, params, err := p.Header.ContentType()
if err != nil {
// go-message's implementation of ContentType() doesn't handle duplicate parameters
// e.g. Content-Type: text/plain; charset=utf-8; charset=UTF-8
// so if it fails, we try again with pmmime's implementation, which does.
t, params, err = pmmime.ParseMediaType(p.Header.Get("Content-Type"))
}
return t, params, err
}
func (p *Part) Child(n int) (part *Part, err error) {
if len(p.children) < n {
return nil, errors.New("no such part")
@ -72,7 +84,7 @@ func (p *Part) AddChild(child *Part) {
func (p *Part) ConvertToUTF8() error {
logrus.Trace("Converting part to utf-8")
t, params, err := p.Header.ContentType()
t, params, err := p.ContentType()
if err != nil {
return err
}
@ -102,7 +114,7 @@ func (p *Part) ConvertMetaCharset() error {
goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) {
if val, ok := sel.Attr("content"); ok {
t, params, err := mime.ParseMediaType(val)
t, params, err := pmmime.ParseMediaType(val)
if err != nil {
return
}
@ -163,7 +175,7 @@ func (p *Part) is7BitClean() bool {
}
func (p *Part) isMultipartMixed() bool {
t, _, err := p.Header.ContentType()
t, _, err := p.ContentType()
if err != nil {
return false
}

View File

@ -49,7 +49,7 @@ func TestPart(t *testing.T) {
part, err := p.Section(getSectionNumber(partNumber))
require.NoError(t, err)
contType, _, err := part.Header.ContentType()
contType, _, err := part.ContentType()
require.NoError(t, err)
assert.Equal(t, wantContType, contType)
}

View File

@ -58,7 +58,7 @@ func (v *Visitor) Visit() (interface{}, error) {
}
func (v *Visitor) visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType()
t, _, err := p.ContentType()
if err != nil {
return nil, err
}

View File

@ -224,6 +224,21 @@ func TestParseTextPlainWithPlainAttachment(t *testing.T) {
assert.Equal(t, readerToString(attReaders[0]), "attachment")
}
func TestParseTextPlainEmptyAddresses(t *testing.T) {
f := getFileReader("text_plain_empty_addresses.eml")
m, _, plainBody, attReaders, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
assert.Equal(t, "body", m.Body)
assert.Equal(t, "body", plainBody)
assert.Len(t, attReaders, 0)
}
func TestParseTextPlainWithImageInline(t *testing.T) {
f := getFileReader("text_plain_image_inline.eml")
@ -244,6 +259,21 @@ func TestParseTextPlainWithImageInline(t *testing.T) {
assert.Equal(t, 8, img.Height)
}
func TestParseTextPlainWithDuplicateCharset(t *testing.T) {
f := getFileReader("text_plain_duplicate_charset.eml")
m, _, plainBody, attReaders, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
assert.Equal(t, "body", m.Body)
assert.Equal(t, "body", plainBody)
assert.Len(t, attReaders, 0)
}
func TestParseWithMultipleTextParts(t *testing.T) {
f := getFileReader("multiple_text_parts.eml")
@ -416,6 +446,27 @@ func TestParseMultipartAlternativeNested(t *testing.T) {
assert.Equal(t, "*multipart 2.1*\n\n", plainBody)
}
func TestParseMultipartAlternativeLatin1(t *testing.T) {
f := getFileReader("multipart_alternative_latin1.eml")
m, _, plainBody, _, err := Parse(f, "", "")
require.NoError(t, err)
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
assert.Equal(t, `<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
</head>
<body>
<b>aoeuaoeu</b>
</body></html>`, m.Body)
assert.Equal(t, "*aoeuaoeu*\n\n", plainBody)
}
func getFileReader(filename string) io.Reader {
f, err := os.Open(filepath.Join("testdata", filename))
if err != nil {

View File

@ -0,0 +1,30 @@
To: pmbridgeietest@outlook.com
From: schizofrenic <schizofrenic@pm.me>
Subject: aoeuaoeu
Date: Thu, 30 Jul 2020 13:35:24 +0200
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary="------------22BC647264E52252E386881A"; charset="iso-8859-1"
Content-Language: en-US
This is a multi-part message in MIME format.
--------------22BC647264E52252E386881A
Content-Type: text/plain
Content-Transfer-Encoding: 7bit
*aoeuaoeu*
--------------22BC647264E52252E386881A
Content-Type: text/html
Content-Transfer-Encoding: 7bit
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
</head>
<body>
<b>aoeuaoeu</b>
</body>
</html>
--------------22BC647264E52252E386881A--

View File

@ -0,0 +1,5 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain; charset=utf-8; charset=UTF-8
body

View File

@ -0,0 +1,6 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
CC:
Reply-To: <>
body

View File

@ -4,7 +4,6 @@ Feature: IMAP import messages
And there is IMAP client logged in as "user"
And there is IMAP client selected in "INBOX"
@ignore
Scenario: Import message with double charset in content type
When IMAP client imports message to "INBOX"
"""