mirror of
https://github.com/ProtonMail/proton-bridge.git
synced 2025-12-11 05:06:51 +00:00
refactor: tidy up DecodeCharset
This commit is contained in:
@ -9,7 +9,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/)
|
|||||||
* IMAP mailbox info update when new mailbox is created
|
* IMAP mailbox info update when new mailbox is created
|
||||||
* IMAP extension Unselect
|
* IMAP extension Unselect
|
||||||
* More logs about event loop activity
|
* More logs about event loop activity
|
||||||
* GODT-72 Try ISO-8859-1 encoding if charset is not specified and it isn't UTF-8
|
* GODT-72 Use ISO-8859-1 encoding if charset is not specified and it isn't UTF-8
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
* GODT-162 User Agent does not contain bridge version, only client in format `client name/client version (os)`
|
* GODT-162 User Agent does not contain bridge version, only client in format `client name/client version (os)`
|
||||||
|
|||||||
@ -18,7 +18,6 @@
|
|||||||
package pmmime
|
package pmmime
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"mime"
|
"mime"
|
||||||
@ -29,10 +28,10 @@ import (
|
|||||||
|
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
"golang.org/x/text/encoding"
|
"golang.org/x/text/encoding"
|
||||||
"golang.org/x/text/encoding/charmap"
|
"golang.org/x/text/encoding/charmap"
|
||||||
"golang.org/x/text/encoding/htmlindex"
|
"golang.org/x/text/encoding/htmlindex"
|
||||||
"golang.org/x/text/transform"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var wordDec = &mime.WordDecoder{
|
var wordDec = &mime.WordDecoder{
|
||||||
@ -161,7 +160,7 @@ func getEncoding(charset string) (enc encoding.Encoding, err error) {
|
|||||||
|
|
||||||
enc, _ = htmlindex.Get(preparsed)
|
enc, _ = htmlindex.Get(preparsed)
|
||||||
if enc == nil {
|
if enc == nil {
|
||||||
err = fmt.Errorf("can not get encodig for '%s' (or '%s')", charset, preparsed)
|
err = fmt.Errorf("can not get encoding for '%s' (or '%s')", charset, preparsed)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -202,41 +201,34 @@ func EncodeHeader(s string) string {
|
|||||||
// If it isn't, it checks whether the content is valid latin1 (iso-8859-1), and if so,
|
// If it isn't, it checks whether the content is valid latin1 (iso-8859-1), and if so,
|
||||||
// reencodes it as utf-8.
|
// reencodes it as utf-8.
|
||||||
func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte, error) {
|
func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte, error) {
|
||||||
var decoder *encoding.Decoder
|
// If the charset is specified, use that.
|
||||||
var err error
|
|
||||||
|
|
||||||
if charset, ok := contentTypeParams["charset"]; ok {
|
if charset, ok := contentTypeParams["charset"]; ok {
|
||||||
decoder, err = selectDecoder(charset)
|
decoder, err := selectDecoder(charset)
|
||||||
} else if utf8.Valid(original) {
|
if err != nil {
|
||||||
|
return original, errors.Wrap(err, "unknown charset was specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
return decoder.Bytes(original)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The charset was not specified. First try utf8.
|
||||||
|
if utf8.Valid(original) {
|
||||||
return original, nil
|
return original, nil
|
||||||
} else if decoded, err = charmap.ISO8859_1.NewDecoder().Bytes(original); err == nil {
|
|
||||||
return decoded, nil
|
|
||||||
} else {
|
|
||||||
err = fmt.Errorf("non-utf8 content without charset specification")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback to latin1.
|
||||||
|
// In future this should fallback to whatever default encoding user specified.
|
||||||
|
decoded, err := charmap.ISO8859_1.NewDecoder().Bytes(original)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return original, err
|
return original, errors.Wrap(err, "failed to decode as latin1")
|
||||||
}
|
}
|
||||||
|
|
||||||
utf8 := make([]byte, len(original))
|
// If the decoded string is not valid utf8, it wasn't latin1, so give up.
|
||||||
nDst, nSrc, err := decoder.Transform(utf8, original, false)
|
if !utf8.Valid(decoded) {
|
||||||
for err == transform.ErrShortDst {
|
return original, errors.Wrap(err, "failed to decode as latin1")
|
||||||
if nDst < 1 {
|
|
||||||
nDst = 1
|
|
||||||
}
|
|
||||||
if nSrc < 1 {
|
|
||||||
nSrc = 1
|
|
||||||
}
|
|
||||||
utf8 = make([]byte, (nDst/nSrc+1)*len(original))
|
|
||||||
nDst, nSrc, err = decoder.Transform(utf8, original, false)
|
|
||||||
}
|
}
|
||||||
if err != nil {
|
|
||||||
return original, err
|
|
||||||
}
|
|
||||||
utf8 = bytes.Trim(utf8, "\x00")
|
|
||||||
|
|
||||||
return utf8, nil
|
return decoded, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DecodeContentEncoding wraps the reader with decoder based on content encoding.
|
// DecodeContentEncoding wraps the reader with decoder based on content encoding.
|
||||||
|
|||||||
@ -3,21 +3,6 @@ Feature: SMTP wrong messages
|
|||||||
Given there is connected user "user"
|
Given there is connected user "user"
|
||||||
And there is SMTP client logged in as "user"
|
And there is SMTP client logged in as "user"
|
||||||
|
|
||||||
Scenario: Message with no charset and bad character
|
|
||||||
When SMTP client sends message
|
|
||||||
"""
|
|
||||||
From: Bridge Test <bridgetest@pm.test>
|
|
||||||
To: External Bridge <pm.bridge.qa@gmail.com>
|
|
||||||
Subject: Plain text, no charset, wrong base64 external
|
|
||||||
Content-Disposition: inline
|
|
||||||
Content-Type: text/plain;
|
|
||||||
Content-Transfer-Encoding: base64
|
|
||||||
|
|
||||||
sdfsdfsd
|
|
||||||
|
|
||||||
"""
|
|
||||||
Then SMTP response is "SMTP error: 554 Error: transaction failed, blame it on the weather: non-utf8 content without charset specification"
|
|
||||||
|
|
||||||
Scenario: Message with attachment and wrong boundaries
|
Scenario: Message with attachment and wrong boundaries
|
||||||
When SMTP client sends message
|
When SMTP client sends message
|
||||||
"""
|
"""
|
||||||
@ -53,4 +38,4 @@ Feature: SMTP wrong messages
|
|||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Then SMTP response is "SMTP error: 554 Error: transaction failed, blame it on the weather: multipart: NextPart: EOF"
|
Then SMTP response is "SMTP error: 554 Error: transaction failed, blame it on the weather: multipart: NextPart: EOF"
|
||||||
|
|||||||
@ -145,7 +145,7 @@ Feature: SMTP sending of plain messages
|
|||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Scenario: Message without charset
|
Scenario: Message without charset is utf8
|
||||||
When SMTP client sends message
|
When SMTP client sends message
|
||||||
"""
|
"""
|
||||||
From: Bridge Test <bridgetest@pm.test>
|
From: Bridge Test <bridgetest@pm.test>
|
||||||
@ -156,6 +156,46 @@ Feature: SMTP sending of plain messages
|
|||||||
|
|
||||||
This is body of mail without charset. Please assume utf8
|
This is body of mail without charset. Please assume utf8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Then SMTP response is "OK"
|
||||||
|
And mailbox "Sent" for "user" has messages
|
||||||
|
| time | from | to | subject |
|
||||||
|
| now | [userAddress] | pm.bridge.qa@gmail.com | Plain text no charset external |
|
||||||
|
And message is sent with API call:
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"Message": {
|
||||||
|
"Subject": "Plain text no charset external",
|
||||||
|
"Sender": {
|
||||||
|
"Name": "Bridge Test"
|
||||||
|
},
|
||||||
|
"ToList": [
|
||||||
|
{
|
||||||
|
"Address": "pm.bridge.qa@gmail.com",
|
||||||
|
"Name": "External Bridge"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"CCList": [],
|
||||||
|
"BCCList": [],
|
||||||
|
"MIMEType": "text/plain"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
Scenario: Message without charset is base64-encoded latin1
|
||||||
|
When SMTP client sends message
|
||||||
|
"""
|
||||||
|
From: Bridge Test <bridgetest@pm.test>
|
||||||
|
To: External Bridge <pm.bridge.qa@gmail.com>
|
||||||
|
Subject: Plain text no charset external
|
||||||
|
Content-Disposition: inline
|
||||||
|
Content-Type: text/plain;
|
||||||
|
Content-Transfer-Encoding: base64
|
||||||
|
|
||||||
|
dGhpcyBpcyBpbiBsYXRpbjEgYW5kIHRoZXJlIGFyZSBsb3RzIG9mIGVzIHdpdGggYWNjZW50czog
|
||||||
|
6enp6enp6enp6enp6enp
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Then SMTP response is "OK"
|
Then SMTP response is "OK"
|
||||||
And mailbox "Sent" for "user" has messages
|
And mailbox "Sent" for "user" has messages
|
||||||
|
|||||||
Reference in New Issue
Block a user