mirror of
https://github.com/ProtonMail/proton-bridge.git
synced 2025-12-15 22:56:48 +00:00
We build too many walls and not enough bridges
This commit is contained in:
24
pkg/mime/Changelog.md
Normal file
24
pkg/mime/Changelog.md
Normal file
@ -0,0 +1,24 @@
|
||||
# Do not modify this file!
|
||||
It is here for historical reasons only. All changes should be documented in the
|
||||
Changelog at the root of this repository.
|
||||
|
||||
|
||||
# Changelog
|
||||
|
||||
## [2019-12-10] v1.0.2
|
||||
|
||||
### Added
|
||||
* support for shift_JIS (cp932) encoding
|
||||
|
||||
## [2019-09-30] v1.0.1
|
||||
|
||||
### Changed
|
||||
* fix divide by zero
|
||||
|
||||
## [2019-09-26] v1.0.0
|
||||
|
||||
### Changed
|
||||
* Import-Export#192: filter header parameters
|
||||
* ignore twice the same parameter (take the latest)
|
||||
* convert non utf8 RFC2231 parameters to a single line utf8 RFC2231
|
||||
|
||||
254
pkg/mime/encoding.go
Normal file
254
pkg/mime/encoding.go
Normal file
@ -0,0 +1,254 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package pmmime
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"mime/quotedprintable"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"encoding/base64"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/htmlindex"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
var wordDec = &mime.WordDecoder{
|
||||
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
|
||||
dec, err := selectDecoder(charset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if dec == nil { // utf-8
|
||||
return input, nil
|
||||
}
|
||||
return dec.Reader(input), nil
|
||||
},
|
||||
}
|
||||
|
||||
// Expects trimmed lowercase.
|
||||
func getEncoding(charset string) (enc encoding.Encoding, err error) {
|
||||
preparsed := strings.Trim(strings.ToLower(charset), " \t\r\n")
|
||||
|
||||
// koi
|
||||
re := regexp.MustCompile("(cs)?koi[-_ ]?8?[-_ ]?(r|ru|u|uk)?$")
|
||||
matches := re.FindAllStringSubmatch(preparsed, -1)
|
||||
if len(matches) == 1 && len(matches[0]) == 3 {
|
||||
preparsed = "koi8-"
|
||||
switch matches[0][2] {
|
||||
case "u", "uk":
|
||||
preparsed += "u"
|
||||
default:
|
||||
preparsed += "r"
|
||||
}
|
||||
}
|
||||
|
||||
// windows-XXXX
|
||||
re = regexp.MustCompile("(cp|(cs)?win(dows)?)[-_ ]?([0-9]{3,4})$")
|
||||
matches = re.FindAllStringSubmatch(preparsed, -1)
|
||||
if len(matches) == 1 && len(matches[0]) == 5 {
|
||||
switch matches[0][4] {
|
||||
case "874", "1250", "1251", "1252", "1253", "1254", "1255", "1256", "1257", "1258":
|
||||
preparsed = "windows-" + matches[0][4]
|
||||
}
|
||||
}
|
||||
|
||||
// iso
|
||||
re = regexp.MustCompile("iso[-_ ]?([0-9]{4})[-_ ]?([0-9]+|jp)?[-_ ]?(i|e)?")
|
||||
matches = re.FindAllStringSubmatch(preparsed, -1)
|
||||
if len(matches) == 1 && len(matches[0]) == 4 {
|
||||
if matches[0][1] == "2022" && matches[0][2] == "jp" {
|
||||
preparsed = "iso-2022-jp"
|
||||
}
|
||||
if matches[0][1] == "8859" {
|
||||
switch matches[0][2] {
|
||||
case "1", "2", "3", "4", "5", "7", "8", "9", "10", "11", "13", "14", "15", "16":
|
||||
preparsed = "iso-8859-" + matches[0][2]
|
||||
if matches[0][3] == "i" {
|
||||
preparsed += "-" + matches[0][3]
|
||||
}
|
||||
case "":
|
||||
preparsed = "iso-8859-1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Latin is tricky.
|
||||
re = regexp.MustCompile("^(cs|csiso)?l(atin)?[-_ ]?([0-9]{1,2})$")
|
||||
matches = re.FindAllStringSubmatch(preparsed, -1)
|
||||
if len(matches) == 1 && len(matches[0]) == 4 {
|
||||
switch matches[0][3] {
|
||||
case "1":
|
||||
preparsed = "windows-1252"
|
||||
case "2", "3", "4", "5":
|
||||
preparsed = "iso-8859-" + matches[0][3]
|
||||
case "6":
|
||||
preparsed = "iso-8859-10"
|
||||
case "8":
|
||||
preparsed = "iso-8859-14"
|
||||
case "9":
|
||||
preparsed = "iso-8859-15"
|
||||
case "10":
|
||||
preparsed = "iso-8859-16"
|
||||
}
|
||||
}
|
||||
|
||||
// Missing substitutions.
|
||||
switch preparsed {
|
||||
case "csutf8", "iso-utf-8", "utf8mb4":
|
||||
preparsed = "utf-8"
|
||||
|
||||
case "cp932", "windows-932", "windows-31J", "ibm-943", "cp943":
|
||||
preparsed = "shift_jis"
|
||||
case "eucjp", "ibm-eucjp":
|
||||
preparsed = "euc-jp"
|
||||
case "euckr", "ibm-euckr", "cp949":
|
||||
preparsed = "euc-kr"
|
||||
case "euccn", "ibm-euccn":
|
||||
preparsed = "gbk"
|
||||
case "zht16mswin950", "cp950":
|
||||
preparsed = "big5"
|
||||
|
||||
case "csascii",
|
||||
"ansi_x3.4-1968",
|
||||
"ansi_x3.4-1986",
|
||||
"ansi_x3.110-1983",
|
||||
"cp850",
|
||||
"cp858",
|
||||
"us",
|
||||
"iso646",
|
||||
"iso-646",
|
||||
"iso646-us",
|
||||
"iso_646.irv:1991",
|
||||
"cp367",
|
||||
"ibm367",
|
||||
"ibm-367",
|
||||
"iso-ir-6":
|
||||
preparsed = "ascii"
|
||||
|
||||
case "ibm852":
|
||||
preparsed = "iso-8859-2"
|
||||
case "iso-ir-199", "iso-celtic":
|
||||
preparsed = "iso-8859-14"
|
||||
case "iso-ir-226":
|
||||
preparsed = "iso-8859-16"
|
||||
|
||||
case "macroman":
|
||||
preparsed = "macintosh"
|
||||
}
|
||||
|
||||
enc, _ = htmlindex.Get(preparsed)
|
||||
if enc == nil {
|
||||
err = fmt.Errorf("can not get encodig for '%s' (or '%s')", charset, preparsed)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func selectDecoder(charset string) (decoder *encoding.Decoder, err error) {
|
||||
var enc encoding.Encoding
|
||||
lcharset := strings.Trim(strings.ToLower(charset), " \t\r\n")
|
||||
switch lcharset {
|
||||
case "utf7", "utf-7", "unicode-1-1-utf-7":
|
||||
return NewUtf7Decoder(), nil
|
||||
default:
|
||||
enc, err = getEncoding(lcharset)
|
||||
}
|
||||
if err == nil {
|
||||
decoder = enc.NewDecoder()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeHeader if needed. Returns error if raw contains non-utf8 characters.
|
||||
func DecodeHeader(raw string) (decoded string, err error) {
|
||||
if decoded, err = wordDec.DecodeHeader(raw); err != nil {
|
||||
decoded = raw
|
||||
}
|
||||
if !utf8.ValidString(decoded) {
|
||||
err = fmt.Errorf("header contains non utf8 chars: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodeHeader using quoted printable and utf8
|
||||
func EncodeHeader(s string) string {
|
||||
return mime.QEncoding.Encode("utf-8", s)
|
||||
}
|
||||
|
||||
// DecodeCharset decodes the orginal using content type parameters.
|
||||
// When charset is missing it checks thaht the content is valid utf8.
|
||||
func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte, error) {
|
||||
var decoder *encoding.Decoder
|
||||
var err error
|
||||
if charset, ok := contentTypeParams["charset"]; ok {
|
||||
decoder, err = selectDecoder(charset)
|
||||
} else {
|
||||
if utf8.Valid(original) {
|
||||
return original, nil
|
||||
}
|
||||
err = fmt.Errorf("non-utf8 content without charset specification")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return original, err
|
||||
}
|
||||
|
||||
utf8 := make([]byte, len(original))
|
||||
nDst, nSrc, err := decoder.Transform(utf8, original, false)
|
||||
for err == transform.ErrShortDst {
|
||||
if nDst < 1 {
|
||||
nDst = 1
|
||||
}
|
||||
if nSrc < 1 {
|
||||
nSrc = 1
|
||||
}
|
||||
utf8 = make([]byte, (nDst/nSrc+1)*len(original))
|
||||
nDst, nSrc, err = decoder.Transform(utf8, original, false)
|
||||
}
|
||||
if err != nil {
|
||||
return original, err
|
||||
}
|
||||
utf8 = bytes.Trim(utf8, "\x00")
|
||||
|
||||
return utf8, nil
|
||||
}
|
||||
|
||||
// DecodeContentEncoding wraps the reader with decoder based on content encoding.
|
||||
func DecodeContentEncoding(r io.Reader, contentEncoding string) (d io.Reader) {
|
||||
switch strings.ToLower(contentEncoding) {
|
||||
case "quoted-printable":
|
||||
d = quotedprintable.NewReader(r)
|
||||
case "base64":
|
||||
d = base64.NewDecoder(base64.StdEncoding, r)
|
||||
case "7bit", "8bit", "binary", "": // Nothing to do
|
||||
d = r
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ParseMediaType from MIME doesn't support RFC2231 for non asci / utf8 encodings so we have to pre-parse it.
|
||||
func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
|
||||
v, _ = changeEncodingAndKeepLastParamDefinition(v)
|
||||
return mime.ParseMediaType(v)
|
||||
}
|
||||
445
pkg/mime/encoding_test.go
Normal file
445
pkg/mime/encoding_test.go
Normal file
@ -0,0 +1,445 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package pmmime
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
//"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding/htmlindex"
|
||||
|
||||
a "github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestDecodeHeader(t *testing.T) {
|
||||
testData := []struct{ raw, expected string }{
|
||||
{
|
||||
"",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"=?iso-2022-jp?Q?=1B$B!Z=1B(BTimes_Car_PLUS=1B$B![JV5Q>Z=1B(B?=",
|
||||
"【Times Car PLUS】返却証",
|
||||
},
|
||||
{
|
||||
`=?iso-2022-jp?Q?iTunes_Movie_=1B$B%K%e!<%j%j!<%9$HCmL\:nIJ=1B(B?=`,
|
||||
"iTunes Movie ニューリリースと注目作品",
|
||||
},
|
||||
{
|
||||
"=?UTF-8?B?w4TDi8OPw5bDnA==?= =?UTF-8?B?IMOkw6vDr8O2w7w=?=",
|
||||
"ÄËÏÖÜ äëïöü",
|
||||
},
|
||||
{
|
||||
"=?ISO-8859-2?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=",
|
||||
"ÄËIÖÜ äëiöü",
|
||||
},
|
||||
{
|
||||
"=?uknown?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=",
|
||||
"=?uknown?B?xMtJ1tw=?= =?ISO-8859-2?B?IOTrafb8?=",
|
||||
},
|
||||
}
|
||||
|
||||
for _, val := range testData {
|
||||
if decoded, err := DecodeHeader(val.raw); strings.Compare(val.expected, decoded) != 0 {
|
||||
t.Errorf("Incorrect decoding of header %q expected %q but have %q; Error %v", val.raw, val.expected, decoded, err)
|
||||
} else {
|
||||
// fmt.Println("Header", val.raw, "successfully decoded", decoded, ". Error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type testParseMediaTypeData struct {
|
||||
arg, wantMediaType string
|
||||
wantParams map[string]string
|
||||
}
|
||||
|
||||
func (d *testParseMediaTypeData) run(t *testing.T) {
|
||||
gotMediaType, params, err := ParseMediaType(d.arg)
|
||||
a.Nil(t, err)
|
||||
a.Equal(t, d.wantMediaType, gotMediaType)
|
||||
a.Equal(t, d.wantParams, params)
|
||||
}
|
||||
|
||||
func TestParseMediaType(t *testing.T) {
|
||||
testTable := map[string]testParseMediaTypeData{
|
||||
"TwiceTheSameParameter": {
|
||||
arg: "attachment; filename=joy.txt; filename=JOY.TXT; title=hi;",
|
||||
wantMediaType: "attachment",
|
||||
wantParams: map[string]string{"filename": "JOY.TXT", "title": "hi"},
|
||||
},
|
||||
"SingleLineUTF8": {
|
||||
arg: "attachment;\nfilename*=utf-8''%F0%9F%98%81%F0%9F%98%82.txt;\n title=smile",
|
||||
wantMediaType: "attachment",
|
||||
wantParams: map[string]string{"filename": "😁😂.txt", "title": "smile"},
|
||||
},
|
||||
"MultiLineUTF8": {
|
||||
arg: "attachment;\nfilename*0*=utf-8''%F0%9F%98%81; title=smile;\nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt",
|
||||
wantMediaType: "attachment",
|
||||
wantParams: map[string]string{"filename": "😁😂.txt", "title": "smile"},
|
||||
},
|
||||
"MultiLineFirstNoEncNextUTF8": {
|
||||
arg: "attachment;\nfilename*0*=utf-8''joy ;\n title*=utf-8''smile; \nfilename*1*=%F0%9F%98%82;\nfilename*2=.txt",
|
||||
wantMediaType: "attachment",
|
||||
wantParams: map[string]string{"filename": "joy😂.txt", "title": "smile"},
|
||||
},
|
||||
"SingleLineBig5": {
|
||||
arg: "attachment;\nfilename*=big5''%B3%C6%A7%D1%BF%FD.m4a; title*=utf8''memorandum",
|
||||
wantMediaType: "attachment",
|
||||
wantParams: map[string]string{"filename": "備忘錄.m4a", "title": "memorandum"},
|
||||
},
|
||||
"MultiLineBig5": {
|
||||
arg: "attachment;\nfilename*0*=big5''%B3%C6a; title*0=utf8''memorandum; filename*2=%BF%FD.m4a; \nfilename*1*=%A7%D1b;",
|
||||
wantMediaType: "attachment",
|
||||
wantParams: map[string]string{"filename": "備a忘b錄.m4a", "title": "memorandum"},
|
||||
},
|
||||
}
|
||||
for name, testData := range testTable {
|
||||
t.Run(name, testData.run)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetEncoding(t *testing.T) {
|
||||
// All MIME charsets with aliases can be found here:
|
||||
// https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||
mimesets := map[string][]string{
|
||||
"utf-8": []string{ // MIB 16
|
||||
"utf8",
|
||||
"csutf8",
|
||||
"unicode-1-1-utf-8",
|
||||
"iso-utf-8",
|
||||
"utf8mb4",
|
||||
},
|
||||
"gbk": []string{
|
||||
"gb2312", // MIB 2025
|
||||
//"euc-cn": []string{
|
||||
"euccn",
|
||||
"ibm-euccn",
|
||||
},
|
||||
//"utf7": []string{"utf-7", "unicode-1-1-utf-7"},
|
||||
"iso-8859-2": []string{ // MIB 5
|
||||
"iso-ir-101",
|
||||
"iso_8859-2",
|
||||
"iso8859-2",
|
||||
"latin2",
|
||||
"l2",
|
||||
"csisolatin2",
|
||||
"ibm852",
|
||||
//"FAILEDibm852",
|
||||
},
|
||||
"iso-8859-3": []string{ // MIB 6
|
||||
"iso-ir-109",
|
||||
"iso_8859-3",
|
||||
"latin3",
|
||||
"l3",
|
||||
"csisolatin3",
|
||||
},
|
||||
"iso-8859-4": []string{ // MIB 7
|
||||
"iso-ir-110",
|
||||
"iso_8859-4",
|
||||
"latin4",
|
||||
"l4",
|
||||
"csisolatin4",
|
||||
},
|
||||
"iso-8859-5": []string{ // MIB 8
|
||||
"iso-ir-144",
|
||||
"iso_8859-5",
|
||||
"cyrillic",
|
||||
"csisolatincyrillic",
|
||||
},
|
||||
"iso-8859-6": []string{ // MIB 9
|
||||
"iso-ir-127",
|
||||
"iso_8859-6",
|
||||
"ecma-114",
|
||||
"asmo-708",
|
||||
"arabic",
|
||||
"csisolatinarabic",
|
||||
//"iso-8859-6e": []string{ // MIB 81 just direction
|
||||
"csiso88596e",
|
||||
"iso-8859-6-e",
|
||||
//"iso-8859-6i": []string{ // MIB 82
|
||||
"csiso88596i",
|
||||
"iso-8859-6-i"},
|
||||
"iso-8859-7": []string{ // MIB 10
|
||||
"iso-ir-126",
|
||||
"iso_8859-7",
|
||||
"elot_928",
|
||||
"ecma-118",
|
||||
"greek",
|
||||
"greek8",
|
||||
"csisolatingreek"},
|
||||
"iso-8859-8": []string{ // MIB 11
|
||||
"iso-ir-138",
|
||||
"iso_8859-8",
|
||||
"hebrew",
|
||||
"csisolatinhebrew",
|
||||
//"iso-8859-8e": []string{ // MIB 84 (directionality
|
||||
"csiso88598e",
|
||||
"iso-8859-8-e",
|
||||
},
|
||||
"iso-8859-8-i": []string{ // MIB 85
|
||||
"logical",
|
||||
"csiso88598i",
|
||||
"iso-8859-8-i", // Hebrew, the "i" means right-to-left, probably unnecessary with ISO cleaning above.
|
||||
},
|
||||
"iso-8859-10": []string{ // MIB 13
|
||||
"iso-ir-157",
|
||||
"l6",
|
||||
"iso_8859-10:1992",
|
||||
"csisolatin6",
|
||||
"latin6"},
|
||||
"iso-8859-13": []string{ // MIB 109
|
||||
"csiso885913"},
|
||||
"iso-8859-14": []string{ // MIB 110
|
||||
"iso-ir-199",
|
||||
"iso_8859-14:1998",
|
||||
"iso_8859-14",
|
||||
"latin8",
|
||||
"iso-celtic",
|
||||
"l8",
|
||||
"csiso885914"},
|
||||
"iso-8859-15": []string{ // MIB 111
|
||||
"iso_8859-15",
|
||||
"latin-9",
|
||||
"csiso885915",
|
||||
"ISO8859-15"},
|
||||
"iso-8859-16": []string{ // MIB 112
|
||||
"iso-ir-226",
|
||||
"iso_8859-16:2001",
|
||||
"iso_8859-16",
|
||||
"latin10",
|
||||
"l10",
|
||||
"csiso885916",
|
||||
},
|
||||
"windows-874": []string{ // MIB 2109
|
||||
"cswindows874",
|
||||
"cp874",
|
||||
"iso-8859-11",
|
||||
"tis-620",
|
||||
},
|
||||
"windows-1250": []string{ // MIB 2250
|
||||
"cswindows1250",
|
||||
"cp1250",
|
||||
},
|
||||
"windows-1251": []string{ // MIB 2251
|
||||
"cswindows1251",
|
||||
"cp1251",
|
||||
},
|
||||
"windows-1252": []string{ // MIB 2252
|
||||
"cswindows1252",
|
||||
"cp1252",
|
||||
"3dwindows-1252",
|
||||
"we8mswin1252",
|
||||
"us-ascii", // MIB 3
|
||||
"ansi_x3.110-1983", // MIB 74 // usascii
|
||||
//"iso-8859-1": []string{ // MIB 4 succeed by win1252
|
||||
"iso8859-1",
|
||||
"iso-ir-100",
|
||||
"iso_8859-1",
|
||||
"latin1",
|
||||
"l1",
|
||||
"ibm819",
|
||||
"cp819",
|
||||
"csisolatin1",
|
||||
"ansi_x3.4-1968",
|
||||
"ansi_x3.4-1986",
|
||||
"cp850",
|
||||
"cp858", // "cp850" Mostly correct except for the Euro sign.
|
||||
"iso_646.irv:1991",
|
||||
"iso646-us",
|
||||
"us",
|
||||
"ibm367",
|
||||
"cp367",
|
||||
"csascii",
|
||||
"ascii",
|
||||
"iso-ir-6",
|
||||
"we8iso8859p1",
|
||||
},
|
||||
"windows-1253": []string{"cswindows1253", "cp1253"}, // MIB 2253
|
||||
"windows-1254": []string{"cswindows1254", "cp1254"}, // MIB 2254
|
||||
"windows-1255": []string{"cSwindows1255", "cp1255"}, // MIB 2255
|
||||
"windows-1256": []string{"cswIndows1256", "cp1256"}, // MIB 2256
|
||||
"windows-1257": []string{"cswinDows1257", "cp1257"}, // MIB 2257
|
||||
"windows-1258": []string{"cswindoWs1258", "cp1258"}, // MIB 2257
|
||||
"koi8-r": []string{"cskoi8r", "koi8r"}, // MIB 2084
|
||||
"koi8-u": []string{"cskoi8u", "koi8u"}, // MIB 2088
|
||||
"macintosh": []string{"mac", "macroman", "csmacintosh"}, // MIB 2027
|
||||
"big5": []string{
|
||||
"zht16mswin950", // cp950
|
||||
"cp950",
|
||||
},
|
||||
"euc-kr": []string{
|
||||
"euckr", // MIB 38
|
||||
"ibm-euckr",
|
||||
//"uhc": []string{ // Korea
|
||||
"ks_c_5601-1987",
|
||||
"ksc5601",
|
||||
"cp949",
|
||||
},
|
||||
"euc-jp": []string{
|
||||
"eucjp",
|
||||
"ibm-eucjp",
|
||||
},
|
||||
"shift_jis": []string{
|
||||
"CP932",
|
||||
"MS932",
|
||||
"Windows-932",
|
||||
"Windows-31J",
|
||||
"MS_Kanji",
|
||||
"IBM-943",
|
||||
"CP943",
|
||||
},
|
||||
"iso-2022-jp": []string{ // MIB 39
|
||||
"iso2022jp",
|
||||
"csiso2022jp",
|
||||
},
|
||||
}
|
||||
|
||||
for expected, names := range mimesets {
|
||||
expenc, _ := htmlindex.Get(expected)
|
||||
if canonical, err := htmlindex.Name(expenc); canonical != expected || err != nil {
|
||||
t.Fatalf("Error while get canonical name. Expected '%v' but have %v `%#v`: %v", expected, canonical, expenc, err)
|
||||
}
|
||||
for _, name := range names {
|
||||
enc, err := getEncoding(name)
|
||||
if err != nil || enc == nil {
|
||||
t.Errorf("Error while getting encoding for %v returned: '%#v' and error: '%v'", name, enc, err)
|
||||
}
|
||||
if expenc != enc {
|
||||
t.Errorf("For %v expected %v '%v' but have '%v'", name, expected, expenc, enc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sample text for UTF8 http://www.columbia.edu/~fdc/utf8/index.html
|
||||
func TestEncodeReader(t *testing.T) {
|
||||
// define test data
|
||||
testData := []struct {
|
||||
params map[string]string
|
||||
original []byte
|
||||
message string
|
||||
}{
|
||||
// russian
|
||||
{
|
||||
map[string]string{"charset": "koi8-r"},
|
||||
// а, з, б, у, к, а, а, б, в, г, д, е, ё
|
||||
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
|
||||
"азбукаабвгдеё",
|
||||
},
|
||||
{
|
||||
map[string]string{"charset": "KOI8-R"},
|
||||
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
|
||||
"азбукаабвгдеё",
|
||||
},
|
||||
{
|
||||
map[string]string{"charset": "csKOI8R"},
|
||||
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
|
||||
"азбукаабвгдеё",
|
||||
},
|
||||
{
|
||||
map[string]string{"charset": "koi8-u"},
|
||||
[]byte{0xC1, 0xDA, 0xC2, 0xD5, 0xCB, 0xC1, 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xA3},
|
||||
"азбукаабвгдеё",
|
||||
},
|
||||
{
|
||||
map[string]string{"charset": "iso-8859-5"},
|
||||
// а , з , б , у , к , а , а , б , в , г , д , е , ё
|
||||
[]byte{0xD0, 0xD7, 0xD1, 0xE3, 0xDA, 0xD0, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF1},
|
||||
"азбукаабвгдеё",
|
||||
},
|
||||
{
|
||||
map[string]string{"charset": "csWrong"},
|
||||
[]byte{0xD0, 0xD7, 0xD1, 0xE3, 0xDA, 0xD0, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6},
|
||||
"",
|
||||
},
|
||||
{
|
||||
map[string]string{"charset": "utf8"},
|
||||
[]byte{0xD0, 0xB0, 0xD0, 0xB7, 0xD0, 0xB1, 0xD1, 0x83, 0xD0, 0xBA, 0xD0, 0xB0, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0, 0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD1, 0x91},
|
||||
"азбукаабвгдеё",
|
||||
},
|
||||
// czechoslovakia
|
||||
{
|
||||
map[string]string{"charset": "windows-1250"},
|
||||
[]byte{225, 228, 232, 233, 236, 244},
|
||||
"áäčéěô",
|
||||
},
|
||||
// umlauts
|
||||
{
|
||||
map[string]string{"charset": "iso-8859-1"},
|
||||
[]byte{196, 203, 214, 220, 228, 235, 246, 252},
|
||||
"ÄËÖÜäëöü",
|
||||
},
|
||||
// latvia
|
||||
{
|
||||
map[string]string{"charset": "iso-8859-4"},
|
||||
[]byte{224, 239, 243, 182, 254},
|
||||
"āīķļū",
|
||||
},
|
||||
{ // encoded by https://www.motobit.com/util/charset-codepage-conversion.asp
|
||||
map[string]string{"charset": "utf7"},
|
||||
[]byte("He wes Leovena+APA-es sone -- li+APA-e him be Drihten.+A6QDtw- +A7MDuwPOA8MDwwOx- +A7wDvwPF- +A60DtAPJA8MDsQO9- +A7UDuwO7A7cDvQO5A7oDrg-. +BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+BCcENQRABD0ENQQ7BDg- +BDgENwQxBEs- +BDcENAQ1BEEETA- +BDg- +BEIEMAQ8-,+C68LvguuC7ELvwuoC80LpA- +C64Lygu0C78LlQuzC78LsgvH- +C6QLrgu/C7QLzQuuC8oLtAu/- +C6oLywuyC80- +C4cLqQu/C6QLvgu1C6QLwQ- +C44LmQvNC5ULwQuuC80- +C5ULvgujC8sLrgvN-."),
|
||||
"He wes Leovenaðes sone -- liðe him be Drihten.Τη γλώσσα μου έδωσαν ελληνική. Чернели избы здесь и там,Чернели избы здесь и там,யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம்.",
|
||||
},
|
||||
|
||||
// iconv -f UTF8 -t GB2312 utf8.txt | hexdump -v -e '"0x" 1/1 "%x, "'
|
||||
{ // encoded by iconv; dump by `cat gb2312.txt | hexdump -v -e '"0x" 1/1 "%x "'` and reformat; text from https://zh.wikipedia.org/wiki/GB_2312
|
||||
map[string]string{"charset": "GB2312"},
|
||||
[]byte{0x47, 0x42, 0x20, 0x32, 0x33, 0x31, 0x32, 0xb5, 0xc4, 0xb3, 0xf6, 0xcf, 0xd6, 0xa3, 0xac, 0xbb, 0xf9, 0xb1, 0xbe, 0xc2, 0xfa, 0xd7, 0xe3, 0xc1, 0xcb, 0xba, 0xba, 0xd7, 0xd6, 0xb5, 0xc4, 0xbc, 0xc6, 0xcb, 0xe3, 0xbb, 0xfa, 0xb4, 0xa6, 0xc0, 0xed, 0xd0, 0xe8, 0xd2, 0xaa, 0xa3, 0xac, 0xcb, 0xfc, 0xcb, 0xf9, 0xca, 0xd5, 0xc2, 0xbc, 0xb5, 0xc4, 0xba, 0xba, 0xd7, 0xd6, 0xd2, 0xd1, 0xbe, 0xad, 0xb8, 0xb2, 0xb8, 0xc7, 0xd6, 0xd0, 0xb9, 0xfa, 0xb4, 0xf3, 0xc2, 0xbd, 0x39, 0x39, 0x2e, 0x37, 0x35, 0x25, 0xb5, 0xc4, 0xca, 0xb9, 0xd3, 0xc3, 0xc6, 0xb5, 0xc2, 0xca, 0xa1, 0xa3, 0xb5, 0xab, 0xb6, 0xd4, 0xd3, 0xda, 0xc8, 0xcb, 0xc3, 0xfb},
|
||||
"GB 2312的出现,基本满足了汉字的计算机处理需要,它所收录的汉字已经覆盖中国大陆99.75%的使用频率。但对于人名",
|
||||
},
|
||||
|
||||
{ // encoded by iconv; text from https://jp.wikipedia.org/wiki/Shift_JIS
|
||||
map[string]string{"charset": "shift-jis"},
|
||||
[]byte{0x95, 0xb6, 0x8e, 0x9a, 0x95, 0x84, 0x8d, 0x86, 0x89, 0xbb, 0x95, 0xfb, 0x8e, 0xae, 0x53, 0x68, 0x69, 0x66, 0x74, 0x5f, 0x4a, 0x49, 0x53, 0x82, 0xcc, 0x90, 0xdd, 0x8c, 0x76, 0x8e, 0xd2, 0x82, 0xe7, 0x82, 0xcd, 0x81, 0x41, 0x90, 0xe6, 0x8d, 0x73, 0x82, 0xb5, 0x82, 0xc4, 0x82, 0xe6, 0x82, 0xad, 0x97, 0x98, 0x97, 0x70, 0x82, 0xb3, 0x82, 0xea, 0x82, 0xc4, 0x82, 0xa2, 0x82, 0xbd, 0x4a, 0x49, 0x53, 0x20, 0x43, 0x20, 0x36, 0x32, 0x32, 0x30, 0x81, 0x69, 0x8c, 0xbb, 0x8d, 0xdd, 0x82, 0xcc, 0x4a, 0x49, 0x53, 0x20, 0x58, 0x20, 0x30, 0x32, 0x30, 0x31, 0x81, 0x6a, 0x82, 0xcc, 0x38, 0x83, 0x72, 0x83, 0x62, 0x83, 0x67, 0x95, 0x84, 0x8d, 0x86, 0x81, 0x69, 0x88, 0xc8, 0x89, 0xba, 0x81, 0x75, 0x89, 0x70, 0x90, 0x94, 0x8e, 0x9a, 0x81, 0x45, 0x94, 0xbc, 0x8a, 0x70, 0x83, 0x4a, 0x83, 0x69, 0x81, 0x76, 0x81, 0x6a, 0x82, 0xc6, 0x81, 0x41, 0x4a, 0x49, 0x53, 0x20, 0x43, 0x20, 0x36, 0x32, 0x32, 0x36, 0x81, 0x69, 0x8c, 0xbb, 0x8d, 0xdd, 0x82, 0xcc, 0x4a, 0x49, 0x53, 0x20, 0x58, 0x20, 0x30, 0x32, 0x30, 0x38, 0x81, 0x41, 0x88, 0xc8, 0x89, 0xba, 0x81, 0x75, 0x8a, 0xbf, 0x8e, 0x9a, 0x81, 0x76, 0x81, 0x6a, 0x82, 0xcc, 0x97, 0xbc, 0x95, 0xb6, 0x8e, 0x9a, 0x8f, 0x57, 0x8d, 0x87, 0x82, 0xf0, 0x95, 0x5c, 0x8c, 0xbb, 0x82, 0xb5, 0x82, 0xe6, 0x82, 0xa4, 0x82, 0xc6, 0x82, 0xb5, 0x82, 0xbd, 0x81, 0x42, 0x82, 0xdc, 0x82, 0xbd, 0x81, 0x41, 0x83, 0x74, 0x83, 0x40, 0x83, 0x43, 0x83, 0x8b, 0x82, 0xcc, 0x91, 0xe5, 0x82, 0xab, 0x82, 0xb3, 0x82, 0xe2, 0x8f, 0x88, 0x97, 0x9d, 0x8e, 0x9e, 0x8a, 0xd4, 0x82, 0xcc, 0x92, 0x5a, 0x8f, 0x6b, 0x82, 0xf0, 0x90, 0x7d, 0x82, 0xe9, 0x82, 0xbd, 0x82, 0xdf, 0x81, 0x41, 0x83, 0x47, 0x83, 0x58, 0x83, 0x50, 0x81, 0x5b, 0x83, 0x76, 0x83, 0x56, 0x81, 0x5b, 0x83, 0x50, 0x83, 0x93, 0x83, 0x58, 0x82, 0xc8, 0x82, 0xb5, 0x82, 0xc5, 0x8d, 0xac, 0x8d, 0xdd, 0x89, 0xc2, 0x94, 0x5c, 0x82, 0xc9, 0x82, 0xb7, 0x82, 0xe9, 0x82, 0xb1, 0x82, 0xc6, 0x82, 0xf0, 0x8a, 0xe9, 0x90, 0x7d, 0x82, 0xb5, 0x82, 0xbd, 0x81, 0x42},
|
||||
"文字符号化方式Shift_JISの設計者らは、先行してよく利用されていたJIS C 6220(現在のJIS X 0201)の8ビット符号(以下「英数字・半角カナ」)と、JIS C 6226(現在のJIS X 0208、以下「漢字」)の両文字集合を表現しようとした。また、ファイルの大きさや処理時間の短縮を図るため、エスケープシーケンスなしで混在可能にすることを企図した。",
|
||||
},
|
||||
|
||||
// add more from mutations of https://en.wikipedia.org/wiki/World_Wide_Web
|
||||
|
||||
}
|
||||
|
||||
// run tests
|
||||
for _, val := range testData {
|
||||
//fmt.Println("Testing ", val)
|
||||
expected := []byte(val.message)
|
||||
decoded, err := DecodeCharset(val.original, val.params)
|
||||
if len(expected) == 0 {
|
||||
if err == nil {
|
||||
t.Error("Expected err but have ", err)
|
||||
} else {
|
||||
//fmt.Println("Expected err: ", err)
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Error("Expected ok but have ", err)
|
||||
}
|
||||
}
|
||||
|
||||
if bytes.Equal(decoded, expected) {
|
||||
// fmt.Println("Succesfull decoding of ", val.params, ":", string(decoded))
|
||||
} else {
|
||||
t.Error("Wrong encoding of ", val.params, ".Expected\n", expected, "\nbut have\n", decoded)
|
||||
}
|
||||
if strings.Compare(val.message, string(decoded)) != 0 {
|
||||
t.Error("Wrong message for ", val.params, ".Expected\n", val.message, "\nbut have\n", string(decoded))
|
||||
}
|
||||
}
|
||||
}
|
||||
364
pkg/mime/mediaType.go
Normal file
364
pkg/mime/mediaType.go
Normal file
@ -0,0 +1,364 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package pmmime
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// changeEncodingAndKeepLastParamDefinition is necessary to modify behaviour
|
||||
// provided by the golang standard libraries.
|
||||
func changeEncodingAndKeepLastParamDefinition(v string) (out string, err error) {
|
||||
log := logrus.WithField("pkg", "pm-mime")
|
||||
|
||||
out = v // By default don't do anything with that.
|
||||
keepOrig := true
|
||||
|
||||
i := strings.Index(v, ";")
|
||||
if i == -1 {
|
||||
i = len(v)
|
||||
}
|
||||
mediatype := strings.TrimSpace(strings.ToLower(v[0:i]))
|
||||
|
||||
params := map[string]string{}
|
||||
var continuation map[string]map[string]string
|
||||
|
||||
v = v[i:]
|
||||
for len(v) > 0 {
|
||||
v = strings.TrimLeftFunc(v, unicode.IsSpace)
|
||||
if len(v) == 0 {
|
||||
break
|
||||
}
|
||||
key, value, rest := consumeMediaParam(v)
|
||||
if key == "" {
|
||||
break
|
||||
}
|
||||
|
||||
pmap := params
|
||||
if idx := strings.Index(key, "*"); idx != -1 {
|
||||
baseName := key[:idx]
|
||||
if continuation == nil {
|
||||
continuation = make(map[string]map[string]string)
|
||||
}
|
||||
var ok bool
|
||||
if pmap, ok = continuation[baseName]; !ok {
|
||||
continuation[baseName] = make(map[string]string)
|
||||
pmap = continuation[baseName]
|
||||
}
|
||||
if isFirstContinuation(key) {
|
||||
charset, _, err := get2231Charset(value)
|
||||
if err != nil {
|
||||
log.Errorln("Filter params:", err)
|
||||
continue
|
||||
}
|
||||
if charset != "utf-8" && charset != "us-ascii" {
|
||||
keepOrig = false
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, exists := pmap[key]; exists {
|
||||
keepOrig = false
|
||||
}
|
||||
pmap[key] = value
|
||||
v = rest
|
||||
}
|
||||
|
||||
if keepOrig {
|
||||
return
|
||||
}
|
||||
|
||||
if continuation != nil {
|
||||
for paramKey, contMap := range continuation {
|
||||
value, err := mergeContinuations(paramKey, contMap)
|
||||
if err == nil {
|
||||
params[paramKey+"*"] = value
|
||||
continue
|
||||
}
|
||||
|
||||
// Fallback.
|
||||
log.Errorln("Merge param", paramKey, ":", err)
|
||||
for ck, cv := range contMap {
|
||||
params[ck] = cv
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge ;
|
||||
out = mediatype
|
||||
for k, v := range params {
|
||||
out += ";"
|
||||
out += k
|
||||
out += "="
|
||||
out += v
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func isFirstContinuation(key string) bool {
|
||||
if idx := strings.Index(key, "*"); idx != -1 {
|
||||
return key[idx:] == "*" || key[idx:] == "*0*"
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// get2231Charset partially from mime/mediatype.go:211 function `decode2231Enc`.
|
||||
func get2231Charset(v string) (charset, value string, err error) {
|
||||
sv := strings.SplitN(v, "'", 3)
|
||||
if len(sv) != 3 {
|
||||
err = errors.New("incorrect RFC2231 charset format")
|
||||
return
|
||||
}
|
||||
charset = strings.ToLower(sv[0])
|
||||
value = sv[2]
|
||||
return
|
||||
}
|
||||
|
||||
func mergeContinuations(paramKey string, contMap map[string]string) (string, error) {
|
||||
var err error
|
||||
var charset, value string
|
||||
|
||||
// Single value.
|
||||
if contValue, ok := contMap[paramKey+"*"]; ok {
|
||||
if charset, value, err = get2231Charset(contValue); err != nil {
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
for n := 0; ; n++ {
|
||||
contKey := fmt.Sprintf("%s*%d", paramKey, n)
|
||||
contValue, isLast := contMap[contKey]
|
||||
if !isLast {
|
||||
var ok bool
|
||||
contValue, ok = contMap[contKey+"*"]
|
||||
if !ok {
|
||||
return "", errors.New("not valid RFC2231 continuation")
|
||||
}
|
||||
}
|
||||
if n == 0 {
|
||||
if charset, value, err = get2231Charset(contValue); err != nil || charset == "" {
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
value += contValue
|
||||
}
|
||||
if isLast {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return convertHexToUTF(charset, value)
|
||||
}
|
||||
|
||||
// convertHexToUTF converts hex values string with charset to UTF8 in RFC2231 format.
|
||||
func convertHexToUTF(charset, value string) (string, error) {
|
||||
raw, err := percentHexUnescape(value)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
utf8, err := DecodeCharset(raw, map[string]string{"charset": charset})
|
||||
return "utf-8''" + percentHexEscape(utf8), err
|
||||
}
|
||||
|
||||
// consumeMediaParam copy paste mime/mediatype.go:297.
|
||||
func consumeMediaParam(v string) (param, value, rest string) {
|
||||
rest = strings.TrimLeftFunc(v, unicode.IsSpace)
|
||||
if !strings.HasPrefix(rest, ";") {
|
||||
return "", "", v
|
||||
}
|
||||
|
||||
rest = rest[1:] // Consume semicolon.
|
||||
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
|
||||
param, rest = consumeToken(rest)
|
||||
param = strings.ToLower(param)
|
||||
if param == "" {
|
||||
return "", "", v
|
||||
}
|
||||
|
||||
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
|
||||
if !strings.HasPrefix(rest, "=") {
|
||||
return "", "", v
|
||||
}
|
||||
rest = rest[1:] // Consume equals sign.
|
||||
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
|
||||
value, rest2 := consumeValue(rest)
|
||||
if value == "" && rest2 == rest {
|
||||
return "", "", v
|
||||
}
|
||||
rest = rest2
|
||||
return param, value, rest
|
||||
}
|
||||
|
||||
// consumeToken copy paste mime/mediatype.go:238.
|
||||
// consumeToken consumes a token from the beginning of the provided string,
|
||||
// per RFC 2045 section 5.1 (referenced from 2183), and returns
|
||||
// the token consumed and the rest of the string.
|
||||
// Returns ("", v) on failure to consume at least one character.
|
||||
func consumeToken(v string) (token, rest string) {
|
||||
notPos := strings.IndexFunc(v, isNotTokenChar)
|
||||
if notPos == -1 {
|
||||
return v, ""
|
||||
}
|
||||
if notPos == 0 {
|
||||
return "", v
|
||||
}
|
||||
return v[0:notPos], v[notPos:]
|
||||
}
|
||||
|
||||
// consumeValue copy paste mime/mediatype.go:253
|
||||
// consumeValue consumes a "value" per RFC 2045, where a value is
|
||||
// either a 'token' or a 'quoted-string'. On success, consumeValue
|
||||
// returns the value consumed (and de-quoted/escaped, if a
|
||||
// quoted-string) and the rest of the string.
|
||||
// On failure, returns ("", v).
|
||||
func consumeValue(v string) (value, rest string) {
|
||||
if v == "" {
|
||||
return
|
||||
}
|
||||
if v[0] != '"' {
|
||||
return consumeToken(v)
|
||||
}
|
||||
|
||||
// parse a quoted-string
|
||||
buffer := new(strings.Builder)
|
||||
for i := 1; i < len(v); i++ {
|
||||
r := v[i]
|
||||
if r == '"' {
|
||||
return buffer.String(), v[i+1:]
|
||||
}
|
||||
// When MSIE sends a full file path (in "intranet mode"), it does not
|
||||
// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
|
||||
//
|
||||
// No known MIME generators emit unnecessary backslash escapes
|
||||
// for simple token characters like numbers and letters.
|
||||
//
|
||||
// If we see an unnecessary backslash escape, assume it is from MSIE
|
||||
// and intended as a literal backslash. This makes Go servers deal better
|
||||
// with MSIE without affecting the way they handle conforming MIME
|
||||
// generators.
|
||||
if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
|
||||
buffer.WriteByte(v[i+1])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if r == '\r' || r == '\n' {
|
||||
return "", v
|
||||
}
|
||||
buffer.WriteByte(v[i])
|
||||
}
|
||||
// Did not find end quote.
|
||||
return "", v
|
||||
}
|
||||
|
||||
// isNotTokenChar copy paste from mime/mediatype.go:234.
|
||||
func isNotTokenChar(r rune) bool {
|
||||
return !isTokenChar(r)
|
||||
}
|
||||
|
||||
// isTokenChar copy paste from mime/grammar.go:19.
|
||||
// isTokenChar reports whether rune is in 'token' as defined by RFC 1521 and RFC 2045.
|
||||
func isTokenChar(r rune) bool {
|
||||
// token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
|
||||
// or tspecials>
|
||||
return r > 0x20 && r < 0x7f && !isTSpecial(r)
|
||||
}
|
||||
|
||||
// isTSpecial copy paste from mime/grammar.go:13
|
||||
// isTSpecial reports whether rune is in 'tspecials' as defined by RFC
|
||||
// 1521 and RFC 2045.
|
||||
func isTSpecial(r rune) bool {
|
||||
return strings.ContainsRune(`()<>@,;:\"/[]?=`, r)
|
||||
}
|
||||
|
||||
func percentHexEscape(raw []byte) (out string) {
|
||||
for _, v := range raw {
|
||||
out += fmt.Sprintf("%%%x", v)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// percentHexUnescape copy paste from mime/mediatype.go:325.
|
||||
func percentHexUnescape(s string) ([]byte, error) {
|
||||
// Count %, check that they're well-formed.
|
||||
percents := 0
|
||||
for i := 0; i < len(s); {
|
||||
if s[i] != '%' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
percents++
|
||||
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
||||
s = s[i:]
|
||||
if len(s) > 3 {
|
||||
s = s[0:3]
|
||||
}
|
||||
return []byte{}, fmt.Errorf("mime: bogus characters after %%: %q", s)
|
||||
}
|
||||
i += 3
|
||||
}
|
||||
if percents == 0 {
|
||||
return []byte(s), nil
|
||||
}
|
||||
|
||||
t := make([]byte, len(s)-2*percents)
|
||||
j := 0
|
||||
for i := 0; i < len(s); {
|
||||
switch s[i] {
|
||||
case '%':
|
||||
t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
|
||||
j++
|
||||
i += 3
|
||||
default:
|
||||
t[j] = s[i]
|
||||
j++
|
||||
i++
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// ishex copy paste from mime/mediatype.go:364.
|
||||
func ishex(c byte) bool {
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
return true
|
||||
case 'a' <= c && c <= 'f':
|
||||
return true
|
||||
case 'A' <= c && c <= 'F':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// unhex copy paste from mime/mediatype.go:376.
|
||||
func unhex(c byte) byte {
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
return c - '0'
|
||||
case 'a' <= c && c <= 'f':
|
||||
return c - 'a' + 10
|
||||
case 'A' <= c && c <= 'F':
|
||||
return c - 'A' + 10
|
||||
}
|
||||
return 0
|
||||
}
|
||||
544
pkg/mime/parser.go
Normal file
544
pkg/mime/parser.go
Normal file
@ -0,0 +1,544 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package pmmime
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"mime"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/mail"
|
||||
"net/textproto"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// VisitAcceptor decides what to do with part which is processed.
|
||||
// It is used by MIMEVisitor.
|
||||
type VisitAcceptor interface {
|
||||
Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error)
|
||||
}
|
||||
|
||||
func VisitAll(part io.Reader, h textproto.MIMEHeader, accepter VisitAcceptor) (err error) {
|
||||
mediaType, _, err := getContentType(h)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return accepter.Accept(part, h, mediaType == "text/plain", true, true)
|
||||
}
|
||||
|
||||
func IsLeaf(h textproto.MIMEHeader) bool {
|
||||
return !strings.HasPrefix(h.Get("Content-Type"), "multipart/")
|
||||
}
|
||||
|
||||
// MIMEVisitor is main object to parse (visit) and process (accept) all parts of MIME message.
|
||||
type MimeVisitor struct {
|
||||
target VisitAcceptor
|
||||
}
|
||||
|
||||
// Accept reads part recursively if needed.
|
||||
// hasPlainSibling is there when acceptor want to check alternatives.
|
||||
func (mv *MimeVisitor) Accept(part io.Reader, h textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
||||
if !isFirst {
|
||||
return
|
||||
}
|
||||
|
||||
parentMediaType, params, err := getContentType(h)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = mv.target.Accept(part, h, hasPlainSibling, true, false); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !IsLeaf(h) {
|
||||
var multiparts []io.Reader
|
||||
var multipartHeaders []textproto.MIMEHeader
|
||||
if multiparts, multipartHeaders, err = GetMultipartParts(part, params); err != nil {
|
||||
return
|
||||
}
|
||||
hasPlainChild := false
|
||||
for _, header := range multipartHeaders {
|
||||
mediaType, _, _ := getContentType(header)
|
||||
if mediaType == "text/plain" {
|
||||
hasPlainChild = true
|
||||
}
|
||||
}
|
||||
if hasPlainSibling && parentMediaType == "multipart/related" {
|
||||
hasPlainChild = true
|
||||
}
|
||||
|
||||
for i, p := range multiparts {
|
||||
if err = mv.Accept(p, multipartHeaders[i], hasPlainChild, true, true); err != nil {
|
||||
return
|
||||
}
|
||||
if err = mv.target.Accept(part, h, hasPlainSibling, false, i == (len(multiparts)-1)); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// NewMIMEVisitor returns a new mime visitor initialised with an acceptor.
|
||||
func NewMimeVisitor(targetAccepter VisitAcceptor) *MimeVisitor {
|
||||
return &MimeVisitor{targetAccepter}
|
||||
}
|
||||
|
||||
func GetRawMimePart(rawdata io.Reader, boundary string) (io.Reader, io.Reader) {
|
||||
b, _ := ioutil.ReadAll(rawdata)
|
||||
tee := bytes.NewReader(b)
|
||||
|
||||
reader := bufio.NewReader(bytes.NewReader(b))
|
||||
byteBoundary := []byte(boundary)
|
||||
bodyBuffer := &bytes.Buffer{}
|
||||
for {
|
||||
line, _, err := reader.ReadLine()
|
||||
if err != nil {
|
||||
return tee, bytes.NewReader(bodyBuffer.Bytes())
|
||||
}
|
||||
if bytes.HasPrefix(line, byteBoundary) {
|
||||
break
|
||||
}
|
||||
}
|
||||
lineEndingLength := 0
|
||||
for {
|
||||
line, isPrefix, err := reader.ReadLine()
|
||||
if err != nil {
|
||||
return tee, bytes.NewReader(bodyBuffer.Bytes())
|
||||
}
|
||||
if bytes.HasPrefix(line, byteBoundary) {
|
||||
break
|
||||
}
|
||||
lineEndingLength = 0
|
||||
bodyBuffer.Write(line)
|
||||
if !isPrefix {
|
||||
reader.UnreadByte()
|
||||
reader.UnreadByte()
|
||||
token, _ := reader.ReadByte()
|
||||
if token == '\r' {
|
||||
lineEndingLength++
|
||||
bodyBuffer.WriteByte(token)
|
||||
}
|
||||
lineEndingLength++
|
||||
bodyBuffer.WriteByte(token)
|
||||
}
|
||||
}
|
||||
ioutil.ReadAll(reader)
|
||||
data := bodyBuffer.Bytes()
|
||||
return tee, bytes.NewReader(data[0 : len(data)-lineEndingLength])
|
||||
}
|
||||
|
||||
func GetAllChildParts(part io.Reader, h textproto.MIMEHeader) (parts []io.Reader, headers []textproto.MIMEHeader, err error) {
|
||||
mediaType, params, err := getContentType(h)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if strings.HasPrefix(mediaType, "multipart/") {
|
||||
var multiparts []io.Reader
|
||||
var multipartHeaders []textproto.MIMEHeader
|
||||
if multiparts, multipartHeaders, err = GetMultipartParts(part, params); err != nil {
|
||||
return
|
||||
}
|
||||
if strings.Contains(mediaType, "alternative") {
|
||||
var chosenPart io.Reader
|
||||
var chosenHeader textproto.MIMEHeader
|
||||
if chosenPart, chosenHeader, err = pickAlternativePart(multiparts, multipartHeaders); err != nil {
|
||||
return
|
||||
}
|
||||
var childParts []io.Reader
|
||||
var childHeaders []textproto.MIMEHeader
|
||||
if childParts, childHeaders, err = GetAllChildParts(chosenPart, chosenHeader); err != nil {
|
||||
return
|
||||
}
|
||||
parts = append(parts, childParts...)
|
||||
headers = append(headers, childHeaders...)
|
||||
} else {
|
||||
for i, p := range multiparts {
|
||||
var childParts []io.Reader
|
||||
var childHeaders []textproto.MIMEHeader
|
||||
if childParts, childHeaders, err = GetAllChildParts(p, multipartHeaders[i]); err != nil {
|
||||
return
|
||||
}
|
||||
parts = append(parts, childParts...)
|
||||
headers = append(headers, childHeaders...)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parts = append(parts, part)
|
||||
headers = append(headers, h)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func GetMultipartParts(r io.Reader, params map[string]string) (parts []io.Reader, headers []textproto.MIMEHeader, err error) {
|
||||
mr := multipart.NewReader(r, params["boundary"])
|
||||
parts = []io.Reader{}
|
||||
headers = []textproto.MIMEHeader{}
|
||||
var p *multipart.Part
|
||||
for {
|
||||
p, err = mr.NextPart()
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
b, _ := ioutil.ReadAll(p)
|
||||
buffer := bytes.NewBuffer(b)
|
||||
|
||||
parts = append(parts, buffer)
|
||||
headers = append(headers, p.Header)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func pickAlternativePart(parts []io.Reader, headers []textproto.MIMEHeader) (part io.Reader, h textproto.MIMEHeader, err error) {
|
||||
|
||||
for i, h := range headers {
|
||||
mediaType, _, err := getContentType(h)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(mediaType, "multipart/") {
|
||||
return parts[i], headers[i], nil
|
||||
}
|
||||
}
|
||||
for i, h := range headers {
|
||||
mediaType, _, err := getContentType(h)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if mediaType == "text/html" {
|
||||
return parts[i], headers[i], nil
|
||||
}
|
||||
}
|
||||
for i, h := range headers {
|
||||
mediaType, _, err := getContentType(h)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if mediaType == "text/plain" {
|
||||
return parts[i], headers[i], nil
|
||||
}
|
||||
}
|
||||
|
||||
// If we get all the way here, part will be nil.
|
||||
return
|
||||
}
|
||||
|
||||
// "Parse address comment" as defined in http://tools.wordtothewise.com/rfc/822
|
||||
// FIXME: Does not work for address groups.
|
||||
// NOTE: This should be removed for go>1.10 (please check).
|
||||
func parseAddressComment(raw string) string {
|
||||
parsed := []string{}
|
||||
for _, item := range regexp.MustCompile("[,;]").Split(raw, -1) {
|
||||
re := regexp.MustCompile("[(][^)]*[)]")
|
||||
comments := strings.Join(re.FindAllString(item, -1), " ")
|
||||
comments = strings.Replace(comments, "(", "", -1)
|
||||
comments = strings.Replace(comments, ")", "", -1)
|
||||
withoutComments := re.ReplaceAllString(item, "")
|
||||
addr, err := mail.ParseAddress(withoutComments)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if addr.Name == "" {
|
||||
addr.Name = comments
|
||||
}
|
||||
parsed = append(parsed, addr.String())
|
||||
}
|
||||
return strings.Join(parsed, ", ")
|
||||
}
|
||||
|
||||
func checkHeaders(headers []textproto.MIMEHeader) bool {
|
||||
foundAttachment := false
|
||||
|
||||
for i := 0; i < len(headers); i++ {
|
||||
h := headers[i]
|
||||
|
||||
mediaType, _, _ := getContentType(h)
|
||||
|
||||
if !strings.HasPrefix(mediaType, "text/") {
|
||||
foundAttachment = true
|
||||
} else if foundAttachment {
|
||||
// This means that there is a text part after the first attachment,
|
||||
// so we will have to convert the body from plain->HTML.
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func decodePart(partReader io.Reader, header textproto.MIMEHeader) (decodedPart io.Reader) {
|
||||
decodedPart = DecodeContentEncoding(partReader, header.Get("Content-Transfer-Encoding"))
|
||||
if decodedPart == nil {
|
||||
log.Warnf("Unsupported Content-Transfer-Encoding '%v'", header.Get("Content-Transfer-Encoding"))
|
||||
decodedPart = partReader
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Assume 'text/plain' if missing.
|
||||
func getContentType(header textproto.MIMEHeader) (mediatype string, params map[string]string, err error) {
|
||||
contentType := header.Get("Content-Type")
|
||||
if contentType == "" {
|
||||
contentType = "text/plain"
|
||||
}
|
||||
|
||||
return mime.ParseMediaType(contentType)
|
||||
}
|
||||
|
||||
// ===================== MIME Printer ===================================
|
||||
// Simply print resulting MIME tree into text form.
|
||||
// TODO move this to file mime_printer.go.
|
||||
|
||||
type stack []string
|
||||
|
||||
func (s stack) Push(v string) stack {
|
||||
return append(s, v)
|
||||
}
|
||||
func (s stack) Pop() (stack, string) {
|
||||
l := len(s)
|
||||
return s[:l-1], s[l-1]
|
||||
}
|
||||
func (s stack) Peek() string {
|
||||
return s[len(s)-1]
|
||||
}
|
||||
|
||||
type MIMEPrinter struct {
|
||||
result *bytes.Buffer
|
||||
boundaryStack stack
|
||||
}
|
||||
|
||||
func NewMIMEPrinter() (pd *MIMEPrinter) {
|
||||
return &MIMEPrinter{
|
||||
result: bytes.NewBuffer([]byte("")),
|
||||
boundaryStack: stack{},
|
||||
}
|
||||
}
|
||||
|
||||
func (pd *MIMEPrinter) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
||||
if isFirst {
|
||||
http.Header(header).Write(pd.result)
|
||||
pd.result.Write([]byte("\n"))
|
||||
if IsLeaf(header) {
|
||||
pd.result.ReadFrom(partReader)
|
||||
} else {
|
||||
_, params, _ := getContentType(header)
|
||||
boundary := params["boundary"]
|
||||
pd.boundaryStack = pd.boundaryStack.Push(boundary)
|
||||
pd.result.Write([]byte("\nThis is a multi-part message in MIME format.\n--" + boundary + "\n"))
|
||||
}
|
||||
} else {
|
||||
if !isLast {
|
||||
pd.result.Write([]byte("\n--" + pd.boundaryStack.Peek() + "\n"))
|
||||
} else {
|
||||
var boundary string
|
||||
pd.boundaryStack, boundary = pd.boundaryStack.Pop()
|
||||
pd.result.Write([]byte("\n--" + boundary + "--\n.\n"))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pd *MIMEPrinter) String() string {
|
||||
return pd.result.String()
|
||||
}
|
||||
|
||||
// ======================== PlainText Collector =========================
|
||||
// Collect contents of all non-attachment text/plain parts and return it as a string.
|
||||
// TODO move this to file collector_plaintext.go.
|
||||
|
||||
type PlainTextCollector struct {
|
||||
target VisitAcceptor
|
||||
plainTextContents *bytes.Buffer
|
||||
}
|
||||
|
||||
func NewPlainTextCollector(targetAccepter VisitAcceptor) *PlainTextCollector {
|
||||
return &PlainTextCollector{
|
||||
target: targetAccepter,
|
||||
plainTextContents: bytes.NewBuffer([]byte("")),
|
||||
}
|
||||
}
|
||||
|
||||
func (ptc *PlainTextCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
||||
if isFirst {
|
||||
if IsLeaf(header) {
|
||||
mediaType, params, _ := getContentType(header)
|
||||
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
|
||||
if mediaType == "text/plain" && disp != "attachment" {
|
||||
partData, _ := ioutil.ReadAll(partReader)
|
||||
decodedPart := decodePart(bytes.NewReader(partData), header)
|
||||
|
||||
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
|
||||
buffer, err = DecodeCharset(buffer, params)
|
||||
if err != nil {
|
||||
log.Warnln("Decode charset error:", err)
|
||||
return err
|
||||
}
|
||||
ptc.plainTextContents.Write(buffer)
|
||||
}
|
||||
|
||||
err = ptc.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
err = ptc.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
|
||||
return
|
||||
}
|
||||
|
||||
func (ptc PlainTextCollector) GetPlainText() string {
|
||||
return ptc.plainTextContents.String()
|
||||
}
|
||||
|
||||
// ======================== Body Collector ==============
|
||||
// Collect contents of all non-attachment parts and return it as a string.
|
||||
// TODO move this to file collector_body.go.
|
||||
|
||||
type BodyCollector struct {
|
||||
target VisitAcceptor
|
||||
htmlBodyBuffer *bytes.Buffer
|
||||
plainBodyBuffer *bytes.Buffer
|
||||
htmlHeaderBuffer *bytes.Buffer
|
||||
plainHeaderBuffer *bytes.Buffer
|
||||
hasHtml bool
|
||||
}
|
||||
|
||||
func NewBodyCollector(targetAccepter VisitAcceptor) *BodyCollector {
|
||||
return &BodyCollector{
|
||||
target: targetAccepter,
|
||||
htmlBodyBuffer: bytes.NewBuffer([]byte("")),
|
||||
plainBodyBuffer: bytes.NewBuffer([]byte("")),
|
||||
htmlHeaderBuffer: bytes.NewBuffer([]byte("")),
|
||||
plainHeaderBuffer: bytes.NewBuffer([]byte("")),
|
||||
}
|
||||
}
|
||||
|
||||
func (bc *BodyCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
||||
// TODO: Collect html and plaintext - if there's html with plain sibling don't include plain/text.
|
||||
if isFirst {
|
||||
if IsLeaf(header) {
|
||||
mediaType, params, _ := getContentType(header)
|
||||
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
|
||||
if disp != "attachment" {
|
||||
partData, _ := ioutil.ReadAll(partReader)
|
||||
decodedPart := decodePart(bytes.NewReader(partData), header)
|
||||
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
|
||||
buffer, err = DecodeCharset(buffer, params)
|
||||
if err != nil {
|
||||
log.Warnln("Decode charset error:", err)
|
||||
return err
|
||||
}
|
||||
if mediaType == "text/html" {
|
||||
bc.hasHtml = true
|
||||
http.Header(header).Write(bc.htmlHeaderBuffer)
|
||||
bc.htmlBodyBuffer.Write(buffer)
|
||||
} else if mediaType == "text/plain" {
|
||||
http.Header(header).Write(bc.plainHeaderBuffer)
|
||||
bc.plainBodyBuffer.Write(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
err = bc.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
err = bc.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
|
||||
return
|
||||
}
|
||||
|
||||
func (bc *BodyCollector) GetBody() (string, string) {
|
||||
if bc.hasHtml {
|
||||
return bc.htmlBodyBuffer.String(), "text/html"
|
||||
} else {
|
||||
return bc.plainBodyBuffer.String(), "text/plain"
|
||||
}
|
||||
}
|
||||
|
||||
func (bc *BodyCollector) GetHeaders() string {
|
||||
if bc.hasHtml {
|
||||
return bc.htmlHeaderBuffer.String()
|
||||
} else {
|
||||
return bc.plainHeaderBuffer.String()
|
||||
}
|
||||
}
|
||||
|
||||
// ======================== Attachments Collector ==============
|
||||
// Collect contents of all attachment parts and return them as a string.
|
||||
// TODO move this to file collector_attachment.go.
|
||||
|
||||
type AttachmentsCollector struct {
|
||||
target VisitAcceptor
|
||||
attBuffers []string
|
||||
attHeaders []string
|
||||
}
|
||||
|
||||
func NewAttachmentsCollector(targetAccepter VisitAcceptor) *AttachmentsCollector {
|
||||
return &AttachmentsCollector{
|
||||
target: targetAccepter,
|
||||
attBuffers: []string{},
|
||||
attHeaders: []string{},
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *AttachmentsCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
||||
if isFirst {
|
||||
if IsLeaf(header) {
|
||||
mediaType, params, _ := getContentType(header)
|
||||
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
|
||||
if (mediaType != "text/html" && mediaType != "text/plain") || disp == "attachment" {
|
||||
partData, _ := ioutil.ReadAll(partReader)
|
||||
decodedPart := decodePart(bytes.NewReader(partData), header)
|
||||
|
||||
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
|
||||
buffer, err = DecodeCharset(buffer, params)
|
||||
if err != nil {
|
||||
log.Warnln("Decode charset error:", err)
|
||||
return err
|
||||
}
|
||||
headerBuf := new(bytes.Buffer)
|
||||
http.Header(header).Write(headerBuf)
|
||||
ac.attHeaders = append(ac.attHeaders, headerBuf.String())
|
||||
ac.attBuffers = append(ac.attBuffers, string(buffer))
|
||||
}
|
||||
|
||||
err = ac.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
err = ac.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
|
||||
return
|
||||
}
|
||||
|
||||
func (ac AttachmentsCollector) GetAttachments() []string {
|
||||
return ac.attBuffers
|
||||
}
|
||||
|
||||
func (ac AttachmentsCollector) GetAttHeaders() []string {
|
||||
return ac.attHeaders
|
||||
}
|
||||
228
pkg/mime/parser_test.go
Normal file
228
pkg/mime/parser_test.go
Normal file
@ -0,0 +1,228 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package pmmime
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"io/ioutil"
|
||||
"net/mail"
|
||||
|
||||
"net/textproto"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func minimalParse(mimeBody string) (readBody string, plainContents string, err error) {
|
||||
mm, err := mail.ReadMessage(strings.NewReader(mimeBody))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
h := textproto.MIMEHeader(mm.Header)
|
||||
mmBodyData, err := ioutil.ReadAll(mm.Body)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
printAccepter := NewMIMEPrinter()
|
||||
plainTextCollector := NewPlainTextCollector(printAccepter)
|
||||
visitor := NewMimeVisitor(plainTextCollector)
|
||||
err = VisitAll(bytes.NewReader(mmBodyData), h, visitor)
|
||||
|
||||
readBody = printAccepter.String()
|
||||
plainContents = plainTextCollector.GetPlainText()
|
||||
|
||||
return readBody, plainContents, err
|
||||
}
|
||||
|
||||
func androidParse(mimeBody string) (body, headers string, atts, attHeaders []string, err error) {
|
||||
mm, err := mail.ReadMessage(strings.NewReader(mimeBody))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
h := textproto.MIMEHeader(mm.Header)
|
||||
mmBodyData, err := ioutil.ReadAll(mm.Body)
|
||||
|
||||
printAccepter := NewMIMEPrinter()
|
||||
bodyCollector := NewBodyCollector(printAccepter)
|
||||
attachmentsCollector := NewAttachmentsCollector(bodyCollector)
|
||||
mimeVisitor := NewMimeVisitor(attachmentsCollector)
|
||||
err = VisitAll(bytes.NewReader(mmBodyData), h, mimeVisitor)
|
||||
|
||||
body, _ = bodyCollector.GetBody()
|
||||
headers = bodyCollector.GetHeaders()
|
||||
atts = attachmentsCollector.GetAttachments()
|
||||
attHeaders = attachmentsCollector.GetAttHeaders()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func TestParseBoundaryIsEmpty(t *testing.T) {
|
||||
testMessage :=
|
||||
`Date: Sun, 10 Mar 2019 11:10:06 -0600
|
||||
In-Reply-To: <abcbase64@protonmail.com>
|
||||
X-Original-To: enterprise@protonmail.com
|
||||
References: <abc64@unicoderns.com> <abc63@protonmail.com> <abc64@protonmail.com> <abc65@mail.gmail.com> <abc66@protonmail.com>
|
||||
To: "ProtonMail" <enterprise@protonmail.com>
|
||||
X-Pm-Origin: external
|
||||
Delivered-To: enterprise@protonmail.com
|
||||
Content-Type: multipart/mixed; boundary=ac7e36bd45425e70b4dab2128f34172e4dc3f9ff2eeb47e909267d4252794ec7
|
||||
Reply-To: XYZ <xyz@xyz.com>
|
||||
Mime-Version: 1.0
|
||||
Subject: Encrypted Message
|
||||
Return-Path: <xyz@xyz.com>
|
||||
From: XYZ <xyz@xyz.com>
|
||||
X-Pm-Conversationid-Id: gNX9bDPLmBgFZ-C3Tdlb628cas1Xl0m4dql5nsWzQAEI-WQv0ytfwPR4-PWELEK0_87XuFOgetc239Y0pjPYHQ==
|
||||
X-Pm-Date: Sun, 10 Mar 2019 18:10:06 +0100
|
||||
Message-Id: <68c11e46-e611-d9e4-edc1-5ec96bac77cc@unicoderns.com>
|
||||
X-Pm-Transfer-Encryption: TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)
|
||||
X-Pm-External-Id: <68c11e46-e611-d9e4-edc1-5ec96bac77cc@unicoderns.com>
|
||||
X-Pm-Internal-Id: _iJ8ETxcqXTSK8IzCn0qFpMUTwvRf-xJUtldRA1f6yHdmXjXzKleG3F_NLjZL3FvIWVHoItTxOuuVXcukwwW3g==
|
||||
Openpgp: preference=signencrypt
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.4.0
|
||||
X-Pm-Content-Encryption: end-to-end
|
||||
|
||||
--ac7e36bd45425e70b4dab2128f34172e4dc3f9ff2eeb47e909267d4252794ec7
|
||||
Content-Disposition: inline
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
Content-Type: multipart/mixed; charset=utf-8
|
||||
|
||||
Content-Type: multipart/mixed; boundary="xnAIW3Turb9YQZ2rXc2ZGZH45WepHIZyy";
|
||||
protected-headers="v1"
|
||||
From: XYZ <xyz@xyz.com>
|
||||
To: "ProtonMail" <enterprise@protonmail.com>
|
||||
Subject: Encrypted Message
|
||||
Message-ID: <68c11e46-e611-d9e4-edc1-5ec96bac77cc@unicoderns.com>
|
||||
References: <abc64@unicoderns.com> <abc63@protonmail.com> <abc64@protonmail.com> <abc65@mail.gmail.com> <abc66@protonmail.com>
|
||||
In-Reply-To: <abcbase64@protonmail.com>
|
||||
|
||||
--xnAIW3Turb9YQZ2rXc2ZGZH45WepHIZyy
|
||||
Content-Type: text/rfc822-headers; protected-headers="v1"
|
||||
Content-Disposition: inline
|
||||
|
||||
From: XYZ <xyz@xyz.com>
|
||||
To: ProtonMail <enterprise@protonmail.com>
|
||||
Subject: Re: Encrypted Message
|
||||
|
||||
--xnAIW3Turb9YQZ2rXc2ZGZH45WepHIZyy
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="------------F9E5AA6D49692F51484075E3"
|
||||
Content-Language: en-US
|
||||
|
||||
This is a multi-part message in MIME format.
|
||||
--------------F9E5AA6D49692F51484075E3
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
Hi ...
|
||||
|
||||
--------------F9E5AA6D49692F51484075E3
|
||||
Content-Type: text/html; charset=utf-8
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body text=3D"#000000" bgcolor=3D"#FFFFFF">
|
||||
<p>Hi .. </p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
--------------F9E5AA6D49692F51484075E3--
|
||||
|
||||
--xnAIW3Turb9YQZ2rXc2ZGZH45WepHIZyy--
|
||||
|
||||
--ac7e36bd45425e70b4dab2128f34172e4dc3f9ff2eeb47e909267d4252794ec7--
|
||||
|
||||
|
||||
`
|
||||
|
||||
body, content, err := minimalParse(testMessage)
|
||||
if err == nil {
|
||||
t.Fatal("should have error but is", err)
|
||||
}
|
||||
t.Log("==BODY==")
|
||||
t.Log(body)
|
||||
t.Log("==CONTENT==")
|
||||
t.Log(content)
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
testMessage :=
|
||||
`From: John Doe <example@example.com>
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/mixed;
|
||||
boundary="XXXXboundary text"
|
||||
|
||||
This is a multipart message in MIME format.
|
||||
|
||||
--XXXXboundary text
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
this is the body text
|
||||
|
||||
--XXXXboundary text
|
||||
Content-Type: text/html; charset=utf-8
|
||||
|
||||
<html><body>this is the html body text</body></html>
|
||||
|
||||
--XXXXboundary text
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
Content-Disposition: attachment;
|
||||
filename="test.txt"
|
||||
|
||||
this is the attachment text
|
||||
|
||||
--XXXXboundary text--
|
||||
|
||||
|
||||
`
|
||||
body, heads, att, attHeads, err := androidParse(testMessage)
|
||||
if err != nil {
|
||||
t.Error("parse error", err)
|
||||
}
|
||||
|
||||
fmt.Println("==BODY:")
|
||||
fmt.Println(body)
|
||||
fmt.Println("==BODY HEADERS:")
|
||||
fmt.Println(heads)
|
||||
|
||||
fmt.Println("==ATTACHMENTS:")
|
||||
fmt.Println(att)
|
||||
fmt.Println("==ATTACHMENT HEADERS:")
|
||||
fmt.Println(attHeads)
|
||||
}
|
||||
|
||||
func TestParseAddressComment(t *testing.T) {
|
||||
parsingExamples := map[string]string{
|
||||
"": "",
|
||||
"(Only Comment) here@pm.me": "\"Only Comment\" <here@pm.me>",
|
||||
"Normal Name (With Comment) <here@pm.me>": "\"Normal Name\" <here@pm.me>",
|
||||
"<Muhammed.(I am the greatest)Ali@(the)Vegas.WBA>": "\"I am the greatest the\" <Muhammed.Ali@Vegas.WBA>",
|
||||
}
|
||||
|
||||
for raw, expected := range parsingExamples {
|
||||
parsed := parseAddressComment(raw)
|
||||
if expected != parsed {
|
||||
t.Errorf("When parsing %q expected %q but have %q", raw, expected, parsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
188
pkg/mime/utf7Decoder.go
Normal file
188
pkg/mime/utf7Decoder.go
Normal file
@ -0,0 +1,188 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package pmmime
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// utf7Decoder copied from: https://github.com/cention-sany/utf7/blob/master/utf7.go
|
||||
// We need `encoding.Decoder` instead of function `UTF7DecodeBytes`.
|
||||
type utf7Decoder struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
// NewUtf7Decoder returns a new decoder for utf7.
|
||||
func NewUtf7Decoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: utf7Decoder{}}
|
||||
}
|
||||
|
||||
const (
|
||||
uRepl = '\uFFFD' // Unicode replacement code point
|
||||
u7min = 0x20 // Minimum self-representing UTF-7 value
|
||||
u7max = 0x7E // Maximum self-representing UTF-7 value
|
||||
)
|
||||
|
||||
// ErrBadUTF7 is returned to indicate the invalid modified UTF-7 encoding.
|
||||
var ErrBadUTF7 = errors.New("utf7: bad utf-7 encoding")
|
||||
|
||||
const modifiedbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
|
||||
var u7enc = base64.NewEncoding(modifiedbase64)
|
||||
|
||||
func isModifiedBase64(r byte) bool {
|
||||
if r >= 'A' && r <= 'Z' {
|
||||
return true
|
||||
} else if r >= 'a' && r <= 'z' {
|
||||
return true
|
||||
} else if r >= '0' && r <= '9' {
|
||||
return true
|
||||
} else if r == '+' || r == '/' {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d utf7Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var implicit bool
|
||||
var tmp int
|
||||
|
||||
nd, n := len(dst), len(src)
|
||||
if n == 0 && !atEOF {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
for ; nSrc < n; nSrc++ {
|
||||
if nDst >= nd {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
if c := src[nSrc]; ((c < u7min || c > u7max) &&
|
||||
c != '\t' && c != '\r' && c != '\n') ||
|
||||
c == '~' || c == '\\' {
|
||||
return nDst, nSrc, ErrBadUTF7 // Illegal code point in ASCII mode.
|
||||
} else if c != '+' {
|
||||
dst[nDst] = c // Character is self-representing.
|
||||
nDst++
|
||||
continue
|
||||
}
|
||||
// Found '+'.
|
||||
start := nSrc + 1
|
||||
tmp = nSrc // nSrc still points to '+', tmp points to the end of BASE64.
|
||||
|
||||
// Find the end of the Base64 or "+-" segment.
|
||||
implicit = false
|
||||
for tmp++; tmp < n && src[tmp] != '-'; tmp++ {
|
||||
if !isModifiedBase64(src[tmp]) {
|
||||
if tmp == start {
|
||||
return nDst, tmp, ErrBadUTF7 // '+' next char must modified base64.
|
||||
}
|
||||
// Implicit shift back to ASCII, no need for '-' character.
|
||||
implicit = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if tmp == start {
|
||||
if tmp == n {
|
||||
// Did not find '-' sign and '+' is the last character.
|
||||
// Total nSrc does not include '+'.
|
||||
if atEOF {
|
||||
return nDst, nSrc, ErrBadUTF7 // '+' can not be at the end.
|
||||
}
|
||||
// '+' can not be at the end, the source is too short.
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
dst[nDst] = '+' // Escape sequence "+-".
|
||||
nDst++
|
||||
} else if tmp == n && !atEOF {
|
||||
// No EOF found, the source is too short.
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
} else if b := utf7dec(src[start:tmp]); len(b) > 0 {
|
||||
if len(b)+nDst > nd {
|
||||
// Need more space in dst for the decoded modified BASE64 unicode.
|
||||
// Total nSrc does not include '+'.
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
copy(dst[nDst:], b) // Control or non-ASCII code points in Base64.
|
||||
nDst += len(b)
|
||||
if implicit {
|
||||
if nDst >= nd {
|
||||
return nDst, tmp, transform.ErrShortDst
|
||||
}
|
||||
dst[nDst] = src[tmp] // Implicit shift.
|
||||
nDst++
|
||||
}
|
||||
if tmp == n {
|
||||
return nDst, tmp, nil
|
||||
}
|
||||
} else {
|
||||
return nDst, nSrc, ErrBadUTF7 // Bad encoding.
|
||||
}
|
||||
nSrc = tmp
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// utf7dec extracts UTF-16-BE bytes from Base64 data and converts them to UTF-8.
|
||||
// A nil slice is returned if the encoding is invalid.
|
||||
func utf7dec(b64 []byte) []byte {
|
||||
var b []byte
|
||||
|
||||
// Allocate a single block of memory large enough to store the Base64 data
|
||||
// (if padding is required), UTF-16-BE bytes, and decoded UTF-8 bytes.
|
||||
// Since a 2-byte UTF-16 sequence may expand into a 3-byte UTF-8 sequence,
|
||||
// double the space allocation for UTF-8.
|
||||
if n := len(b64); b64[n-1] == '=' {
|
||||
return nil
|
||||
} else if n&3 == 0 {
|
||||
b = make([]byte, u7enc.DecodedLen(n)*3)
|
||||
} else {
|
||||
n += 4 - n&3
|
||||
b = make([]byte, n+u7enc.DecodedLen(n)*3)
|
||||
copy(b[copy(b, b64):n], []byte("=="))
|
||||
b64, b = b[:n], b[n:]
|
||||
}
|
||||
|
||||
// Decode Base64 into the first 1/3rd of b.
|
||||
n, err := u7enc.Decode(b, b64)
|
||||
if err != nil || n&1 == 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode UTF-16-BE into the remaining 2/3rds of b.
|
||||
b, s := b[:n], b[n:]
|
||||
j := 0
|
||||
for i := 0; i < n; i += 2 {
|
||||
r := rune(b[i])<<8 | rune(b[i+1])
|
||||
if utf16.IsSurrogate(r) {
|
||||
if i += 2; i == n {
|
||||
return nil
|
||||
}
|
||||
r2 := rune(b[i])<<8 | rune(b[i+1])
|
||||
if r = utf16.DecodeRune(r, r2); r == uRepl {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
j += utf8.EncodeRune(s[j:], r)
|
||||
}
|
||||
return s[:j]
|
||||
}
|
||||
Reference in New Issue
Block a user