refactor: don't reconstruct mimeBody

This commit is contained in:
James Houlahan
2020-08-06 10:27:08 +02:00
parent 7e1af9ff4e
commit 0e7e13211b
39 changed files with 311 additions and 533 deletions

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import "regexp"

View File

@ -1,11 +1,28 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (
"bytes"
"io"
"io/ioutil"
"github.com/emersion/go-message"
"github.com/sirupsen/logrus"
)
type Parser struct {
@ -13,10 +30,10 @@ type Parser struct {
root *Part
}
func New(r io.Reader) (*Parser, error) {
func New(b []byte) (*Parser, error) {
p := new(Parser)
entity, err := message.Read(r)
entity, err := message.Read(bytes.NewReader(b))
if err != nil && !message.IsUnknownCharset(err) {
return nil, err
}
@ -70,12 +87,6 @@ func (p *Parser) endPart() {
} else {
p.root = part
}
if !part.isUTF8() {
if err := part.convertToUTF8(); err != nil {
logrus.WithError(err).Error("failed to convert part to utf-8")
}
}
}
func (p *Parser) top() *Part {

View File

@ -1,49 +1,48 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (
"bytes"
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newTestParser(t *testing.T, msg string) *Parser {
r := f(msg)
p, err := New(r)
buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(r); err != nil {
panic(err)
}
p, err := New(buf.Bytes())
require.NoError(t, err)
return p
}
func TestParserSpecifiedLatin1Charset(t *testing.T) {
p := newTestParser(t, "text_plain_latin1.eml")
checkBodies(t, p, "ééééééé")
}
func TestParserUnspecifiedLatin1Charset(t *testing.T) {
p := newTestParser(t, "text_plain_unknown_latin1.eml")
checkBodies(t, p, "ééééééé")
}
func TestParserSpecifiedLatin2Charset(t *testing.T) {
p := newTestParser(t, "text_plain_latin2.eml")
checkBodies(t, p, "řšřšřš")
}
func TestParserEmbeddedLatin2Charset(t *testing.T) {
p := newTestParser(t, "text_html_embedded_latin2_encoding.eml")
checkBodies(t, p, `<html><head><meta charset="ISO-8859-2"></head><body>latin2 řšřš</body></html>`)
}
func f(filename string) io.ReadCloser {
f, err := os.Open(filepath.Join("testdata", filename))
@ -62,21 +61,3 @@ func s(filename string) string {
return string(b)
}
func checkBodies(t *testing.T, p *Parser, wantBodies ...string) {
var partBodies, expectedBodies [][]byte
require.NoError(t, p.NewWalker().RegisterDefaultHandler(func(p *Part) (err error) {
if p.Body != nil {
partBodies = append(partBodies, p.Body)
}
return
}).Walk())
for _, body := range wantBodies {
expectedBodies = append(expectedBodies, []byte(body))
}
assert.ElementsMatch(t, expectedBodies, partBodies)
}

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (
@ -34,11 +51,11 @@ func (p *Part) AddChild(child *Part) {
p.children = append(p.children, child)
}
func (p *Part) isUTF8() bool {
return utf8.Valid(p.Body)
}
func (p *Part) ConvertToUTF8() error {
if utf8.Valid(p.Body) {
return nil
}
func (p *Part) convertToUTF8() error {
t, params, err := p.Header.ContentType()
if err != nil {
return err
@ -57,7 +74,7 @@ func (p *Part) convertToUTF8() error {
return err
}
// TODO: Is this okay? What about when the charset is embedded in structured text type eg html/xml?
// HELP: Is this okay? What about when the charset is embedded in structured text type eg html/xml?
params["charset"] = "utf-8"
p.Header.SetContentType(t, params)

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (

View File

@ -1,5 +0,0 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/html
<html><head><meta charset="ISO-8859-2"></head><body>latin2 <20><><EFBFBD><EFBFBD></body></html>

View File

@ -1,5 +0,0 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain; charset=ISO-8859-1
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>

View File

@ -1,5 +0,0 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain; charset=ISO-8859-2
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>

View File

@ -1,5 +0,0 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>

View File

@ -1,5 +0,0 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: text/plain
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import "regexp"

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
type Walker struct {

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (

View File

@ -1,3 +1,20 @@
// Copyright (c) 2020 Proton Technologies AG
//
// This file is part of ProtonMail Bridge.
//
// ProtonMail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ProtonMail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
package parser
import (