mirror of
https://github.com/ProtonMail/proton-bridge.git
synced 2025-12-11 13:16:53 +00:00
feat: convert content type in html meta tags
This commit is contained in:
1
go.mod
1
go.mod
@ -19,6 +19,7 @@ require (
|
||||
github.com/ProtonMail/go-imap-id v0.0.0-20190926060100-f94a56b9ecde
|
||||
github.com/ProtonMail/go-vcard v0.0.0-20180326232728-33aaa0a0c8a5
|
||||
github.com/ProtonMail/gopenpgp/v2 v2.0.1
|
||||
github.com/PuerkitoBio/goquery v1.5.1
|
||||
github.com/abiosoft/ishell v2.0.0+incompatible
|
||||
github.com/abiosoft/readline v0.0.0-20180607040430-155bce2042db // indirect
|
||||
github.com/allan-simon/go-singleinstance v0.0.0-20160830203053-79edcfdc2dfc
|
||||
|
||||
6
go.sum
6
go.sum
@ -23,12 +23,16 @@ github.com/ProtonMail/go-vcard v0.0.0-20180326232728-33aaa0a0c8a5 h1:Uga1DHFN4GU
|
||||
github.com/ProtonMail/go-vcard v0.0.0-20180326232728-33aaa0a0c8a5/go.mod h1:oeP9CMN+ajWp5jKp1kue5daJNwMMxLF+ujPaUIoJWlA=
|
||||
github.com/ProtonMail/gopenpgp/v2 v2.0.1 h1:x0uvDhry5WzoHeJO4J3dgMLhG4Z9PeBJ2O+sDOY0LcU=
|
||||
github.com/ProtonMail/gopenpgp/v2 v2.0.1/go.mod h1:wQQCJo7DURO6S9VwH+kSDEYs/B63yZnAEfGlOg8YNBY=
|
||||
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
||||
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||
github.com/abiosoft/ishell v2.0.0+incompatible h1:zpwIuEHc37EzrsIYah3cpevrIc8Oma7oZPxr03tlmmw=
|
||||
github.com/abiosoft/ishell v2.0.0+incompatible/go.mod h1:HQR9AqF2R3P4XXpMpI0NAzgHf/aS6+zVXRj14cVk9qg=
|
||||
github.com/abiosoft/readline v0.0.0-20180607040430-155bce2042db h1:CjPUSXOiYptLbTdr1RceuZgSFDQ7U15ITERUGrUORx8=
|
||||
github.com/abiosoft/readline v0.0.0-20180607040430-155bce2042db/go.mod h1:rB3B4rKii8V21ydCbIzH5hZiCQE7f5E9SzUb/ZZx530=
|
||||
github.com/allan-simon/go-singleinstance v0.0.0-20160830203053-79edcfdc2dfc h1:mZca0/HZ/XWXP9txkfdl2GH6mUzBqAlyJz3u5Lg8fuA=
|
||||
github.com/allan-simon/go-singleinstance v0.0.0-20160830203053-79edcfdc2dfc/go.mod h1:qqsTQiwdyqxU05iDCsi0oN3P4nrVxAmn8xCtODDSf/U=
|
||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/certifi/gocertifi v0.0.0-20200211180108-c7c1fbc02894 h1:JLaf/iINcLyjwbtTsCJjc6rtlASgHeIJPrB6QmwURnA=
|
||||
github.com/certifi/gocertifi v0.0.0-20200211180108-c7c1fbc02894/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
|
||||
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
|
||||
@ -165,11 +169,13 @@ github.com/urfave/cli v1.22.4/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
|
||||
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
|
||||
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
|
||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190420063019-afa5a82059c6/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
|
||||
@ -93,6 +93,13 @@ func convertForeignEncodings(p *parser.Parser) error {
|
||||
|
||||
// HELP: Is it correct to only do this to text types?
|
||||
return p.NewWalker().
|
||||
RegisterContentTypeHandler("text/html", func(p *parser.Part) error {
|
||||
if err := p.ConvertToUTF8(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return p.ConvertMetaCharset()
|
||||
}).
|
||||
RegisterContentTypeHandler("text/.*", func(p *parser.Part) error {
|
||||
return p.ConvertToUTF8()
|
||||
}).
|
||||
|
||||
@ -18,12 +18,16 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"mime"
|
||||
"unicode/utf8"
|
||||
|
||||
pmmime "github.com/ProtonMail/proton-bridge/pkg/mime"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/emersion/go-message"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
"golang.org/x/text/encoding"
|
||||
)
|
||||
@ -79,18 +83,51 @@ func (p *Part) ConvertToUTF8() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// HELP: Is this okay? What about when the charset is embedded in structured text type eg html/xml?
|
||||
if params == nil {
|
||||
params = make(map[string]string)
|
||||
}
|
||||
|
||||
params["charset"] = "utf-8"
|
||||
params["charset"] = "UTF-8"
|
||||
|
||||
p.Header.SetContentType(t, params)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Part) ConvertMetaCharset() error {
|
||||
doc, err := html.Parse(bytes.NewReader(p.Body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) {
|
||||
if val, ok := sel.Attr("content"); ok {
|
||||
t, params, err := mime.ParseMediaType(val)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
params["charset"] = "UTF-8"
|
||||
|
||||
sel.SetAttr("content", mime.FormatMediaType(t, params))
|
||||
}
|
||||
|
||||
if _, ok := sel.Attr("charset"); ok {
|
||||
sel.SetAttr("charset", "UTF-8")
|
||||
}
|
||||
})
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
if err := html.Render(buf, doc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.Body = buf.Bytes()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func selectSuitableDecoder(p *Part, t string, params map[string]string) *encoding.Decoder {
|
||||
if charset, ok := params["charset"]; ok {
|
||||
logrus.WithField("charset", charset).Debug("The part has a specified charset")
|
||||
|
||||
@ -268,7 +268,7 @@ func TestParseTextHTML(t *testing.T) {
|
||||
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
|
||||
|
||||
assert.Equal(t, "<html><body>This is body of <b>HTML mail</b> without attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "<html><head></head><body>This is body of <b>HTML mail</b> without attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "This is body of *HTML mail* without attachment", plainBody)
|
||||
|
||||
assert.Len(t, attReaders, 0)
|
||||
@ -283,7 +283,7 @@ func TestParseTextHTMLAlready7Bit(t *testing.T) {
|
||||
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
|
||||
|
||||
assert.Equal(t, "<html><body>This is body of <b>HTML mail</b> without attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "<html><head></head><body>This is body of <b>HTML mail</b> without attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "This is body of *HTML mail* without attachment", plainBody)
|
||||
|
||||
assert.Len(t, attReaders, 0)
|
||||
@ -298,7 +298,7 @@ func TestParseTextHTMLWithOctetAttachment(t *testing.T) {
|
||||
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
|
||||
|
||||
assert.Equal(t, "<html><body>This is body of <b>HTML mail</b> with attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "<html><head></head><body>This is body of <b>HTML mail</b> with attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "This is body of *HTML mail* with attachment", plainBody)
|
||||
|
||||
require.Len(t, attReaders, 1)
|
||||
@ -315,7 +315,7 @@ func TestParseTextHTMLWithPlainAttachment(t *testing.T) {
|
||||
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
|
||||
|
||||
// BAD: plainBody should not be empty!
|
||||
assert.Equal(t, "<html><body>This is body of <b>HTML mail</b> with attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "<html><head></head><body>This is body of <b>HTML mail</b> with attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "This is body of *HTML mail* with attachment", plainBody)
|
||||
|
||||
require.Len(t, attReaders, 1)
|
||||
@ -331,7 +331,7 @@ func TestParseTextHTMLWithImageInline(t *testing.T) {
|
||||
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
|
||||
|
||||
assert.Equal(t, "<html><body>This is body of <b>HTML mail</b> with attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "<html><head></head><body>This is body of <b>HTML mail</b> with attachment</body></html>", m.Body)
|
||||
assert.Equal(t, "This is body of *HTML mail* with attachment", plainBody)
|
||||
|
||||
// The inline image is an 8x8 mic-dropping gopher.
|
||||
@ -368,8 +368,7 @@ func TestParseTextHTMLWithEmbeddedForeignEncoding(t *testing.T) {
|
||||
assert.Equal(t, `"Sender" <sender@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `"Receiver" <receiver@pm.me>`, m.ToList[0].String())
|
||||
|
||||
// BAD: Bridge does not detect the charset specified in the <meta> tag of the html.
|
||||
assert.Equal(t, `<html><head><meta charset="ISO-8859-2"></head><body>latin2 řšřš</body></html>`, m.Body)
|
||||
assert.Equal(t, `<html><head><meta charset="UTF-8"/></head><body>latin2 řšřš</body></html>`, m.Body)
|
||||
assert.Equal(t, `latin2 řšřš`, plainBody)
|
||||
|
||||
assert.Len(t, attReaders, 0)
|
||||
@ -384,15 +383,14 @@ func TestParseMultipartAlternative(t *testing.T) {
|
||||
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
|
||||
|
||||
assert.Equal(t, `<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
assert.Equal(t, `<html><head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
|
||||
</head>
|
||||
<body>
|
||||
<b>aoeuaoeu</b>
|
||||
</body>
|
||||
</html>
|
||||
`, m.Body)
|
||||
|
||||
|
||||
</body></html>`, m.Body)
|
||||
|
||||
assert.Equal(t, "*aoeuaoeu*\n\n", plainBody)
|
||||
}
|
||||
@ -406,15 +404,14 @@ func TestParseMultipartAlternativeNested(t *testing.T) {
|
||||
assert.Equal(t, `"schizofrenic" <schizofrenic@pm.me>`, m.Sender.String())
|
||||
assert.Equal(t, `<pmbridgeietest@outlook.com>`, m.ToList[0].String())
|
||||
|
||||
assert.Equal(t, `<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
assert.Equal(t, `<html><head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
|
||||
</head>
|
||||
<body>
|
||||
<b>multipart 2.2</b>
|
||||
</body>
|
||||
</html>
|
||||
`, m.Body)
|
||||
|
||||
|
||||
</body></html>`, m.Body)
|
||||
|
||||
assert.Equal(t, "*multipart 2.1*\n\n", plainBody)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user