From 83bbdbd63e7482def963932b55075e962a8c0a6e Mon Sep 17 00:00:00 2001 From: Romain Le Jeune Date: Fri, 10 Nov 2023 12:50:15 +0000 Subject: [PATCH] feat(GODT-3113): Only force UTF-8 charset for HTML part when needed. --- pkg/message/parser.go | 3 -- pkg/message/parser/part.go | 31 +++++++++++++------ pkg/message/parser_test.go | 12 +++---- .../smtp/send/html_to_internal.feature | 4 +-- .../smtp/send/plain_to_internal.feature | 2 +- 5 files changed, 30 insertions(+), 22 deletions(-) diff --git a/pkg/message/parser.go b/pkg/message/parser.go index f90fda79..76557e62 100644 --- a/pkg/message/parser.go +++ b/pkg/message/parser.go @@ -197,9 +197,6 @@ func convertForeignEncodings(p *parser.Parser) error { return p.NewWalker(). RegisterContentTypeHandler("text/html", func(p *parser.Part) error { - if p.IsAttachment() { - return nil - } if err := p.ConvertToUTF8(); err != nil { return err } diff --git a/pkg/message/parser/part.go b/pkg/message/parser/part.go index 8a259efc..1dbb36cb 100644 --- a/pkg/message/parser/part.go +++ b/pkg/message/parser/part.go @@ -41,6 +41,8 @@ type Part struct { children Parts } +const utf8Charset = "UTF-8" + func (p *Part) ContentType() (string, map[string]string, error) { t, params, err := p.Header.ContentType() if err != nil { @@ -116,7 +118,7 @@ func (p *Part) ConvertToUTF8() error { params = make(map[string]string) } - params["charset"] = "UTF-8" + params["charset"] = utf8Charset p.Header.SetContentType(t, params) @@ -129,6 +131,8 @@ func (p *Part) ConvertMetaCharset() error { return err } + // Override charset to UTF-8 in meta headers only if needed. + var metaModified = false goquery.NewDocumentFromNode(doc).Find("meta").Each(func(n int, sel *goquery.Selection) { if val, ok := sel.Attr("content"); ok { t, params, err := pmmime.ParseMediaType(val) @@ -136,24 +140,31 @@ func (p *Part) ConvertMetaCharset() error { return } - params["charset"] = "UTF-8" + if charset, ok := params["charset"]; ok && charset != utf8Charset { + params["charset"] = utf8Charset + } sel.SetAttr("content", mime.FormatMediaType(t, params)) + metaModified = true } - if _, ok := sel.Attr("charset"); ok { - sel.SetAttr("charset", "UTF-8") + if charset, ok := sel.Attr("charset"); ok && charset != utf8Charset { + sel.SetAttr("charset", utf8Charset) + metaModified = true } }) - buf := new(bytes.Buffer) + // Override the body part only if modification was applied + // as html.render will sanitise the html headers. + if metaModified { + buf := new(bytes.Buffer) - if err := html.Render(buf, doc); err != nil { - return err + if err := html.Render(buf, doc); err != nil { + return err + } + + p.Body = buf.Bytes() } - - p.Body = buf.Bytes() - return nil } diff --git a/pkg/message/parser_test.go b/pkg/message/parser_test.go index 65345981..24b2101d 100644 --- a/pkg/message/parser_test.go +++ b/pkg/message/parser_test.go @@ -431,7 +431,7 @@ func TestParseTextHTML(t *testing.T) { assert.Equal(t, `"Sender" `, m.Sender.String()) assert.Equal(t, `"Receiver" `, m.ToList[0].String()) - assert.Equal(t, "This is body of HTML mail without attachment", string(m.RichBody)) + assert.Equal(t, "This is body of HTML mail without attachment", string(m.RichBody)) assert.Equal(t, "This is body of *HTML mail* without attachment", string(m.PlainBody)) assert.Len(t, m.Attachments, 0) @@ -446,7 +446,7 @@ func TestParseTextHTMLAlready7Bit(t *testing.T) { assert.Equal(t, `"Sender" `, m.Sender.String()) assert.Equal(t, `"Receiver" `, m.ToList[0].String()) - assert.Equal(t, "This is body of HTML mail without attachment", string(m.RichBody)) + assert.Equal(t, "This is body of HTML mail without attachment", string(m.RichBody)) assert.Equal(t, "This is body of *HTML mail* without attachment", string(m.PlainBody)) assert.Len(t, m.Attachments, 0) @@ -461,7 +461,7 @@ func TestParseTextHTMLWithOctetAttachment(t *testing.T) { assert.Equal(t, `"Sender" `, m.Sender.String()) assert.Equal(t, `"Receiver" `, m.ToList[0].String()) - assert.Equal(t, "This is body of HTML mail with attachment", string(m.RichBody)) + assert.Equal(t, "This is body of HTML mail with attachment", string(m.RichBody)) assert.Equal(t, "This is body of *HTML mail* with attachment", string(m.PlainBody)) require.Len(t, m.Attachments, 1) @@ -478,7 +478,7 @@ func TestParseTextHTMLWithPlainAttachment(t *testing.T) { assert.Equal(t, `"Receiver" `, m.ToList[0].String()) // BAD: plainBody should not be empty! - assert.Equal(t, "This is body of HTML mail with attachment", string(m.RichBody)) + assert.Equal(t, "This is body of HTML mail with attachment", string(m.RichBody)) assert.Equal(t, "This is body of *HTML mail* with attachment", string(m.PlainBody)) require.Len(t, m.Attachments, 1) @@ -496,7 +496,7 @@ func TestParseTextHTMLWithImageInline(t *testing.T) { require.Len(t, m.Attachments, 1) - assert.Equal(t, fmt.Sprintf(`This is body of HTML mail with attachment`, m.Attachments[0].ContentID), string(m.RichBody)) + assert.Equal(t, fmt.Sprintf(`This is body of HTML mail with attachment`, m.Attachments[0].ContentID), string(m.RichBody)) assert.Equal(t, "This is body of *HTML mail* with attachment", string(m.PlainBody)) // The inline image is an 8x8 mic-dropping gopher. @@ -627,7 +627,7 @@ func TestParseWithTrailingEndOfMailIndicator(t *testing.T) { assert.Equal(t, `"Sender" `, m.Sender.String()) assert.Equal(t, `"Receiver" `, m.ToList[0].String()) - assert.Equal(t, "boo!", string(m.RichBody)) + assert.Equal(t, "\nboo!", string(m.RichBody)) assert.Equal(t, "boo!", string(m.PlainBody)) } diff --git a/tests/features/smtp/send/html_to_internal.feature b/tests/features/smtp/send/html_to_internal.feature index 33dda0b6..b35c3dec 100644 --- a/tests/features/smtp/send/html_to_internal.feature +++ b/tests/features/smtp/send/html_to_internal.feature @@ -241,6 +241,7 @@ Feature: SMTP sending of HTMl messages to Internal recipient { "content-type": "application/pgp-keys", "content-disposition": "attachment", + "transfer-encoding": "base64" } ] @@ -763,7 +764,7 @@ Feature: SMTP sending of HTMl messages to Internal recipient "content-disposition": "attachment", "content-disposition-filename": "index.html", "transfer-encoding": "base64", - "body-is": "PCFET0NUWVBFIGh0bWw+PGh0bWw+PGhlYWQ+Cjx0aXRsZT5QYWdlIFRpdGxlPC90aXRsZT4KPC9o\r\nZWFkPgo8Ym9keT4KCjxoMT5NeSBGaXJzdCBIZWFkaW5nPC9oMT4KPHA+TXkgZmlyc3QgcGFyYWdy\r\nYXBoLjwvcD4KCgogPC9ib2R5PjwvaHRtbD4=" + "body-is": "IDwhRE9DVFlQRSBodG1sPg0KPGh0bWw+DQo8aGVhZD4NCjx0aXRsZT5QYWdlIFRpdGxlPC90aXRs\r\nZT4NCjwvaGVhZD4NCjxib2R5Pg0KDQo8aDE+TXkgRmlyc3QgSGVhZGluZzwvaDE+DQo8cD5NeSBm\r\naXJzdCBwYXJhZ3JhcGguPC9wPg0KDQo8L2JvZHk+DQo8L2h0bWw+IA==" }, { "content-type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -1070,7 +1071,6 @@ Feature: SMTP sending of HTMl messages to Internal recipient } """ - Scenario: Replying to a message after enabling attach public key When SMTP client "1" sends the following message from "[user:user]@[domain]" to "[user:to]@[domain]": """ diff --git a/tests/features/smtp/send/plain_to_internal.feature b/tests/features/smtp/send/plain_to_internal.feature index 03feb069..795a08fc 100644 --- a/tests/features/smtp/send/plain_to_internal.feature +++ b/tests/features/smtp/send/plain_to_internal.feature @@ -1242,7 +1242,7 @@ Feature: SMTP sending of PLAIN messages to Internal recipient "content-disposition": "attachment", "content-disposition-filename": "index.html", "transfer-encoding": "base64", - "body-is": "PCFET0NUWVBFIGh0bWw+PGh0bWw+PGhlYWQ+Cjx0aXRsZT5QYWdlIFRpdGxlPC90aXRsZT4KPC9o\r\nZWFkPgo8Ym9keT4KCjxoMT5NeSBGaXJzdCBIZWFkaW5nPC9oMT4KPHA+TXkgZmlyc3QgcGFyYWdy\r\nYXBoLjwvcD4KCgogPC9ib2R5PjwvaHRtbD4=" + "body-is": "IDwhRE9DVFlQRSBodG1sPg0KPGh0bWw+DQo8aGVhZD4NCjx0aXRsZT5QYWdlIFRpdGxlPC90aXRs\r\nZT4NCjwvaGVhZD4NCjxib2R5Pg0KDQo8aDE+TXkgRmlyc3QgSGVhZGluZzwvaDE+DQo8cD5NeSBm\r\naXJzdCBwYXJhZ3JhcGguPC9wPg0KDQo8L2JvZHk+DQo8L2h0bWw+IA==" }, { "content-type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",