diff --git a/Changelog.md b/Changelog.md index abd9f5c1..fcd42b12 100644 --- a/Changelog.md +++ b/Changelog.md @@ -9,6 +9,7 @@ Changelog [format](http://keepachangelog.com/en/1.0.0/) * IMAP mailbox info update when new mailbox is created * IMAP extension Unselect * More logs about event loop activity +* GODT-72 Try ISO-8859-1 encoding if charset is not specified and it isn't UTF-8 ### Changed * GODT-162 User Agent does not contain bridge version, only client in format `client name/client version (os)` diff --git a/pkg/mime/encoding.go b/pkg/mime/encoding.go index b4e2c4ea..9a1d94bd 100644 --- a/pkg/mime/encoding.go +++ b/pkg/mime/encoding.go @@ -30,6 +30,7 @@ import ( "encoding/base64" "golang.org/x/text/encoding" + "golang.org/x/text/encoding/charmap" "golang.org/x/text/encoding/htmlindex" "golang.org/x/text/transform" ) @@ -197,16 +198,20 @@ func EncodeHeader(s string) string { } // DecodeCharset decodes the orginal using content type parameters. -// When charset is missing it checks thaht the content is valid utf8. +// When charset is missing it checks that the content is valid utf8. +// If it isn't, it checks whether the content is valid latin1 (iso-8859-1), and if so, +// reencodes it as utf-8. func DecodeCharset(original []byte, contentTypeParams map[string]string) ([]byte, error) { var decoder *encoding.Decoder var err error + if charset, ok := contentTypeParams["charset"]; ok { decoder, err = selectDecoder(charset) + } else if utf8.Valid(original) { + return original, nil + } else if decoded, err = charmap.ISO8859_1.NewDecoder().Bytes(original); err == nil { + return decoded, nil } else { - if utf8.Valid(original) { - return original, nil - } err = fmt.Errorf("non-utf8 content without charset specification") } diff --git a/test/features/imap/message/import.feature b/test/features/imap/message/import.feature new file mode 100644 index 00000000..da8ad4a4 --- /dev/null +++ b/test/features/imap/message/import.feature @@ -0,0 +1,87 @@ +Feature: IMAP import messages + Background: + Given there is connected user "user" + And there is IMAP client logged in as "user" + And there is IMAP client selected in "INBOX" + + @ignore + Scenario: Import message with double charset in content type + When IMAP client imports message to "INBOX" + """ + From: Bridge Test + To: Internal Bridge + Subject: Message with double charset in content type + Content-Type: text/plain; charset=utf-8; charset=utf-8 + Content-Disposition: inline + + Hello + + """ + Then IMAP response is "OK" + + @ignore + Scenario: Import message with attachment name encoded by RFC 2047 without quoting + When IMAP client imports message to "INBOX" + """ + From: Bridge Test + To: Internal Bridge + Subject: Message with attachment name encoded by RFC 2047 without quoting + Content-type: multipart/mixed; boundary="boundary" + + --boundary + Content-Type: text/plain + + Hello + + --boundary + Content-Type: application/pdf; name==?US-ASCII?Q?filename?= + Content-Disposition: attachment; filename==?US-ASCII?Q?filename?= + + somebytes + + --boundary-- + + """ + Then IMAP response is "OK" + + Scenario: Import message as latin1 without content type + When IMAP client imports message to "INBOX" with encoding "latin1" + """ + From: Bridge Test + To: Internal Bridge + Subject: Message in latin1 without content type + Content-Disposition: inline + + Hello íááá + + """ + Then IMAP response is "OK" + + Scenario: Import message as latin1 with content type + When IMAP client imports message to "INBOX" with encoding "latin1" + """ + From: Bridge Test + To: Internal Bridge + Subject: Message in latin1 with content type + Content-Disposition: inline + Content-Type: text/plain; charset=latin1 + + Hello íááá + + """ + Then IMAP response is "OK" + + Scenario: Import message as latin1 with wrong content type + When IMAP client imports message to "INBOX" with encoding "latin1" + """ + From: Bridge Test + To: Internal Bridge + Subject: Message in latin1 with wrong content type + Content-Disposition: inline + Content-Type: text/plain; charset=KOI8R + + Hello íááá + + """ + Then IMAP response is "OK" + diff --git a/test/imap_actions_messages_test.go b/test/imap_actions_messages_test.go index 99fd365a..195323b6 100644 --- a/test/imap_actions_messages_test.go +++ b/test/imap_actions_messages_test.go @@ -18,7 +18,11 @@ package tests import ( + "fmt" + "github.com/cucumber/godog" + "github.com/cucumber/godog/gherkin" + "golang.org/x/net/html/charset" ) func IMAPActionsMessagesFeatureContext(s *godog.Suite) { @@ -29,6 +33,8 @@ func IMAPActionsMessagesFeatureContext(s *godog.Suite) { s.Step(`^IMAP client "([^"]*)" deletes messages "([^"]*)"$`, imapClientNamedDeletesMessages) s.Step(`^IMAP client copies messages "([^"]*)" to "([^"]*)"$`, imapClientCopiesMessagesTo) s.Step(`^IMAP client moves messages "([^"]*)" to "([^"]*)"$`, imapClientMovesMessagesTo) + s.Step(`^IMAP client imports message to "([^"]*)"$`, imapClientCreatesMessage) + s.Step(`^IMAP client imports message to "([^"]*)" with encoding "([^"]*)"$`, imapClientCreatesMessageWithEncoding) s.Step(`^IMAP client creates message "([^"]*)" from "([^"]*)" to "([^"]*)" with body "([^"]*)" in "([^"]*)"$`, imapClientCreatesMessageFromToWithBody) s.Step(`^IMAP client creates message "([^"]*)" from "([^"]*)" to address "([^"]*)" of "([^"]*)" with body "([^"]*)" in "([^"]*)"$`, imapClientCreatesMessageFromToAddressOfUserWithBody) s.Step(`^IMAP client creates message "([^"]*)" from address "([^"]*)" of "([^"]*)" to "([^"]*)" with body "([^"]*)" in "([^"]*)"$`, imapClientCreatesMessageFromAddressOfUserToWithBody) @@ -84,8 +90,33 @@ func imapClientMovesMessagesTo(messageRange, newMailboxName string) error { return nil } +func imapClientCreatesMessage(mailboxName string, message *gherkin.DocString) error { + return imapClientCreatesMessageWithEncoding(mailboxName, "utf8", message) +} + +func imapClientCreatesMessageWithEncoding(mailboxName, encodingName string, message *gherkin.DocString) error { + encoding, _ := charset.Lookup(encodingName) + + msg := message.Content + if encodingName != "utf8" { + if encoding == nil { + return fmt.Errorf("unsupported encoding %s", encodingName) + } + + var err error + msg, err = encoding.NewEncoder().String(message.Content) + if err != nil { + return internalError(err, "encoding message content") + } + } + + res := ctx.GetIMAPClient("imap").Append(mailboxName, msg) + ctx.SetIMAPLastResponse("imap", res) + return nil +} + func imapClientCreatesMessageFromToWithBody(subject, from, to, body, mailboxName string) error { - res := ctx.GetIMAPClient("imap").Append(mailboxName, subject, from, to, body) + res := ctx.GetIMAPClient("imap").AppendBody(mailboxName, subject, from, to, body) ctx.SetIMAPLastResponse("imap", res) return nil } diff --git a/test/mocks/imap.go b/test/mocks/imap.go index 1e63ff69..0341b053 100644 --- a/test/mocks/imap.go +++ b/test/mocks/imap.go @@ -162,7 +162,12 @@ func (c *IMAPClient) Search(query string) *IMAPResponse { // Message -func (c *IMAPClient) Append(mailboxName, subject, from, to, body string) *IMAPResponse { +func (c *IMAPClient) Append(mailboxName, msg string) *IMAPResponse { + cmd := fmt.Sprintf("APPEND \"%s\" (\\Seen) \"25-Mar-2021 00:30:00 +0100\" {%d}\r\n%s", mailboxName, len(msg), msg) + return c.SendCommand(cmd) +} + +func (c *IMAPClient) AppendBody(mailboxName, subject, from, to, body string) *IMAPResponse { msg := fmt.Sprintf("Subject: %s\r\n", subject) msg += fmt.Sprintf("From: %s\r\n", from) msg += fmt.Sprintf("To: %s\r\n", to)