Support quoted printable and filter out some auto-generated Gmail labels

This commit is contained in:
Michal Horejsek
2020-10-15 13:53:48 +02:00
parent 64fbcdc1ca
commit 79cafee2eb
2 changed files with 32 additions and 2 deletions

View File

@ -21,6 +21,7 @@ import (
"bufio"
"bytes"
"io"
"mime"
"os"
"strings"
)
@ -29,6 +30,17 @@ type stringSet map[string]bool
const xGmailLabelsHeader = "X-Gmail-Labels"
// filteredOutGmailLabels is set of labels which we don't want to show to users
// as they might be auto-generated by Gmail and unwanted.
var filteredOutGmailLabels = []string{ //nolint[gochecknoglobals]
"Unread",
"Opened",
"IMAP_Junk",
"IMAP_NonJunk",
"IMAP_NotJunk",
"IMAP_$NotJunk",
}
func getGmailLabelsFromMboxFile(filePath string) (stringSet, error) {
f, err := os.Open(filePath) //nolint[gosec]
if err != nil {
@ -38,7 +50,7 @@ func getGmailLabelsFromMboxFile(filePath string) (stringSet, error) {
}
func getGmailLabelsFromMboxReader(f io.Reader) (stringSet, error) {
allLabels := map[string]bool{}
allLabels := stringSet{}
// Scanner is not used as it does not support long lines and some mbox
// files contain very long lines even though that should not be happening.
@ -78,12 +90,28 @@ func getGmailLabelsFromMessage(body []byte) (stringSet, error) {
func getGmailLabelsFromValue(value string) stringSet {
value = strings.TrimPrefix(value, xGmailLabelsHeader+":")
labels := map[string]bool{}
if decoded, err := new(mime.WordDecoder).DecodeHeader(value); err != nil {
log.WithError(err).Error("Failed to decode header")
} else {
value = decoded
}
labels := stringSet{}
for _, label := range strings.Split(value, ",") {
label = strings.TrimSpace(label)
if label == "" {
continue
}
skip := false
for _, filteredOutLabel := range filteredOutGmailLabels {
if label == filteredOutLabel {
skip = true
break
}
}
if skip {
continue
}
labels[label] = true
}
return labels

View File

@ -114,6 +114,8 @@ func TestGetGmailLabelsFromValue(t *testing.T) {
{" , ", toSet()},
{" ", toSet()},
{"", toSet()},
{"=?UTF-8?Q?Archived,Category_personal,test_=F0=9F=98=80=F0=9F=99=83?=", toSet("Archived", "Category personal", "test 😀🙃")},
{"IMAP_NotJunk,Foo,Opened,bar,Unread", toSet("Foo", "bar")},
}
for _, tc := range tests {
tc := tc