We build too many walls and not enough bridges

2026-02-12 11:58:33 +00:00 · 2020-04-08 12:59:16 +02:00
commit 17f4d6097a
494 changed files with 62753 additions and 0 deletions
--- a/pkg/mime/utf7Decoder.go
+++ b/pkg/mime/utf7Decoder.go
@ -0,0 +1,188 @@
+// Copyright (c) 2020 Proton Technologies AG
+//
+// This file is part of ProtonMail Bridge.
+//
+// ProtonMail Bridge is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// ProtonMail Bridge is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with ProtonMail Bridge.  If not, see <https://www.gnu.org/licenses/>.
+
+package pmmime
+
+import (
+	"encoding/base64"
+	"errors"
+	"unicode/utf16"
+	"unicode/utf8"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/transform"
+)
+
+// utf7Decoder copied from: https://github.com/cention-sany/utf7/blob/master/utf7.go
+// We need `encoding.Decoder` instead of function `UTF7DecodeBytes`.
+type utf7Decoder struct {
+	transform.NopResetter
+}
+
+// NewUtf7Decoder returns a new decoder for utf7.
+func NewUtf7Decoder() *encoding.Decoder {
+	return &encoding.Decoder{Transformer: utf7Decoder{}}
+}
+
+const (
+	uRepl = '\uFFFD' // Unicode replacement code point
+	u7min = 0x20     // Minimum self-representing UTF-7 value
+	u7max = 0x7E     // Maximum self-representing UTF-7 value
+)
+
+// ErrBadUTF7 is returned to indicate the invalid modified UTF-7 encoding.
+var ErrBadUTF7 = errors.New("utf7: bad utf-7 encoding")
+
+const modifiedbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+
+var u7enc = base64.NewEncoding(modifiedbase64)
+
+func isModifiedBase64(r byte) bool {
+	if r >= 'A' && r <= 'Z' {
+		return true
+	} else if r >= 'a' && r <= 'z' {
+		return true
+	} else if r >= '0' && r <= '9' {
+		return true
+	} else if r == '+' || r == '/' {
+		return true
+	}
+	return false
+}
+
+func (d utf7Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	var implicit bool
+	var tmp int
+
+	nd, n := len(dst), len(src)
+	if n == 0 && !atEOF {
+		return 0, 0, transform.ErrShortSrc
+	}
+	for ; nSrc < n; nSrc++ {
+		if nDst >= nd {
+			return nDst, nSrc, transform.ErrShortDst
+		}
+		if c := src[nSrc]; ((c < u7min || c > u7max) &&
+			c != '\t' && c != '\r' && c != '\n') ||
+			c == '~' || c == '\\' {
+			return nDst, nSrc, ErrBadUTF7 // Illegal code point in ASCII mode.
+		} else if c != '+' {
+			dst[nDst] = c // Character is self-representing.
+			nDst++
+			continue
+		}
+		// Found '+'.
+		start := nSrc + 1
+		tmp = nSrc // nSrc still points to '+', tmp points to the end of BASE64.
+
+		// Find the end of the Base64 or "+-" segment.
+		implicit = false
+		for tmp++; tmp < n && src[tmp] != '-'; tmp++ {
+			if !isModifiedBase64(src[tmp]) {
+				if tmp == start {
+					return nDst, tmp, ErrBadUTF7 // '+' next char must modified base64.
+				}
+				// Implicit shift back to ASCII, no need for '-' character.
+				implicit = true
+				break
+			}
+		}
+		if tmp == start {
+			if tmp == n {
+				// Did not find '-' sign and '+' is the last character.
+				// Total nSrc does not include '+'.
+				if atEOF {
+					return nDst, nSrc, ErrBadUTF7 // '+' can not be at the end.
+				}
+				// '+' can not be at the end, the source is too short.
+				return nDst, nSrc, transform.ErrShortSrc
+			}
+			dst[nDst] = '+' // Escape sequence "+-".
+			nDst++
+		} else if tmp == n && !atEOF {
+			// No EOF found, the source is too short.
+			return nDst, nSrc, transform.ErrShortSrc
+		} else if b := utf7dec(src[start:tmp]); len(b) > 0 {
+			if len(b)+nDst > nd {
+				// Need more space in dst for the decoded modified BASE64 unicode.
+				// Total nSrc does not include '+'.
+				return nDst, nSrc, transform.ErrShortDst
+			}
+			copy(dst[nDst:], b) // Control or non-ASCII code points in Base64.
+			nDst += len(b)
+			if implicit {
+				if nDst >= nd {
+					return nDst, tmp, transform.ErrShortDst
+				}
+				dst[nDst] = src[tmp] // Implicit shift.
+				nDst++
+			}
+			if tmp == n {
+				return nDst, tmp, nil
+			}
+		} else {
+			return nDst, nSrc, ErrBadUTF7 // Bad encoding.
+		}
+		nSrc = tmp
+	}
+	return
+}
+
+// utf7dec extracts UTF-16-BE bytes from Base64 data and converts them to UTF-8.
+// A nil slice is returned if the encoding is invalid.
+func utf7dec(b64 []byte) []byte {
+	var b []byte
+
+	// Allocate a single block of memory large enough to store the Base64 data
+	// (if padding is required), UTF-16-BE bytes, and decoded UTF-8 bytes.
+	// Since a 2-byte UTF-16 sequence may expand into a 3-byte UTF-8 sequence,
+	// double the space allocation for UTF-8.
+	if n := len(b64); b64[n-1] == '=' {
+		return nil
+	} else if n&3 == 0 {
+		b = make([]byte, u7enc.DecodedLen(n)*3)
+	} else {
+		n += 4 - n&3
+		b = make([]byte, n+u7enc.DecodedLen(n)*3)
+		copy(b[copy(b, b64):n], []byte("=="))
+		b64, b = b[:n], b[n:]
+	}
+
+	// Decode Base64 into the first 1/3rd of b.
+	n, err := u7enc.Decode(b, b64)
+	if err != nil || n&1 == 1 {
+		return nil
+	}
+
+	// Decode UTF-16-BE into the remaining 2/3rds of b.
+	b, s := b[:n], b[n:]
+	j := 0
+	for i := 0; i < n; i += 2 {
+		r := rune(b[i])<<8 | rune(b[i+1])
+		if utf16.IsSurrogate(r) {
+			if i += 2; i == n {
+				return nil
+			}
+			r2 := rune(b[i])<<8 | rune(b[i+1])
+			if r = utf16.DecodeRune(r, r2); r == uRepl {
+				return nil
+			}
+		}
+		j += utf8.EncodeRune(s[j:], r)
+	}
+	return s[:j]
+}