feat: initial parser exposing walker/writer

This commit is contained in:
James Houlahan
2020-06-30 17:43:04 +02:00
parent 7207a5d59e
commit 6ea3fc1963
10 changed files with 513 additions and 0 deletions

View File

@ -34,6 +34,7 @@ import (
"strconv"
"strings"
"github.com/ProtonMail/proton-bridge/pkg/message/parser"
pmmime "github.com/ProtonMail/proton-bridge/pkg/mime"
"github.com/ProtonMail/proton-bridge/pkg/pmapi"
"github.com/jaytaylor/html2text"
@ -412,6 +413,55 @@ func (pka *PublicKeyAttacher) Accept(partReader io.Reader, header textproto.MIME
// ======= Parser ==========
func ParseGoMessage(r io.Reader) (m *pmapi.Message, mimeBody string, plainContents string, atts []io.Reader, err error) {
p, err := parser.New(r)
if err != nil {
return
}
walker := p.
NewWalker().
WithContentDispositionHandler("attachment", func(p *parser.Part, _ parser.PartHandler) (err error) {
atts = append(atts, bytes.NewReader(p.Body))
return
}).
WithContentTypeHandler("text/html", func(p *parser.Part) (err error) {
plain, err := html2text.FromString(string(p.Body))
if err != nil {
plain = string(p.Body)
}
plainContents += plain
return
})
if err = walker.Walk(); err != nil {
return
}
writer := p.
NewWriter().
WithCondition(func(p *parser.Part) (keep bool) {
// We don't write if the content disposition says it's an attachment.
if disp, _, err := p.Header.ContentDisposition(); err == nil && disp == "attachment" {
return false
}
return true
})
buf := new(bytes.Buffer)
if err = writer.Write(buf); err != nil {
return
}
mimeBody = buf.String()
return
}
func Parse(r io.Reader, attachedPublicKey, attachedPublicKeyName string) (m *pmapi.Message, mimeBody string, plainContents string, atts []io.Reader, err error) {
secondReader := new(bytes.Buffer)
_, _ = secondReader.ReadFrom(r)

View File

@ -0,0 +1,114 @@
package parser
import (
"io"
"io/ioutil"
"github.com/emersion/go-message"
)
type Parser struct {
stack []*Part
root *Part
}
func New(r io.Reader) (p *Parser, err error) {
p = new(Parser)
if err = p.parse(r); err != nil {
return
}
return
}
func (p *Parser) NewWalker() *Walker {
return newWalker(p.root)
}
func (p *Parser) NewWriter() *Writer {
return newWriter(p.root)
}
func (p *Parser) parse(r io.Reader) (err error) {
e, err := message.Read(r)
if err != nil {
return
}
return p.parseEntity(e)
}
func (p *Parser) enter() {
p.stack = append(p.stack, &Part{})
}
func (p *Parser) exit() {
var built *Part
p.stack, built = p.stack[:len(p.stack)-1], p.stack[len(p.stack)-1]
if len(p.stack) > 0 {
p.top().children = append(p.top().children, built)
} else {
p.root = built
}
}
func (p *Parser) top() *Part {
return p.stack[len(p.stack)-1]
}
func (p *Parser) withHeader(h message.Header) {
p.top().Header = h
}
func (p *Parser) withBody(bytes []byte) {
p.top().Body = bytes
}
func (p *Parser) parseEntity(e *message.Entity) (err error) {
p.enter()
defer p.exit()
p.withHeader(e.Header)
if mr := e.MultipartReader(); mr != nil {
return p.parseMultipart(mr)
}
return p.parsePart(e)
}
func (p *Parser) parsePart(e *message.Entity) (err error) {
bytes, err := ioutil.ReadAll(e.Body)
if err != nil {
return
}
p.withBody(bytes)
return
}
func (p *Parser) parseMultipart(r message.MultipartReader) (err error) {
for {
var child *message.Entity
if child, err = r.NextPart(); err != nil {
return ignoreEOF(err)
}
if err = p.parseEntity(child); err != nil {
return
}
}
}
func ignoreEOF(err error) error {
if err == io.EOF {
return nil
}
return err
}

View File

@ -0,0 +1,39 @@
package parser
import (
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func newTestParser(t *testing.T, msg string) *Parser {
r := f(msg)
p, err := New(r)
require.NoError(t, err)
return p
}
func f(filename string) io.ReadCloser {
f, err := os.Open(filepath.Join("testdata", filename))
if err != nil {
panic(err)
}
return f
}
func s(filename string) string {
b, err := ioutil.ReadAll(f(filename))
if err != nil {
panic(err)
}
return string(b)
}

View File

@ -0,0 +1,96 @@
package parser
import (
"github.com/emersion/go-message"
)
type Part struct {
Header message.Header
Body []byte
children []*Part
}
func (p *Part) visit(w *Walker) (err error) {
if err = p.handle(w); err != nil {
return
}
for _, child := range p.children {
if err = child.visit(w); err != nil {
return
}
}
return
}
func (p *Part) getTypeHandler(w *Walker) (hdl PartHandler) {
t, _, err := p.Header.ContentType()
if err != nil {
return
}
return w.typeHandlers[t]
}
func (p *Part) getDispHandler(w *Walker) (hdl DispHandler) {
t, _, err := p.Header.ContentDisposition()
if err != nil {
return
}
return w.dispHandlers[t]
}
func (p *Part) handle(w *Walker) (err error) {
typeHandler := p.getTypeHandler(w)
dispHandler := p.getDispHandler(w)
defaultHandler := w.defaultHandler
switch {
case dispHandler != nil && typeHandler != nil:
return dispHandler(p, typeHandler)
case dispHandler != nil && typeHandler == nil:
return dispHandler(p, defaultHandler)
case dispHandler == nil && typeHandler != nil:
return typeHandler(p)
default:
return defaultHandler(p)
}
}
func (p *Part) write(writer *message.Writer, w *Writer) (err error) {
if len(p.children) > 0 {
for _, child := range p.children {
if err = child.writeAsChild(writer, w); err != nil {
return
}
}
}
if _, err = writer.Write(p.Body); err != nil {
return
}
return
}
func (p *Part) writeAsChild(writer *message.Writer, w *Writer) (err error) {
if !w.shouldWrite(p) {
return
}
childWriter, err := writer.CreatePart(p.Header)
if err != nil {
return
}
if err = p.write(childWriter, w); err != nil {
return
}
return childWriter.Close()
}

View File

@ -0,0 +1,9 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: multipart/mixed; boundary=longrandomstring
--longrandomstring
Content-Type: text/html
<html><body>This is body of <b>HTML mail</b> with attachment</body></html>
--longrandomstring--

View File

@ -0,0 +1,15 @@
From: Sender <sender@pm.me>
To: Receiver <receiver@pm.me>
Content-Type: multipart/mixed; boundary=longrandomstring
--longrandomstring
Content-Type: text/html
<html><body>This is body of <b>HTML mail</b> with attachment</body></html>
--longrandomstring
Content-Type: application/octet-stream
Content-Transfer-Encoding: base64
Content-Disposition: attachment
aWYgeW91IGFyZSByZWFkaW5nIHRoaXMsIGhpIQ==
--longrandomstring--

View File

@ -0,0 +1,39 @@
package parser
type Walker struct {
root *Part
defaultHandler PartHandler
typeHandlers map[string]PartHandler
dispHandlers map[string]DispHandler
}
type PartHandler func(*Part) error
type DispHandler func(*Part, PartHandler) error
func newWalker(root *Part) *Walker {
return &Walker{
root: root,
defaultHandler: func(*Part) (err error) { return },
typeHandlers: make(map[string]PartHandler),
dispHandlers: make(map[string]DispHandler),
}
}
func (w *Walker) Walk() (err error) {
return w.root.visit(w)
}
func (w *Walker) WithDefaultHandler(handler PartHandler) *Walker {
w.defaultHandler = handler
return w
}
func (w *Walker) WithContentTypeHandler(contType string, handler PartHandler) *Walker {
w.typeHandlers[contType] = handler
return w
}
func (w *Walker) WithContentDispositionHandler(contDisp string, handler DispHandler) *Walker {
w.dispHandlers[contDisp] = handler
return w
}

View File

@ -0,0 +1,64 @@
package parser
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestWalker(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml")
allBodies := [][]byte{}
walker := p.
NewWalker().
WithDefaultHandler(func(p *Part) (err error) {
if p.Body != nil {
allBodies = append(allBodies, p.Body)
}
return
})
assert.NoError(t, walker.Walk())
assert.ElementsMatch(t, [][]byte{
[]byte("<html><body>This is body of <b>HTML mail</b> with attachment</body></html>"),
[]byte("if you are reading this, hi!"),
}, allBodies)
}
func TestWalkerTypeHandler(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml")
html := [][]byte{}
walker := p.
NewWalker().
WithContentTypeHandler("text/html", func(p *Part) (err error) {
html = append(html, p.Body)
return
})
assert.NoError(t, walker.Walk())
assert.ElementsMatch(t, [][]byte{
[]byte("<html><body>This is body of <b>HTML mail</b> with attachment</body></html>"),
}, html)
}
func TestWalkerDispositionHandler(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml")
attachments := [][]byte{}
walker := p.
NewWalker().
WithContentDispositionHandler("attachment", func(p *Part, hdl PartHandler) (err error) {
attachments = append(attachments, p.Body)
return
})
assert.NoError(t, walker.Walk())
assert.ElementsMatch(t, [][]byte{
[]byte("if you are reading this, hi!"),
}, attachments)
}

View File

@ -0,0 +1,48 @@
package parser
import (
"io"
"github.com/emersion/go-message"
)
type Writer struct {
root *Part
cond []Condition
}
type Condition func(p *Part) bool
func newWriter(root *Part) *Writer {
return &Writer{
root: root,
}
}
func (w *Writer) WithCondition(cond Condition) *Writer {
w.cond = append(w.cond, cond)
return w
}
func (w *Writer) Write(ww io.Writer) (err error) {
msgWriter, err := message.CreateWriter(ww, w.root.Header)
if err != nil {
return
}
if err = w.root.write(msgWriter, w); err != nil {
return
}
return msgWriter.Close()
}
func (w *Writer) shouldWrite(p *Part) bool {
for _, cond := range w.cond {
if !cond(p) {
return false
}
}
return true
}

View File

@ -0,0 +1,39 @@
package parser
import (
"bytes"
"testing"
"github.com/stretchr/testify/assert"
)
func TestParserWrite(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml")
w := p.NewWriter()
buf := new(bytes.Buffer)
assert.NoError(t, w.Write(buf))
assert.Equal(t, s("text_html_octet_attachment.eml"), buf.String())
}
func TestParserWriteNoAttachments(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml")
w := p.
NewWriter().
WithCondition(func(p *Part) bool {
// We don't write if the content disposition says it's an attachment.
if disp, _, err := p.Header.ContentDisposition(); err == nil && disp == "attachment" {
return false
}
return true
})
buf := new(bytes.Buffer)
assert.NoError(t, w.Write(buf))
assert.Equal(t, s("text_html.eml"), buf.String())
}