From 6ea3fc19631e0010c078dfc219d9e229996a22ca Mon Sep 17 00:00:00 2001 From: James Houlahan Date: Tue, 30 Jun 2020 17:43:04 +0200 Subject: [PATCH] feat: initial parser exposing walker/writer --- pkg/message/parser.go | 50 ++++++++ pkg/message/parser/parser.go | 114 ++++++++++++++++++ pkg/message/parser/parser_test.go | 39 ++++++ pkg/message/parser/part.go | 96 +++++++++++++++ pkg/message/parser/testdata/text_html.eml | 9 ++ .../testdata/text_html_octet_attachment.eml | 15 +++ pkg/message/parser/walker.go | 39 ++++++ pkg/message/parser/walker_test.go | 64 ++++++++++ pkg/message/parser/writer.go | 48 ++++++++ pkg/message/parser/writer_test.go | 39 ++++++ 10 files changed, 513 insertions(+) create mode 100644 pkg/message/parser/parser.go create mode 100644 pkg/message/parser/parser_test.go create mode 100644 pkg/message/parser/part.go create mode 100644 pkg/message/parser/testdata/text_html.eml create mode 100644 pkg/message/parser/testdata/text_html_octet_attachment.eml create mode 100644 pkg/message/parser/walker.go create mode 100644 pkg/message/parser/walker_test.go create mode 100644 pkg/message/parser/writer.go create mode 100644 pkg/message/parser/writer_test.go diff --git a/pkg/message/parser.go b/pkg/message/parser.go index 80da7ab8..35ff7f65 100644 --- a/pkg/message/parser.go +++ b/pkg/message/parser.go @@ -34,6 +34,7 @@ import ( "strconv" "strings" + "github.com/ProtonMail/proton-bridge/pkg/message/parser" pmmime "github.com/ProtonMail/proton-bridge/pkg/mime" "github.com/ProtonMail/proton-bridge/pkg/pmapi" "github.com/jaytaylor/html2text" @@ -412,6 +413,55 @@ func (pka *PublicKeyAttacher) Accept(partReader io.Reader, header textproto.MIME // ======= Parser ========== +func ParseGoMessage(r io.Reader) (m *pmapi.Message, mimeBody string, plainContents string, atts []io.Reader, err error) { + p, err := parser.New(r) + if err != nil { + return + } + + walker := p. + NewWalker(). + WithContentDispositionHandler("attachment", func(p *parser.Part, _ parser.PartHandler) (err error) { + atts = append(atts, bytes.NewReader(p.Body)) + return + }). + WithContentTypeHandler("text/html", func(p *parser.Part) (err error) { + plain, err := html2text.FromString(string(p.Body)) + if err != nil { + plain = string(p.Body) + } + + plainContents += plain + + return + }) + + if err = walker.Walk(); err != nil { + return + } + + writer := p. + NewWriter(). + WithCondition(func(p *parser.Part) (keep bool) { + // We don't write if the content disposition says it's an attachment. + if disp, _, err := p.Header.ContentDisposition(); err == nil && disp == "attachment" { + return false + } + + return true + }) + + buf := new(bytes.Buffer) + + if err = writer.Write(buf); err != nil { + return + } + + mimeBody = buf.String() + + return +} + func Parse(r io.Reader, attachedPublicKey, attachedPublicKeyName string) (m *pmapi.Message, mimeBody string, plainContents string, atts []io.Reader, err error) { secondReader := new(bytes.Buffer) _, _ = secondReader.ReadFrom(r) diff --git a/pkg/message/parser/parser.go b/pkg/message/parser/parser.go new file mode 100644 index 00000000..009877fe --- /dev/null +++ b/pkg/message/parser/parser.go @@ -0,0 +1,114 @@ +package parser + +import ( + "io" + "io/ioutil" + + "github.com/emersion/go-message" +) + +type Parser struct { + stack []*Part + root *Part +} + +func New(r io.Reader) (p *Parser, err error) { + p = new(Parser) + + if err = p.parse(r); err != nil { + return + } + + return +} + +func (p *Parser) NewWalker() *Walker { + return newWalker(p.root) +} + +func (p *Parser) NewWriter() *Writer { + return newWriter(p.root) +} + +func (p *Parser) parse(r io.Reader) (err error) { + e, err := message.Read(r) + if err != nil { + return + } + + return p.parseEntity(e) +} + +func (p *Parser) enter() { + p.stack = append(p.stack, &Part{}) +} + +func (p *Parser) exit() { + var built *Part + + p.stack, built = p.stack[:len(p.stack)-1], p.stack[len(p.stack)-1] + + if len(p.stack) > 0 { + p.top().children = append(p.top().children, built) + } else { + p.root = built + } +} + +func (p *Parser) top() *Part { + return p.stack[len(p.stack)-1] +} + +func (p *Parser) withHeader(h message.Header) { + p.top().Header = h +} + +func (p *Parser) withBody(bytes []byte) { + p.top().Body = bytes +} + +func (p *Parser) parseEntity(e *message.Entity) (err error) { + p.enter() + defer p.exit() + + p.withHeader(e.Header) + + if mr := e.MultipartReader(); mr != nil { + return p.parseMultipart(mr) + } + + return p.parsePart(e) +} + +func (p *Parser) parsePart(e *message.Entity) (err error) { + bytes, err := ioutil.ReadAll(e.Body) + if err != nil { + return + } + + p.withBody(bytes) + + return +} + +func (p *Parser) parseMultipart(r message.MultipartReader) (err error) { + for { + var child *message.Entity + + if child, err = r.NextPart(); err != nil { + return ignoreEOF(err) + } + + if err = p.parseEntity(child); err != nil { + return + } + } +} + +func ignoreEOF(err error) error { + if err == io.EOF { + return nil + } + + return err +} diff --git a/pkg/message/parser/parser_test.go b/pkg/message/parser/parser_test.go new file mode 100644 index 00000000..baa83753 --- /dev/null +++ b/pkg/message/parser/parser_test.go @@ -0,0 +1,39 @@ +package parser + +import ( + "io" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func newTestParser(t *testing.T, msg string) *Parser { + r := f(msg) + + p, err := New(r) + require.NoError(t, err) + + return p +} + +func f(filename string) io.ReadCloser { + f, err := os.Open(filepath.Join("testdata", filename)) + + if err != nil { + panic(err) + } + + return f +} + +func s(filename string) string { + b, err := ioutil.ReadAll(f(filename)) + if err != nil { + panic(err) + } + + return string(b) +} diff --git a/pkg/message/parser/part.go b/pkg/message/parser/part.go new file mode 100644 index 00000000..1e2e3fc8 --- /dev/null +++ b/pkg/message/parser/part.go @@ -0,0 +1,96 @@ +package parser + +import ( + "github.com/emersion/go-message" +) + +type Part struct { + Header message.Header + Body []byte + children []*Part +} + +func (p *Part) visit(w *Walker) (err error) { + if err = p.handle(w); err != nil { + return + } + + for _, child := range p.children { + if err = child.visit(w); err != nil { + return + } + } + + return +} + +func (p *Part) getTypeHandler(w *Walker) (hdl PartHandler) { + t, _, err := p.Header.ContentType() + if err != nil { + return + } + + return w.typeHandlers[t] +} + +func (p *Part) getDispHandler(w *Walker) (hdl DispHandler) { + t, _, err := p.Header.ContentDisposition() + if err != nil { + return + } + + return w.dispHandlers[t] +} + +func (p *Part) handle(w *Walker) (err error) { + typeHandler := p.getTypeHandler(w) + dispHandler := p.getDispHandler(w) + defaultHandler := w.defaultHandler + + switch { + case dispHandler != nil && typeHandler != nil: + return dispHandler(p, typeHandler) + + case dispHandler != nil && typeHandler == nil: + return dispHandler(p, defaultHandler) + + case dispHandler == nil && typeHandler != nil: + return typeHandler(p) + + default: + return defaultHandler(p) + } +} + +func (p *Part) write(writer *message.Writer, w *Writer) (err error) { + if len(p.children) > 0 { + for _, child := range p.children { + if err = child.writeAsChild(writer, w); err != nil { + return + } + } + } + + if _, err = writer.Write(p.Body); err != nil { + return + } + + return +} + +func (p *Part) writeAsChild(writer *message.Writer, w *Writer) (err error) { + if !w.shouldWrite(p) { + return + } + + childWriter, err := writer.CreatePart(p.Header) + if err != nil { + return + } + + if err = p.write(childWriter, w); err != nil { + return + } + + return childWriter.Close() +} diff --git a/pkg/message/parser/testdata/text_html.eml b/pkg/message/parser/testdata/text_html.eml new file mode 100644 index 00000000..7b7a1fe8 --- /dev/null +++ b/pkg/message/parser/testdata/text_html.eml @@ -0,0 +1,9 @@ +From: Sender +To: Receiver +Content-Type: multipart/mixed; boundary=longrandomstring + +--longrandomstring +Content-Type: text/html + +This is body of HTML mail with attachment +--longrandomstring-- diff --git a/pkg/message/parser/testdata/text_html_octet_attachment.eml b/pkg/message/parser/testdata/text_html_octet_attachment.eml new file mode 100644 index 00000000..7491ee83 --- /dev/null +++ b/pkg/message/parser/testdata/text_html_octet_attachment.eml @@ -0,0 +1,15 @@ +From: Sender +To: Receiver +Content-Type: multipart/mixed; boundary=longrandomstring + +--longrandomstring +Content-Type: text/html + +This is body of HTML mail with attachment +--longrandomstring +Content-Type: application/octet-stream +Content-Transfer-Encoding: base64 +Content-Disposition: attachment + +aWYgeW91IGFyZSByZWFkaW5nIHRoaXMsIGhpIQ== +--longrandomstring-- diff --git a/pkg/message/parser/walker.go b/pkg/message/parser/walker.go new file mode 100644 index 00000000..4e842c7c --- /dev/null +++ b/pkg/message/parser/walker.go @@ -0,0 +1,39 @@ +package parser + +type Walker struct { + root *Part + + defaultHandler PartHandler + typeHandlers map[string]PartHandler + dispHandlers map[string]DispHandler +} + +type PartHandler func(*Part) error +type DispHandler func(*Part, PartHandler) error + +func newWalker(root *Part) *Walker { + return &Walker{ + root: root, + defaultHandler: func(*Part) (err error) { return }, + typeHandlers: make(map[string]PartHandler), + dispHandlers: make(map[string]DispHandler), + } +} + +func (w *Walker) Walk() (err error) { + return w.root.visit(w) +} + +func (w *Walker) WithDefaultHandler(handler PartHandler) *Walker { + w.defaultHandler = handler + return w +} +func (w *Walker) WithContentTypeHandler(contType string, handler PartHandler) *Walker { + w.typeHandlers[contType] = handler + return w +} + +func (w *Walker) WithContentDispositionHandler(contDisp string, handler DispHandler) *Walker { + w.dispHandlers[contDisp] = handler + return w +} diff --git a/pkg/message/parser/walker_test.go b/pkg/message/parser/walker_test.go new file mode 100644 index 00000000..5ab1385d --- /dev/null +++ b/pkg/message/parser/walker_test.go @@ -0,0 +1,64 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWalker(t *testing.T) { + p := newTestParser(t, "text_html_octet_attachment.eml") + + allBodies := [][]byte{} + + walker := p. + NewWalker(). + WithDefaultHandler(func(p *Part) (err error) { + if p.Body != nil { + allBodies = append(allBodies, p.Body) + } + return + }) + + assert.NoError(t, walker.Walk()) + assert.ElementsMatch(t, [][]byte{ + []byte("This is body of HTML mail with attachment"), + []byte("if you are reading this, hi!"), + }, allBodies) +} + +func TestWalkerTypeHandler(t *testing.T) { + p := newTestParser(t, "text_html_octet_attachment.eml") + + html := [][]byte{} + + walker := p. + NewWalker(). + WithContentTypeHandler("text/html", func(p *Part) (err error) { + html = append(html, p.Body) + return + }) + + assert.NoError(t, walker.Walk()) + assert.ElementsMatch(t, [][]byte{ + []byte("This is body of HTML mail with attachment"), + }, html) +} + +func TestWalkerDispositionHandler(t *testing.T) { + p := newTestParser(t, "text_html_octet_attachment.eml") + + attachments := [][]byte{} + + walker := p. + NewWalker(). + WithContentDispositionHandler("attachment", func(p *Part, hdl PartHandler) (err error) { + attachments = append(attachments, p.Body) + return + }) + + assert.NoError(t, walker.Walk()) + assert.ElementsMatch(t, [][]byte{ + []byte("if you are reading this, hi!"), + }, attachments) +} diff --git a/pkg/message/parser/writer.go b/pkg/message/parser/writer.go new file mode 100644 index 00000000..f4ba3463 --- /dev/null +++ b/pkg/message/parser/writer.go @@ -0,0 +1,48 @@ +package parser + +import ( + "io" + + "github.com/emersion/go-message" +) + +type Writer struct { + root *Part + cond []Condition +} + +type Condition func(p *Part) bool + +func newWriter(root *Part) *Writer { + return &Writer{ + root: root, + } +} + +func (w *Writer) WithCondition(cond Condition) *Writer { + w.cond = append(w.cond, cond) + return w +} + +func (w *Writer) Write(ww io.Writer) (err error) { + msgWriter, err := message.CreateWriter(ww, w.root.Header) + if err != nil { + return + } + + if err = w.root.write(msgWriter, w); err != nil { + return + } + + return msgWriter.Close() +} + +func (w *Writer) shouldWrite(p *Part) bool { + for _, cond := range w.cond { + if !cond(p) { + return false + } + } + + return true +} diff --git a/pkg/message/parser/writer_test.go b/pkg/message/parser/writer_test.go new file mode 100644 index 00000000..e9ccd07c --- /dev/null +++ b/pkg/message/parser/writer_test.go @@ -0,0 +1,39 @@ +package parser + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParserWrite(t *testing.T) { + p := newTestParser(t, "text_html_octet_attachment.eml") + + w := p.NewWriter() + + buf := new(bytes.Buffer) + + assert.NoError(t, w.Write(buf)) + assert.Equal(t, s("text_html_octet_attachment.eml"), buf.String()) +} + +func TestParserWriteNoAttachments(t *testing.T) { + p := newTestParser(t, "text_html_octet_attachment.eml") + + w := p. + NewWriter(). + WithCondition(func(p *Part) bool { + // We don't write if the content disposition says it's an attachment. + if disp, _, err := p.Header.ContentDisposition(); err == nil && disp == "attachment" { + return false + } + + return true + }) + + buf := new(bytes.Buffer) + + assert.NoError(t, w.Write(buf)) + assert.Equal(t, s("text_html.eml"), buf.String()) +}