feat: better handling of multipart messages

This commit is contained in:
James Houlahan
2020-07-30 14:51:34 +02:00
parent a7da66ccbc
commit c6b18b45b5
10 changed files with 502 additions and 214 deletions

View File

@ -63,7 +63,7 @@ func (p *Parser) parse(r io.Reader) (err error) {
}
func (p *Parser) enter() {
p.stack = append(p.stack, &Part{parent: p.top()})
p.stack = append(p.stack, &Part{})
}
func (p *Parser) exit() {

View File

@ -6,11 +6,12 @@ import (
"github.com/emersion/go-message"
)
type Parts []*Part
type Part struct {
Header message.Header
Body []byte
parent *Part
children []*Part
children Parts
}
func (p *Part) Part(n int) (part *Part, err error) {
@ -21,58 +22,14 @@ func (p *Part) Part(n int) (part *Part, err error) {
return p.children[n-1], nil
}
func (p *Part) Parts() (n int) {
return len(p.children)
}
func (p *Part) Parent() *Part {
return p.parent
}
func (p *Part) Siblings() []*Part {
if p.parent == nil {
return nil
}
siblings := []*Part{}
for _, sibling := range p.parent.children {
if sibling != p {
siblings = append(siblings, sibling)
}
}
return siblings
func (p *Part) Children() Parts {
return p.children
}
func (p *Part) AddChild(child *Part) {
p.children = append(p.children, child)
}
func (p *Part) visit(w *Walker) (err error) {
hdl := p.getHandler(w)
if err = hdl.handleEnter(w, p); err != nil {
return
}
for _, child := range p.children {
if err = child.visit(w); err != nil {
return
}
}
return hdl.handleExit(w, p)
}
func (p *Part) getHandler(w *Walker) handler {
if dispHandler := w.getDispHandler(p); dispHandler != nil {
return dispHandler
}
return w.getTypeHandler(p)
}
func (p *Part) write(writer *message.Writer, w *Writer) (err error) {
if len(p.children) > 0 {
for _, child := range p.children {

View File

@ -0,0 +1,36 @@
To: pmbridgeietest@outlook.com
From: schizofrenic <schizofrenic@pm.me>
Subject: aoeuaoeu
Message-ID: <7dc32b61-b9cf-f2d3-8ec5-10e5b4a33ec1@pm.me>
Date: Thu, 30 Jul 2020 13:35:24 +0200
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:68.0)
Gecko/20100101 Thunderbird/68.11.0
MIME-Version: 1.0
Content-Type: multipart/alternative;
boundary="------------22BC647264E52252E386881A"
Content-Language: en-US
This is a multi-part message in MIME format.
--------------22BC647264E52252E386881A
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 7bit
*aoeuaoeu*
--------------22BC647264E52252E386881A
Content-Type: text/html; charset=utf-8
Content-Transfer-Encoding: 7bit
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
<p><b>aoeuaoeu</b><br>
</p>
</body>
</html>
--------------22BC647264E52252E386881A--

View File

@ -0,0 +1,55 @@
package parser
import "regexp"
type Visitor struct {
rules []*rule
fallback Rule
}
func NewVisitor(fallback Rule) *Visitor {
return &Visitor{
fallback: fallback,
}
}
type Visit func(*Part) (interface{}, error)
type Rule func(*Part, Visit) (interface{}, error)
type rule struct {
re string
fn Rule
}
func (v *Visitor) RegisterRule(contentTypeRegex string, fn Rule) *Visitor {
v.rules = append(v.rules, &rule{
re: contentTypeRegex,
fn: fn,
})
return v
}
func (v *Visitor) Visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType()
if err != nil {
return nil, err
}
if rule := v.getRuleForContentType(t); rule != nil {
return rule.fn(p, v.Visit)
}
return v.fallback(p, v.Visit)
}
func (v *Visitor) getRuleForContentType(contentType string) *rule {
for _, rule := range v.rules {
if regexp.MustCompile(rule.re).MatchString(contentType) {
return rule
}
}
return nil
}

View File

@ -23,7 +23,23 @@ func newWalker(root *Part) *Walker {
}
func (w *Walker) Walk() (err error) {
return w.root.visit(w)
return w.visitPart(w.root)
}
func (w *Walker) visitPart(p *Part) (err error) {
hdl := w.getHandler(p)
if err = hdl.handleEnter(w, p); err != nil {
return
}
for _, child := range p.children {
if err = w.visitPart(child); err != nil {
return
}
}
return hdl.handleExit(w, p)
}
func (w *Walker) WithDefaultHandler(handler handler) *Walker {
@ -46,6 +62,14 @@ func (w *Walker) RegisterContentDispositionHandler(contDisp string) *DispHandler
return hdl
}
func (w *Walker) getHandler(p *Part) handler {
if dispHandler := w.getDispHandler(p); dispHandler != nil {
return dispHandler
}
return w.getTypeHandler(p)
}
// getTypeHandler returns the appropriate PartHandler to handle the given part.
// If no specialised handler exists, it returns the default handler.
func (w *Walker) getTypeHandler(p *Part) handler {

View File

@ -34,8 +34,7 @@ func TestWalkerTypeHandler(t *testing.T) {
walker := p.NewWalker()
walker.
RegisterContentTypeHandler("text/html").
walker.RegisterContentTypeHandler("text/html").
OnEnter(func(p *Part) (err error) {
html = append(html, p.Body)
return
@ -54,8 +53,7 @@ func TestWalkerDispositionHandler(t *testing.T) {
walker := p.NewWalker()
walker.
RegisterContentDispositionHandler("attachment").
walker.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *Part, hdl PartHandlerFunc) (err error) {
attachments = append(attachments, p.Body)
return
@ -74,13 +72,11 @@ func TestWalkerDispositionAndTypeHandler(t *testing.T) {
var enter, exit int
walker.
RegisterContentTypeHandler("application/octet-stream").
walker.RegisterContentTypeHandler("application/octet-stream").
OnEnter(func(p *Part) (err error) { enter++; return }).
OnExit(func(p *Part) (err error) { exit--; return })
walker.
RegisterContentDispositionHandler("attachment").
walker.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *Part, hdl PartHandlerFunc) (err error) { _ = hdl(p); _ = hdl(p); return }).
OnExit(func(p *Part, hdl PartHandlerFunc) (err error) { _ = hdl(p); _ = hdl(p); return })