feat: pull out most things as attachments

This commit is contained in:
James Houlahan
2020-08-03 14:44:12 +02:00
parent c6b18b45b5
commit e01a523ae3
8 changed files with 1287 additions and 184 deletions

View File

@ -68,19 +68,46 @@ func Parse(r io.Reader, key, keyName string) (m *pmapi.Message, mimeMessage, pla
} }
func collectAttachments(p *parser.Parser) (atts []*pmapi.Attachment, data []io.Reader, err error) { func collectAttachments(p *parser.Parser) (atts []*pmapi.Attachment, data []io.Reader, err error) {
w := p.NewWalker() w := p.NewWalker().
RegisterContentDispositionHandler("attachment", func(p *parser.Part) error {
w.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *parser.Part, _ parser.PartHandlerFunc) (err error) {
att, err := parseAttachment(p.Header) att, err := parseAttachment(p.Header)
if err != nil { if err != nil {
return return err
} }
atts = append(atts, att) atts = append(atts, att)
data = append(data, bytes.NewReader(p.Body)) data = append(data, bytes.NewReader(p.Body))
return return nil
}).
RegisterContentTypeHandler("text/calendar", func(p *parser.Part) error {
att, err := parseAttachment(p.Header)
if err != nil {
return err
}
atts = append(atts, att)
data = append(data, bytes.NewReader(p.Body))
return nil
}).
RegisterContentTypeHandler("text/.*", func(p *parser.Part) error {
return nil
}).
RegisterDefaultHandler(func(p *parser.Part) error {
if len(p.Children()) > 0 {
return nil
}
att, err := parseAttachment(p.Header)
if err != nil {
return err
}
atts = append(atts, att)
data = append(data, bytes.NewReader(p.Body))
return nil
}) })
if err = w.Walk(); err != nil { if err = w.Walk(); err != nil {
@ -118,7 +145,7 @@ func buildBodies(p *parser.Parser) (richBody, plainBody string, err error) {
// collectBodyParts collects all body parts in the parse tree, preferring // collectBodyParts collects all body parts in the parse tree, preferring
// parts of the given content type if alternatives exist. // parts of the given content type if alternatives exist.
func collectBodyParts(p *parser.Parser, preferredContentType string) (parser.Parts, error) { func collectBodyParts(p *parser.Parser, preferredContentType string) (parser.Parts, error) {
v := parser. v := p.
NewVisitor(func(p *parser.Part, visit parser.Visit) (interface{}, error) { NewVisitor(func(p *parser.Part, visit parser.Visit) (interface{}, error) {
childParts, err := collectChildParts(p, visit) childParts, err := collectChildParts(p, visit)
if err != nil { if err != nil {
@ -142,7 +169,7 @@ func collectBodyParts(p *parser.Parser, preferredContentType string) (parser.Par
return parser.Parts{p}, nil return parser.Parts{p}, nil
}) })
res, err := v.Visit(p.Root()) res, err := v.Visit()
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -203,12 +230,10 @@ func allHaveContentType(parts parser.Parts, contentType string) bool {
} }
func determineMIMEType(p *parser.Parser) (string, error) { func determineMIMEType(p *parser.Parser) (string, error) {
w := p.NewWalker()
var isHTML bool var isHTML bool
w.RegisterContentTypeHandler("text/html"). w := p.NewWalker().
OnEnter(func(p *parser.Part) (err error) { RegisterContentTypeHandler("text/html", func(p *parser.Part) (err error) {
isHTML = true isHTML = true
return return
}) })
@ -349,6 +374,8 @@ func parseAttachment(h message.Header) (att *pmapi.Attachment, err error) {
att.Name = dispParams["filename"] att.Name = dispParams["filename"]
} }
att.ContentID = strings.Trim(h.Get("Content-Id"), " <>")
// TODO: Set att.Header // TODO: Set att.Header
return return

View File

@ -0,0 +1,40 @@
package parser
import "regexp"
type HandlerFunc func(*Part) error
type handler struct {
typeRegExp, dispRegExp string
fn HandlerFunc
}
func (h *handler) matchPart(p *Part) bool {
return h.matchType(p) || h.matchDisp(p)
}
func (h *handler) matchType(p *Part) bool {
if h.typeRegExp == "" {
return false
}
t, _, err := p.Header.ContentType()
if err != nil {
t = ""
}
return regexp.MustCompile(h.typeRegExp).MatchString(t)
}
func (h *handler) matchDisp(p *Part) bool {
if h.dispRegExp == "" {
return false
}
disp, _, err := p.Header.ContentDisposition()
if err != nil {
disp = ""
}
return regexp.MustCompile(h.dispRegExp).MatchString(disp)
}

View File

@ -1,72 +0,0 @@
package parser
type PartHandlerFunc func(*Part) error
type DispHandlerFunc func(*Part, PartHandlerFunc) error
type PartHandler struct {
enter, exit PartHandlerFunc
}
func NewPartHandler() *PartHandler {
return &PartHandler{
enter: partNoop,
exit: partNoop,
}
}
func (h *PartHandler) OnEnter(fn PartHandlerFunc) *PartHandler {
h.enter = fn
return h
}
func (h *PartHandler) OnExit(fn PartHandlerFunc) *PartHandler {
h.exit = fn
return h
}
func (h *PartHandler) handleEnter(_ *Walker, p *Part) error {
return h.enter(p)
}
func (h *PartHandler) handleExit(_ *Walker, p *Part) error {
return h.exit(p)
}
type DispHandler struct {
enter, exit DispHandlerFunc
}
func NewDispHandler() *DispHandler {
return &DispHandler{
enter: dispNoop,
exit: dispNoop,
}
}
func (h *DispHandler) OnEnter(fn DispHandlerFunc) *DispHandler {
h.enter = fn
return h
}
func (h *DispHandler) OnExit(fn DispHandlerFunc) *DispHandler {
h.exit = fn
return h
}
func (h *DispHandler) handleEnter(w *Walker, p *Part) error {
// NOTE: This is hacky -- is there a better solution?
return h.enter(p, func(p *Part) error {
return w.getTypeHandler(p).handleEnter(w, p)
})
}
func (h *DispHandler) handleExit(w *Walker, p *Part) error {
// NOTE: This is hacky -- is there a better solution?
return h.exit(p, func(p *Part) error {
return w.getTypeHandler(p).handleExit(w, p)
})
}
func partNoop(*Part) error { return nil }
func dispNoop(*Part, PartHandlerFunc) error { return nil }

View File

@ -33,6 +33,10 @@ func (p *Parser) NewWalker() *Walker {
return newWalker(p.root) return newWalker(p.root)
} }
func (p *Parser) NewVisitor(defaultRule VisitorRule) *Visitor {
return newVisitor(p.root, defaultRule)
}
func (p *Parser) NewWriter() *Writer { func (p *Parser) NewWriter() *Writer {
return newWriter(p.root) return newWriter(p.root)
} }

View File

@ -3,27 +3,29 @@ package parser
import "regexp" import "regexp"
type Visitor struct { type Visitor struct {
rules []*rule root *Part
fallback Rule rules []*visitorRule
defaultRule VisitorRule
} }
func NewVisitor(fallback Rule) *Visitor { func newVisitor(root *Part, defaultRule VisitorRule) *Visitor {
return &Visitor{ return &Visitor{
fallback: fallback, root: root,
defaultRule: defaultRule,
} }
} }
type Visit func(*Part) (interface{}, error) type Visit func(*Part) (interface{}, error)
type Rule func(*Part, Visit) (interface{}, error) type VisitorRule func(*Part, Visit) (interface{}, error)
type rule struct { type visitorRule struct {
re string re string
fn Rule fn VisitorRule
} }
func (v *Visitor) RegisterRule(contentTypeRegex string, fn Rule) *Visitor { func (v *Visitor) RegisterRule(contentTypeRegex string, fn VisitorRule) *Visitor {
v.rules = append(v.rules, &rule{ v.rules = append(v.rules, &visitorRule{
re: contentTypeRegex, re: contentTypeRegex,
fn: fn, fn: fn,
}) })
@ -31,20 +33,24 @@ func (v *Visitor) RegisterRule(contentTypeRegex string, fn Rule) *Visitor {
return v return v
} }
func (v *Visitor) Visit(p *Part) (interface{}, error) { func (v *Visitor) Visit() (interface{}, error) {
return v.visit(v.root)
}
func (v *Visitor) visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType() t, _, err := p.Header.ContentType()
if err != nil { if err != nil {
return nil, err return nil, err
} }
if rule := v.getRuleForContentType(t); rule != nil { if rule := v.getRuleForContentType(t); rule != nil {
return rule.fn(p, v.Visit) return rule.fn(p, v.visit)
} }
return v.fallback(p, v.Visit) return v.defaultRule(p, v.visit)
} }
func (v *Visitor) getRuleForContentType(contentType string) *rule { func (v *Visitor) getRuleForContentType(contentType string) *visitorRule {
for _, rule := range v.rules { for _, rule := range v.rules {
if regexp.MustCompile(rule.re).MatchString(contentType) { if regexp.MustCompile(rule.re).MatchString(contentType) {
return rule return rule

View File

@ -3,96 +3,64 @@ package parser
type Walker struct { type Walker struct {
root *Part root *Part
defaultHandler handler handlers []*handler
typeHandlers map[string]handler defaultHandler HandlerFunc
dispHandlers map[string]handler
}
type handler interface {
handleEnter(*Walker, *Part) error
handleExit(*Walker, *Part) error
} }
func newWalker(root *Part) *Walker { func newWalker(root *Part) *Walker {
return &Walker{ return &Walker{
root: root, root: root,
defaultHandler: NewPartHandler(), defaultHandler: func(*Part) error { return nil },
typeHandlers: make(map[string]handler),
dispHandlers: make(map[string]handler),
} }
} }
func (w *Walker) Walk() (err error) { func (w *Walker) Walk() (err error) {
return w.visitPart(w.root) return w.walkOverPart(w.root)
} }
func (w *Walker) visitPart(p *Part) (err error) { func (w *Walker) walkOverPart(p *Part) error {
hdl := w.getHandler(p) if err := w.getHandlerFunc(p)(p); err != nil {
return err
if err = hdl.handleEnter(w, p); err != nil {
return
} }
for _, child := range p.children { for _, child := range p.children {
if err = w.visitPart(child); err != nil { if err := w.walkOverPart(child); err != nil {
return return err
} }
} }
return hdl.handleExit(w, p)
}
func (w *Walker) WithDefaultHandler(handler handler) *Walker {
w.defaultHandler = handler
return w
}
func (w *Walker) RegisterContentTypeHandler(contType string) *PartHandler {
hdl := NewPartHandler()
w.typeHandlers[contType] = hdl
return hdl
}
func (w *Walker) RegisterContentDispositionHandler(contDisp string) *DispHandler {
hdl := NewDispHandler()
w.dispHandlers[contDisp] = hdl
return hdl
}
func (w *Walker) getHandler(p *Part) handler {
if dispHandler := w.getDispHandler(p); dispHandler != nil {
return dispHandler
}
return w.getTypeHandler(p)
}
// getTypeHandler returns the appropriate PartHandler to handle the given part.
// If no specialised handler exists, it returns the default handler.
func (w *Walker) getTypeHandler(p *Part) handler {
t, _, err := p.Header.ContentType()
if err != nil {
return w.defaultHandler
}
hdl, ok := w.typeHandlers[t]
if !ok {
return w.defaultHandler
}
return hdl
}
// getDispHandler returns the appropriate DispHandler to handle the given part.
// If no specialised handler exists, it returns nil.
func (w *Walker) getDispHandler(p *Part) handler {
t, _, err := p.Header.ContentDisposition()
if err != nil {
return nil return nil
} }
return w.dispHandlers[t] func (w *Walker) RegisterDefaultHandler(fn HandlerFunc) *Walker {
w.defaultHandler = fn
return w
}
func (w *Walker) RegisterContentTypeHandler(typeRegExp string, fn HandlerFunc) *Walker {
w.handlers = append(w.handlers, &handler{
typeRegExp: typeRegExp,
fn: fn,
})
return w
}
func (w *Walker) RegisterContentDispositionHandler(dispRegExp string, fn HandlerFunc) *Walker {
w.handlers = append(w.handlers, &handler{
dispRegExp: dispRegExp,
fn: fn,
})
return w
}
func (w *Walker) getHandlerFunc(p *Part) HandlerFunc {
for _, hdl := range w.handlers {
if hdl.matchPart(p) {
return hdl.fn
}
}
return w.defaultHandler
} }

View File

@ -11,14 +11,13 @@ func TestWalker(t *testing.T) {
allBodies := [][]byte{} allBodies := [][]byte{}
walker := p. walker := p.NewWalker().
NewWalker(). RegisterDefaultHandler(func(p *Part) (err error) {
WithDefaultHandler(NewPartHandler().OnEnter(func(p *Part) (err error) {
if p.Body != nil { if p.Body != nil {
allBodies = append(allBodies, p.Body) allBodies = append(allBodies, p.Body)
} }
return return
})) })
assert.NoError(t, walker.Walk()) assert.NoError(t, walker.Walk())
assert.ElementsMatch(t, [][]byte{ assert.ElementsMatch(t, [][]byte{
@ -32,10 +31,8 @@ func TestWalkerTypeHandler(t *testing.T) {
html := [][]byte{} html := [][]byte{}
walker := p.NewWalker() walker := p.NewWalker().
RegisterContentTypeHandler("text/html", func(p *Part) (err error) {
walker.RegisterContentTypeHandler("text/html").
OnEnter(func(p *Part) (err error) {
html = append(html, p.Body) html = append(html, p.Body)
return return
}) })
@ -51,10 +48,8 @@ func TestWalkerDispositionHandler(t *testing.T) {
attachments := [][]byte{} attachments := [][]byte{}
walker := p.NewWalker() walker := p.NewWalker().
RegisterContentDispositionHandler("attachment", func(p *Part) (err error) {
walker.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *Part, hdl PartHandlerFunc) (err error) {
attachments = append(attachments, p.Body) attachments = append(attachments, p.Body)
return return
}) })
@ -65,22 +60,22 @@ func TestWalkerDispositionHandler(t *testing.T) {
}, attachments) }, attachments)
} }
func TestWalkerDispositionAndTypeHandler(t *testing.T) { func TestWalkerDispositionAndTypeHandler_TypeDefinedFirst(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml") p := newTestParser(t, "text_html_octet_attachment.eml")
walker := p.NewWalker() var typeCalled, dispCalled bool
var enter, exit int walker := p.NewWalker().
RegisterContentTypeHandler("application/octet-stream", func(p *Part) (err error) {
walker.RegisterContentTypeHandler("application/octet-stream"). typeCalled = true
OnEnter(func(p *Part) (err error) { enter++; return }). return
OnExit(func(p *Part) (err error) { exit--; return }) }).
RegisterContentDispositionHandler("attachment", func(p *Part) (err error) {
walker.RegisterContentDispositionHandler("attachment"). dispCalled = true
OnEnter(func(p *Part, hdl PartHandlerFunc) (err error) { _ = hdl(p); _ = hdl(p); return }). return
OnExit(func(p *Part, hdl PartHandlerFunc) (err error) { _ = hdl(p); _ = hdl(p); return }) })
assert.NoError(t, walker.Walk()) assert.NoError(t, walker.Walk())
assert.Equal(t, 2, enter) assert.True(t, typeCalled)
assert.Equal(t, -2, exit) assert.False(t, dispCalled)
} }

File diff suppressed because it is too large Load Diff