feat: pull out most things as attachments

This commit is contained in:
James Houlahan
2020-08-03 14:44:12 +02:00
parent c6b18b45b5
commit e01a523ae3
8 changed files with 1287 additions and 184 deletions

View File

@ -68,19 +68,46 @@ func Parse(r io.Reader, key, keyName string) (m *pmapi.Message, mimeMessage, pla
}
func collectAttachments(p *parser.Parser) (atts []*pmapi.Attachment, data []io.Reader, err error) {
w := p.NewWalker()
w.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *parser.Part, _ parser.PartHandlerFunc) (err error) {
w := p.NewWalker().
RegisterContentDispositionHandler("attachment", func(p *parser.Part) error {
att, err := parseAttachment(p.Header)
if err != nil {
return
return err
}
atts = append(atts, att)
data = append(data, bytes.NewReader(p.Body))
return
return nil
}).
RegisterContentTypeHandler("text/calendar", func(p *parser.Part) error {
att, err := parseAttachment(p.Header)
if err != nil {
return err
}
atts = append(atts, att)
data = append(data, bytes.NewReader(p.Body))
return nil
}).
RegisterContentTypeHandler("text/.*", func(p *parser.Part) error {
return nil
}).
RegisterDefaultHandler(func(p *parser.Part) error {
if len(p.Children()) > 0 {
return nil
}
att, err := parseAttachment(p.Header)
if err != nil {
return err
}
atts = append(atts, att)
data = append(data, bytes.NewReader(p.Body))
return nil
})
if err = w.Walk(); err != nil {
@ -118,7 +145,7 @@ func buildBodies(p *parser.Parser) (richBody, plainBody string, err error) {
// collectBodyParts collects all body parts in the parse tree, preferring
// parts of the given content type if alternatives exist.
func collectBodyParts(p *parser.Parser, preferredContentType string) (parser.Parts, error) {
v := parser.
v := p.
NewVisitor(func(p *parser.Part, visit parser.Visit) (interface{}, error) {
childParts, err := collectChildParts(p, visit)
if err != nil {
@ -142,7 +169,7 @@ func collectBodyParts(p *parser.Parser, preferredContentType string) (parser.Par
return parser.Parts{p}, nil
})
res, err := v.Visit(p.Root())
res, err := v.Visit()
if err != nil {
return nil, err
}
@ -203,12 +230,10 @@ func allHaveContentType(parts parser.Parts, contentType string) bool {
}
func determineMIMEType(p *parser.Parser) (string, error) {
w := p.NewWalker()
var isHTML bool
w.RegisterContentTypeHandler("text/html").
OnEnter(func(p *parser.Part) (err error) {
w := p.NewWalker().
RegisterContentTypeHandler("text/html", func(p *parser.Part) (err error) {
isHTML = true
return
})
@ -349,6 +374,8 @@ func parseAttachment(h message.Header) (att *pmapi.Attachment, err error) {
att.Name = dispParams["filename"]
}
att.ContentID = strings.Trim(h.Get("Content-Id"), " <>")
// TODO: Set att.Header
return

View File

@ -0,0 +1,40 @@
package parser
import "regexp"
type HandlerFunc func(*Part) error
type handler struct {
typeRegExp, dispRegExp string
fn HandlerFunc
}
func (h *handler) matchPart(p *Part) bool {
return h.matchType(p) || h.matchDisp(p)
}
func (h *handler) matchType(p *Part) bool {
if h.typeRegExp == "" {
return false
}
t, _, err := p.Header.ContentType()
if err != nil {
t = ""
}
return regexp.MustCompile(h.typeRegExp).MatchString(t)
}
func (h *handler) matchDisp(p *Part) bool {
if h.dispRegExp == "" {
return false
}
disp, _, err := p.Header.ContentDisposition()
if err != nil {
disp = ""
}
return regexp.MustCompile(h.dispRegExp).MatchString(disp)
}

View File

@ -1,72 +0,0 @@
package parser
type PartHandlerFunc func(*Part) error
type DispHandlerFunc func(*Part, PartHandlerFunc) error
type PartHandler struct {
enter, exit PartHandlerFunc
}
func NewPartHandler() *PartHandler {
return &PartHandler{
enter: partNoop,
exit: partNoop,
}
}
func (h *PartHandler) OnEnter(fn PartHandlerFunc) *PartHandler {
h.enter = fn
return h
}
func (h *PartHandler) OnExit(fn PartHandlerFunc) *PartHandler {
h.exit = fn
return h
}
func (h *PartHandler) handleEnter(_ *Walker, p *Part) error {
return h.enter(p)
}
func (h *PartHandler) handleExit(_ *Walker, p *Part) error {
return h.exit(p)
}
type DispHandler struct {
enter, exit DispHandlerFunc
}
func NewDispHandler() *DispHandler {
return &DispHandler{
enter: dispNoop,
exit: dispNoop,
}
}
func (h *DispHandler) OnEnter(fn DispHandlerFunc) *DispHandler {
h.enter = fn
return h
}
func (h *DispHandler) OnExit(fn DispHandlerFunc) *DispHandler {
h.exit = fn
return h
}
func (h *DispHandler) handleEnter(w *Walker, p *Part) error {
// NOTE: This is hacky -- is there a better solution?
return h.enter(p, func(p *Part) error {
return w.getTypeHandler(p).handleEnter(w, p)
})
}
func (h *DispHandler) handleExit(w *Walker, p *Part) error {
// NOTE: This is hacky -- is there a better solution?
return h.exit(p, func(p *Part) error {
return w.getTypeHandler(p).handleExit(w, p)
})
}
func partNoop(*Part) error { return nil }
func dispNoop(*Part, PartHandlerFunc) error { return nil }

View File

@ -33,6 +33,10 @@ func (p *Parser) NewWalker() *Walker {
return newWalker(p.root)
}
func (p *Parser) NewVisitor(defaultRule VisitorRule) *Visitor {
return newVisitor(p.root, defaultRule)
}
func (p *Parser) NewWriter() *Writer {
return newWriter(p.root)
}

View File

@ -3,27 +3,29 @@ package parser
import "regexp"
type Visitor struct {
rules []*rule
fallback Rule
root *Part
rules []*visitorRule
defaultRule VisitorRule
}
func NewVisitor(fallback Rule) *Visitor {
func newVisitor(root *Part, defaultRule VisitorRule) *Visitor {
return &Visitor{
fallback: fallback,
root: root,
defaultRule: defaultRule,
}
}
type Visit func(*Part) (interface{}, error)
type Rule func(*Part, Visit) (interface{}, error)
type VisitorRule func(*Part, Visit) (interface{}, error)
type rule struct {
type visitorRule struct {
re string
fn Rule
fn VisitorRule
}
func (v *Visitor) RegisterRule(contentTypeRegex string, fn Rule) *Visitor {
v.rules = append(v.rules, &rule{
func (v *Visitor) RegisterRule(contentTypeRegex string, fn VisitorRule) *Visitor {
v.rules = append(v.rules, &visitorRule{
re: contentTypeRegex,
fn: fn,
})
@ -31,20 +33,24 @@ func (v *Visitor) RegisterRule(contentTypeRegex string, fn Rule) *Visitor {
return v
}
func (v *Visitor) Visit(p *Part) (interface{}, error) {
func (v *Visitor) Visit() (interface{}, error) {
return v.visit(v.root)
}
func (v *Visitor) visit(p *Part) (interface{}, error) {
t, _, err := p.Header.ContentType()
if err != nil {
return nil, err
}
if rule := v.getRuleForContentType(t); rule != nil {
return rule.fn(p, v.Visit)
return rule.fn(p, v.visit)
}
return v.fallback(p, v.Visit)
return v.defaultRule(p, v.visit)
}
func (v *Visitor) getRuleForContentType(contentType string) *rule {
func (v *Visitor) getRuleForContentType(contentType string) *visitorRule {
for _, rule := range v.rules {
if regexp.MustCompile(rule.re).MatchString(contentType) {
return rule

View File

@ -3,96 +3,64 @@ package parser
type Walker struct {
root *Part
defaultHandler handler
typeHandlers map[string]handler
dispHandlers map[string]handler
}
type handler interface {
handleEnter(*Walker, *Part) error
handleExit(*Walker, *Part) error
handlers []*handler
defaultHandler HandlerFunc
}
func newWalker(root *Part) *Walker {
return &Walker{
root: root,
defaultHandler: NewPartHandler(),
typeHandlers: make(map[string]handler),
dispHandlers: make(map[string]handler),
defaultHandler: func(*Part) error { return nil },
}
}
func (w *Walker) Walk() (err error) {
return w.visitPart(w.root)
return w.walkOverPart(w.root)
}
func (w *Walker) visitPart(p *Part) (err error) {
hdl := w.getHandler(p)
if err = hdl.handleEnter(w, p); err != nil {
return
func (w *Walker) walkOverPart(p *Part) error {
if err := w.getHandlerFunc(p)(p); err != nil {
return err
}
for _, child := range p.children {
if err = w.visitPart(child); err != nil {
return
if err := w.walkOverPart(child); err != nil {
return err
}
}
return hdl.handleExit(w, p)
return nil
}
func (w *Walker) WithDefaultHandler(handler handler) *Walker {
w.defaultHandler = handler
func (w *Walker) RegisterDefaultHandler(fn HandlerFunc) *Walker {
w.defaultHandler = fn
return w
}
func (w *Walker) RegisterContentTypeHandler(contType string) *PartHandler {
hdl := NewPartHandler()
w.typeHandlers[contType] = hdl
func (w *Walker) RegisterContentTypeHandler(typeRegExp string, fn HandlerFunc) *Walker {
w.handlers = append(w.handlers, &handler{
typeRegExp: typeRegExp,
fn: fn,
})
return hdl
return w
}
func (w *Walker) RegisterContentDispositionHandler(contDisp string) *DispHandler {
hdl := NewDispHandler()
func (w *Walker) RegisterContentDispositionHandler(dispRegExp string, fn HandlerFunc) *Walker {
w.handlers = append(w.handlers, &handler{
dispRegExp: dispRegExp,
fn: fn,
})
w.dispHandlers[contDisp] = hdl
return hdl
return w
}
func (w *Walker) getHandler(p *Part) handler {
if dispHandler := w.getDispHandler(p); dispHandler != nil {
return dispHandler
func (w *Walker) getHandlerFunc(p *Part) HandlerFunc {
for _, hdl := range w.handlers {
if hdl.matchPart(p) {
return hdl.fn
}
}
return w.getTypeHandler(p)
}
// getTypeHandler returns the appropriate PartHandler to handle the given part.
// If no specialised handler exists, it returns the default handler.
func (w *Walker) getTypeHandler(p *Part) handler {
t, _, err := p.Header.ContentType()
if err != nil {
return w.defaultHandler
}
hdl, ok := w.typeHandlers[t]
if !ok {
return w.defaultHandler
}
return hdl
}
// getDispHandler returns the appropriate DispHandler to handle the given part.
// If no specialised handler exists, it returns nil.
func (w *Walker) getDispHandler(p *Part) handler {
t, _, err := p.Header.ContentDisposition()
if err != nil {
return nil
}
return w.dispHandlers[t]
return w.defaultHandler
}

View File

@ -11,14 +11,13 @@ func TestWalker(t *testing.T) {
allBodies := [][]byte{}
walker := p.
NewWalker().
WithDefaultHandler(NewPartHandler().OnEnter(func(p *Part) (err error) {
walker := p.NewWalker().
RegisterDefaultHandler(func(p *Part) (err error) {
if p.Body != nil {
allBodies = append(allBodies, p.Body)
}
return
}))
})
assert.NoError(t, walker.Walk())
assert.ElementsMatch(t, [][]byte{
@ -32,10 +31,8 @@ func TestWalkerTypeHandler(t *testing.T) {
html := [][]byte{}
walker := p.NewWalker()
walker.RegisterContentTypeHandler("text/html").
OnEnter(func(p *Part) (err error) {
walker := p.NewWalker().
RegisterContentTypeHandler("text/html", func(p *Part) (err error) {
html = append(html, p.Body)
return
})
@ -51,10 +48,8 @@ func TestWalkerDispositionHandler(t *testing.T) {
attachments := [][]byte{}
walker := p.NewWalker()
walker.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *Part, hdl PartHandlerFunc) (err error) {
walker := p.NewWalker().
RegisterContentDispositionHandler("attachment", func(p *Part) (err error) {
attachments = append(attachments, p.Body)
return
})
@ -65,22 +60,22 @@ func TestWalkerDispositionHandler(t *testing.T) {
}, attachments)
}
func TestWalkerDispositionAndTypeHandler(t *testing.T) {
func TestWalkerDispositionAndTypeHandler_TypeDefinedFirst(t *testing.T) {
p := newTestParser(t, "text_html_octet_attachment.eml")
walker := p.NewWalker()
var typeCalled, dispCalled bool
var enter, exit int
walker.RegisterContentTypeHandler("application/octet-stream").
OnEnter(func(p *Part) (err error) { enter++; return }).
OnExit(func(p *Part) (err error) { exit--; return })
walker.RegisterContentDispositionHandler("attachment").
OnEnter(func(p *Part, hdl PartHandlerFunc) (err error) { _ = hdl(p); _ = hdl(p); return }).
OnExit(func(p *Part, hdl PartHandlerFunc) (err error) { _ = hdl(p); _ = hdl(p); return })
walker := p.NewWalker().
RegisterContentTypeHandler("application/octet-stream", func(p *Part) (err error) {
typeCalled = true
return
}).
RegisterContentDispositionHandler("attachment", func(p *Part) (err error) {
dispCalled = true
return
})
assert.NoError(t, walker.Walk())
assert.Equal(t, 2, enter)
assert.Equal(t, -2, exit)
assert.True(t, typeCalled)
assert.False(t, dispCalled)
}

File diff suppressed because it is too large Load Diff