mirror of
https://github.com/ProtonMail/proton-bridge.git
synced 2025-12-24 10:56:43 +00:00
feat: custom address/date parser based on rfc5322 abnf
This commit is contained in:
530
pkg/message/rfc5322/RFC5322Parser.g4
Normal file
530
pkg/message/rfc5322/RFC5322Parser.g4
Normal file
@ -0,0 +1,530 @@
|
||||
// Copyright (c) 2020 Proton Technologies AG
|
||||
//
|
||||
// This file is part of ProtonMail Bridge.
|
||||
//
|
||||
// ProtonMail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// ProtonMail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with ProtonMail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
parser grammar RFC5322Parser;
|
||||
|
||||
options { tokenVocab=RFC5322Lexer; }
|
||||
|
||||
|
||||
// -------------------
|
||||
// 3.2. Lexical tokens
|
||||
// -------------------
|
||||
|
||||
quotedChar: vchar | wsp;
|
||||
|
||||
quotedPair
|
||||
: Backslash quotedChar
|
||||
| obsQP
|
||||
;
|
||||
|
||||
fws
|
||||
: (wsp* crlf)? wsp+
|
||||
| obsFWS
|
||||
;
|
||||
|
||||
ctext
|
||||
: alpha
|
||||
| Exclamation
|
||||
| DQuote
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Comma
|
||||
| Minus
|
||||
| Period
|
||||
| Slash
|
||||
| Digit
|
||||
| Colon
|
||||
| Semicolon
|
||||
| Less
|
||||
| Equal
|
||||
| Greater
|
||||
| Question
|
||||
| At
|
||||
| LBracket
|
||||
| RBracket
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
| obsCtext
|
||||
| UTF8NonAscii
|
||||
;
|
||||
|
||||
ccontent
|
||||
: ctext
|
||||
| quotedPair
|
||||
| comment
|
||||
;
|
||||
|
||||
comment: LParens (fws? ccontent)* fws? RParens;
|
||||
|
||||
cfws
|
||||
: (fws? comment)+ fws?
|
||||
| fws
|
||||
;
|
||||
|
||||
atext
|
||||
: alpha
|
||||
| Digit
|
||||
| Exclamation
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Minus
|
||||
| Slash
|
||||
| Equal
|
||||
| Question
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
| UTF8NonAscii
|
||||
;
|
||||
|
||||
atom: atext+;
|
||||
|
||||
// Allow dotAtom to have a trailing period; some messages in the wild look like this.
|
||||
dotAtom: atext+ (Period atext+)* Period?;
|
||||
|
||||
qtext
|
||||
: alpha
|
||||
| Exclamation
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| LParens
|
||||
| RParens
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Comma
|
||||
| Minus
|
||||
| Period
|
||||
| Slash
|
||||
| Digit
|
||||
| Colon
|
||||
| Semicolon
|
||||
| Less
|
||||
| Equal
|
||||
| Greater
|
||||
| Question
|
||||
| At
|
||||
| LBracket
|
||||
| RBracket
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
| obsQtext
|
||||
| UTF8NonAscii
|
||||
;
|
||||
|
||||
quotedContent
|
||||
: qtext
|
||||
| quotedPair
|
||||
;
|
||||
|
||||
quotedValue: (fws? quotedContent)*;
|
||||
|
||||
quotedString: DQuote quotedValue fws? DQuote;
|
||||
|
||||
// Allow word to consist of the @ token.
|
||||
word
|
||||
: cfws? encodedWord cfws?
|
||||
| cfws? atom cfws?
|
||||
| cfws? quotedString cfws?
|
||||
| At
|
||||
;
|
||||
|
||||
|
||||
// --------------------------------
|
||||
// 3.3. Date and Time Specification
|
||||
// --------------------------------
|
||||
|
||||
dateTime: (dayOfweek Comma)? day month year hour Colon minute (Colon second)? zone? cfws? EOF;
|
||||
|
||||
dayOfweek
|
||||
: fws? dayName
|
||||
| cfws? dayName cfws?
|
||||
;
|
||||
|
||||
dayName
|
||||
: M O N
|
||||
| T U E
|
||||
| W E D
|
||||
| T H U
|
||||
| F R I
|
||||
| S A T
|
||||
| S U N
|
||||
;
|
||||
|
||||
day
|
||||
: fws? Digit Digit? fws
|
||||
| cfws? Digit Digit? cfws?
|
||||
;
|
||||
|
||||
month
|
||||
: J A N
|
||||
| F E B
|
||||
| M A R
|
||||
| A P R
|
||||
| M A Y
|
||||
| J U N
|
||||
| J U L
|
||||
| A U G
|
||||
| S E P
|
||||
| O C T
|
||||
| N O V
|
||||
| D E C
|
||||
;
|
||||
|
||||
year
|
||||
: fws Digit Digit Digit Digit fws
|
||||
| cfws? Digit Digit cfws?
|
||||
;
|
||||
|
||||
// NOTE: RFC5322 requires two digits for the hour, but we
|
||||
// relax that requirement a bit, allowing single digits.
|
||||
hour
|
||||
: Digit? Digit
|
||||
| cfws? Digit? Digit cfws?
|
||||
;
|
||||
|
||||
minute
|
||||
: Digit Digit
|
||||
| cfws? Digit Digit cfws?
|
||||
;
|
||||
|
||||
second
|
||||
: Digit Digit
|
||||
| cfws? Digit Digit cfws?
|
||||
;
|
||||
|
||||
offset: (Plus | Minus)? Digit Digit Digit Digit;
|
||||
|
||||
zone
|
||||
: fws offset
|
||||
| obsZone
|
||||
;
|
||||
|
||||
|
||||
// --------------------------
|
||||
// 3.4. Address Specification
|
||||
// --------------------------
|
||||
|
||||
address
|
||||
: mailbox
|
||||
| group
|
||||
;
|
||||
|
||||
mailbox
|
||||
: nameAddr
|
||||
| addrSpec
|
||||
;
|
||||
|
||||
nameAddr: displayName? angleAddr;
|
||||
|
||||
angleAddr
|
||||
: cfws? Less addrSpec? Greater cfws?
|
||||
| obsAngleAddr
|
||||
;
|
||||
|
||||
group: displayName Colon groupList? Semicolon cfws?;
|
||||
|
||||
displayName
|
||||
: word+
|
||||
| word (word | Period | cfws)*
|
||||
;
|
||||
|
||||
mailboxList
|
||||
: mailbox (Comma mailbox)*
|
||||
| obsMboxList
|
||||
;
|
||||
|
||||
addressList
|
||||
: address (Comma address)* EOF
|
||||
| obsAddrList EOF
|
||||
;
|
||||
|
||||
groupList
|
||||
: mailboxList
|
||||
| cfws
|
||||
| obsGroupList
|
||||
;
|
||||
|
||||
// Allow addrSpec contain a port.
|
||||
addrSpec: localPart At domain (Colon port)?;
|
||||
|
||||
localPart
|
||||
: cfws? dotAtom cfws?
|
||||
| cfws? quotedString cfws?
|
||||
| obsLocalPart
|
||||
;
|
||||
|
||||
port: Digit+;
|
||||
|
||||
domain
|
||||
: cfws? dotAtom cfws?
|
||||
| cfws? domainLiteral cfws?
|
||||
| cfws? obsDomain cfws?
|
||||
;
|
||||
|
||||
domainLiteral: LBracket (fws? dtext)* fws? RBracket;
|
||||
|
||||
dtext
|
||||
: alpha
|
||||
| Exclamation
|
||||
| DQuote
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| LParens
|
||||
| RParens
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Comma
|
||||
| Minus
|
||||
| Period
|
||||
| Slash
|
||||
| Digit
|
||||
| Colon
|
||||
| Semicolon
|
||||
| Less
|
||||
| Equal
|
||||
| Greater
|
||||
| Question
|
||||
| At
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
//| obsDtext
|
||||
| UTF8NonAscii
|
||||
;
|
||||
|
||||
|
||||
// ----------------------------------
|
||||
// 4.1. Miscellaneous Obsolete Tokens
|
||||
// ----------------------------------
|
||||
|
||||
obsNoWSCTL
|
||||
: U_01_08
|
||||
| U_0B
|
||||
| U_0C
|
||||
| U_0E_1F
|
||||
| Delete
|
||||
;
|
||||
|
||||
obsCtext: obsNoWSCTL;
|
||||
|
||||
obsQtext: obsNoWSCTL;
|
||||
|
||||
obsQP: Backslash (U_00 | obsNoWSCTL | LF | CR);
|
||||
|
||||
|
||||
// ---------------------------------
|
||||
// 4.2. Obsolete Folding White Space
|
||||
// ---------------------------------
|
||||
|
||||
obsFWS: wsp+ (crlf wsp+);
|
||||
|
||||
|
||||
// ---------------------------
|
||||
// 4.3. Obsolete Date and Time
|
||||
// ---------------------------
|
||||
|
||||
obsZone
|
||||
: U T
|
||||
| U T C
|
||||
| G M T
|
||||
| E S T
|
||||
| E D T
|
||||
| C S T
|
||||
| C D T
|
||||
| M S T
|
||||
| M D T
|
||||
| P S T
|
||||
| P D T
|
||||
//| obsZoneMilitary
|
||||
;
|
||||
|
||||
|
||||
// ------------------------
|
||||
// 4.4. Obsolete Addressing
|
||||
// ------------------------
|
||||
|
||||
obsAngleAddr: cfws? Less obsRoute addrSpec Greater cfws?;
|
||||
|
||||
obsRoute: obsDomainList Colon;
|
||||
|
||||
obsDomainList: (cfws | Comma)* At domain (Comma cfws? (At domain)?)*;
|
||||
|
||||
obsMboxList: (cfws? Comma)* mailbox (Comma (mailbox | cfws)?)*;
|
||||
|
||||
obsAddrList: (cfws? Comma)* address (Comma (address | cfws)?)*;
|
||||
|
||||
obsGroupList: (cfws? Comma)+ cfws?;
|
||||
|
||||
obsLocalPart: word (Period word)*;
|
||||
|
||||
obsDomain: atom (Period atom)*;
|
||||
|
||||
|
||||
// ------------------------------------
|
||||
// 2. Syntax of encoded-words (RFC2047)
|
||||
// ------------------------------------
|
||||
|
||||
encodedWord: Equal Question charset Question encoding Question encodedText Question Equal;
|
||||
|
||||
charset: token;
|
||||
|
||||
encoding: token;
|
||||
|
||||
token: tokenChar+;
|
||||
|
||||
tokenChar
|
||||
: alpha
|
||||
| Exclamation
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Minus
|
||||
| Digit
|
||||
| Backslash
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
;
|
||||
|
||||
encodedText: encodedChar+;
|
||||
|
||||
encodedChar
|
||||
: alpha
|
||||
| Exclamation
|
||||
| DQuote
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| LParens
|
||||
| RParens
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Comma
|
||||
| Minus
|
||||
| Period
|
||||
| Slash
|
||||
| Digit
|
||||
| Colon
|
||||
| Semicolon
|
||||
| Less
|
||||
| Equal
|
||||
| Greater
|
||||
| At
|
||||
| LBracket
|
||||
| Backslash
|
||||
| RBracket
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
;
|
||||
|
||||
|
||||
// -------------------------
|
||||
// B.1. Core Rules (RFC5234)
|
||||
// -------------------------
|
||||
|
||||
crlf: CR LF;
|
||||
|
||||
wsp: SP | TAB;
|
||||
|
||||
vchar
|
||||
: alpha
|
||||
| Exclamation
|
||||
| DQuote
|
||||
| Hash
|
||||
| Dollar
|
||||
| Percent
|
||||
| Ampersand
|
||||
| SQuote
|
||||
| LParens
|
||||
| RParens
|
||||
| Asterisk
|
||||
| Plus
|
||||
| Comma
|
||||
| Minus
|
||||
| Period
|
||||
| Slash
|
||||
| Digit
|
||||
| Colon
|
||||
| Semicolon
|
||||
| Less
|
||||
| Equal
|
||||
| Greater
|
||||
| Question
|
||||
| At
|
||||
| LBracket
|
||||
| Backslash
|
||||
| RBracket
|
||||
| Caret
|
||||
| Underscore
|
||||
| Backtick
|
||||
| LCurly
|
||||
| Pipe
|
||||
| RCurly
|
||||
| Tilde
|
||||
| UTF8NonAscii
|
||||
;
|
||||
|
||||
alpha: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z ;
|
||||
Reference in New Issue
Block a user