]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Start support of MIME UTF8
authorVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 15 Jul 2024 12:43:27 +0000 (13:43 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 15 Jul 2024 12:43:27 +0000 (13:43 +0100)
src/ragel/content_disposition.rl
src/ragel/smtp_address.rl
src/ragel/smtp_base.rl

index 862015ea196364fc639639e0cd93708241470350..93d3c9d3d348abc297cc198b3e866427bfffa93d 100644 (file)
@@ -7,7 +7,7 @@
   balanced_ccontent := ccontent* ')' @{ fret; };
   comment        =   "(" (FWS? ccontent)* FWS? ")";
   CFWS           =   ((FWS? comment)+ FWS?) | FWS;
-  qcontent = qtextSMTP | quoted_pairSMTP | textUTF8;
+  qcontent = qtextSMTP | quoted_pairSMTP;
   quoted_string = CFWS?
                   (DQUOTE
                     (((FWS? qcontent)* FWS?) >Quoted_Str_Start %Quoted_Str_End)
index 0caf1a65e4fc0dc6121be47dea153051f724c95d..eb0fc2d9d1a5501e8e195b06ad190cfd49b716d7 100644 (file)
@@ -24,6 +24,7 @@
 
   # SMTP address spec
   # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2
+  # Additions from rfc6532 (smtputf8): https://tools.ietf.org/html/rfc6532#section-3.2
 
   QcontentSMTP   = qtextSMTP | quoted_pairSMTP %User_has_backslash;
   Quoted_string  = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr;
index cb4f066bc395da70bb4964b6dd82186c9fedd5c4..26999eb514dad9c039ef7714b87b96db4c66f3cf 100644 (file)
@@ -9,26 +9,27 @@
   CRLF            =   "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
   DQUOTE = '"';
 
+  utf8_cont = 0x80..0xbf;
+  utf8_2c   = 0xc0..0xdf utf8_cont;
+  utf8_3c   = 0xe0..0xef utf8_cont utf8_cont;
+  utf8_4c   = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
+  UTF8_non_ascii  =   utf8_2c | utf8_3c | utf8_4c;
+
   # Printable US-ASCII characters not including specials
   atext = alpha | digit | "!" | "#" | "$" | "%" | "&" |
           "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" |
-          "-" | "`" | "{" | "|" | "}" | "~";
+          "-" | "`" | "{" | "|" | "}" | "~" | UTF8_non_ascii;
   # Printable US-ASCII characters not including "[", "]", or "\"
-  dtext = 33..90 | 94..126;
+  dtext = 33..90 | 94..126 | UTF8_non_ascii;
   # Printable US-ASCII characters not including  "(", ")", or "\"
-  ctext = 33..39 | 42..91 | 93..126;
+  ctext = 33..39 | 42..91 | 93..126 | UTF8_non_ascii;
 
-  dcontent       = 33..90 | 94..126;
+  dcontent       = 33..90 | 94..126 | UTF8_non_ascii;
   Let_dig        = alpha | digit;
   Ldh_str        = ( alpha | digit | "_" | "-" )* Let_dig;
 
   quoted_pairSMTP  = "\\" 32..126;
-  qtextSMTP      = 32..33 | 35..91 | 93..126;
-  utf8_cont = 0x80..0xbf;
-  utf8_2c   = 0xc0..0xdf utf8_cont;
-  utf8_3c   = 0xe0..0xef utf8_cont utf8_cont;
-  utf8_4c   = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
-  textUTF8  = qtextSMTP | utf8_2c | utf8_3c | utf8_4c;
+  qtextSMTP      = 32..33 | 35..91 | 93..126 | UTF8_non_ascii;
   Atom           = atext+;
   Dot_string     = Atom ("."  Atom)*;
   dot_atom_text  = atext+ ("." atext+)*;