]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add rfc2047 grammar
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 19 Dec 2016 16:50:36 +0000 (16:50 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 19 Dec 2016 16:50:36 +0000 (16:50 +0000)
src/CMakeLists.txt
src/libmime/smtp_parsers.h
src/ragel/rfc2047_parser.rl [new file with mode: 0644]

index b73f88cc1da655b4edbd50f62d2b2667079dd5e7..7254eefc26031ccb8f4316e0cd5d01c642fdc994 100644 (file)
@@ -133,6 +133,11 @@ RAGEL_TARGET(ragel_content_disposition
        DEPENDS ${RAGEL_DEPENDS}
        COMPILE_FLAGS -G2
        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/content_disposition.rl.c)
+RAGEL_TARGET(ragel_rfc2047
+       INPUTS ${CMAKE_SOURCE_DIR}/src/ragel/rfc2047_parser.rl
+       DEPENDS ${RAGEL_DEPENDS}
+       COMPILE_FLAGS -G2
+       OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/rfc2047.rl.c)
 ######################### LINK SECTION ###############################
 
 ADD_LIBRARY(rspamd-server STATIC
@@ -148,7 +153,8 @@ ADD_LIBRARY(rspamd-server STATIC
                "${RAGEL_ragel_smtp_received_OUTPUTS}"
                "${RAGEL_ragel_newlines_strip_OUTPUTS}"
                "${RAGEL_ragel_content_type_OUTPUTS}"
-               "${RAGEL_ragel_content_disposition_OUTPUTS}")
+               "${RAGEL_ragel_content_disposition_OUTPUTS}"
+               "${RAGEL_ragel_rfc2047_OUTPUTS}")
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
 TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg)
index d0784c2cd0fe584e4dbf6a9aa246f708b821b70a..905a01f0c6719b21563fb10e25af68450c3cc4dc 100644 (file)
@@ -36,4 +36,9 @@ gboolean rspamd_content_type_parser (const char *data, size_t len,
 gboolean rspamd_content_disposition_parser (const char *data, size_t len,
                struct rspamd_content_disposition *cd, rspamd_mempool_t *pool);
 
+gboolean
+rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding,
+               const gchar **charset, gsize *charset_len,
+               const gchar **encoded, gsize *encoded_len);
+
 #endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
diff --git a/src/ragel/rfc2047_parser.rl b/src/ragel/rfc2047_parser.rl
new file mode 100644 (file)
index 0000000..9a863fc
--- /dev/null
@@ -0,0 +1,86 @@
+%%{
+  # It actually implements rfc2047 + rfc2231 extension
+  machine rfc2047_parser;
+
+  action Start_Charset {
+    charset_start = p;
+  }
+
+  action End_Charset {
+    if (charset_start && p > charset_start) {
+      charset_end = p;
+    }
+  }
+
+  action End_Encoding {
+    if (p > in) {
+      switch (*(p - 1)) {
+      case 'B':
+      case 'b':
+        encoding = RSPAMD_RFC2047_BASE64;
+        break;
+      default:
+        encoding = RSPAMD_RFC2047_QP;
+        break;
+      }
+    }
+  }
+
+  action Start_Encoded {
+    encoded_start = p;
+  }
+
+  action End_Encoded {
+    if (encoded_start && p > encoded_start) {
+      encoded_end = p;
+    }
+  }
+
+  primary_tag = alpha{1,8};
+  subtag = alpha{1,8};
+  language = primary_tag ( "-" subtag )*;
+  especials = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\"" | "/" | "[" | "]" | "?" | "." | "=" | "*";
+  token = (graph - especials)+;
+  charset = token;
+  encoding = "Q" | "q" | "B" | "b";
+  encoded_text = (print - ("?" | " "))+;
+  encoded_word = "=?" charset >Start_Charset %End_Charset
+    ("*" language)? "?"
+    encoding %End_Encoding "?"
+    encoded_text >Start_Encoded %End_Encoded
+    "?=";
+  main := encoded_word;
+}%%
+
+#include "smtp_parsers.h"
+#include "mime_headers.h"
+
+%% write data;
+
+gboolean
+rspamd_rfc2047_parser (const gchar *in, gsize len, gint *pencoding,
+  const gchar **charset, gsize *charset_len,
+  const gchar **encoded, gsize *encoded_len)
+{
+  const char *p = in, *pe = in + len, *eof,
+    *encoded_start = NULL, *encoded_end = NULL,
+    *charset_start = NULL, *charset_end = NULL;
+  gint encoding = RSPAMD_RFC2047_QP, cs = 0;
+
+  eof = pe;
+
+  %% write init;
+  %% write exec;
+
+  if (encoded_end) {
+    *pencoding = encoding;
+    *charset = charset_start;
+    *charset_len = charset_end - charset_start;
+    *encoded = encoded_start;
+    *encoded_len = encoded_end - encoded_start;
+
+    return TRUE;
+  }
+
+  return FALSE;
+}