]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Move regexp escape function to the public space
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 23 Apr 2018 09:32:48 +0000 (10:32 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 23 Apr 2018 09:42:05 +0000 (10:42 +0100)
src/libutil/multipattern.c
src/libutil/str_util.c
src/libutil/str_util.h

index e55b5d0b5f14e2019cda7c47320df147cdeb6e52..94b5398b34f7a04bdae3e876c875e53f01334a6e 100644 (file)
@@ -133,117 +133,6 @@ rspamd_multipattern_escape_tld_hyperscan (const gchar *pattern, gsize slen,
        return res;
 }
 
-static gchar *
-rspamd_multipattern_escape_hyperscan (const gchar *pattern, gsize slen,
-               gsize *dst_len, gboolean allow_glob)
-{
-       const gchar *p, *end = pattern + slen;
-       gchar *res, *d, t;
-       gsize len;
-       static const gchar hexdigests[16] = "0123456789abcdef";
-
-       len = slen;
-       p = pattern;
-
-       /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
-       while (p < end) {
-               t = *p ++;
-
-               switch (t) {
-               case '[':
-               case ']':
-               case '-':
-               case '\\':
-               case '{':
-               case '}':
-               case '(':
-               case ')':
-               case '*':
-               case '+':
-               case '?':
-               case '.':
-               case ',':
-               case '^':
-               case '$':
-               case '|':
-               case '#':
-                       len ++;
-                       break;
-               default:
-                       if (g_ascii_isspace (t)) {
-                               len ++;
-                       }
-                       else if (!g_ascii_isprint (t)) {
-                               /* \\xHH -> 4 symbols */
-                               len += 3;
-                       }
-                       break;
-               }
-       }
-
-       if (slen == len) {
-               *dst_len = slen;
-               return g_strdup (pattern);
-       }
-
-       res = g_malloc (len + 1);
-       p = pattern;
-       d = res;
-
-       while (p < end) {
-               t = *p ++;
-
-               switch (t) {
-               case '[':
-               case ']':
-               case '-':
-               case '\\':
-               case '{':
-               case '}':
-               case '(':
-               case ')':
-               case '.':
-               case ',':
-               case '^':
-               case '$':
-               case '|':
-               case '#':
-                       *d++ = '\\';
-                       break;
-               case '*':
-               case '?':
-               case '+':
-                       if (allow_glob) {
-                               /* Treat * as .* and ? as .? */
-                               *d++ = '.';
-                       }
-                       else {
-                               *d++ = '\\';
-                       }
-                       break;
-               default:
-                       if (g_ascii_isspace (t)) {
-                               *d++ = '\\';
-                       }
-                       else if (!g_ascii_isgraph (t)) {
-                               *d++ = '\\';
-                               *d++ = 'x';
-                               *d++ = hexdigests[((t >> 4) & 0xF)];
-                               *d++ = hexdigests[((t) & 0xF)];
-                               continue; /* To avoid *d++ = t; */
-                       }
-                       break;
-               }
-
-               *d++ = t;
-       }
-
-       *d = '\0';
-       *dst_len = d - res;
-
-       return res;
-}
-
 #endif
 static gchar *
 rspamd_multipattern_escape_tld_acism (const gchar *pattern, gsize len,
@@ -312,10 +201,10 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
                        *dst_len = rspamd_strlcpy (ret, pattern, len + 1);
                }
                else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
-                       ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, TRUE);
+                       ret = rspamd_str_regexp_escape (pattern, len, dst_len, TRUE);
                }
                else {
-                       ret = rspamd_multipattern_escape_hyperscan (pattern, len, dst_len, FALSE);
+                       ret = rspamd_str_regexp_escape (pattern, len, dst_len, FALSE);
                }
 
                return ret;
index 3b1f3c1e307a292a1e413f4aba025af5fa1c6b4a..186ce5d3823b71e244db6f976cf14a39b49ac58f 100644 (file)
@@ -2093,3 +2093,120 @@ rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start,
        return FALSE;
 #endif
 }
+
+gchar *
+rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
+               gsize *dst_len, gboolean allow_glob)
+{
+       const gchar *p, *end = pattern + slen;
+       gchar *res, *d, t;
+       gsize len;
+       static const gchar hexdigests[16] = "0123456789abcdef";
+
+       len = slen;
+       p = pattern;
+
+       /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
+       while (p < end) {
+               t = *p ++;
+
+               switch (t) {
+               case '[':
+               case ']':
+               case '-':
+               case '\\':
+               case '{':
+               case '}':
+               case '(':
+               case ')':
+               case '*':
+               case '+':
+               case '?':
+               case '.':
+               case ',':
+               case '^':
+               case '$':
+               case '|':
+               case '#':
+                       len ++;
+                       break;
+               default:
+                       if (g_ascii_isspace (t)) {
+                               len ++;
+                       }
+                       else if (!g_ascii_isprint (t)) {
+                               /* \\xHH -> 4 symbols */
+                               len += 3;
+                       }
+                       break;
+               }
+       }
+
+       if (slen == len) {
+               if (dst_len) {
+                       *dst_len = slen;
+               }
+
+               return g_strdup (pattern);
+       }
+
+       res = g_malloc (len + 1);
+       p = pattern;
+       d = res;
+
+       while (p < end) {
+               t = *p ++;
+
+               switch (t) {
+               case '[':
+               case ']':
+               case '-':
+               case '\\':
+               case '{':
+               case '}':
+               case '(':
+               case ')':
+               case '.':
+               case ',':
+               case '^':
+               case '$':
+               case '|':
+               case '#':
+                       *d++ = '\\';
+                       break;
+               case '*':
+               case '?':
+               case '+':
+                       if (allow_glob) {
+                               /* Treat * as .* and ? as .? */
+                               *d++ = '.';
+                       }
+                       else {
+                               *d++ = '\\';
+                       }
+                       break;
+               default:
+                       if (g_ascii_isspace (t)) {
+                               *d++ = '\\';
+                       }
+                       else if (!g_ascii_isgraph (t)) {
+                               *d++ = '\\';
+                               *d++ = 'x';
+                               *d++ = hexdigests[((t >> 4) & 0xF)];
+                               *d++ = hexdigests[((t) & 0xF)];
+                               continue; /* To avoid *d++ = t; */
+                       }
+                       break;
+               }
+
+               *d++ = t;
+       }
+
+       *d = '\0';
+
+       if (dst_len) {
+               *dst_len = d - res;
+       }
+
+       return res;
+}
index 5f0695c2a025f8b5cade08efee5c108ed3faf306..45507e2be6e46d4c25bb92f1a4f224685baab2b0 100644 (file)
@@ -375,4 +375,16 @@ rspamd_str_has_8bit (const guchar *beg, gsize len)
 gboolean rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
                gchar *start, guint *len);
 
+/**
+ * Escapes special characters when reading plain data to be processed in pcre
+ * @param pattern pattern to process
+ * @param slen source length
+ * @param dst_len destination length pointer (can be NULL)
+ * @param allow_glob allow glob expressions to be translated into pcre
+ * @return newly allocated zero terminated escaped pattern
+ */
+gchar *
+rspamd_str_regexp_escape (const gchar *pattern, gsize slen,
+               gsize *dst_len, gboolean allow_glob);
+
 #endif /* SRC_LIBUTIL_STR_UTIL_H_ */