]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Libserver migration process
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 12 Jul 2019 09:01:44 +0000 (10:01 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 12 Jul 2019 14:18:17 +0000 (15:18 +0100)
src/libmime/mime_headers.h
src/libserver/re_cache.c
src/libserver/url.c

index 81948d11b6686f1fa3239da0b5702ca99aebe60a..9b85e758fbca0bfa03ec857015e8d620686453a5 100644 (file)
@@ -51,12 +51,13 @@ enum rspamd_mime_header_flags {
 };
 
 struct rspamd_mime_header {
-       gchar *name; /* Also used for key */
-       gchar *value;
        const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
        gsize raw_len;
        guint order;
        int flags; /* see enum rspamd_mime_header_flags */
+       /* These are zero terminated (historically) */
+       gchar *name; /* Also used for key */
+       gchar *value;
        gchar *separator;
        gchar *decoded;
        struct rspamd_mime_header *prev, *next; /* Headers with the same name */
index 5517ad875cabc367626d2fba03a4212cbded863a..592cc31d4da56cb39f48cd61d25f80e147f4c6bb 100644 (file)
@@ -24,6 +24,7 @@
 #include "libutil/regexp.h"
 #include "lua/lua_common.h"
 #include "libstat/stat_api.h"
+#include "contrib/uthash/utlist.h"
 
 #include "khash.h"
 
@@ -950,6 +951,72 @@ rspamd_process_words_vector (GArray *words,
        return cnt;
 }
 
+static guint
+rspamd_re_cache_process_headers_list (struct rspamd_task *task,
+                                                                         struct rspamd_re_runtime *rt,
+                                                                         rspamd_regexp_t *re,
+                                                                         struct rspamd_re_class *re_class,
+                                                                         struct rspamd_mime_header *rh,
+                                                                         gboolean is_strong)
+{
+       const guchar **scvec, *in;
+       gboolean raw = FALSE;
+       guint *lenvec;
+       struct rspamd_mime_header *cur;
+       guint cnt = 0, i = 0, ret = 0;
+
+       DL_COUNT (rh, cur, cnt);
+
+       scvec = g_malloc (sizeof (*scvec) * cnt);
+       lenvec = g_malloc (sizeof (*lenvec) * cnt);
+
+       DL_FOREACH (rh, cur) {
+
+               if (is_strong && strcmp (cur->name, re_class->type_data) != 0) {
+                       /* Skip a different case */
+                       continue;
+               }
+
+               if (re_class->type == RSPAMD_RE_RAWHEADER) {
+                       in = (const guchar *)cur->value;
+                       lenvec[i] = strlen (cur->value);
+
+                       if (!g_utf8_validate (in, lenvec[i], NULL)) {
+                               raw = TRUE;
+                       }
+               }
+               else {
+                       in = (const guchar *)cur->decoded;
+                       /* Validate input^W^WNo need to validate as it is already valid */
+                       if (!in) {
+                               lenvec[i] = 0;
+                               scvec[i] = (guchar *)"";
+                               continue;
+                       }
+
+                       lenvec[i] = strlen (in);
+               }
+
+               scvec[i] = in;
+
+               i ++;
+       }
+
+       if (i > 0) {
+               ret = rspamd_re_cache_process_regexp_data (rt, re,
+                               task, scvec, lenvec, i, raw);
+               msg_debug_re_task ("checking header %s regexp: %s=%*s -> %d",
+                               re_class->type_data,
+                               rspamd_regexp_get_pattern (re),
+                               (int) lenvec[0], scvec[0], ret);
+       }
+
+       g_free (scvec);
+       g_free (lenvec);
+
+       return ret;
+}
+
 /*
  * Calculates the specified regexp for the specified class if it's not calculated
  */
@@ -961,14 +1028,14 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                gboolean is_strong)
 {
        guint ret = 0, i, re_id;
-       GPtrArray *headerlist;
        GHashTableIter it;
        struct rspamd_mime_header *rh;
-       const gchar *in, *end;
+       const gchar *in;
        const guchar **scvec;
        guint *lenvec;
        gboolean raw = FALSE;
-       struct rspamd_mime_text_part *part;
+       struct rspamd_mime_text_part *text_part;
+       struct rspamd_mime_part *mime_part;
        struct rspamd_url *url;
        gpointer k, v;
        guint len, cnt;
@@ -982,140 +1049,70 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
        case RSPAMD_RE_HEADER:
        case RSPAMD_RE_RAWHEADER:
                /* Get list of specified headers */
-               headerlist = rspamd_message_get_header_array (task,
-                               re_class->type_data,
-                               is_strong);
+               rh = rspamd_message_get_header_array (task,
+                               re_class->type_data);
 
-               if (headerlist && headerlist->len > 0) {
-                       scvec = g_malloc (sizeof (*scvec) * headerlist->len);
-                       lenvec = g_malloc (sizeof (*lenvec) * headerlist->len);
-
-                       for (i = 0; i < headerlist->len; i ++) {
-                               rh = g_ptr_array_index (headerlist, i);
-
-                               if (re_class->type == RSPAMD_RE_RAWHEADER) {
-                                       in = rh->value;
-                                       lenvec[i] = strlen (rh->value);
-
-                                       if (!g_utf8_validate (in, lenvec[i], NULL)) {
-                                               raw = TRUE;
-                                       }
-                               }
-                               else {
-                                       in = rh->decoded;
-                                       /* Validate input */
-                                       if (!in || !g_utf8_validate (in, -1, &end)) {
-                                               lenvec[i] = 0;
-                                               scvec[i] = (guchar *)"";
-                                               continue;
-                                       }
-                                       lenvec[i] = end - in;
-                               }
-
-                               scvec[i] = (guchar *)in;
-                       }
-
-                       ret = rspamd_re_cache_process_regexp_data (rt, re,
-                                       task, scvec, lenvec, headerlist->len, raw);
-                       msg_debug_re_task ("checking header %s regexp: %s=%*s -> %d",
-                                       re_class->type_data,
-                                       rspamd_regexp_get_pattern (re),
-                                       (int)lenvec[0], scvec[0], ret);
-                       g_free (scvec);
-                       g_free (lenvec);
+               if (rh) {
+                       ret = rspamd_re_cache_process_headers_list (task, rt, re,
+                                       re_class, rh, is_strong);
                }
                break;
        case RSPAMD_RE_ALLHEADER:
                raw = TRUE;
-               in = task->raw_headers_content.begin;
-               len = task->raw_headers_content.len;
+               in = MESSAGE_FIELD (task, raw_headers_content).begin;
+               len = MESSAGE_FIELD (task, raw_headers_content).len;
                ret = rspamd_re_cache_process_regexp_data (rt, re,
                                task, (const guchar **)&in, &len, 1, raw);
                msg_debug_re_task ("checking allheader regexp: %s -> %d",
                                rspamd_regexp_get_pattern (re), ret);
                break;
        case RSPAMD_RE_MIMEHEADER:
-               headerlist = rspamd_message_get_mime_header_array (task,
-                               re_class->type_data,
-                               is_strong);
-
-               if (headerlist && headerlist->len > 0) {
-                       scvec = g_malloc (sizeof (*scvec) * headerlist->len);
-                       lenvec = g_malloc (sizeof (*lenvec) * headerlist->len);
-
-                       for (i = 0; i < headerlist->len; i ++) {
-                               rh = g_ptr_array_index (headerlist, i);
-
-                               if (re_class->type == RSPAMD_RE_RAWHEADER) {
-                                       in = rh->value;
-                                       lenvec[i] = strlen (rh->value);
+               PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, mime_part) {
+                       rh = rspamd_message_get_header_from_hash (mime_part->raw_headers,
+                                       re_class->type_data);
 
-                                       if (!g_utf8_validate (in, lenvec[i], NULL)) {
-                                               raw = TRUE;
-                                       }
-                               }
-                               else {
-                                       in = rh->decoded;
-                                       /* Validate input */
-                                       if (!in || !g_utf8_validate (in, -1, &end)) {
-                                               lenvec[i] = 0;
-                                               scvec[i] = (guchar *)"";
-                                               continue;
-                                       }
-
-                                       lenvec[i] = end - in;
-                               }
-
-                               scvec[i] = (guchar *)in;
+                       if (rh) {
+                               ret += rspamd_re_cache_process_headers_list (task, rt, re,
+                                               re_class, rh, is_strong);
                        }
-
-                       ret = rspamd_re_cache_process_regexp_data (rt, re,
-                                       task, scvec, lenvec, headerlist->len, raw);
-                       msg_debug_re_task ("checking mime header %s regexp: %s -> %d",
-                                       re_class->type_data,
-                                       rspamd_regexp_get_pattern (re), ret);
-                       g_free (scvec);
-                       g_free (lenvec);
                }
                break;
        case RSPAMD_RE_MIME:
        case RSPAMD_RE_RAWMIME:
                /* Iterate through text parts */
-               if (task->text_parts->len > 0) {
-                       cnt = task->text_parts->len;
+               if (MESSAGE_FIELD (task, text_parts)->len > 0) {
+                       cnt = MESSAGE_FIELD (task, text_parts)->len;
                        scvec = g_malloc (sizeof (*scvec) * cnt);
                        lenvec = g_malloc (sizeof (*lenvec) * cnt);
 
-                       for (i = 0; i < task->text_parts->len; i++) {
-                               part = g_ptr_array_index (task->text_parts, i);
-
+                       PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) {
                                /* Select data for regexp */
                                if (re_class->type == RSPAMD_RE_RAWMIME) {
-                                       if (part->raw.len == 0) {
+                                       if (text_part->raw.len == 0) {
                                                len = 0;
                                                in = "";
                                        }
                                        else {
-                                               in = part->raw.begin;
-                                               len = part->raw.len;
+                                               in = text_part->raw.begin;
+                                               len = text_part->raw.len;
                                        }
 
                                        raw = TRUE;
                                }
                                else {
                                        /* Skip empty parts */
-                                       if (IS_PART_EMPTY (part)) {
+                                       if (IS_PART_EMPTY (text_part)) {
                                                len = 0;
                                                in = "";
                                        }
                                        else {
                                                /* Check raw flags */
-                                               if (!IS_PART_UTF (part)) {
+                                               if (!IS_PART_UTF (text_part)) {
                                                        raw = TRUE;
                                                }
 
-                                               in = part->utf_content->data;
-                                               len = part->utf_content->len;
+                                               in = text_part->utf_content->data;
+                                               len = text_part->utf_content->len;
                                        }
                                }
 
@@ -1132,12 +1129,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                }
                break;
        case RSPAMD_RE_URL:
-               cnt = g_hash_table_size (task->urls) + g_hash_table_size (task->emails);
+               cnt = g_hash_table_size (MESSAGE_FIELD (task, urls)) +
+                               g_hash_table_size (MESSAGE_FIELD (task, emails));
 
                if (cnt > 0) {
                        scvec = g_malloc (sizeof (*scvec) * cnt);
                        lenvec = g_malloc (sizeof (*lenvec) * cnt);
-                       g_hash_table_iter_init (&it, task->urls);
+                       g_hash_table_iter_init (&it, MESSAGE_FIELD (task, urls));
                        i = 0;
 
                        while (g_hash_table_iter_next (&it, &k, &v)) {
@@ -1150,7 +1148,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                lenvec[i++] = len;
                        }
 
-                       g_hash_table_iter_init (&it, task->emails);
+                       g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
 
                        while (g_hash_table_iter_next (&it, &k, &v)) {
                                url = v;
@@ -1191,7 +1189,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                 * paragraph when running the rules. All HTML tags and line breaks will
                 * be removed before matching.
                 */
-               cnt = task->text_parts->len + 1;
+               cnt = MESSAGE_FIELD (task, text_parts)->len + 1;
                scvec = g_malloc (sizeof (*scvec) * cnt);
                lenvec = g_malloc (sizeof (*lenvec) * cnt);
 
@@ -1200,11 +1198,9 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                 * of the body content.
                 */
 
-               headerlist = rspamd_message_get_header_array (task, "Subject", FALSE);
-
-               if (headerlist && headerlist->len > 0) {
-                       rh = g_ptr_array_index (headerlist, 0);
+               rh = rspamd_message_get_header_array (task, "Subject");
 
+               if (rh) {
                        scvec[0] = (guchar *)rh->decoded;
                        lenvec[0] = strlen (rh->decoded);
                }
@@ -1212,14 +1208,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                        scvec[0] = (guchar *)"";
                        lenvec[0] = 0;
                }
-               for (i = 0; i < task->text_parts->len; i++) {
-                       part = g_ptr_array_index (task->text_parts, i);
 
-                       if (part->utf_stripped_content) {
-                               scvec[i + 1] = (guchar *)part->utf_stripped_content->data;
-                               lenvec[i + 1] = part->utf_stripped_content->len;
+               PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) {
+                       if (text_part->utf_stripped_content) {
+                               scvec[i + 1] = (guchar *)text_part->utf_stripped_content->data;
+                               lenvec[i + 1] = text_part->utf_stripped_content->len;
 
-                               if (!IS_PART_UTF (part)) {
+                               if (!IS_PART_UTF (text_part)) {
                                        raw = TRUE;
                                }
                        }
@@ -1244,19 +1239,19 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                 * Multiline expressions will need to be used to match strings that are
                 * broken by line breaks.
                 */
-               if (task->text_parts->len > 0) {
-                       cnt = task->text_parts->len;
+               if (MESSAGE_FIELD (task, text_parts)->len > 0) {
+                       cnt = MESSAGE_FIELD (task, text_parts)->len;
                        scvec = g_malloc (sizeof (*scvec) * cnt);
                        lenvec = g_malloc (sizeof (*lenvec) * cnt);
 
-                       for (i = 0; i < task->text_parts->len; i++) {
-                               part = g_ptr_array_index (task->text_parts, i);
+                       for (i = 0; i < cnt; i++) {
+                               text_part = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), i);
 
-                               if (part->parsed.len > 0) {
-                                       scvec[i] = (guchar *)part->parsed.begin;
-                                       lenvec[i] = part->parsed.len;
+                               if (text_part->parsed.len > 0) {
+                                       scvec[i] = (guchar *)text_part->parsed.begin;
+                                       lenvec[i] = text_part->parsed.len;
 
-                                       if (!IS_PART_UTF (part)) {
+                                       if (!IS_PART_UTF (text_part)) {
                                                raw = TRUE;
                                        }
                                }
@@ -1277,13 +1272,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
        case RSPAMD_RE_WORDS:
        case RSPAMD_RE_STEMWORDS:
        case RSPAMD_RE_RAWWORDS:
-               if (task->text_parts->len > 0) {
+               if (MESSAGE_FIELD (task, text_parts)->len > 0) {
                        cnt = 0;
                        raw = FALSE;
 
-                       PTR_ARRAY_FOREACH (task->text_parts, i, part) {
-                               if (part->utf_words) {
-                                       cnt += part->utf_words->len;
+                       PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) {
+                               if (text_part->utf_words) {
+                                       cnt += text_part->utf_words->len;
                                }
                        }
 
@@ -1297,9 +1292,9 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 
                                cnt = 0;
 
-                               PTR_ARRAY_FOREACH (task->text_parts, i, part) {
-                                       if (part->utf_words) {
-                                               cnt = rspamd_process_words_vector (part->utf_words,
+                               PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, text_part) {
+                                       if (text_part->utf_words) {
+                                               cnt = rspamd_process_words_vector (text_part->utf_words,
                                                                scvec, lenvec, re_class, cnt, &raw);
                                        }
                                }
@@ -1522,6 +1517,7 @@ rspamd_re_cache_type_to_string (enum rspamd_re_type type)
                ret = "stem_words";
                break;
        case RSPAMD_RE_MAX:
+       default:
                ret = "invalid class";
                break;
        }
index 0b31007bbe7076a17c6c2282229078cb4922bbf8..26e328a6d3a8a6499818d049967724a11ef0d690 100644 (file)
@@ -2949,11 +2949,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
        if (url->protocol == PROTOCOL_MAILTO) {
                if (url->userlen > 0) {
-                       target_tbl = task->emails;
+                       target_tbl = MESSAGE_FIELD (task, emails);
                }
        }
        else {
-               target_tbl = task->urls;
+               target_tbl = MESSAGE_FIELD (task, urls);
        }
 
        if (target_tbl) {
@@ -2996,11 +2996,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
                                if (query_url->protocol == PROTOCOL_MAILTO) {
                                        if (query_url->userlen > 0) {
-                                               target_tbl = task->emails;
+                                               target_tbl = MESSAGE_FIELD (task, emails);
                                        }
                                }
                                else {
-                                       target_tbl = task->urls;
+                                       target_tbl = MESSAGE_FIELD (task, urls);
                                }
 
                                if (target_tbl) {
@@ -3115,9 +3115,10 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
        url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED|RSPAMD_URL_FLAG_SUBJECT;
 
        if (url->protocol == PROTOCOL_MAILTO) {
-               if (url->userlen > 0) {
-                       if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
-                               g_hash_table_insert (task->emails, url,
+               if (url->userlen > 0 && url->hostlen > 0) {
+                       if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, emails),
+                                       url)) == NULL) {
+                               g_hash_table_insert (MESSAGE_FIELD (task, emails), url,
                                                url);
                        }
                        else {
@@ -3126,8 +3127,9 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
                }
        }
        else {
-               if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
-                       g_hash_table_insert (task->urls, url, url);
+               if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
+                               url)) == NULL) {
+                       g_hash_table_insert (MESSAGE_FIELD (task, urls), url, url);
                }
                else {
                        existing->count ++;
@@ -3156,9 +3158,9 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
                                        query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
                                }
 
-                               if ((existing = g_hash_table_lookup (task->urls,
+                               if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
                                                query_url)) == NULL) {
-                                       g_hash_table_insert (task->urls,
+                                       g_hash_table_insert (MESSAGE_FIELD (task, urls),
                                                        query_url,
                                                        query_url);
                                }