]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Rework] Urls: Improve query urls handling
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 22 Mar 2020 13:08:26 +0000 (13:08 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 22 Mar 2020 13:08:26 +0000 (13:08 +0000)
src/libserver/url.c
src/libserver/url.h

index e0f05c3b0f0f55e8050ac66e6516d490dfc11215..30872c38dbd800fe50d3f004c33efcde97c596be 100644 (file)
@@ -3213,11 +3213,44 @@ struct rspamd_url_mimepart_cbdata {
        gsize url_len;
 };
 
+static gboolean
+rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
+                                                  gsize end_offset, gpointer ud)
+{
+       struct rspamd_url_mimepart_cbdata *cbd =
+                       (struct rspamd_url_mimepart_cbdata *)ud;
+       struct rspamd_task *task;
+
+       task = cbd->task;
+
+       if (url->protocol == PROTOCOL_MAILTO) {
+               if (url->userlen == 0) {
+                       return FALSE;
+               }
+       }
+       /* Also check max urls */
+       if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+               if (kh_size (MESSAGE_FIELD (task, urls)) > cbd->task->cfg->max_urls) {
+                       msg_err_task ("part has too many URLs, we cannot process more: "
+                                                 "%d urls extracted ",
+                                       (guint)kh_size (MESSAGE_FIELD (task, urls)));
+
+                       return FALSE;
+               }
+       }
+
+       url->flags |= RSPAMD_URL_FLAG_QUERY;
+       rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+       return TRUE;
+}
+
 static gboolean
 rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
                gsize end_offset, gpointer ud)
 {
-       struct rspamd_url_mimepart_cbdata *cbd = ud;
+       struct rspamd_url_mimepart_cbdata *cbd =
+                       (struct rspamd_url_mimepart_cbdata *)ud;
        struct rspamd_process_exception *ex;
        struct rspamd_task *task;
        gchar *url_str = NULL;
@@ -3270,36 +3303,10 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
        /* We also search the query for additional url inside */
        if (url->querylen > 0) {
-               if (rspamd_url_find (task->task_pool,
+               rspamd_url_find_multiple (task->task_pool,
                                rspamd_url_query_unsafe (url), url->querylen,
-                               &url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
-                       query_url = rspamd_mempool_alloc0 (task->task_pool,
-                                       sizeof (struct rspamd_url));
-                       rc = rspamd_url_parse (query_url,
-                                       url_str,
-                                       strlen (url_str),
-                                       task->task_pool,
-                                       RSPAMD_URL_PARSE_TEXT);
-
-                       if (rc == URI_ERRNO_OK &&
-                                       query_url->hostlen > 0) {
-                               msg_debug_task ("found url %s in query of url"
-                                               " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
-
-                               if (prefix_added) {
-                                       query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
-                               }
-
-                               if (query_url->protocol == PROTOCOL_MAILTO) {
-                                       if (query_url->userlen == 0) {
-                                               return TRUE;
-                                       }
-                               }
-
-                               query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-                               rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), query_url);
-                       }
-               }
+                               RSPAMD_URL_FIND_ALL, NULL,
+                               rspamd_url_query_callback, cbd);
        }
 
        return TRUE;
index bf8ba4b6376171aae3070a69f8edb21ea79687f2..bb9c57399ac7975a1f908f97bb84d2d743bd6747 100644 (file)
@@ -35,6 +35,7 @@ enum rspamd_url_flags {
        RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
        RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
        RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
+       RSPAMD_URL_FLAG_QUERY = 1u << 20u,
 };
 
 struct rspamd_url_tag {