]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Rework] Urls: process query urls in HTML urls correctly
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 22 Mar 2020 17:25:32 +0000 (17:25 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 22 Mar 2020 17:25:32 +0000 (17:25 +0000)
src/libserver/html.c
src/libserver/url.c

index 71efe632aa7515f1563377be14812573f3c0ebb2..edcb0f2b2dc2b9243e9e12ef09660460f680ed8d 100644 (file)
@@ -1615,57 +1615,56 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
        return NULL;
 }
 
+struct rspamd_html_url_query_cbd {
+       rspamd_mempool_t *pool;
+       khash_t (rspamd_url_hash) *url_set;
+       struct rspamd_url *url;
+};
+
+static gboolean
+rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,
+                                                  gsize end_offset, gpointer ud)
+{
+       struct rspamd_html_url_query_cbd *cbd =
+                       (struct rspamd_html_url_query_cbd *)ud;
+       rspamd_mempool_t *pool;
+
+       pool = cbd->pool;
+
+       if (url->protocol == PROTOCOL_MAILTO) {
+               if (url->userlen == 0) {
+                       return FALSE;
+               }
+       }
+
+       msg_debug_html ("found url %s in query of url"
+                                       " %*s", url->string,
+                                       cbd->url->querylen, rspamd_url_query_unsafe (cbd->url));
+
+       url->flags |= RSPAMD_URL_FLAG_QUERY;
+       rspamd_url_set_add_or_increase (cbd->url_set, url);
+
+       return TRUE;
+}
+
 static void
 rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
                                                 khash_t (rspamd_url_hash) *url_set)
 {
-       struct rspamd_url *query_url;
-       gchar *url_str;
-       gint rc;
-       gboolean prefix_added;
-
        if (url->flags & RSPAMD_URL_FLAG_UNNORMALISED) {
                url->flags |= RSPAMD_URL_FLAG_OBSCURED;
        }
 
        if (url->querylen > 0) {
+               struct rspamd_html_url_query_cbd qcbd;
 
-               if (rspamd_url_find (pool, rspamd_url_query_unsafe (url), url->querylen, &url_str,
-                               RSPAMD_URL_FIND_ALL,
-                               NULL, &prefix_added)) {
-                       query_url = rspamd_mempool_alloc0 (pool,
-                                       sizeof (struct rspamd_url));
-
-                       rc = rspamd_url_parse (query_url,
-                                       url_str,
-                                       strlen (url_str),
-                                       pool,
-                                       RSPAMD_URL_PARSE_TEXT);
-
-                       if (rc == URI_ERRNO_OK &&
-                                       query_url->hostlen > 0) {
-                               msg_debug_html ("found url %s in query of url"
-                                               " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
-
-                               if (prefix_added) {
-                                       query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
-                               }
-
-                               if (query_url->flags
-                                               & (RSPAMD_URL_FLAG_UNNORMALISED|RSPAMD_URL_FLAG_OBSCURED|
-                                                       RSPAMD_URL_FLAG_NUMERIC)) {
-                                       /* Set obscured flag if query url is bad */
-                                       url->flags |= RSPAMD_URL_FLAG_OBSCURED;
-                               }
+               qcbd.pool = pool;
+               qcbd.url_set = url_set;
 
-                               /* And vice-versa */
-                               if (url->flags & RSPAMD_URL_FLAG_OBSCURED) {
-                                       query_url->flags |= RSPAMD_URL_FLAG_OBSCURED;
-                               }
-
-                               rspamd_url_set_add_or_increase (url_set, query_url);
-                       }
-               }
+               rspamd_url_find_multiple(pool,
+                               rspamd_url_query_unsafe (url), url->querylen,
+                               RSPAMD_URL_FIND_ALL, NULL,
+                               rspamd_html_url_query_callback, &qcbd);
        }
 }
 
index 30872c38dbd800fe50d3f004c33efcde97c596be..0669d932fd4d06a8602ff6d0c0480bcdbd855960 100644 (file)
@@ -3253,10 +3253,6 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
                        (struct rspamd_url_mimepart_cbdata *)ud;
        struct rspamd_process_exception *ex;
        struct rspamd_task *task;
-       gchar *url_str = NULL;
-       struct rspamd_url *query_url;
-       gint rc;
-       gboolean prefix_added;
 
        task = cbd->task;
        ex = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_process_exception));