]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add count to url structure
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 6 Jun 2017 12:50:38 +0000 (13:50 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 6 Jun 2017 12:50:38 +0000 (13:50 +0100)
src/libserver/html.c
src/libserver/url.c
src/libserver/url.h
src/plugins/surbl.c

index 40f8f9f64e43a5d6aaede54cd6b1e70e98abce01..186376567ea6235dd2e2c62d99641636533ff9f1 100644 (file)
@@ -1578,7 +1578,7 @@ static void
 rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
                GHashTable *target)
 {
-       struct rspamd_url *query_url;
+       struct rspamd_url *query_url, *existing;
        gchar *url_str;
        gint rc;
 
@@ -1599,12 +1599,15 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
                                msg_debug_html ("found url %s in query of url"
                                                " %*s", url_str, url->querylen, url->query);
 
-                               if (!g_hash_table_lookup (target,
-                                               query_url)) {
+                               if ((existing = g_hash_table_lookup (target,
+                                               query_url)) == NULL) {
                                        g_hash_table_insert (target,
                                                        query_url,
                                                        query_url);
                                }
+                               else {
+                                       existing->count ++;
+                               }
                        }
                }
        }
@@ -2102,6 +2105,8 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
                                        turl->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
                                        turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
                                }
+
+                               turl->count ++;
                        }
                        else {
                                g_hash_table_insert (target_tbl,
@@ -2504,6 +2509,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                                                        g_hash_table_insert (target_tbl, url, url);
                                                                }
                                                                else {
+                                                                       turl->count ++;
                                                                        url = NULL;
                                                                }
 
index 9f377edb98151884834907ff5ac84458c1fca16c..8e0cb52eecb1f50c5b7c00702f2eb2dba60b500a 100644 (file)
@@ -1491,6 +1491,7 @@ rspamd_url_parse (struct rspamd_url *uri, gchar *uristring, gsize len,
 
        memset (uri, 0, sizeof (*uri));
        memset (&u, 0, sizeof (u));
+       uri->count = 1;
 
        if (*uristring == '\0') {
                return URI_ERRNO_EMPTY;
@@ -2350,7 +2351,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
        struct rspamd_process_exception *ex;
        struct rspamd_task *task;
        gchar *url_str = NULL;
-       struct rspamd_url *query_url;
+       struct rspamd_url *query_url, *existing;
        gint rc;
 
        task = cbd->task;
@@ -2362,18 +2363,24 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
        if (url->protocol == PROTOCOL_MAILTO) {
                if (url->userlen > 0) {
-                       if (!g_hash_table_lookup (task->emails, url)) {
+                       if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
                                url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
                                g_hash_table_insert (task->emails, url,
                                                url);
                        }
+                       else {
+                               existing->count ++;
+                       }
                }
        }
        else {
-               if (!g_hash_table_lookup (task->urls, url)) {
+               if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
                        url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
                        g_hash_table_insert (task->urls, url, url);
                }
+               else {
+                       existing->count ++;
+               }
        }
 
        cbd->part->exceptions = g_list_prepend (
@@ -2397,13 +2404,16 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
                                msg_debug_task ("found url %s in query of url"
                                                " %*s", url_str, url->querylen, url->query);
 
-                               if (!g_hash_table_lookup (task->urls,
-                                               query_url)) {
+                               if ((existing = g_hash_table_lookup (task->urls,
+                                               query_url)) == NULL) {
                                        query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
                                        g_hash_table_insert (task->urls,
                                                        query_url,
                                                        query_url);
                                }
+                               else {
+                                       existing->count ++;
+                               }
                        }
                }
        }
@@ -2492,7 +2502,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 {
        struct rspamd_task *task = ud;
        gchar *url_str = NULL;
-       struct rspamd_url *query_url;
+       struct rspamd_url *query_url, *existing;
        gint rc;
 
        /* It is just a displayed URL, we should not check it for certain things */
@@ -2500,16 +2510,22 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 
        if (url->protocol == PROTOCOL_MAILTO) {
                if (url->userlen > 0) {
-                       if (!g_hash_table_lookup (task->emails, url)) {
+                       if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) {
                                g_hash_table_insert (task->emails, url,
                                                url);
                        }
+                       else {
+                               existing->count ++;
+                       }
                }
        }
        else {
-               if (!g_hash_table_lookup (task->urls, url)) {
+               if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) {
                        g_hash_table_insert (task->urls, url, url);
                }
+               else {
+                       existing->count ++;
+               }
        }
 
        /* We also search the query for additional url inside */
@@ -2529,12 +2545,15 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
                                msg_debug_task ("found url %s in query of url"
                                                " %*s", url_str, url->querylen, url->query);
 
-                               if (!g_hash_table_lookup (task->urls,
-                                               query_url)) {
+                               if ((existing = g_hash_table_lookup (task->urls,
+                                               query_url))) {
                                        g_hash_table_insert (task->urls,
                                                        query_url,
                                                        query_url);
                                }
+                               else {
+                                       existing->count ++;
+                               }
                        }
                }
        }
index 14c0c5b696654e6456af14d6c953d082ddd3dcc7..e4834d9bcfccf057c4e48f20a03418facf605080 100644 (file)
@@ -50,6 +50,7 @@ struct rspamd_url {
        guint urllen;
 
        enum rspamd_url_flags flags;
+       guint count;
        GHashTable *tags;
 };
 
index 9fef4d128c92eaa90e31e1304d248b6e550cb8ba..c84cfdc88c928f9f4c45824a2e7f04d9c94ee320 100644 (file)
@@ -1354,7 +1354,7 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
        struct redirector_param *param = (struct redirector_param *)conn->ud;
        struct rspamd_task *task;
        gint r, urllen;
-       struct rspamd_url *redirected_url;
+       struct rspamd_url *redirected_url, *existing;
        const rspamd_ftok_t *hdr;
        gchar *urlstr;
 
@@ -1378,12 +1378,15 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
                                        task->task_pool);
 
                        if (r == URI_ERRNO_OK) {
-                               if (!g_hash_table_lookup (task->urls, redirected_url)) {
+                               if ((existing = g_hash_table_lookup (task->urls, redirected_url))) {
                                        g_hash_table_insert (task->urls, redirected_url,
                                                        redirected_url);
                                        redirected_url->phished_url = param->url;
                                        redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
                                }
+                               else {
+                                       existing->count ++;
+                               }
 
                                rspamd_url_add_tag (param->url, "redirector", urlstr,
                                                task->task_pool);