From: Vsevolod Stakhov Date: Fri, 16 Feb 2018 10:17:32 +0000 (+0000) Subject: [Fix] Fix urls/emails distinguishing found in queries X-Git-Tag: 1.7.0~177 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=fc5dc785c482b6706bcd8efc5e0cf70d4b1e6ad5;p=thirdparty%2Frspamd.git [Fix] Fix urls/emails distinguishing found in queries MFH: rspamd-1.6 --- diff --git a/src/libserver/html.c b/src/libserver/html.c index c21871a932..53c16708b3 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1614,8 +1614,9 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) static void rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, - GHashTable *target) + GHashTable *tbl_urls, GHashTable *tbl_emails) { + GHashTable *target_tbl; struct rspamd_url *query_url, *existing; gchar *url_str; gint rc; @@ -1633,13 +1634,20 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, pool); if (rc == URI_ERRNO_OK && - url->hostlen > 0) { + query_url->hostlen > 0) { msg_debug_html ("found url %s in query of url" " %*s", url_str, url->querylen, url->query); - if ((existing = g_hash_table_lookup (target, + if (query_url->protocol == PROTOCOL_MAILTO) { + target_tbl = tbl_emails; + } + else { + target_tbl = tbl_urls; + } + + if ((existing = g_hash_table_lookup (target_tbl, query_url)) == NULL) { - g_hash_table_insert (target, + g_hash_table_insert (target_tbl, query_url, query_url); } @@ -2558,7 +2566,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, if (turl == NULL && url != NULL) { rspamd_process_html_url (pool, url, - target_tbl); + urls, emails); } } diff --git a/src/libserver/url.c b/src/libserver/url.c index 83ffd36d99..272511a1ca 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -2446,6 +2446,7 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, struct rspamd_task *task; gchar *url_str = NULL; struct rspamd_url *query_url, *existing; + GHashTable *target_tbl = NULL; gint rc; task = cbd->task; @@ -2457,26 +2458,25 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, if (url->protocol == PROTOCOL_MAILTO) { if (url->userlen > 0) { - if ((existing = g_hash_table_lookup (task->emails, url)) == NULL) { - url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; - g_hash_table_insert (task->emails, url, - url); - } - else { - existing->count ++; - } + target_tbl = task->emails; } } else { - if ((existing = g_hash_table_lookup (task->urls, url)) == NULL) { + target_tbl = task->urls; + } + + if (target_tbl) { + if ((existing = g_hash_table_lookup (target_tbl, url)) == NULL) { url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; - g_hash_table_insert (task->urls, url, url); + g_hash_table_insert (target_tbl, url, url); } else { - existing->count ++; + existing->count++; } } + target_tbl = NULL; + cbd->part->exceptions = g_list_prepend ( cbd->part->exceptions, ex); @@ -2494,19 +2494,27 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, task->task_pool); if (rc == URI_ERRNO_OK && - url->hostlen > 0) { + query_url->hostlen > 0) { msg_debug_task ("found url %s in query of url" " %*s", url_str, url->querylen, url->query); - if ((existing = g_hash_table_lookup (task->urls, - query_url)) == NULL) { - query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; - g_hash_table_insert (task->urls, - query_url, - query_url); + if (query_url->protocol == PROTOCOL_MAILTO) { + if (query_url->userlen > 0) { + target_tbl = task->emails; + } } else { - existing->count ++; + target_tbl = task->urls; + } + + if (target_tbl) { + if ((existing = g_hash_table_lookup (target_tbl, query_url)) == NULL) { + url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; + g_hash_table_insert (target_tbl, query_url, query_url); + } + else { + existing->count++; + } } } }