From: Vsevolod Stakhov Date: Sat, 6 Mar 2021 23:49:16 +0000 (+0000) Subject: [Fix] Urls: Fix processing of html urls when it comes to the flags X-Git-Tag: 3.0~606 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b7467f9d294faf54f25d2b2fc32255f613097416;p=thirdparty%2Frspamd.git [Fix] Urls: Fix processing of html urls when it comes to the flags Issue: #3664 --- diff --git a/src/libserver/html.c b/src/libserver/html.c index 974b59129f..aa1cdf6cc3 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1764,7 +1764,7 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset, url->flags |= RSPAMD_URL_FLAG_QUERY; - if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) { + if (rspamd_url_set_add_or_increase(cbd->url_set, url, false) && cbd->part_urls) { g_ptr_array_add (cbd->part_urls, url); } @@ -1903,7 +1903,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, if (img->url) { img->url->flags |= RSPAMD_URL_FLAG_IMAGE; - if (rspamd_url_set_add_or_increase (url_set, img->url) && + if (rspamd_url_set_add_or_increase(url_set, img->url, false) && part_urls) { g_ptr_array_add (part_urls, img->url); } @@ -3245,10 +3245,15 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, if (url != NULL) { if (url_set != NULL) { - if (rspamd_url_set_add_or_increase (url_set, url)) { + struct rspamd_url *maybe_existing = + rspamd_url_set_add_or_return (url_set, url); + if (maybe_existing == url) { rspamd_process_html_url (pool, url, url_set, part_urls); } + else { + url = maybe_existing; + } } href_offset = dest->len; diff --git a/src/libserver/url.c b/src/libserver/url.c index a5de7ebdfb..8183213b66 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -3377,7 +3377,7 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset, url->flags |= RSPAMD_URL_FLAG_QUERY; - if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) { + if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false)) { if (cbd->part && cbd->part->mime_part->urls) { g_ptr_array_add (cbd->part->mime_part->urls, url); } @@ -3433,8 +3433,8 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset, url->flags |= RSPAMD_URL_FLAG_FROM_TEXT; - if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) && - cbd->part->mime_part->urls) { + if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false) && + cbd->part->mime_part->urls) { g_ptr_array_add (cbd->part->mime_part->urls, url); } @@ -3592,7 +3592,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, } } - rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url); + rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false); /* We also search the query for additional url inside */ if (url->querylen > 0) { @@ -3622,8 +3622,8 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset, } } - rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), - query_url); + rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), + query_url, false); } } } @@ -4044,21 +4044,44 @@ rspamd_url_protocol_from_string (const gchar *str) bool -rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set, - struct rspamd_url *u) +rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set, + struct rspamd_url *u, + bool enforce_replace) { khiter_t k; gint r; - k = kh_put (rspamd_url_hash, set, u, &r); + k = kh_get (rspamd_url_hash, set, u); - if (r == 0) { + if (k != kh_end (set)) { + /* Existing url */ struct rspamd_url *ex = kh_key (set, k); - - ex->count ++; +#define SUSPICIOUS_URL_FLAGS (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_OBSCURED|RSPAMD_URL_FLAG_ZW_SPACES) + if (enforce_replace) { + kh_key (set, k) = u; + u->count++; + } + else { + if (u->flags & SUSPICIOUS_URL_FLAGS) { + if (!(ex->flags & SUSPICIOUS_URL_FLAGS)) { + /* Propagate new url to an old one */ + kh_key (set, k) = u; + u->count++; + } + else { + ex->count++; + } + } + else { + ex->count++; + } + } return false; } + else { + k = kh_put (rspamd_url_hash, set, u, &r); + } return true; } @@ -4071,12 +4094,15 @@ rspamd_url_set_add_or_return (khash_t (rspamd_url_hash) *set, gint r; if (set) { - k = kh_put (rspamd_url_hash, set, u, &r); + k = kh_get (rspamd_url_hash, set, u); - if (r == 0) { - struct rspamd_url *ex = kh_key (set, k); + if (k != kh_end (set)) { + return kh_key (set, k); + } + else { + k = kh_put (rspamd_url_hash, set, u, &r); - return ex; + return kh_key (set, k); } } diff --git a/src/libserver/url.h b/src/libserver/url.h index 567cdd137a..59485ab9a3 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -296,8 +296,9 @@ KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char); * @param u * @return true if a new url has been added */ -bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set, - struct rspamd_url *u); +bool rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set, + struct rspamd_url *u, + bool enforce_replace); /** * Same as rspamd_url_set_add_or_increase but returns the existing url if found diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index d2bd17aba5..579f04fb9b 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -2507,7 +2507,7 @@ lua_task_inject_url (lua_State * L) } if (task && task->message && url && url->url) { - if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url->url)) { + if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url->url, false)) { if (mpart && mpart->urls) { /* Also add url to the mime part */ g_ptr_array_add (mpart->urls, url->url);