From: Vsevolod Stakhov Date: Wed, 23 Sep 2015 16:08:32 +0000 (+0100) Subject: Search for urls in query parts of html urls as well. X-Git-Tag: 1.0.2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=471e72e7a1290918c759a5576bcb8883122d460d;p=thirdparty%2Frspamd.git Search for urls in query parts of html urls as well. --- diff --git a/src/libserver/html.c b/src/libserver/html.c index 0a46cef02f..520c49d391 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -1307,6 +1307,43 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag) return NULL; } +static void +rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url, + GHashTable *target) +{ + gint nstate = 0; + struct rspamd_url *query_url; + gchar *url_str; + gint rc; + + if (url->querylen > 0) { + + if (rspamd_url_find (pool, url->query, url->querylen, NULL, NULL, + &url_str, TRUE, &nstate)) { + query_url = rspamd_mempool_alloc0 (pool, + sizeof (struct rspamd_url)); + + rc = rspamd_url_parse (query_url, + url_str, + strlen (url_str), + pool); + + if (rc == URI_ERRNO_OK && + url->hostlen > 0) { + msg_debug_pool ("found url %s in query of url" + " %*s", url_str, url->querylen, url->query); + + if (!g_hash_table_lookup (target, + query_url)) { + g_hash_table_insert (target, + query_url, + query_url); + } + } + } + } +} + static void rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag, struct html_content *hc) @@ -1923,6 +1960,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc, else { url = NULL; } + + if (turl == NULL && url != NULL) { + rspamd_process_html_url (pool, + url, + target_tbl); + } } href_offset = dest->len;