]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Search for urls in query parts of html urls as well.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 23 Sep 2015 16:08:32 +0000 (17:08 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 23 Sep 2015 16:08:32 +0000 (17:08 +0100)
src/libserver/html.c

index 0a46cef02ffbb0b858bf31d9c51f3949e4955df4..520c49d3910aeefaa31e3ef705294b1a2d7141e7 100644 (file)
@@ -1307,6 +1307,43 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
        return NULL;
 }
 
+static void
+rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
+               GHashTable *target)
+{
+       gint nstate = 0;
+       struct rspamd_url *query_url;
+       gchar *url_str;
+       gint rc;
+
+       if (url->querylen > 0) {
+
+               if (rspamd_url_find (pool, url->query, url->querylen, NULL, NULL,
+                               &url_str, TRUE, &nstate)) {
+                       query_url = rspamd_mempool_alloc0 (pool,
+                                       sizeof (struct rspamd_url));
+
+                       rc = rspamd_url_parse (query_url,
+                                       url_str,
+                                       strlen (url_str),
+                                       pool);
+
+                       if (rc == URI_ERRNO_OK &&
+                                       url->hostlen > 0) {
+                               msg_debug_pool ("found url %s in query of url"
+                                               " %*s", url_str, url->querylen, url->query);
+
+                               if (!g_hash_table_lookup (target,
+                                               query_url)) {
+                                       g_hash_table_insert (target,
+                                                       query_url,
+                                                       query_url);
+                               }
+                       }
+               }
+       }
+}
+
 static void
 rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                struct html_content *hc)
@@ -1923,6 +1960,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                                                else {
                                                                        url = NULL;
                                                                }
+
+                                                               if (turl == NULL && url != NULL) {
+                                                                       rspamd_process_html_url (pool,
+                                                                                       url,
+                                                                                       target_tbl);
+                                                               }
                                                        }
 
                                                        href_offset = dest->len;