]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Improve tag_exists function.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 17 Jul 2015 12:51:19 +0000 (13:51 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 17 Jul 2015 12:51:19 +0000 (13:51 +0100)
src/libmime/mime_expressions.c
src/libserver/html.c
src/libserver/html.h

index c367ad07330452db8cd9da12df0d30845048f869..bff70c1b7b225fd06413d9b676a4ed8d062f0ecd 100644 (file)
@@ -1550,37 +1550,13 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
 
 }
 
-struct html_callback_data {
-       struct html_tag *tag;
-       gboolean *res;
-};
-
-static gboolean
-search_html_node_callback (GNode * node, gpointer data)
-{
-       struct html_callback_data *cd = data;
-       struct html_tag *nd;
-
-       nd = node->data;
-       if (nd) {
-               if (nd->id == cd->tag->id) {
-                       *cd->res = TRUE;
-                       return TRUE;
-               }
-       }
-
-       return FALSE;
-}
-
 gboolean
 rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
 {
        struct mime_text_part *p;
        struct expression_argument *arg;
-       struct html_tag *tag;
        guint i;
        gboolean res = FALSE;
-       struct html_callback_data cd;
 
        if (args == NULL) {
                msg_warn ("no parameters to function");
@@ -1593,27 +1569,11 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
                return FALSE;
        }
 
-       tag = get_tag_by_name (arg->data);
-       if (tag == NULL) {
-               msg_warn ("unknown tag type passed as argument: %s",
-                       (gchar *)arg->data);
-               return FALSE;
-       }
-
-       cd.res = &res;
-       cd.tag = tag;
-
        for (i = 0; i < task->text_parts->len && res; i ++) {
                p = g_ptr_array_index (task->text_parts, i);
 
                if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html) {
-                       /* TODO: too slow */
-                       g_node_traverse (p->html->html_tags,
-                               G_PRE_ORDER,
-                               G_TRAVERSE_ALL,
-                               -1,
-                               search_html_node_callback,
-                               &cd);
+                       res = rspamd_html_tag_seen (p->html, arg->data);
                }
        }
 
index 421a898291ecf0cefd3991c57fce0e278f66fe7c..cfab7a7d728299ace487f6d9820dd57654dfef4b 100644 (file)
@@ -714,9 +714,26 @@ rspamd_html_check_balance (GNode * node, GNode ** cur_level)
        return FALSE;
 }
 
-struct html_tag *
-get_tag_by_name (const gchar *name)
+gboolean
+rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname)
 {
+       struct html_tag tag;
+       struct html_tag_def *found;
+
+       g_assert (hc != NULL);
+       g_assert (hc->tags_seen != NULL);
+
+       tag.name.start = tagname;
+       tag.name.len = strlen (tagname);
+
+       found = bsearch (&tag, tag_defs, G_N_ELEMENTS (tag_defs),
+                       sizeof (tag_defs[0]), tag_find);
+
+       if (found) {
+               return isset (hc->tags_seen, found->id);
+       }
+
+       return FALSE;
 }
 
 /* Decode HTML entitles in text */
@@ -1291,6 +1308,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                entities_sorted = 1;
        }
 
+       hc->tags_seen = rspamd_mempool_alloc0 (pool, NBYTES (G_N_ELEMENTS (tag_defs)));
+
        dest = g_byte_array_sized_new (in->len / 3 * 2);
 
        p = in->data;
@@ -1553,6 +1572,10 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                        state = content_ignore;
                                }
 
+                               if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
+                                       setbit (hc->tags_seen, cur_tag->id);
+                               }
+
                                if ((cur_tag->id == Tag_P || cur_tag->id == Tag_BR ||
                                                cur_tag->id == Tag_HR) && balanced) {
                                        /* Insert newline */
index c70d7d6ed2eff5e8b3e9db01ef8dc9e9028d49c2..4b17b5000f5fd9a57693118c48bc6fa931a0328e 100644 (file)
@@ -41,13 +41,9 @@ struct rspamd_task;
 struct html_content {
        GNode *html_tags;
        gint flags;
+       guchar *tags_seen;
 };
 
-/*
- * Get tag structure by its name (binary search is used)
- */
-struct html_tag * get_tag_by_name (const gchar *name);
-
 /*
  * Decode HTML entitles in text. Text is modified in place.
  */
@@ -61,4 +57,9 @@ GByteArray* rspamd_html_process_part_full (rspamd_mempool_t *pool,
                struct html_content *hc,
                GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails);
 
+/*
+ * Returns true if a specified tag has been seen in a part
+ */
+gboolean rspamd_html_tag_seen (struct html_content *hc, const gchar *tagname);
+
 #endif