]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Add HTML images concept.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 23 Jul 2015 12:02:24 +0000 (13:02 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 23 Jul 2015 12:02:24 +0000 (13:02 +0100)
src/libserver/html.c
src/libserver/html.h

index 16d966c816e2331af6525185112e36d6d2fccf94..e9b7eab18b32306d75caf173c8fa79505c21d09a 100644 (file)
@@ -982,6 +982,25 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool,
                        ret = TRUE;
                }
        }
+       else if (tag->id == Tag_IMG) {
+               /* Check width and height if presented */
+               if (len == 5 && g_ascii_strncasecmp (begin, "width", len) == 0) {
+                       comp = rspamd_mempool_alloc (pool, sizeof (*comp));
+                       comp->type = RSPAMD_HTML_COMPONENT_WIDTH;
+                       comp->start = NULL;
+                       comp->len = 0;
+                       tag->params = g_list_prepend (tag->params, comp);
+                       ret = TRUE;
+               }
+               else if (len == 5 && g_ascii_strncasecmp (begin, "height", len) == 0) {
+                       comp = rspamd_mempool_alloc (pool, sizeof (*comp));
+                       comp->type = RSPAMD_HTML_COMPONENT_HEIGHT;
+                       comp->start = NULL;
+                       comp->len = 0;
+                       tag->params = g_list_prepend (tag->params, comp);
+                       ret = TRUE;
+               }
+       }
 
        return ret;
 }
@@ -1282,6 +1301,57 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag)
        return NULL;
 }
 
+static void
+rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
+               struct html_content *hc)
+{
+       struct html_tag_component *comp;
+       struct html_image *img;
+       rspamd_fstring_t fstr;
+       GList *cur;
+       gulong val;
+
+       cur = tag->params;
+       img = rspamd_mempool_alloc0 (pool, sizeof (*img));
+
+       while (cur) {
+               comp = cur->data;
+
+               if (comp->type == RSPAMD_HTML_COMPONENT_HREF && comp->len > 0) {
+                       fstr.begin = (gchar *)comp->start;
+                       fstr.len = comp->len;
+                       img->src = rspamd_mempool_fstrdup (pool, &fstr);
+
+                       if (comp->len > sizeof ("cid:") - 1 && memcmp (comp->start,
+                                       "cid:", sizeof ("cid:") - 1) == 0) {
+                               /* We have an embedded image */
+                               img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
+                       }
+                       else {
+                               img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+                       }
+               }
+               else if (comp->type == RSPAMD_HTML_COMPONENT_HEIGHT) {
+                       if (rspamd_strtoul (comp->start, comp->len, &val)) {
+                               img->height = val;
+                       }
+               }
+               else if (comp->type == RSPAMD_HTML_COMPONENT_WIDTH) {
+                       if (rspamd_strtoul (comp->start, comp->len, &val)) {
+                               img->width = val;
+                       }
+               }
+       }
+
+       if (hc->images == NULL) {
+               hc->images = g_ptr_array_sized_new (4);
+               rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
+                               hc->images);
+       }
+
+       g_ptr_array_add (hc->images, img);
+}
+
 GByteArray*
 rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                GByteArray *in, GList **exceptions, GHashTable *urls,  GHashTable *emails)
@@ -1626,16 +1696,18 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                                                target_tbl = urls;
                                                        }
 
-                                                       turl = g_hash_table_lookup (target_tbl, url);
-
-                                                       if (turl != NULL && turl->phished_url == NULL) {
-                                                               g_hash_table_insert (target_tbl, url, url);
-                                                       }
-                                                       else if (turl == NULL) {
-                                                               g_hash_table_insert (target_tbl, url, url);
-                                                       }
-                                                       else {
-                                                               url = NULL;
+                                                       if (target_tbl != NULL) {
+                                                               turl = g_hash_table_lookup (target_tbl, url);
+
+                                                               if (turl != NULL && turl->phished_url == NULL) {
+                                                                       g_hash_table_insert (target_tbl, url, url);
+                                                               }
+                                                               else if (turl == NULL) {
+                                                                       g_hash_table_insert (target_tbl, url, url);
+                                                               }
+                                                               else {
+                                                                       url = NULL;
+                                                               }
                                                        }
 
                                                        href_offset = dest->len;
@@ -1662,6 +1734,9 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
                                                url = NULL;
                                        }
                                }
+                               else if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
+                                       rspamd_html_process_img_tag (pool, cur_tag, hc);
+                               }
                        }
                        else {
                                state = content_write;
index 5516594e44ed08878c2e9700f6aae1fc4733c800..29716eb75b6af623fb08d5dc5651542d366fbef8 100644 (file)
@@ -8,6 +8,9 @@
 #include "config.h"
 #include "mem_pool.h"
 
+/*
+ * HTML content flags
+ */
 #define RSPAMD_HTML_FLAG_BAD_START (1 << 0)
 #define RSPAMD_HTML_FLAG_BAD_ELEMENTS (1 << 1)
 #define RSPAMD_HTML_FLAG_XML (1 << 2)
 #define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
 #define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
 
+/*
+ * Image flags
+ */
+#define RSPAMD_HTML_FLAG_IMAGE_EMBEDDED (1 << 0)
+#define RSPAMD_HTML_FLAG_IMAGE_EXTERNAL (1 << 1)
+
 enum html_component_type {
        RSPAMD_HTML_COMPONENT_NAME = 0,
        RSPAMD_HTML_COMPONENT_HREF,
@@ -29,6 +38,13 @@ struct html_tag_component {
        guint len;
 };
 
+struct html_image {
+       guint height;
+       guint width;
+       guint flags;
+       gchar *src;
+};
+
 struct html_tag {
        gint id;
        struct html_tag_component name;
@@ -43,6 +59,7 @@ struct html_content {
        GNode *html_tags;
        gint flags;
        guchar *tags_seen;
+       GPtrArray *images;
 };
 
 /*