]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] HTML: Specially treat data urls in HTML
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 15 Jan 2019 12:32:34 +0000 (12:32 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 15 Jan 2019 12:32:34 +0000 (12:32 +0000)
src/libserver/html.c
src/libserver/html.h
src/lua/lua_html.c

index e8856db356d7b002ad701d652298200326147427..cbc0fe7da6b319dc32eedd79be58cc168b70cc09 100644 (file)
@@ -1571,6 +1571,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                img->flags |=
                                                (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED|RSPAMD_HTML_FLAG_IMAGE_DATA);
                                rspamd_html_process_data_image (pool, img, comp);
+                               hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
                        }
                        else {
                                img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
index 0400672ac5fa8ed7080da3d4549429568d7f8f15..a2f3a0b1d52be8fd0d24ab3538702b9b187228e9 100644 (file)
@@ -18,6 +18,7 @@
 #define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
 #define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
 #define RSPAMD_HTML_FLAG_TOO_MANY_TAGS (1 << 6)
+#define RSPAMD_HTML_FLAG_HAS_DATA_URLS (1 << 7)
 
 /*
  * Image flags
index 47b8c7dfdbd0de00df638fe410cb7a303c1c17af..71578e1a48506510f8ed6629e5f1a861b3d97935 100644 (file)
@@ -229,6 +229,7 @@ lua_html_has_property (lua_State *L)
                 * - `unknown_element`
                 * - `duplicate_element`
                 * - `unbalanced`
+                * - `data_urls`
                 */
                if (strcmp (propname, "no_html") == 0) {
                        ret = hc->flags & RSPAMD_HTML_FLAG_BAD_START;
@@ -248,6 +249,12 @@ lua_html_has_property (lua_State *L)
                else if (strcmp (propname, "unbalanced") == 0) {
                        ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
                }
+               else if (strcmp (propname, "unbalanced") == 0) {
+                       ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
+               }
+               else if (strcmp (propname, "data_urls") == 0) {
+                       ret = hc->flags & RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+               }
        }
 
        lua_pushboolean (L, ret);
@@ -266,7 +273,21 @@ lua_html_push_image (lua_State *L, struct html_image *img)
 
        if (img->src) {
                lua_pushstring (L, "src");
-               lua_pushstring (L, img->src);
+
+               if (img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA) {
+                       struct rspamd_lua_text *t;
+
+                       t = lua_newuserdata (L, sizeof (*t));
+                       t->start = img->src;
+                       t->len = strlen (img->src);
+                       t->flags = 0;
+
+                       rspamd_lua_setclass (L, "rspamd{text}", -1);
+               }
+               else {
+                       lua_pushstring (L, img->src);
+               }
+
                lua_settable (L, -3);
        }
 
@@ -294,6 +315,8 @@ lua_html_push_image (lua_State *L, struct html_image *img)
        lua_settable (L, -3);
        lua_pushstring (L, "embedded");
        lua_pushboolean (L, img->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED);
+       lua_pushstring (L, "data");
+       lua_pushboolean (L, img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA);
        lua_settable (L, -3);
 }