]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Core: Process data urls for images
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 15 Jan 2019 11:26:20 +0000 (11:26 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 15 Jan 2019 11:26:20 +0000 (11:26 +0000)
src/libserver/html.c

index 9bf15e0a2c4d1d30c25e9a3c749a0e79d40d722e..e8856db356d7b002ad701d652298200326147427 100644 (file)
@@ -23,6 +23,7 @@
 #include "html_entities.h"
 #include "url.h"
 #include "contrib/libucl/khash.h"
+#include "libmime/images.h"
 
 #include <unicode/uversion.h>
 #include <unicode/ucnv.h>
@@ -1482,6 +1483,58 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
        }
 }
 
+static void
+rspamd_html_process_data_image (rspamd_mempool_t *pool,
+                                                               struct html_image *img,
+                                                               struct html_tag_component *src)
+{
+       /*
+        * Here, we do very basic processing of the data:
+        * detect if we have something like: `data:image/xxx;base64,yyyzzz==`
+        * We only parse base64 encoded data.
+        * We ignore content type so far
+        */
+       struct rspamd_image *parsed_image;
+       const gchar *semicolon_pos = NULL, *end = src->start + src->len;
+
+       semicolon_pos = src->start;
+
+       while ((semicolon_pos = memchr (semicolon_pos, ';', end - semicolon_pos)) != NULL) {
+               if (end - semicolon_pos > sizeof ("base64,")) {
+                       if (memcmp (semicolon_pos + 1, "base64,", sizeof ("base64,") - 1) == 0) {
+                               const gchar *data_pos = semicolon_pos + sizeof ("base64,");
+                               gchar *decoded;
+                               gsize encoded_len = end - data_pos, decoded_len;
+                               rspamd_ftok_t inp;
+
+                               decoded_len = (encoded_len / 4 * 3) + 12;
+                               decoded = rspamd_mempool_alloc (pool, decoded_len);
+                               rspamd_cryptobox_base64_decode (data_pos, encoded_len,
+                                               decoded, &decoded_len);
+                               inp.begin = decoded;
+                               inp.len = decoded_len;
+
+                               parsed_image = rspamd_maybe_process_image (pool, &inp);
+
+                               if (parsed_image) {
+                                       msg_debug_html ("detected %s image of size %ud x %ud in data url",
+                                                       rspamd_image_type_str (parsed_image->type),
+                                                       parsed_image->width, parsed_image->height);
+                                       img->embedded_image = parsed_image;
+                               }
+                       }
+
+                       break;
+               }
+               else {
+                       /* Nothing useful */
+                       return;
+               }
+
+               semicolon_pos ++;
+       }
+}
+
 static void
 rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                struct html_content *hc)
@@ -1517,7 +1570,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                /* We have an embedded image in HTML tag */
                                img->flags |=
                                                (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED|RSPAMD_HTML_FLAG_IMAGE_DATA);
-
+                               rspamd_html_process_data_image (pool, img, comp);
                        }
                        else {
                                img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
@@ -1593,6 +1646,15 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
                                hc->images);
        }
 
+       if (img->embedded_image) {
+               if (!seen_height) {
+                       img->height = img->embedded_image->height;
+               }
+               if (!seen_width) {
+                       img->width = img->embedded_image->width;
+               }
+       }
+
        g_ptr_array_add (hc->images, img);
        tag->extra = img;
 }