]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Rework] Html: Add images processing logic
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 1 Jun 2021 19:18:47 +0000 (20:18 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Wed, 2 Jun 2021 19:55:10 +0000 (20:55 +0100)
src/libmime/images.c
src/libserver/html/html.cxx
src/libserver/html/html.h

index 960036d786a6c8f1f98197c770f7a57e8b71f399..4e0872f389f5dd76ba1bf537bcc9e2f679c5c06a 100644 (file)
@@ -658,8 +658,8 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa
        struct rspamd_mime_header *rh;
        struct rspamd_mime_text_part *tp;
        struct html_image *himg;
-       const gchar *cid, *html_cid;
-       guint cid_len, i, j;
+       const gchar *cid;
+       guint cid_len, i;
        struct rspamd_image *img;
 
        img = (struct rspamd_image *)part->specific.img;
@@ -684,35 +684,22 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa
                                }
 
                                PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
-                                       if (IS_TEXT_PART_HTML (tp) && tp->html != NULL &&
-                                               tp->html->images != NULL) {
-                                               for (j = 0; j < tp->html->images->len; j ++) {
-                                                       himg = g_ptr_array_index (tp->html->images, j);
-
-                                                       if ((himg->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED) &&
-                                                               himg->src) {
-                                                               html_cid = himg->src;
-
-                                                               if (strncmp (html_cid, "cid:", 4) == 0) {
-                                                                       html_cid += 4;
-                                                               }
-
-                                                               if (strlen (html_cid) == cid_len &&
-                                                                       memcmp (html_cid, cid, cid_len) == 0) {
-                                                                       img->html_image = himg;
-                                                                       himg->embedded_image = img;
-
-                                                                       msg_debug_images ("found linked image by cid: <%s>",
-                                                                                       cid);
-
-                                                                       if (himg->height == 0) {
-                                                                               himg->height = img->height;
-                                                                       }
-
-                                                                       if (himg->width == 0) {
-                                                                               himg->width = img->width;
-                                                                       }
-                                                               }
+                                       if (IS_TEXT_PART_HTML (tp) && tp->html != NULL) {
+                                               himg = rspamd_html_find_embedded_image(tp->html, cid, cid_len);
+
+                                               if (himg != NULL) {
+                                                       img->html_image = himg;
+                                                       himg->embedded_image = img;
+
+                                                       msg_debug_images ("found linked image by cid: <%s>",
+                                                                       cid);
+
+                                                       if (himg->height == 0) {
+                                                               himg->height = img->height;
+                                                       }
+
+                                                       if (himg->width == 0) {
+                                                               himg->width = img->width;
                                                        }
                                                }
                                        }
index 45a9afa18e24744af50a16993039926186c441fc..00f1d331f38f4ca6e25e35eb3e42d270b6ba7bfb 100644 (file)
@@ -2277,6 +2277,23 @@ html_process_part_full (rspamd_mempool_t *pool,
        return hc;
 }
 
+static auto
+html_find_image_by_cid(const html_content &hc, std::string_view cid)
+       -> std::optional<const html_image *>
+{
+       for (const auto *html_image : hc.images) {
+               /* Filter embedded images */
+               if (html_image->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED &&
+                               html_image->src != nullptr) {
+                       if (cid == html_image->src) {
+                               return html_image;
+                       }
+               }
+       }
+
+       return std::nullopt;
+}
+
 }
 
 void *
@@ -2355,4 +2372,19 @@ rspamd_html_tag_name(void *p, gsize *len)
        }
 
        return tag->name.data();
+}
+
+struct html_image*
+rspamd_html_find_embedded_image(void *html_content,
+                                                               const char *cid, gsize cid_len)
+{
+       auto *hc = rspamd::html::html_content::from_ptr(html_content);
+
+       auto maybe_img = rspamd::html::html_find_image_by_cid(*hc, {cid, cid_len});
+
+       if (maybe_img) {
+               return (html_image *)maybe_img.value();
+       }
+
+       return nullptr;
 }
\ No newline at end of file
index 94063b9be51f36e4602275d99d6867b73a9003fb..1e71d0c2d0706489352ef46f0f6767ee2222fc7b 100644 (file)
@@ -144,6 +144,16 @@ gint rspamd_html_tag_by_name(const gchar *name);
  */
 const gchar *rspamd_html_tag_name(void *tag, gsize *len);
 
+/**
+ * Find HTML image by content id
+ * @param html_content
+ * @param cid
+ * @param cid_len
+ * @return
+ */
+struct html_image* rspamd_html_find_embedded_image(void *html_content,
+               const char *cid, gsize cid_len);
+
 
 #ifdef  __cplusplus
 }