]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Css: Enable conditional css parsing support from the HTML parser
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 26 Mar 2021 20:54:37 +0000 (20:54 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 26 Mar 2021 20:55:10 +0000 (20:55 +0000)
src/libmime/message.c
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libserver/html.c
src/libserver/html.h

index 8a9601fa771de8d5499864b87eeadc18c4beb267..9713a6bf545a558dfee1e6b9ee88b44f15e29e84 100644 (file)
@@ -769,7 +769,8 @@ rspamd_message_process_html_text_part (struct rspamd_task *task,
                        text_part->utf_raw_content,
                        &text_part->exceptions,
                        MESSAGE_FIELD (task, urls),
-                       text_part->mime_part->urls);
+                       text_part->mime_part->urls,
+                       task->cfg->enable_css_parser);
 
        if (text_part->utf_content->len == 0) {
                text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
index 9ef795d05377942bd07dca66f69b3a9d3db76259..67f18e1e99e9197efc825cc3aab7124ba619635d 100644 (file)
@@ -377,6 +377,7 @@ struct rspamd_config {
        gboolean soft_reject_on_timeout;                /**< If true emit soft reject on task timeout (if not reject) */
        gboolean public_groups_only;                    /**< Output merely public groups everywhere                             */
        gboolean enable_test_patterns;                  /**< Enable test patterns                                                               */
+       gboolean enable_css_parser;                     /**< Enable css parsing in HTML                                                 */
 
        gsize max_cores_size;                           /**< maximum size occupied by rspamd core files                 */
        gsize max_cores_count;                          /**< maximum number of core files                                               */
index ffdc5e5962ad8c7d166bc1145f2e75d8d2bf322f..4891c4194eb94d61777eb405cbdb453bcd9d2604 100644 (file)
@@ -1999,6 +1999,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
                                G_STRUCT_OFFSET (struct rspamd_config, enable_test_patterns),
                                0,
                                "Enable test GTUBE like patterns (not for production!)");
+               rspamd_rcl_add_default_handler (sub,
+                               "enable_css_parser",
+                               rspamd_rcl_parse_struct_boolean,
+                               G_STRUCT_OFFSET (struct rspamd_config, enable_css_parser),
+                               0,
+                               "Enable CSS parser (experimental)");
                rspamd_rcl_add_default_handler (sub,
                                "enable_experimental",
                                rspamd_rcl_parse_struct_boolean,
index 3d9d540f513acba4b706d507bf6b416f50956fc7..b56f3ef320fc48616d960e1ae0faaa30cd85830e 100644 (file)
@@ -24,6 +24,7 @@
 #include "url.h"
 #include "contrib/libucl/khash.h"
 #include "libmime/images.h"
+#include "css/css.h"
 
 #include <unicode/uversion.h>
 #include <unicode/ucnv.h>
@@ -2781,7 +2782,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                                           GByteArray *in,
                                                           GList **exceptions,
                                                           khash_t (rspamd_url_hash) *url_set,
-                                                          GPtrArray *part_urls)
+                                                          GPtrArray *part_urls,
+                                                          bool allow_css)
 {
        const guchar *p, *c, *end, *savep = NULL;
        guchar t;
@@ -2809,6 +2811,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                xml_tag_end,
                content_ignore,
                content_write,
+               content_style,
                content_ignore_sp
        } state = parse_start;
 
@@ -3118,6 +3121,36 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                        p ++;
                        break;
 
+               case content_style: {
+
+                       /*
+                        * We just search for the first </s substring and then pass
+                        * the content to the parser (if needed)
+                        */
+                       goffset end_style = rspamd_substring_search (p, end - p,
+                                       "</", 2);
+                       if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
+                               /* Invalid style */
+                               state = content_ignore;
+                       }
+                       else {
+
+                               if (allow_css) {
+                                       GError *err = NULL;
+                                       (void)rspamd_css_parse_style (pool, p, end_style, &err);
+
+                                       if (err) {
+                                               msg_info_pool ("cannot parse css: %e", err);
+                                               g_error_free (err);
+                                       }
+                               }
+
+                               p += end_style;
+                               state = tag_begin;
+                       }
+                       break;
+               }
+
                case content_ignore_sp:
                        if (!g_ascii_isspace (t)) {
                                c = p;
@@ -3173,7 +3206,12 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                        need_decode = FALSE;
                                }
                                else {
-                                       state = content_ignore;
+                                       if (cur_tag->id == Tag_STYLE) {
+                                               state = content_style;
+                                       }
+                                       else {
+                                               state = content_ignore;
+                                       }
                                }
 
                                if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
@@ -3387,5 +3425,6 @@ rspamd_html_process_part (rspamd_mempool_t *pool,
                struct html_content *hc,
                GByteArray *in)
 {
-       return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL);
+       return rspamd_html_process_part_full (pool, hc, in, NULL,
+                       NULL, NULL, FALSE);
 }
index fba412cb32bae3bd52493ce17ea53f454660bba7..f8a5e18e4e18cd555eadfa3053078f729e17577c 100644 (file)
@@ -147,7 +147,8 @@ GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool,
                                                                                   struct html_content *hc,
                                                                                   GByteArray *in, GList **exceptions,
                                                                                   khash_t (rspamd_url_hash) *url_set,
-                                                                                  GPtrArray *part_urls);
+                                                                                  GPtrArray *part_urls,
+                                                                                  bool allow_css);
 
 /*
  * Returns true if a specified tag has been seen in a part