text_part->utf_raw_content,
&text_part->exceptions,
MESSAGE_FIELD (task, urls),
- text_part->mime_part->urls);
+ text_part->mime_part->urls,
+ task->cfg->enable_css_parser);
if (text_part->utf_content->len == 0) {
text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
gboolean soft_reject_on_timeout; /**< If true emit soft reject on task timeout (if not reject) */
gboolean public_groups_only; /**< Output merely public groups everywhere */
gboolean enable_test_patterns; /**< Enable test patterns */
+ gboolean enable_css_parser; /**< Enable css parsing in HTML */
gsize max_cores_size; /**< maximum size occupied by rspamd core files */
gsize max_cores_count; /**< maximum number of core files */
G_STRUCT_OFFSET (struct rspamd_config, enable_test_patterns),
0,
"Enable test GTUBE like patterns (not for production!)");
+ rspamd_rcl_add_default_handler (sub,
+ "enable_css_parser",
+ rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct rspamd_config, enable_css_parser),
+ 0,
+ "Enable CSS parser (experimental)");
rspamd_rcl_add_default_handler (sub,
"enable_experimental",
rspamd_rcl_parse_struct_boolean,
#include "url.h"
#include "contrib/libucl/khash.h"
#include "libmime/images.h"
+#include "css/css.h"
#include <unicode/uversion.h>
#include <unicode/ucnv.h>
GByteArray *in,
GList **exceptions,
khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls)
+ GPtrArray *part_urls,
+ bool allow_css)
{
const guchar *p, *c, *end, *savep = NULL;
guchar t;
xml_tag_end,
content_ignore,
content_write,
+ content_style,
content_ignore_sp
} state = parse_start;
p ++;
break;
+ case content_style: {
+
+ /*
+ * We just search for the first </s substring and then pass
+ * the content to the parser (if needed)
+ */
+ goffset end_style = rspamd_substring_search (p, end - p,
+ "</", 2);
+ if (end_style == -1 || g_ascii_tolower (p[end_style + 2]) != 's') {
+ /* Invalid style */
+ state = content_ignore;
+ }
+ else {
+
+ if (allow_css) {
+ GError *err = NULL;
+ (void)rspamd_css_parse_style (pool, p, end_style, &err);
+
+ if (err) {
+ msg_info_pool ("cannot parse css: %e", err);
+ g_error_free (err);
+ }
+ }
+
+ p += end_style;
+ state = tag_begin;
+ }
+ break;
+ }
+
case content_ignore_sp:
if (!g_ascii_isspace (t)) {
c = p;
need_decode = FALSE;
}
else {
- state = content_ignore;
+ if (cur_tag->id == Tag_STYLE) {
+ state = content_style;
+ }
+ else {
+ state = content_ignore;
+ }
}
if (cur_tag->id != -1 && cur_tag->id < N_TAGS) {
struct html_content *hc,
GByteArray *in)
{
- return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL);
+ return rspamd_html_process_part_full (pool, hc, in, NULL,
+ NULL, NULL, FALSE);
}
struct html_content *hc,
GByteArray *in, GList **exceptions,
khash_t (rspamd_url_hash) *url_set,
- GPtrArray *part_urls);
+ GPtrArray *part_urls,
+ bool allow_css);
/*
* Returns true if a specified tag has been seen in a part