[Minor] Fix rspamd_has_only_html_part function + refactor macro names

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Mon, 26 Oct 2020 14:49:37 +0000 (14:49 +0000)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Mon, 26 Oct 2020 14:49:37 +0000 (14:49 +0000)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 26 Oct 2020 14:49:37 +0000 (14:49 +0000)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 26 Oct 2020 14:49:37 +0000 (14:49 +0000)
diff --git a/src/libmime/images.c b/src/libmime/images.c

index c54f5845a8db1aa1905b384389118569034caa49..b3baa8e4c0f65ddc891da0924e1fe3db138f1a09 100644 (file)
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -684,7 +684,7 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa
                                 }
  
                                 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
-                                       if (IS_PART_HTML (tp) && tp->html != NULL &&
+                                       if (IS_TEXT_PART_HTML (tp) && tp->html != NULL &&
                                                 tp->html->images != NULL) {
                                                 for (j = 0; j < tp->html->images->len; j ++) {
                                                         himg = g_ptr_array_index (tp->html->images, j);
diff --git a/src/libmime/message.c b/src/libmime/message.c

index 2702d0f514c6b537eb4655894b5be6bbdc8f8b3d..411b872c92520d2e4f2afe83d8e5e544483ac419 100644 (file)
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -147,7 +147,7 @@ rspamd_mime_part_create_words (struct rspamd_task *task,
  {
         enum rspamd_tokenize_type tok_type;
  
-       if (IS_PART_UTF (part)) {
+       if (IS_TEXT_PART_UTF (part)) {
  
  #if U_ICU_VERSION_MAJOR_NUM < 50
                 /* Hack to prevent hang with Thai in old libicu */
@@ -209,8 +209,8 @@ rspamd_mime_part_detect_language (struct rspamd_task *task,
  {
         struct rspamd_lang_detector_res *lang;
  
-       if (!IS_PART_EMPTY (part) && part->utf_words && part->utf_words->len > 0 &&
-                       task->lang_det) {
+       if (!IS_TEXT_PART_EMPTY (part) && part->utf_words && part->utf_words->len > 0 &&
+               task->lang_det) {
                 if (rspamd_language_detector_detect (task, task->lang_det, part)) {
                         lang = g_ptr_array_index (part->languages, 0);
                         part->language = lang->lang;
@@ -240,7 +240,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task,
         } state = normal_char;
  
         while (p < pe) {
-               if (IS_PART_UTF (part)) {
+               if (IS_TEXT_PART_UTF (part)) {
                         gint32 off = p - begin;
                         U8_NEXT (begin, off, pe - begin, uc);
  
@@ -324,7 +324,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task,
  
                                 c = p + 1;
  
-                               if (IS_PART_HTML (part) || !url_open_bracket) {
+                               if (IS_TEXT_PART_HTML (part) || !url_open_bracket) {
                                         g_byte_array_append (part->utf_stripped_content,
                                                         (const guint8 *)" ", 1);
                                         g_ptr_array_add (part->newlines,
@@ -339,7 +339,7 @@ rspamd_strip_newlines_parse (struct rspamd_task *task,
                         case seen_cr:
                                 /* \r\n */
                                 if (!crlf_added) {
-                                       if (IS_PART_HTML (part) || !url_open_bracket) {
+                                       if (IS_TEXT_PART_HTML (part) || !url_open_bracket) {
                                                 g_byte_array_append (part->utf_stripped_content,
                                                                 (const guint8 *) " ", 1);
                                                 crlf_added = TRUE;
@@ -509,7 +509,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
  
         part->newlines = g_ptr_array_sized_new (128);
  
-       if (IS_PART_EMPTY (part)) {
+       if (IS_TEXT_PART_EMPTY (part)) {
                 part->utf_stripped_content = g_byte_array_new ();
         }
         else {
@@ -532,7 +532,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
                 }
         }
  
-       if (IS_PART_UTF (part)) {
+       if (IS_TEXT_PART_UTF (part)) {
                 utext_openUTF8 (&part->utf_stripped_text,
                                 part->utf_stripped_content->data,
                                 part->utf_stripped_content->len,
@@ -780,6 +780,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
         struct rspamd_mime_text_part *text_part;
         rspamd_ftok_t html_tok, xhtml_tok;
         gboolean found_html = FALSE, found_txt = FALSE;
+       guint flags = 0;
         enum rspamd_action_type act;
  
         if ((mime_part->ct && (mime_part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) ||
@@ -802,10 +803,14 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
  
         /* Skip attachments */
         if ((found_txt || found_html) &&
-                       (mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT) &&
-                       (!task->cfg->check_text_attachements)) {
-               debug_task ("skip attachments for checking as text parts");
-               return FALSE;
+                       (mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT)) {
+               if (!task->cfg->check_text_attachements) {
+                       debug_task ("skip attachments for checking as text parts");
+                       return FALSE;
+               }
+               else {
+                       flags |= RSPAMD_MIME_TEXT_PART_ATTACHMENT;
+               }
         }
         else if (!(found_txt || found_html)) {
                 /* Not a text part */
@@ -820,6 +825,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
         text_part->parsed.begin = mime_part->parsed_data.begin;
         text_part->parsed.len = mime_part->parsed_data.len;
         text_part->utf_stripped_text = (UText)UTEXT_INITIALIZER;
+       text_part->flags |= flags;
  
         if (found_html) {
                 if (!rspamd_message_process_html_text_part (task, text_part)) {
@@ -859,7 +865,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
         /* Post process part */
         rspamd_normalize_text_part (task, text_part);
  
-       if (!IS_PART_HTML (text_part)) {
+       if (!IS_TEXT_PART_HTML (text_part)) {
                 if (mime_part->parent_part) {
                         struct rspamd_mime_part *parent = mime_part->parent_part;
  
@@ -1509,7 +1515,7 @@ rspamd_message_process (struct rspamd_task *task)
                         srch.len = 11;
  
                         if (rspamd_ftok_cmp (&p1->mime_part->parent_part->ct->subtype, &srch) == 0) {
-                               if (!IS_PART_EMPTY (p1) && !IS_PART_EMPTY (p2) &&
+                               if (!IS_TEXT_PART_EMPTY (p1) && !IS_TEXT_PART_EMPTY (p2) &&
                                         p1->normalized_hashes && p2->normalized_hashes) {
                                         /*
                                          * We also detect language on one part and propagate it to
@@ -1518,10 +1524,10 @@ rspamd_message_process (struct rspamd_task *task)
                                         struct rspamd_mime_text_part *sel;
  
                                         /* Prefer HTML as text part is not displayed normally */
-                                       if (IS_PART_HTML (p1)) {
+                                       if (IS_TEXT_PART_HTML (p1)) {
                                                 sel = p1;
                                         }
-                                       else if (IS_PART_HTML (p2)) {
+                                       else if (IS_TEXT_PART_HTML (p2)) {
                                                 sel = p2;
                                         }
                                         else {
diff --git a/src/libmime/message.h b/src/libmime/message.h

index d6f1b76c0b5231047a10664e049461a7976a8657..13e40e2eff28150ef43ab34e411581b13a9dd4de 100644 (file)
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -120,11 +120,12 @@ struct rspamd_mime_part {
  #define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5)
  #define RSPAMD_MIME_TEXT_PART_HAS_SUBNORMAL (1 << 6)
  #define RSPAMD_MIME_TEXT_PART_NORMALISED (1 << 7)
+#define RSPAMD_MIME_TEXT_PART_ATTACHMENT (1 << 8)
  
-#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
-#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
-#define IS_PART_RAW(part) (!((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF))
-#define IS_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
+#define IS_TEXT_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
+#define IS_TEXT_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+#define IS_TEXT_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
+#define IS_TEXT_PART_ATTACHMENT(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_ATTACHMENT)
  
  
  struct rspamd_mime_text_part {
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c

index d35bc136f83dd9fc1e3dc7baacb27e1a5a930851..a06baffaf439f55fa5a9279781f74a64d8a3b1ed 100644 (file)
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -1455,20 +1455,23 @@ rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
         void *unused)
  {
         struct rspamd_mime_text_part *p;
+       guint i, cnt_html = 0, cnt_txt = 0;
         gboolean res = FALSE;
  
-       if (MESSAGE_FIELD (task, text_parts)->len == 1) {
+       PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
                 p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0);
  
-               if (IS_PART_HTML (p)) {
-                       res = TRUE;
-               }
-               else {
-                       res = FALSE;
+               if (!IS_TEXT_PART_ATTACHMENT (p)) {
+                       if (IS_TEXT_PART_HTML (p)) {
+                               cnt_html++;
+                       }
+                       else {
+                               cnt_txt++;
+                       }
                 }
         }
  
-       return res;
+       return (cnt_html > 0 && cnt_txt == 0);
  }
  
  static gboolean
@@ -1565,7 +1568,7 @@ rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
         gboolean res = TRUE;
  
         PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
-               if (IS_PART_HTML (p)) {
+               if (IS_TEXT_PART_HTML (p)) {
                         if (p->flags & RSPAMD_MIME_TEXT_PART_FLAG_BALANCED) {
                                 res = TRUE;
                         }
@@ -1600,7 +1603,7 @@ rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
         }
  
         PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
-               if (IS_PART_HTML (p) && p->html) {
+               if (IS_TEXT_PART_HTML (p) && p->html) {
                         res = rspamd_html_tag_seen (p->html, arg->data);
                 }
  
@@ -1621,7 +1624,7 @@ rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
         gboolean res = FALSE;
  
         PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
-               if (IS_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) {
+               if (IS_TEXT_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) {
                         res = TRUE;
                 }
  
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c

index 73082bb2d09680e9f85a70cfd583c31f447f5c6e..078563103673d2cc9a1bb6e53992a46e594690da 100644 (file)
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -1198,13 +1198,13 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                 }
                                 else {
                                         /* Skip empty parts */
-                                       if (IS_PART_EMPTY (text_part)) {
+                                       if (IS_TEXT_PART_EMPTY (text_part)) {
                                                 len = 0;
                                                 in = "";
                                         }
                                         else {
                                                 /* Check raw flags */
-                                               if (!IS_PART_UTF (text_part)) {
+                                               if (!IS_TEXT_PART_UTF (text_part)) {
                                                         raw = TRUE;
                                                 }
  
@@ -1345,7 +1345,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                 scvec[i + 1] = (guchar *)text_part->utf_stripped_content->data;
                                 lenvec[i + 1] = text_part->utf_stripped_content->len;
  
-                               if (!IS_PART_UTF (text_part)) {
+                               if (!IS_TEXT_PART_UTF (text_part)) {
                                         raw = TRUE;
                                 }
                         }
@@ -1382,7 +1382,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
                                         scvec[i] = (guchar *)text_part->parsed.begin;
                                         lenvec[i] = text_part->parsed.len;
  
-                                       if (!IS_PART_UTF (text_part)) {
+                                       if (!IS_TEXT_PART_UTF (text_part)) {
                                                 raw = TRUE;
                                         }
                                 }
diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c

index 9cd425206d20b7a37a3fddf0b86c9491fa358eb7..0b22cbd11a672403dbd7add746087207bbe2d3af 100644 (file)
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -132,7 +132,7 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
         g_assert (st_ctx != NULL);
  
         PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
-               if (!IS_PART_EMPTY (part) && part->utf_words != NULL) {
+               if (!IS_TEXT_PART_EMPTY (part) && part->utf_words != NULL) {
                         reserved_len += part->utf_words->len;
                 }
                 /* XXX: normal window size */
@@ -146,9 +146,9 @@ rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
         pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance");
  
         PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
-               if (!IS_PART_EMPTY (part) && part->utf_words != NULL) {
+               if (!IS_TEXT_PART_EMPTY (part) && part->utf_words != NULL) {
                         st_ctx->tokenizer->tokenize_func (st_ctx, task,
-                                       part->utf_words, IS_PART_UTF (part),
+                                       part->utf_words, IS_TEXT_PART_UTF (part),
                                         NULL, task->tokens);
                 }
  
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c

index 9748cfde33b65e2271c53612739c1e8d5dfa66b0..9cc1374bebff9e820fe186a6a2fa4d6b78733e57 100644 (file)
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -617,12 +617,12 @@ lua_textpart_is_utf (lua_State * L)
         LUA_TRACE_POINT;
         struct rspamd_mime_text_part *part = lua_check_textpart (L);
  
-       if (part == NULL || IS_PART_EMPTY (part)) {
+       if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
                 lua_pushboolean (L, FALSE);
                 return 1;
         }
  
-       lua_pushboolean (L, IS_PART_UTF (part));
+       lua_pushboolean (L, IS_TEXT_PART_UTF (part));
  
         return 1;
  }
@@ -690,7 +690,7 @@ lua_textpart_get_content (lua_State * L)
         }
  
         if (!type) {
-               if (IS_PART_EMPTY (part)) {
+               if (IS_TEXT_PART_EMPTY (part)) {
                         lua_pushnil (L);
                         return 1;
                 }
@@ -698,7 +698,7 @@ lua_textpart_get_content (lua_State * L)
                 len = part->utf_content->len;
         }
         else if (strcmp (type, "content") == 0) {
-               if (IS_PART_EMPTY (part)) {
+               if (IS_TEXT_PART_EMPTY (part)) {
                         lua_pushnil (L);
                         return 1;
                 }
@@ -707,7 +707,7 @@ lua_textpart_get_content (lua_State * L)
                 len = part->utf_content->len;
         }
         else if (strcmp (type, "content_oneline") == 0) {
-               if (IS_PART_EMPTY (part)) {
+               if (IS_TEXT_PART_EMPTY (part)) {
                         lua_pushnil (L);
                         return 1;
                 }
@@ -763,7 +763,7 @@ lua_textpart_get_raw_content (lua_State * L)
         struct rspamd_mime_text_part *part = lua_check_textpart (L);
         struct rspamd_lua_text *t;
  
-       if (part == NULL || IS_PART_EMPTY (part)) {
+       if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
                 lua_pushnil (L);
                 return 1;
         }
@@ -784,7 +784,7 @@ lua_textpart_get_content_oneline (lua_State * L)
         struct rspamd_mime_text_part *part = lua_check_textpart (L);
         struct rspamd_lua_text *t;
  
-       if (part == NULL || IS_PART_EMPTY (part)) {
+       if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
                 lua_pushnil (L);
                 return 1;
         }
@@ -809,7 +809,7 @@ lua_textpart_get_length (lua_State * L)
                 return 1;
         }
  
-       if (IS_PART_EMPTY (part) || part->utf_content == NULL) {
+       if (IS_TEXT_PART_EMPTY (part) || part->utf_content == NULL) {
                 lua_pushinteger (L, 0);
         }
         else {
@@ -873,7 +873,7 @@ lua_textpart_get_lines_count (lua_State * L)
                 return 1;
         }
  
-       if (IS_PART_EMPTY (part)) {
+       if (IS_TEXT_PART_EMPTY (part)) {
                 lua_pushinteger (L, 0);
         }
         else {
@@ -894,7 +894,7 @@ lua_textpart_get_words_count (lua_State *L)
                 return 1;
         }
  
-       if (IS_PART_EMPTY (part) || part->utf_words == NULL) {
+       if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
                 lua_pushinteger (L, 0);
         }
         else {
@@ -936,7 +936,7 @@ lua_textpart_get_words (lua_State *L)
                 return luaL_error (L, "invalid arguments");
         }
  
-       if (IS_PART_EMPTY (part) || part->utf_words == NULL) {
+       if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
                 lua_createtable (L, 0, 0);
         }
         else {
@@ -969,7 +969,7 @@ lua_textpart_filter_words (lua_State *L)
                 return luaL_error (L, "invalid arguments");
         }
  
-       if (IS_PART_EMPTY (part) || part->utf_words == NULL) {
+       if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
                 lua_createtable (L, 0, 0);
         }
         else {
@@ -1055,7 +1055,7 @@ lua_textpart_is_empty (lua_State * L)
                 return 1;
         }
  
-       lua_pushboolean (L, IS_PART_EMPTY (part));
+       lua_pushboolean (L, IS_TEXT_PART_EMPTY (part));
  
         return 1;
  }
@@ -1071,7 +1071,7 @@ lua_textpart_is_html (lua_State * L)
                 return 1;
         }
  
-       lua_pushboolean (L, IS_PART_HTML (part));
+       lua_pushboolean (L, IS_TEXT_PART_HTML (part));
  
         return 1;
  }
diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c

index 3941a5a8554e89a8ededbde743089fb398e53860..7c63fc6870e7c8f74feb8deff994840c36f77fcc 100644 (file)
--- a/src/lua/lua_trie.c
+++ b/src/lua/lua_trie.c
@@ -342,7 +342,7 @@ lua_trie_search_mime (lua_State *L)
  
         if (trie && task) {
                 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
-                       if (!IS_PART_EMPTY (part) && part->utf_content != NULL) {
+                       if (!IS_TEXT_PART_EMPTY (part) && part->utf_content != NULL) {
                                 text = part->utf_content->data;
                                 len = part->utf_content->len;
  
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c

index 640afcc32ef815ba0c9d36533e7bb1e92c263659..633ce50aedbce468f6d3bffea66a45c18c39e5df 100644 (file)
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -2399,7 +2399,7 @@ fuzzy_insert_metric_results (struct rspamd_task *task, struct fuzzy_rule *rule,
  
         if (task->message) {
                 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
-                       if (!IS_PART_EMPTY (tp) && tp->utf_words != NULL && tp->utf_words->len > 0) {
+                       if (!IS_TEXT_PART_EMPTY (tp) && tp->utf_words != NULL && tp->utf_words->len > 0) {
                                 seen_text_part = TRUE;
  
                                 if (tp->utf_stripped_text.magic == UTEXT_MAGIC) {
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Mon, 26 Oct 2020 14:49:37 +0000 (14:49 +0000)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Mon, 26 Oct 2020 14:49:37 +0000 (14:49 +0000)
src/libmime/images.c		patch \| blob \| blame \| history
src/libmime/message.c		patch \| blob \| blame \| history
src/libmime/message.h		patch \| blob \| blame \| history
src/libmime/mime_expressions.c		patch \| blob \| blame \| history
src/libserver/re_cache.c		patch \| blob \| blame \| history
src/libstat/stat_process.c		patch \| blob \| blame \| history
src/lua/lua_mimepart.c		patch \| blob \| blame \| history
src/lua/lua_trie.c		patch \| blob \| blame \| history
src/plugins/fuzzy_check.c		patch \| blob \| blame \| history