]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] R_PARTS_DIFFER: also handle parts without words
authorVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 3 Feb 2026 10:14:26 +0000 (10:14 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 3 Feb 2026 10:14:26 +0000 (10:14 +0000)
The previous fix only handled truly empty parts. This also handles
the case where a part has content but no extractable words (e.g.,
6 bytes of whitespace in text/plain vs 142 words in text/html).

Now check if exactly one part has normalized_hashes with words,
regardless of whether parts are marked as empty.

src/libmime/message.c

index 8bd2c11b5c01f4bf0b3ea93621935b3502583266..93365480f41062e8bf1807716ef7a0c2ccc75556 100644 (file)
@@ -1909,19 +1909,26 @@ void rspamd_message_process(struct rspamd_task *task)
                                                                                                        NULL);
                                        }
                                }
-                               else if (IS_TEXT_PART_EMPTY(p1) != IS_TEXT_PART_EMPTY(p2)) {
+                               else {
                                        /*
-                                        * One part is empty, another is not - this is 100% difference
+                                        * Handle cases where parts differ significantly:
+                                        * - One part is empty, another is not
+                                        * - One part has words, another has none (but isn't empty)
+                                        * In both cases, treat as 100% difference
                                         */
-                                       struct rspamd_mime_text_part *non_empty =
-                                               IS_TEXT_PART_EMPTY(p1) ? p2 : p1;
+                                       gboolean p1_has_words = p1->normalized_hashes &&
+                                                                                       p1->normalized_hashes->len > 0;
+                                       gboolean p2_has_words = p2->normalized_hashes &&
+                                                                                       p2->normalized_hashes->len > 0;
+
+                                       if (p1_has_words != p2_has_words) {
+                                               struct rspamd_mime_text_part *non_empty =
+                                                       p1_has_words ? p1 : p2;
 
-                                       if (non_empty->normalized_hashes &&
-                                               non_empty->normalized_hashes->len > 0) {
                                                tw = non_empty->normalized_hashes->len;
 
                                                msg_debug_task(
-                                                       "one part is empty, another has %d words, "
+                                                       "one part has no words, another has %d words, "
                                                        "got diff between parts of 1.0",
                                                        tw);