]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Store text parts content with newlines stripped
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 28 Apr 2016 15:58:13 +0000 (16:58 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 28 Apr 2016 16:00:27 +0000 (17:00 +0100)
src/libmime/message.c
src/libmime/message.h

index 0daacc40b00fd0a737128477e9f6f2f073df530a..8b43aec8c8fe11436a2e482d92da9a28cae463b9 100644 (file)
@@ -983,7 +983,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
        struct sb_stemmer *stem = NULL;
 #endif
        rspamd_ftok_t *w;
-       const guchar *r;
+       const guchar *r, *p, *c, *end;
        gchar *temp_word;
        guint i, nlen;
 
@@ -996,6 +996,35 @@ rspamd_normalize_text_part (struct rspamd_task *task,
                }
        }
 #endif
+       /* Strip newlines */
+       part->stripped_content = g_byte_array_sized_new (part->content->len);
+       p = part->content->data;
+       c = p;
+       end = p + part->content->len;
+
+       while (p < end) {
+               if (*p == '\r' || *p == '\n') {
+                       if (p > c) {
+                               g_byte_array_append (part->stripped_content, c, p - c);
+                       }
+
+                       while (p < end && (*p == '\r' || *p == '\n')) {
+                               p ++;
+                       }
+                       c = p;
+               }
+               else {
+                       p ++;
+               }
+       }
+
+       if (p > c) {
+               g_byte_array_append (part->stripped_content, c, p - c);
+       }
+
+       rspamd_mempool_add_destructor (task->task_pool,
+                       (rspamd_mempool_destruct_t) free_byte_array_callback,
+                       part->stripped_content);
 
        /* Ugly workaround */
        part->normalized_words = rspamd_tokenize_text (part->content->data,
@@ -1124,7 +1153,7 @@ process_text_part (struct rspamd_task *task,
        const gchar *cd, *p, *c;
        guint remain;
 
-       /* Skip attachements */
+       /* Skip attachments */
 #ifndef GMIME24
        cd = g_mime_part_get_content_disposition (GMIME_PART (mime_part->mime));
        if (cd &&
index 4f8f27b0b6069b0dcdf4acb70e1bfe86e677938e..2c2365ef76e5aac0e8df7e97d9966cc1fbf3bc3e 100644 (file)
@@ -43,6 +43,7 @@ struct mime_text_part {
        const gchar *real_charset;
        GByteArray *orig;
        GByteArray *content;
+       GByteArray *stripped_content; /**< no newlines or html tags                     */
        struct html_content *html;
        GList *urls_offset;     /**< list of offsets of urls                                            */
        GMimeObject *parent;