]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Store text parts content with newlines stripped
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 28 Apr 2016 15:58:13 +0000 (16:58 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 28 Apr 2016 15:58:13 +0000 (16:58 +0100)
src/libmime/message.c
src/libmime/message.h

index a52b43f36113f248d76c4b42940eb14e88f2847f..be773d48020f75666de68104310736c149b09356 100644 (file)
@@ -984,7 +984,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
        struct sb_stemmer *stem = NULL;
 #endif
        rspamd_ftok_t *w;
-       const guchar *r;
+       const guchar *r, *p, *c, *end;
        gchar *temp_word;
        guint i, nlen;
 
@@ -997,6 +997,35 @@ rspamd_normalize_text_part (struct rspamd_task *task,
                }
        }
 #endif
+       /* Strip newlines */
+       part->stripped_content = g_byte_array_sized_new (part->content->len);
+       p = part->content->data;
+       c = p;
+       end = p + part->content->len;
+
+       while (p < end) {
+               if (*p == '\r' || *p == '\n') {
+                       if (p > c) {
+                               g_byte_array_append (part->stripped_content, c, p - c);
+                       }
+
+                       while (p < end && (*p == '\r' || *p == '\n')) {
+                               p ++;
+                       }
+                       c = p;
+               }
+               else {
+                       p ++;
+               }
+       }
+
+       if (p > c) {
+               g_byte_array_append (part->stripped_content, c, p - c);
+       }
+
+       rspamd_mempool_add_destructor (task->task_pool,
+                       (rspamd_mempool_destruct_t) free_byte_array_callback,
+                       part->stripped_content);
 
        /* Ugly workaround */
        part->normalized_words = rspamd_tokenize_text (part->content->data,
@@ -1125,7 +1154,7 @@ process_text_part (struct rspamd_task *task,
        const gchar *cd, *p, *c;
        guint remain;
 
-       /* Skip attachements */
+       /* Skip attachments */
 #ifndef GMIME24
        cd = g_mime_part_get_content_disposition (GMIME_PART (mime_part->mime));
        if (cd &&
index 2f7539825b00fc971759506d0284d07f4ac31567..59fa0b73c49f1bf14ef57a7f54f838a0d957a428 100644 (file)
@@ -43,6 +43,7 @@ struct mime_text_part {
        const gchar *real_charset;
        GByteArray *orig;
        GByteArray *content;
+       GByteArray *stripped_content; /**< no newlines or html tags                     */
        struct html_content *html;
        GList *urls_offset;     /**< list of offsets of urls                                            */
        GMimeObject *parent;