[Feature] Try to guess line endings when folding headers

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Thu, 22 Sep 2016 17:10:43 +0000 (18:10 +0100)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Thu, 22 Sep 2016 17:10:43 +0000 (18:10 +0100)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 22 Sep 2016 17:10:43 +0000 (18:10 +0100)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 22 Sep 2016 17:10:43 +0000 (18:10 +0100)
diff --git a/src/client/rspamc.c b/src/client/rspamc.c

index ae3b3fe82ee80493650bc3036f64617a68ade797..3f038ed575a7e405e203ea531951a413d19bbddb 100644 (file)
--- a/src/client/rspamc.c
+++ b/src/client/rspamc.c
@@ -1129,7 +1129,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input,
  
                 folded_symbuf = rspamd_header_value_fold ("X-Spam-Symbols",
                                 symbuf->str,
-                               0);
+                               0, RSPAMD_TASK_NEWLINES_CRLF);
                 rspamd_printf_gstring (added_headers, "X-Spam-Symbols: %v\r\n",
                                 folded_symbuf);
  
@@ -1153,7 +1153,7 @@ rspamc_mime_output (FILE *out, ucl_object_t *result, GString *input,
                         }
  
                         json_header_encoded = rspamd_encode_base64_fold (json_header,
-                                       strlen (json_header), 60, NULL);
+                                       strlen (json_header), 60, NULL, RSPAMD_TASK_NEWLINES_CRLF);
                         free (json_header);
                         rspamd_printf_gstring (added_headers,
                                         "X-Spam-Result: %s\r\n",
diff --git a/src/libmime/message.c b/src/libmime/message.c

index 8f4417db4a84c1563fe3b7f026a06669a2b5bb93..346105438e0f160d808859cc3f8f4fc2484ec6c0 100644 (file)
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -80,10 +80,12 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
         gchar *tmp, *tp;
         gint state = 0, l, next_state = 100, err_state = 100, t_state;
         gboolean valid_folding = FALSE;
+       guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
  
         p = in;
         end = p + len;
         c = p;
+       memset (nlines_count, 0, sizeof (nlines_count));
  
         while (p < end) {
                 /* FSM for processing headers */
@@ -140,6 +142,17 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
                                 p++;
                         }
                         else if (*p == '\n' || *p == '\r') {
+
+                               if (*p == '\n') {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+                               }
+                               else if (*(p + 1) == '\n') {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+                               }
+                               else {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+                               }
+
                                 /* Process folding */
                                 state = 99;
                                 l = p - c;
@@ -167,6 +180,15 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
                 case 3:
                         if (*p == '\r' || *p == '\n') {
                                 /* Hold folding */
+                               if (*p == '\n') {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+                               }
+                               else if (*(p + 1) == '\n') {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
+                               }
+                               else {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_CR] ++;
+                               }
                                 state = 99;
                                 next_state = 3;
                                 err_state = 4;
@@ -279,12 +301,15 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
  
                         if (*p == '\r') {
                                 if (*(p + 1) == '\n') {
+                                       nlines_count[RSPAMD_TASK_NEWLINES_CRLF] ++;
                                         p++;
                                 }
                                 p++;
                                 state = next_state;
                         }
                         else if (*p == '\n') {
+                               nlines_count[RSPAMD_TASK_NEWLINES_LF] ++;
+
                                 if (*(p + 1) == '\r') {
                                         p++;
                                 }
@@ -301,6 +326,18 @@ process_raw_headers (struct rspamd_task *task, GHashTable *target,
                         break;
                 }
         }
+
+       guint max_cnt = 0;
+       gint sel = 0;
+
+       for (gint i = 0; i < RSPAMD_TASK_NEWLINES_MAX; i ++) {
+               if (nlines_count[i] > max_cnt) {
+                       max_cnt = nlines_count[i];
+                       sel = i;
+               }
+       }
+
+       task->nlines_type = sel;
  }
  
  static void
diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c

index 33ac2cb966e87cfe945c416c5d47b9c775410769..d545a78e41a4bc643b25bb67f7d4bc4288e55dab 100644 (file)
--- a/src/libserver/dkim.c
+++ b/src/libserver/dkim.c
@@ -2162,7 +2162,8 @@ rspamd_dkim_sign (struct rspamd_task *task,
                 return NULL;
         }
  
-       b64_data = rspamd_encode_base64_fold (rsa_buf, rsa_len, 70, NULL);
+       b64_data = rspamd_encode_base64_fold (rsa_buf, rsa_len, 70, NULL,
+                       task->nlines_type);
         rspamd_printf_gstring (hdr, "%s", b64_data);
         g_free (b64_data);
  
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c

index 413d48bf2a9137bce00507c7e74090c225c539ef..3ec4c7eca1e4406d0a69bb24d4a3d4ba7322e8ea 100644 (file)
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -1037,7 +1037,7 @@ rspamd_protocol_write_ucl (struct rspamd_task *task)
  
         if (dkim_sig) {
                 GString *folded_header = rspamd_header_value_fold ("DKIM-Signature",
-                               dkim_sig->str, 80);
+                               dkim_sig->str, 80, task->nlines_type);
                 ucl_object_insert_key (top,
                                 ucl_object_fromstring_common (folded_header->str,
                                                 folded_header->len, UCL_STRING_RAW),
diff --git a/src/libserver/task.h b/src/libserver/task.h

index aa1f52e45436d19665049f085ec7c3669a120238..915d58aa364591349ba1f36adaec5afe0d01d80e 100644 (file)
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -114,7 +114,7 @@ enum rspamd_task_stage {
  #define RSPAMD_TASK_IS_EMPTY(task) (((task)->flags & RSPAMD_TASK_FLAG_EMPTY))
  
  struct rspamd_email_address;
-
+enum rspamd_newlines_type;
  
  /**
   * Worker task structure
@@ -161,6 +161,7 @@ struct rspamd_task {
         GPtrArray *rcpt_envelope;                                               /**< array of rspamd_email_address                                      */
         InternetAddressList *from_mime;
         struct rspamd_email_address *from_envelope;
+       enum rspamd_newlines_type nlines_type;                  /**< type of newlines (detected on most of headers      */
  
         GList *messages;                                                                /**< list of messages that would be reported            */
         struct rspamd_re_runtime *re_rt;                                /**< regexp runtime                                                                     */
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c

index 4210adbe236f77209f7a7959d2e7228b0e312d5e..ca40c86e4b7fbdadc85d87737f9012f778b914e7 100644 (file)
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -701,14 +701,17 @@ rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen)
  
  static gchar *
  rspamd_encode_base64_common (const guchar *in, gsize inlen, gint str_len,
-               gsize *outlen, gboolean fold)
+               gsize *outlen, gboolean fold, enum rspamd_newlines_type how)
  {
+#define ADD_SPLIT do { \
+       if (how == RSPAMD_TASK_NEWLINES_CR || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\r'; \
+       if (how == RSPAMD_TASK_NEWLINES_LF || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\n'; \
+       if (fold) *o++ = '\t'; \
+} while (0)
  #define CHECK_SPLIT \
         do { if (str_len > 0 && cols >= str_len) { \
-                               *o++ = '\r'; \
-                               *o++ = '\n'; \
-                               if (fold) *o++ = '\t'; \
-                               cols = 0; \
+               ADD_SPLIT; \
+               cols = 0; \
         } } \
  while (0)
  
@@ -724,7 +727,28 @@ while (0)
  
         if (str_len > 0) {
                 g_assert (str_len > 8);
-               allocated_len += (allocated_len / str_len + 1) * (fold ? 3 : 2) + 1;
+               if (fold) {
+                       switch (how) {
+                       case RSPAMD_TASK_NEWLINES_CR:
+                       case RSPAMD_TASK_NEWLINES_LF:
+                               allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+                               break;
+                       default:
+                               allocated_len += (allocated_len / str_len + 1) * 3 + 1;
+                               break;
+                       }
+               }
+               else {
+                       switch (how) {
+                       case RSPAMD_TASK_NEWLINES_CR:
+                       case RSPAMD_TASK_NEWLINES_LF:
+                               allocated_len += (allocated_len / str_len + 1) * 1 + 1;
+                               break;
+                       default:
+                               allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+                               break;
+                       }
+               }
         }
  
         out = g_malloc (allocated_len);
@@ -755,11 +779,7 @@ while (0)
                                 cols --;
                         }
  
-                       *o++ = '\r';
-                       *o++ = '\n';
-                       if (fold) {
-                               *o ++ = '\t';
-                       }
+                       ADD_SPLIT;
  
                         /* Remaining bytes */
                         while (shift >= 16) {
@@ -851,14 +871,15 @@ gchar *
  rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
                 gsize *outlen)
  {
-       return rspamd_encode_base64_common (in, inlen, str_len, outlen, FALSE);
+       return rspamd_encode_base64_common (in, inlen, str_len, outlen, FALSE,
+                       RSPAMD_TASK_NEWLINES_CRLF);
  }
  
  gchar *
  rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
-               gsize *outlen)
+               gsize *outlen, enum rspamd_newlines_type how)
  {
-       return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE);
+       return rspamd_encode_base64_common (in, inlen, str_len, outlen, TRUE, how);
  }
  
  gsize
@@ -1004,7 +1025,8 @@ rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
  GString *
  rspamd_header_value_fold (const gchar *name,
                 const gchar *value,
-               guint fold_max)
+               guint fold_max,
+               enum rspamd_newlines_type how)
  {
         GString *res;
         const guint default_fold_max = 76;
@@ -1066,7 +1088,7 @@ rspamd_header_value_fold (const gchar *name,
                                 c = p;
                                 state = read_quoted;
                         }
-                       else if (*p == '\r') {
+                       else if (*p == '\r' || *p == '\n') {
                                 /* Reset line length */
                                 cur_len = 0;
  
@@ -1105,7 +1127,19 @@ rspamd_header_value_fold (const gchar *name,
                         /* Here, we have token start at 'c' and token end at 'p' */
                         if (fold_type == fold_after) {
                                 g_string_append_len (res, c, p - c);
-                               g_string_append_len (res, "\r\n\t", 3);
+
+                               switch (how) {
+                               case RSPAMD_TASK_NEWLINES_LF:
+                                       g_string_append_len (res, "\n\t", 2);
+                                       break;
+                               case RSPAMD_TASK_NEWLINES_CR:
+                                       g_string_append_len (res, "\r\t", 2);
+                                       break;
+                               case RSPAMD_TASK_NEWLINES_CRLF:
+                               default:
+                                       g_string_append_len (res, "\r\n\t", 3);
+                                       break;
+                               }
  
                                 /* Skip space if needed */
                                 if (g_ascii_isspace (*p)) {
@@ -1118,7 +1152,19 @@ rspamd_header_value_fold (const gchar *name,
                                         c ++;
                                 }
  
-                               g_string_append_len (res, "\r\n\t", 3);
+                               switch (how) {
+                               case RSPAMD_TASK_NEWLINES_LF:
+                                       g_string_append_len (res, "\n\t", 2);
+                                       break;
+                               case RSPAMD_TASK_NEWLINES_CR:
+                                       g_string_append_len (res, "\r\t", 2);
+                                       break;
+                               case RSPAMD_TASK_NEWLINES_CRLF:
+                               default:
+                                       g_string_append_len (res, "\r\n\t", 3);
+                                       break;
+                               }
+
                                 g_string_append_len (res, c, p - c);
                         }
  
@@ -1155,7 +1201,18 @@ rspamd_header_value_fold (const gchar *name,
                         if (g_ascii_isspace (*c)) {
                                 c ++;
                         }
-                       g_string_append_len (res, "\r\n\t", 3);
+                       switch (how) {
+                       case RSPAMD_TASK_NEWLINES_LF:
+                               g_string_append_len (res, "\n\t", 2);
+                               break;
+                       case RSPAMD_TASK_NEWLINES_CR:
+                               g_string_append_len (res, "\r\t", 2);
+                               break;
+                       case RSPAMD_TASK_NEWLINES_CRLF:
+                       default:
+                               g_string_append_len (res, "\r\n\t", 3);
+                               break;
+                       }
                         g_string_append_len (res, c, p - c);
                 }
                 else {
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h

index 91c80ff5d6faa8de0ae8f1b1478e9504657a8dfb..9b9bbe0c12788f8e63dd3c25ea539f23c7d39c98 100644 (file)
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -20,6 +20,14 @@
  #include "ucl.h"
  #include "fstring.h"
  
+
+enum rspamd_newlines_type {
+       RSPAMD_TASK_NEWLINES_CR,
+       RSPAMD_TASK_NEWLINES_LF,
+       RSPAMD_TASK_NEWLINES_CRLF,
+       RSPAMD_TASK_NEWLINES_MAX
+};
+
  /**
   * Compare two memory regions of size `l` using case insensitive matching
   */
@@ -193,7 +201,7 @@ gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
   * @return freshly allocated base64 encoded value or NULL if input is invalid
   */
  gchar * rspamd_encode_base64_fold (const guchar *in, gsize inlen, gint str_len,
-               gsize *outlen);
+               gsize *outlen, enum rspamd_newlines_type how);
  
  /**
   * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
@@ -227,7 +235,8 @@ gint rspamd_strings_levenshtein_distance (const gchar *s1, gsize s1len,
   */
  GString *rspamd_header_value_fold (const gchar *name,
                 const gchar *value,
-               guint fold_max);
+               guint fold_max,
+               enum rspamd_newlines_type how);
  
  /**
   * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c

index 81038ffdd17a3a1f2db26228529d60fa39d69081..b3c30ab296d7f5abb3eb15d52fd56f733f351c6f 100644 (file)
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1032,7 +1032,8 @@ lua_util_fold_header (lua_State *L)
         value = luaL_checkstring (L, 2);
  
         if (name && value) {
-               folded = rspamd_header_value_fold (name, value, 0);
+               folded = rspamd_header_value_fold (name, value, 0,
+                               RSPAMD_TASK_NEWLINES_CRLF);
  
                 if (folded) {
                         lua_pushlstring (L, folded->str, folded->len);
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Thu, 22 Sep 2016 17:10:43 +0000 (18:10 +0100)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Thu, 22 Sep 2016 17:10:43 +0000 (18:10 +0100)
src/client/rspamc.c		patch \| blob \| blame \| history
src/libmime/message.c		patch \| blob \| blame \| history
src/libserver/dkim.c		patch \| blob \| blame \| history
src/libserver/protocol.c		patch \| blob \| blame \| history
src/libserver/task.h		patch \| blob \| blame \| history
src/libutil/str_util.c		patch \| blob \| blame \| history
src/libutil/str_util.h		patch \| blob \| blame \| history
src/lua/lua_util.c		patch \| blob \| blame \| history