]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Filter non-utf chars from all decoded headers
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 22 Dec 2016 13:07:01 +0000 (13:07 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 22 Dec 2016 13:07:01 +0000 (13:07 +0000)
src/libmime/mime_encoding.c
src/libmime/mime_encoding.h
src/libmime/mime_headers.c

index b33312fafce7ada74826c0faf5710d4f89753f06..adde740be79d5c4ed4baf50e05a7798571bd24bb 100644 (file)
@@ -274,13 +274,39 @@ rspamd_mime_to_utf8_byte_array (GByteArray *in,
        return TRUE;
 }
 
-gboolean
-rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
-               gchar *in, gsize len)
+void
+rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
 {
        const gchar *end, *p;
        gsize remain = len;
 
+       /* Now we validate input and replace bad characters with '?' symbol */
+       p = in;
+
+       while (remain > 0 && !g_utf8_validate (p, remain, &end)) {
+               gchar *valid;
+
+               valid = g_utf8_find_next_char (end, in + len);
+
+               if (!valid) {
+                       valid = in + len;
+               }
+
+               if (valid > end) {
+                       memset ((gchar *)end, '?', valid - end);
+                       p = valid;
+                       remain = (in + len) - p;
+               }
+               else {
+                       break;
+               }
+       }
+}
+
+gboolean
+rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
+               gchar *in, gsize len)
+{
        if (utf_compatible_re == NULL) {
                utf_compatible_re = rspamd_regexp_new (
                                "^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:ansi)|(?:us)|(?:ISO-8859-1)|"
@@ -290,27 +316,7 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
 
        if (rspamd_regexp_match (utf_compatible_re, charset->begin, charset->len,
                        TRUE)) {
-               /* Now we validate input and replace bad characters with '?' symbol */
-               p = in;
-
-               while (remain > 0 && !g_utf8_validate (p, remain, &end)) {
-                       gchar *valid;
-
-                       valid = g_utf8_find_next_char (end, in + len);
-
-                       if (!valid) {
-                               valid = in + len;
-                       }
-
-                       if (valid > end) {
-                               memset ((gchar *)end, '?', valid - end);
-                               p = valid;
-                               remain = (in + len) - p;
-                       }
-                       else {
-                               break;
-                       }
-               }
+               rspamd_mime_charset_utf_enforce (in, len);
 
                return TRUE;
        }
index a4999266f6f55d4ce7856e2c2d2e64509767d7f4..c1a24eecad7f0eba355dfcc2192d657263ad9a62 100644 (file)
@@ -78,4 +78,12 @@ GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
 gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
                gchar *in, gsize len);
 
+/**
+ * Ensure that all characters in string are valid utf8 chars or replace them
+ * with '?'
+ * @param in
+ * @param len
+ */
+void rspamd_mime_charset_utf_enforce (gchar *in, gsize len);
+
 #endif /* SRC_LIBMIME_MIME_ENCODING_H_ */
index f2f2c2270b4346712f7247fb5e22f1055ab54405..9b65f1ecfe8ba6c0cb656b37fc70143d42fdf33d 100644 (file)
@@ -227,6 +227,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
                                new->decoded = "";
                        }
 
+                       /* We also validate utf8 and replace all non-valid utf8 chars */
+                       rspamd_mime_charset_utf_enforce (new->decoded, strlen (new->decoded));
                        rspamd_mime_header_add (task, target, new);
                        state = 0;
                        break;