]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Do not try to detect utf8 using heuristic
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 9 Jan 2020 15:22:14 +0000 (15:22 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 9 Jan 2020 15:22:14 +0000 (15:22 +0000)
src/libmime/mime_encoding.c

index 0ba0e0edd6d0a6f31e1e180c0582149990b7c256..1f130325e704c4767033a72f1a898b160fa2694e 100644 (file)
@@ -36,7 +36,7 @@
 #define RSPAMD_CHARSET_FLAG_ASCII (1 << 1)
 
 #define RSPAMD_CHARSET_CACHE_SIZE 32
-#define RSPAMD_CHARSET_MAX_CONTENT 128
+#define RSPAMD_CHARSET_MAX_CONTENT 512
 
 #define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_TEXT_PART_FLAG_UTF)
 #define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_TEXT_PART_FLAG_UTF)
@@ -625,28 +625,30 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
                 * corner cases
                 */
                if (content_check) {
-                       real_charset = rspamd_mime_charset_find_by_content (in,
-                                       MIN (RSPAMD_CHARSET_MAX_CONTENT, len));
+                       if (rspamd_fast_utf8_validate (in, len) != 0) {
+                               real_charset = rspamd_mime_charset_find_by_content (in,
+                                               MIN (RSPAMD_CHARSET_MAX_CONTENT, len));
 
-                       if (real_charset) {
+                               if (real_charset) {
 
-                               if (rspamd_regexp_match (utf_compatible_re,
-                                               real_charset, strlen (real_charset), TRUE)) {
-                                       RSPAMD_FTOK_ASSIGN (charset, UTF8_CHARSET);
+                                       if (rspamd_regexp_match (utf_compatible_re,
+                                                       real_charset, strlen (real_charset), TRUE)) {
+                                               RSPAMD_FTOK_ASSIGN (charset, UTF8_CHARSET);
 
-                                       return TRUE;
-                               }
-                               else {
-                                       charset->begin = real_charset;
-                                       charset->len = strlen (real_charset);
+                                               return TRUE;
+                                       }
+                                       else {
+                                               charset->begin = real_charset;
+                                               charset->len = strlen (real_charset);
 
-                                       return FALSE;
+                                               return FALSE;
+                                       }
                                }
+
+                               rspamd_mime_charset_utf_enforce (in, len);
                        }
                }
 
-               rspamd_mime_charset_utf_enforce (in, len);
-
                return TRUE;
        }