]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
str_sanitize(): Don't break UTF-8 input.
authorTimo Sirainen <tss@iki.fi>
Wed, 28 Oct 2009 17:50:55 +0000 (13:50 -0400)
committerTimo Sirainen <tss@iki.fi>
Wed, 28 Oct 2009 17:50:55 +0000 (13:50 -0400)
--HG--
branch : HEAD

src/lib/str-sanitize.c
src/lib/test-str-sanitize.c

index 67e888ef72cd74302592746628e04dd3a96445cf..4c435b5df5157f86ee90a9536e54972e03051fee 100644 (file)
@@ -1,32 +1,52 @@
 /* Copyright (c) 2004-2009 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
+#include "unichar.h"
 #include "str.h"
 #include "str-sanitize.h"
 
 static size_t str_sanitize_skip_start(const char *src, size_t max_len)
 {
+       unsigned int len;
+       unichar_t chr;
        size_t i;
 
-       for (i = 0; i < max_len; i++) {
-               if (((unsigned char)src[i] & 0x7f) < 32)
+       for (i = 0; i < max_len; ) {
+               len = uni_utf8_char_bytes(src[i]);
+               if (uni_utf8_get_char(src+i, &chr) <= 0)
                        break;
+               if ((unsigned char)src[i] < 32)
+                       break;
+               i += len;
        }
        return i;
 }
 
 void str_sanitize_append(string_t *dest, const char *src, size_t max_len)
 {
+       unsigned int len;
+       unichar_t chr;
        size_t i;
+       int ret;
 
-       i = str_sanitize_skip_start(src, max_len);
-       str_append_n(dest, src, i);
-
-       for (; i < max_len && src[i] != '\0'; i++) {
-               if (((unsigned char)src[i] & 0x7f) < 32)
+       for (i = 0; i < max_len && src[i] != '\0'; ) {
+               len = uni_utf8_char_bytes(src[i]);
+               ret = uni_utf8_get_char(src+i, &chr);
+               if (ret <= 0) {
+                       /* invalid UTF-8 */
+                       str_append_c(dest, '?');
+                       if (ret == 0) {
+                               /* input ended too early */
+                               return;
+                       }
+                       i++;
+                       continue;
+               }
+               if ((unsigned char)src[i] < 32)
                        str_append_c(dest, '?');
                else
                        str_append_c(dest, src[i]);
+               i += len;
        }
 
        if (src[i] != '\0') {
index 8a5033ca4a619da116f9c220911fa3795e181730..6afec2f6b1992c95c384fcbd9fcf92bd35f33b79 100644 (file)
@@ -17,7 +17,10 @@ void test_str_sanitize(void)
                { "ab", 2 },
                { "abc", 2 },
                { "abcd", 3 },
-               { "abcde", 4 }
+               { "abcde", 4 },
+               { "с", 10 },
+               { "с", 1 },
+               { "\001x\x1fy\x81", 10 }
        };
        static const char *output[] = {
                NULL,
@@ -26,15 +29,18 @@ void test_str_sanitize(void)
                "ab",
                "...",
                "...",
-               "a..."
+               "a...",
+               "с",
+               "с",
+               "?x?y?"
        };
        const char *str;
        unsigned int i;
-       bool success;
 
+       test_begin("str_sanitize");
        for (i = 0; i < N_ELEMENTS(input); i++) {
                str = str_sanitize(input[i].str, input[i].max_len);
-               success = null_strcmp(output[i], str) == 0;
-               test_out(t_strdup_printf("str_sanitize(%d)", i), success);
+               test_assert(null_strcmp(output[i], str) == 0);
        }
+       test_end();
 }