From: Timo Sirainen Date: Wed, 14 Jan 2015 23:03:58 +0000 (+0200) Subject: lib: Fixed NUL-handling in uni_utf8_*strlen*() X-Git-Tag: 2.2.16.rc1~158 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=32ae620015da6ab2ec28e04d3cdcdb4420f1fa6b;p=thirdparty%2Fdovecot%2Fcore.git lib: Fixed NUL-handling in uni_utf8_*strlen*() uni_utf8_strlen() could have skipped over the ending NUL byte and caused read buffer overflows with invalid input. uni_utf8_strlen_n() and uni_utf8_partial_strlen_n() now allow NUL bytes in the input and they're treated as regular control characters. Previously the size was actually treated as max_size with early NUL byte termination. Technically this is an API change, but I'm not aware of anything using these functions in an incompatible way. --- diff --git a/src/lib/test-unichar.c b/src/lib/test-unichar.c index 6607809616..563787455b 100644 --- a/src/lib/test-unichar.c +++ b/src/lib/test-unichar.c @@ -5,9 +5,25 @@ #include "buffer.h" #include "unichar.h" +static void test_unichar_uni_utf8_strlen(void) +{ + static const char input[] = "\xC3\xA4\xC3\xA4\0a"; + + test_begin("uni_utf8_strlen()"); + test_assert(uni_utf8_strlen(input) == 2); + test_end(); + + test_begin("uni_utf8_strlen_n()"); + test_assert(uni_utf8_strlen_n(input, 1) == 0); + test_assert(uni_utf8_strlen_n(input, 2) == 1); + test_assert(uni_utf8_strlen_n(input, 3) == 1); + test_assert(uni_utf8_strlen_n(input, 4) == 2); + test_end(); +} + static void test_unichar_uni_utf8_partial_strlen_n(void) { - static const char input[] = "\xC3\xA4\xC3\xA4"; + static const char input[] = "\xC3\xA4\xC3\xA4\0a"; size_t pos; test_begin("uni_utf8_partial_strlen_n()"); @@ -15,7 +31,8 @@ static void test_unichar_uni_utf8_partial_strlen_n(void) test_assert(uni_utf8_partial_strlen_n(input, 2, &pos) == 1 && pos == 2); test_assert(uni_utf8_partial_strlen_n(input, 3, &pos) == 1 && pos == 2); test_assert(uni_utf8_partial_strlen_n(input, 4, &pos) == 2 && pos == 4); - test_assert(uni_utf8_partial_strlen_n(input, (size_t)-1, &pos) == 2 && pos == 4); + test_assert(uni_utf8_partial_strlen_n(input, 5, &pos) == 3 && pos == 5); + test_assert(uni_utf8_partial_strlen_n(input, 6, &pos) == 4 && pos == 6); test_end(); } @@ -47,5 +64,6 @@ void test_unichar(void) test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0); test_end(); + test_unichar_uni_utf8_strlen(); test_unichar_uni_utf8_partial_strlen_n(); } diff --git a/src/lib/unichar.c b/src/lib/unichar.c index e0f5f25a9f..7c8b46b90b 100644 --- a/src/lib/unichar.c +++ b/src/lib/unichar.c @@ -189,7 +189,7 @@ void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output) unsigned int uni_utf8_strlen(const char *input) { - return uni_utf8_strlen_n(input, (size_t)-1); + return uni_utf8_strlen_n(input, strlen(input)); } unsigned int uni_utf8_strlen_n(const void *input, size_t size) @@ -206,7 +206,7 @@ unsigned int uni_utf8_partial_strlen_n(const void *_input, size_t size, unsigned int count, len = 0; size_t i; - for (i = 0; i < size && input[i] != '\0'; ) { + for (i = 0; i < size; ) { count = uni_utf8_char_bytes(input[i]); if (i + count > size) break; diff --git a/src/lib/unichar.h b/src/lib/unichar.h index dbc3bd8d45..8c37b395eb 100644 --- a/src/lib/unichar.h +++ b/src/lib/unichar.h @@ -51,9 +51,9 @@ void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output); -1 for invalid input. */ int uni_utf8_get_char(const char *input, unichar_t *chr_r); int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r); -/* Returns UTF-8 string length. */ +/* Returns number of characters in UTF-8 string. */ unsigned int uni_utf8_strlen(const char *input) ATTR_PURE; -/* Returns UTF-8 string length with maximum input size. */ +/* Returns number of characters in UTF-8 input of specified size. */ unsigned int uni_utf8_strlen_n(const void *input, size_t size) ATTR_PURE; /* Same as uni_utf8_strlen_n(), but if input ends with a partial UTF-8 character, don't include it in the return value and set partial_pos_r to