void test_str_find(void);
void test_str_sanitize(void);
void test_time_util(void);
+void test_unichar(void);
void test_utc_mktime(void);
void test_var_expand(void);
--- /dev/null
+/* Copyright (c) 2007-2012 Dovecot authors, see the included COPYING file */
+
+#include "test-lib.h"
+#include "str.h"
+#include "unichar.h"
+
+void test_unichar(void)
+{
+ static const char *overlong_utf8 = "\xf8\x80\x95\x81\xa1";
+ unichar_t chr, chr2;
+ string_t *str = t_str_new(16);
+
+ test_begin("unichars");
+ for (chr = 0; chr <= 0x10ffff; chr++) {
+ str_truncate(str, 0);
+ uni_ucs4_to_utf8_c(chr, str);
+ test_assert(uni_utf8_str_is_valid(str_c(str)));
+ test_assert(uni_utf8_get_char(str_c(str), &chr2) > 0);
+ test_assert(chr2 == chr);
+ }
+ test_assert(!uni_utf8_str_is_valid(overlong_utf8));
+ test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0);
+ test_end();
+}
int uni_utf8_get_char_n(const void *_input, size_t max_len, unichar_t *chr_r)
{
+ static unichar_t lowest_valid_chr_table[] =
+ { 0, 0, 0x80, 0x800, 0x10000, 0x20000, 0x40000 };
const unsigned char *input = _input;
- unichar_t chr;
+ unichar_t chr, lowest_valid_chr;
unsigned int i, len;
int ret;
return -1;
}
- if (len <= max_len)
+ if (len <= max_len) {
+ lowest_valid_chr = lowest_valid_chr_table[len];
ret = 1;
- else {
+ } else {
/* check first if the input is invalid before returning 0 */
+ lowest_valid_chr = 0;
ret = 0;
len = max_len;
}
chr <<= 6;
chr |= input[i] & 0x3f;
}
+ if (chr < lowest_valid_chr) {
+ /* overlong encoding */
+ return -1;
+ }
*chr_r = chr;
return ret;
static inline unsigned int
is_valid_utf8_seq(const unsigned char *input, unsigned int size)
{
- unsigned int i, len;
+ unichar_t chr;
- len = uni_utf8_char_bytes(input[0]);
- if (unlikely(len > size || len == 1))
+ if (uni_utf8_get_char_n(input, size, &chr) <= 0)
return 0;
-
- /* the rest of the chars should be in 0x80..0xbf range.
- anything else is start of a sequence or invalid */
- for (i = 1; i < len; i++) {
- if (unlikely(input[i] < 0x80 || input[i] > 0xbf))
- return 0;
- }
- return len;
+ return uni_utf8_char_bytes(input[0]);
}
static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,