From e7e451b7d9339883db29bad79df5256f9b410652 Mon Sep 17 00:00:00 2001 From: Aki Tuomi Date: Thu, 19 Oct 2017 11:55:01 +0300 Subject: [PATCH] test-unichar: Update test to conform RFC3629 --- src/lib/test-unichar.c | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/lib/test-unichar.c b/src/lib/test-unichar.c index 21826ebf97..d16040ebbb 100644 --- a/src/lib/test-unichar.c +++ b/src/lib/test-unichar.c @@ -36,6 +36,77 @@ static void test_unichar_uni_utf8_partial_strlen_n(void) test_end(); } +static void test_unichar_valid_unicode(void) +{ + struct { + const char *input; + bool valid; + unichar_t expected; + } test_cases[] = { + { "a", TRUE, 'a' }, + { "\xc3\xb1", TRUE, 0x00F1 }, /* U+00F1 */ + { "\xc3\x28", FALSE, 0x0 }, /* has invalid 2nd octet */ + { "\xa0\xa1", FALSE, 0x0 }, /* invalid sequence identifier */ + { "\xed\xb2\x80", FALSE, 0x0 }, /* UTF-8B */ + { "\xed\xa0\x80", FALSE, 0x0 }, /* surrogate halves, U+D800 .. */ + { "\xed\xa0\x80", FALSE, 0x0 }, + { "\xed\xa1\x80", FALSE, 0x0 }, + { "\xed\xa2\x80", FALSE, 0x0 }, + { "\xed\xa3\x80", FALSE, 0x0 }, + { "\xed\xa4\x80", FALSE, 0x0 }, + { "\xed\xa5\x80", FALSE, 0x0 }, + { "\xed\xa6\x80", FALSE, 0x0 }, + { "\xed\xa7\x80", FALSE, 0x0 }, + { "\xed\xa8\x80", FALSE, 0x0 }, + { "\xed\xa9\x80", FALSE, 0x0 }, + { "\xed\xaa\x80", FALSE, 0x0 }, + { "\xed\xab\x80", FALSE, 0x0 }, + { "\xed\xac\x80", FALSE, 0x0 }, + { "\xed\xad\x80", FALSE, 0x0 }, + { "\xed\xaf\x80", FALSE, 0x0 }, + { "\xed\xb0\x80", FALSE, 0x0 }, + { "\xed\xb1\x80", FALSE, 0x0 }, + { "\xed\xb2\x80", FALSE, 0x0 }, + { "\xed\xb3\x80", FALSE, 0x0 }, + { "\xed\xb4\x80", FALSE, 0x0 }, + { "\xed\xb5\x80", FALSE, 0x0 }, + { "\xed\xb6\x80", FALSE, 0x0 }, + { "\xed\xb7\x80", FALSE, 0x0 }, + { "\xed\xb8\x80", FALSE, 0x0 }, + { "\xed\xb9\x80", FALSE, 0x0 }, + { "\xed\xba\x80", FALSE, 0x0 }, + { "\xed\xbb\x80", FALSE, 0x0 }, + { "\xed\xbc\x80", FALSE, 0x0 }, + { "\xed\xbd\x80", FALSE, 0x0 }, + { "\xed\xbf\x80", FALSE, 0x0 }, /* .. U+DFFF */ + { "\xe2\x82\xa1", TRUE, 0x20A1 }, /* U+20A1 */ + { "\xe2\x28\xa1", FALSE, 0x0 }, /* invalid 2nd octet */ + { "\xe2\x82\x28", FALSE, 0x0 }, /* invalid 3rd octet */ + { "\xf0\x90\x8c\xbc", TRUE, 0x1033C }, /* U+1033C */ + { "\xf0\x28\x8c\xbc", FALSE, 0x0 }, /*invalid 2nd octet*/ + { "\xf0\x90\x28\xbc", FALSE, 0x0 }, /* invalid 3rd octet */ + { "\xf0\x28\x8c\x28", FALSE, 0x0 }, /* invalid 4th octet */ + { "\xf4\x80\x80\x80", TRUE, 0x100000 }, /* U+100000, supplementary plane start */ + { "\xf4\x8f\xbf\xbf", TRUE, 0x10FFFF }, /* U+10FFFF, maximum value */ + { "\xf8\xa1\xa1\xa1\xa1", FALSE, 0x0 }, /* invalid unicode */ + { "\xfc\xa1\xa1\xa1\xa1\xa1", FALSE, 0x0 }, /* invalid unicode */ + }; + + test_begin("unichar valid unicode"); + + for(size_t i = 0; i < N_ELEMENTS(test_cases); i++) { + unichar_t chr; + if (test_cases[i].valid) { + test_assert_idx(uni_utf8_get_char(test_cases[i].input, &chr) > 0, i); + test_assert_idx(test_cases[i].expected == chr, i); + } else { + test_assert_idx(uni_utf8_get_char(test_cases[i].input, &chr) < 1, i); + } + } + + test_end(); +} + void test_unichar(void) { static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1"; @@ -47,6 +118,9 @@ void test_unichar(void) test_begin("unichars encode/decode"); for (chr = 0; chr <= 0x10ffff; chr++) { + /* skip surrogates */ + if ((chr & 0xfff800) == 0xd800) + continue; /* The bottom 6 bits should be irrelevant to code coverage, only test 000000, 111111, and something in between. */ if ((chr & 63) == 1) @@ -92,4 +166,5 @@ void test_unichar(void) test_unichar_uni_utf8_strlen(); test_unichar_uni_utf8_partial_strlen_n(); + test_unichar_valid_unicode(); } -- 2.47.3