From: dota17 Date: Fri, 17 Jan 2020 07:33:44 +0000 (+0800) Subject: update code X-Git-Tag: json-c-0.14-20200419~59^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F531%2Fhead;p=thirdparty%2Fjson-c.git update code --- diff --git a/json_tokener.c b/json_tokener.c index 2a8451df..246d5bd7 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -223,7 +223,7 @@ struct json_object* json_tokener_parse_verbose(const char *str, : \ (((tok)->err = json_tokener_continue), 0) \ ) : \ - (((tok->flags & JSON_TOKENER_STRICT) && \ + (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \ (!json_tokener_validate_utf8(*str, nBytesp)))? \ ((tok->err = json_tokener_error_parse_utf8_string), 0) \ : \ @@ -956,7 +956,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, } /* while(PEEK_CHAR) */ out: - if ((tok->flags & JSON_TOKENER_STRICT) && (nBytes != 0)) + if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0)) { tok->err = json_tokener_error_parse_utf8_string; } @@ -1004,19 +1004,14 @@ json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) { if (chr >= 0x80) { - if(chr >= 0xFC && chr <= 0xFd) - *nBytes = 6; - else if (chr >= 0xF8) - *nBytes = 5; - else if (chr >= 0xF0) - *nBytes = 4; - else if (chr >= 0xE0) - *nBytes = 3; - else if (chr >= 0xC0) + if ((chr & 0xe0) == 0xc0) + *nBytes = 1; + else if ((chr & 0xf0) == 0xe0) *nBytes = 2; + else if ((chr & 0xf8) == 0xf0) + *nBytes = 3; else return 0; - (*nBytes)--; } } else diff --git a/json_tokener.h b/json_tokener.h index 061f81bc..274e5487 100644 --- a/json_tokener.h +++ b/json_tokener.h @@ -137,6 +137,17 @@ typedef struct json_tokener json_tokener; */ #define JSON_TOKENER_STRICT 0x01 +/** + * Allow json_tokener_parse_ex() validate utf-8 char. + * The json_tokener_validate_utf8() validate one utf8 char + * after get one char, then begin to parse it. + * + * This flag is not set by default. + * + * @see json_tokener_set_flags() + */ +#define JSON_TOKENER_VALIDATE_UTF8 0x10 + /** * Given an error previously returned by json_tokener_get_error(), * return a human readable description of the error. diff --git a/tests/test_parse.c b/tests/test_parse.c index 14d4b113..bd7768f8 100644 --- a/tests/test_parse.c +++ b/tests/test_parse.c @@ -357,38 +357,36 @@ struct incremental_step { // utf-8 test // acsll encoding - { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 3 }, + { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 5 }, { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 1 }, // utf-8 encoding - { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 3 }, - { "\x22\xe4\xb8",-1, -1, json_tokener_error_parse_utf8_string, 2 }, - { "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 3 }, + { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 5 }, + { "\x22\xe4\xb8",-1, 3, json_tokener_error_parse_utf8_string, 4 }, + { "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 5 }, { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 1 }, - { "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 3 }, - { "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 }, - { "\x22\xf8\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 }, - { "\x22\xfd\xa5\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 }, + { "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 5 }, + { "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 5 }, // wrong utf-8 encoding - { "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 3 }, + { "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 5 }, { "\x22\xe6\x9d\x4e\x22",-1, 5, json_tokener_success, 1 }, // GBK encoding - { "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 }, + { "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 5 }, { "\x22\xc0\xee\xc5\xf4\x22",-1, 6, json_tokener_success, 1 }, // char after space - { "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 3 }, - { "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 }, - { "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 3 }, + { "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 5 }, + { "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 5 }, + { "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 5 }, // char in state inf { "\x49\x6e\x66\x69\x6e\x69\x74\x79",9, 8, json_tokener_success, 1 }, - { "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 3 }, + { "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 5 }, // char in escape unicode - { "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 3 }, - { "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 3 }, - { "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 3 }, + { "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 5 }, + { "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 5 }, + { "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 5 }, // char in number - { "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 3 }, + { "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 5 }, // char in object - { "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 3 }, + { "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 5 }, { NULL, -1, -1, json_tokener_success, 0 }, }; @@ -424,9 +422,19 @@ static void test_incremental_parse() size_t expected_char_offset; if (step->reset_tokener & 2) - json_tokener_set_flags(tok, JSON_TOKENER_STRICT); + { + if (step->reset_tokener & 4) + json_tokener_set_flags(tok, 3); + else + json_tokener_set_flags(tok, JSON_TOKENER_STRICT); + } else - json_tokener_set_flags(tok, 0); + { + if (step->reset_tokener & 4) + json_tokener_set_flags(tok, JSON_TOKENER_VALIDATE_UTF8); + else + json_tokener_set_flags(tok, 0); + } if (length == -1) length = strlen(step->string_to_parse); diff --git a/tests/test_parse.expected b/tests/test_parse.expected index a5c2454a..68e55b1b 100644 --- a/tests/test_parse.expected +++ b/tests/test_parse.expected @@ -191,8 +191,6 @@ json_tokener_parse_ex(tok, json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界" json_tokener_parse_ex(tok, "πφ" , 6) ... OK: got object of type [string]: "πφ" json_tokener_parse_ex(tok, "𥑕" , 6) ... OK: got object of type [string]: "𥑕" -json_tokener_parse_ex(tok, "ø¥¥‘•" , 7) ... OK: got object of type [string]: "ø¥¥‘•" -json_tokener_parse_ex(tok, "ý¥¥¥‘•" , 8) ... OK: got object of type [string]: "ý¥¥¥‘•" json_tokener_parse_ex(tok, "æN" , 5) ... OK: got correct error: invalid utf-8 string json_tokener_parse_ex(tok, "æN" , 5) ... OK: got object of type [string]: "æN" json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got correct error: invalid utf-8 string @@ -207,5 +205,5 @@ json_tokener_parse_ex(tok, "\ud855 json_tokener_parse_ex(tok, "\ud0031À" , 10) ... OK: got correct error: invalid utf-8 string json_tokener_parse_ex(tok, 1111 , 5) ... OK: got correct error: invalid utf-8 string json_tokener_parse_ex(tok, {"1":1} , 8) ... OK: got correct error: invalid utf-8 string -End Incremental Tests OK=129 ERROR=0 +End Incremental Tests OK=127 ERROR=0 ==================================