"object value separator ',' expected",
"invalid string sequence",
"expected comment",
+ "invalid utf-8 string",
"buffer size overflow"
};
: \
(((tok)->err = json_tokener_continue), 0) \
) : \
- (((dest) = *str), 1) \
- )
+ (((tok->flags & JSON_TOKENER_STRICT) && \
+ (!json_tokener_validate_utf8(*str, nBytesp)))? \
+ ((tok->err = json_tokener_error_parse_utf8_string), 0) \
+ : \
+ (((dest) = *str), 1) \
+ ))
/* ADVANCE_CHAR() macro:
* Increments str & tok->char_offset.
{
struct json_object *obj = NULL;
char c = '\1';
+ unsigned int nBytes = 0;
+ unsigned int *nBytesp = &nBytes;
+
#ifdef HAVE_USELOCALE
locale_t oldlocale = uselocale(NULL);
locale_t newloc;
} /* while(PEEK_CHAR) */
out:
+ if ((tok->flags & JSON_TOKENER_STRICT) && (nBytes != 0))
+ {
+ tok->err = json_tokener_error_parse_utf8_string;
+ }
if (c &&
(state == json_tokener_state_finish) &&
(tok->depth == 0) &&
return NULL;
}
+json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
+{
+ unsigned char chr = c;
+ if (*nBytes == 0)
+ {
+ if (chr >= 0x80)
+ {
+ if(chr >= 0xFC && chr <= 0xFd)
+ *nBytes = 6;
+ else if (chr >= 0xF8)
+ *nBytes = 5;
+ else if (chr >= 0xF0)
+ *nBytes = 4;
+ else if (chr >= 0xE0)
+ *nBytes = 3;
+ else if (chr >= 0xC0)
+ *nBytes = 2;
+ else
+ return 0;
+ (*nBytes)--;
+ }
+ }
+ else
+ {
+ if ((chr & 0xC0) != 0x80)
+ return 0;
+ (*nBytes)--;
+ }
+ return 1;
+}
+
void json_tokener_set_flags(struct json_tokener *tok, int flags)
{
tok->flags = flags;
json_tokener_error_parse_object_value_sep,
json_tokener_error_parse_string,
json_tokener_error_parse_comment,
+ json_tokener_error_parse_utf8_string,
json_tokener_error_size
};
JSON_EXPORT struct json_object* json_tokener_parse(const char *str);
JSON_EXPORT struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error);
+/**
+ * validete the utf-8 string in strict model.
+ * if not utf-8 format, return err.
+ */
+json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
/**
* Set flags that control how parsing will be done.
*/
{ "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 },
{ "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 },
+ // utf-8 test
+ // acsll encoding
+ { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 3 },
+ { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 1 },
+ // utf-8 encoding
+ { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 3 },
+ { "\x22\xe4\xb8",-1, -1, json_tokener_error_parse_utf8_string, 2 },
+ { "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 3 },
+ { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 1 },
+ { "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 3 },
+ { "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
+ { "\x22\xf8\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
+ { "\x22\xfd\xa5\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 },
+ // wrong utf-8 encoding
+ { "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 3 },
+ { "\x22\xe6\x9d\x4e\x22",-1, 5, json_tokener_success, 1 },
+ // GBK encoding
+ { "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+ { "\x22\xc0\xee\xc5\xf4\x22",-1, 6, json_tokener_success, 1 },
+ // char after space
+ { "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 3 },
+ { "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+ { "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+ // char in state inf
+ { "\x49\x6e\x66\x69\x6e\x69\x74\x79",9, 8, json_tokener_success, 1 },
+ { "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 3 },
+ // char in escape unicode
+ { "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 3 },
+ { "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 3 },
+ { "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 3 },
+ // char in number
+ { "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 3 },
+ // char in object
+ { "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 3 },
+
{ NULL, -1, -1, json_tokener_success, 0 },
};
json_tokener_parse_ex(tok, [1,2,,3,] , 9) ... OK: got correct error: unexpected character
json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got correct error: unexpected character
json_tokener_parse_ex(tok, {"a":1,} , 8) ... OK: got correct error: unexpected character
-End Incremental Tests OK=105 ERROR=0
+json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&"
+json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&"
+json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界"
+json_tokener_parse_ex(tok, "ä¸ , 3) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, \96ç\95\8c" , 5) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界"
+json_tokener_parse_ex(tok, "πφ" , 6) ... OK: got object of type [string]: "πφ"
+json_tokener_parse_ex(tok, "𥑕" , 6) ... OK: got object of type [string]: "𥑕"
+json_tokener_parse_ex(tok, "ø¥¥‘•" , 7) ... OK: got object of type [string]: "ø¥¥‘•"
+json_tokener_parse_ex(tok, "ý¥¥¥‘•" , 8) ... OK: got object of type [string]: "ý¥¥¥‘•"
+json_tokener_parse_ex(tok, "æ\9dN" , 5) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, "æ\9dN" , 5) ... OK: got object of type [string]: "æ\9dN"
+json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got object of type [string]: "ÀîÅô"
+json_tokener_parse_ex(tok, "世" , 7) ... OK: got object of type [string]: "世"
+json_tokener_parse_ex(tok, \81"ä¸\96" , 8) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, [ \811] , 5) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, Infinity , 9) ... OK: got object of type [double]: Infinity
+json_tokener_parse_ex(tok, Inf\81nity , 8) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, "\ud855\udc55", 15) ... OK: got object of type [string]: "𥑕"
+json_tokener_parse_ex(tok, "\ud855Àudc55", 14) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, "\ud0031À" , 10) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, 11\8111 , 5) ... OK: got correct error: invalid utf-8 string
+json_tokener_parse_ex(tok, {"1\81":1} , 8) ... OK: got correct error: invalid utf-8 string
+End Incremental Tests OK=129 ERROR=0
==================================