From: Eric Haszlakiewicz Date: Sun, 2 Aug 2020 02:54:36 +0000 (+0000) Subject: Merge the is_ws_char() and is_hex_char() changes to json_tokener from branch 'ramirop... X-Git-Tag: json-c-0.16-20220414~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8b43ff0c220d4615e0b6fe124fb248fd87c1c2ce;p=thirdparty%2Fjson-c.git Merge the is_ws_char() and is_hex_char() changes to json_tokener from branch 'ramiropolla/for_upstream' (PR #464) --- 8b43ff0c220d4615e0b6fe124fb248fd87c1c2ce diff --cc json_tokener.c index 6527270d,6fc4937f..ea61ce0a --- a/json_tokener.c +++ b/json_tokener.c @@@ -40,19 -40,49 +40,42 @@@ #ifdef HAVE_XLOCALE_H #include #endif +#ifdef HAVE_STRINGS_H +#include +#endif /* HAVE_STRINGS_H */ -#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x) & 7) + 9) +#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9) #if !HAVE_STRNCASECMP && defined(_MSC_VER) - /* MSC has the version as _strnicmp */ -# define strncasecmp _strnicmp +/* MSC has the version as _strnicmp */ +#define strncasecmp _strnicmp #elif !HAVE_STRNCASECMP -# error You do not have strncasecmp on your system. +#error You do not have strncasecmp on your system. #endif /* HAVE_STRNCASECMP */ + /* The following helper functions are used to speed up parsing. They + * are faster than their ctype counterparts because they assume that + * the input is in ASCII and that the locale is set to "C". The + * compiler will also inline these functions, providing an additional + * speedup by saving on function calls. + */ + static int is_ws_char(char c) + { + return c == ' ' + || c == '\t' + || c == '\n' + || c == '\v' + || c == '\f' + || c == '\r'; + } + + static int is_hex_char(char c) + { + return (c >= '0' && c <= '9') + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f'); + } + -static int is_number_char(char c) -{ - return (c >= '0' && c <= '9') - || c == '.' - || c == '+' - || c == '-' - || c == 'e' - || c == 'E'; -} - /* Use C99 NAN by default; if not available, nan("") should work too. */ #ifndef NAN #define NAN nan("") @@@ -257,994 -268,760 +280,994 @@@ struct json_object *json_tokener_parse_ /* End optimization macro defs */ - -struct json_object* json_tokener_parse_ex(struct json_tokener *tok, - const char *str, int len) +struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len) { - struct json_object *obj = NULL; - char c = '\1'; -#ifdef HAVE_USELOCALE - locale_t oldlocale = uselocale(NULL); - locale_t newloc; -#elif defined(HAVE_SETLOCALE) - char *oldlocale = NULL; -#endif - - tok->char_offset = 0; - tok->err = json_tokener_success; - - /* this interface is presently not 64-bit clean due to the int len argument - and the internal printbuf interface that takes 32-bit int len arguments - so the function limits the maximum string size to INT32_MAX (2GB). - If the function is called with len == -1 then strlen is called to check - the string length is less than INT32_MAX (2GB) */ - if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) { - tok->err = json_tokener_error_size; - return NULL; - } + struct json_object *obj = NULL; + char c = '\1'; + unsigned int nBytes = 0; + unsigned int *nBytesp = &nBytes; #ifdef HAVE_USELOCALE - { - locale_t duploc = duplocale(oldlocale); - newloc = newlocale(LC_NUMERIC, "C", duploc); - // XXX at least Debian 8.4 has a bug in newlocale where it doesn't - // change the decimal separator unless you set LC_TIME! - if (newloc) - { - duploc = newloc; // original duploc has been freed by newlocale() - newloc = newlocale(LC_TIME, "C", duploc); - } - if (newloc == NULL) - { - freelocale(duploc); - return NULL; - } - uselocale(newloc); - } + locale_t oldlocale = uselocale(NULL); + locale_t newloc; #elif defined(HAVE_SETLOCALE) - { - char *tmplocale; - tmplocale = setlocale(LC_NUMERIC, NULL); - if (tmplocale) oldlocale = strdup(tmplocale); - setlocale(LC_NUMERIC, "C"); - } + char *oldlocale = NULL; #endif - while (PEEK_CHAR(c, tok)) { - - redo_char: - switch(state) { - - case json_tokener_state_eatws: - /* Advance until we change state */ - while (is_ws_char(c)) { - if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) - goto out; - } - if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) { - printbuf_reset(tok->pb); - printbuf_memappend_fast(tok->pb, &c, 1); - state = json_tokener_state_comment_start; - } else { - state = saved_state; - goto redo_char; - } - break; - - case json_tokener_state_start: - switch(c) { - case '{': - state = json_tokener_state_eatws; - saved_state = json_tokener_state_object_field_start; - current = json_object_new_object(); - if(current == NULL) - goto out; - break; - case '[': - state = json_tokener_state_eatws; - saved_state = json_tokener_state_array; - current = json_object_new_array(); - if(current == NULL) - goto out; - break; - case 'I': - case 'i': - state = json_tokener_state_inf; - printbuf_reset(tok->pb); - tok->st_pos = 0; - goto redo_char; - case 'N': - case 'n': - state = json_tokener_state_null; // or NaN - printbuf_reset(tok->pb); - tok->st_pos = 0; - goto redo_char; - case '\'': - if (tok->flags & JSON_TOKENER_STRICT) { - /* in STRICT mode only double-quote are allowed */ - tok->err = json_tokener_error_parse_unexpected; - goto out; - } - /* FALLTHRU */ - case '"': - state = json_tokener_state_string; - printbuf_reset(tok->pb); - tok->quote_char = c; - break; - case 'T': - case 't': - case 'F': - case 'f': - state = json_tokener_state_boolean; - printbuf_reset(tok->pb); - tok->st_pos = 0; - goto redo_char; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '-': - state = json_tokener_state_number; - printbuf_reset(tok->pb); - tok->is_double = 0; - goto redo_char; - default: - tok->err = json_tokener_error_parse_unexpected; - goto out; - } - break; - - case json_tokener_state_finish: - if(tok->depth == 0) goto out; - obj = json_object_get(current); - json_tokener_reset_level(tok, tok->depth); - tok->depth--; - goto redo_char; - - case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */ - { - /* If we were guaranteed to have len set, then we could (usually) handle - * the entire "Infinity" check in a single strncmp (strncasecmp), but - * since len might be -1 (i.e. "read until \0"), we need to check it - * a character at a time. - * Trying to handle it both ways would make this code considerably more - * complicated with likely little performance benefit. + tok->char_offset = 0; + tok->err = json_tokener_success; + + /* this interface is presently not 64-bit clean due to the int len argument + * and the internal printbuf interface that takes 32-bit int len arguments + * so the function limits the maximum string size to INT32_MAX (2GB). + * If the function is called with len == -1 then strlen is called to check + * the string length is less than INT32_MAX (2GB) */ - int is_negative = 0; - const char *_json_inf_str = json_inf_str; - if (!(tok->flags & JSON_TOKENER_STRICT)) - _json_inf_str = json_inf_str_lower; + if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) + { + tok->err = json_tokener_error_size; + return NULL; + } - /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */ - while (tok->st_pos < (int)json_inf_str_len) +#ifdef HAVE_USELOCALE { - char inf_char = *str; - if (!(tok->flags & JSON_TOKENER_STRICT)) - inf_char = tolower((int)*str); - if (inf_char != _json_inf_str[tok->st_pos]) - { - tok->err = json_tokener_error_parse_unexpected; - goto out; - } - tok->st_pos++; - (void)ADVANCE_CHAR(str, tok); - if (!PEEK_CHAR(c, tok)) + locale_t duploc = duplocale(oldlocale); + newloc = newlocale(LC_NUMERIC_MASK, "C", duploc); + if (newloc == NULL) { - /* out of input chars, for now at least */ - goto out; + freelocale(duploc); + return NULL; } + uselocale(newloc); } - /* We checked the full length of "Infinity", so create the object. - * When handling -Infinity, the number parsing code will have dropped - * the "-" into tok->pb for us, so check it now. - */ - if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-') +#elif defined(HAVE_SETLOCALE) { - is_negative = 1; + char *tmplocale; + tmplocale = setlocale(LC_NUMERIC, NULL); + if (tmplocale) + oldlocale = strdup(tmplocale); + setlocale(LC_NUMERIC, "C"); } - current = json_object_new_double(is_negative - ? -INFINITY : INFINITY); - if (current == NULL) - goto out; - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - goto redo_char; - - } - break; - case json_tokener_state_null: /* aka starts with 'n' */ - { - int size; - int size_nan; - printbuf_memappend_fast(tok->pb, &c, 1); - size = json_min(tok->st_pos+1, json_null_str_len); - size_nan = json_min(tok->st_pos+1, json_nan_str_len); - if((!(tok->flags & JSON_TOKENER_STRICT) && - strncasecmp(json_null_str, tok->pb->buf, size) == 0) - || (strncmp(json_null_str, tok->pb->buf, size) == 0) - ) { - if (tok->st_pos == json_null_str_len) { - current = NULL; - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - goto redo_char; - } - } - else if ((!(tok->flags & JSON_TOKENER_STRICT) && - strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || - (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0) - ) +#endif + + while (PEEK_CHAR(c, tok)) // Note: c might be '\0' ! { - if (tok->st_pos == json_nan_str_len) + + redo_char: + switch (state) { - current = json_object_new_double(NAN); + + case json_tokener_state_eatws: + /* Advance until we change state */ - while (isspace((unsigned char)c)) ++ while (is_ws_char(c)) + { + if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) + goto out; + } + if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) + { + printbuf_reset(tok->pb); + printbuf_memappend_fast(tok->pb, &c, 1); + state = json_tokener_state_comment_start; + } + else + { + state = saved_state; + goto redo_char; + } + break; + + case json_tokener_state_start: + switch (c) + { + case '{': + state = json_tokener_state_eatws; + saved_state = json_tokener_state_object_field_start; + current = json_object_new_object(); + if (current == NULL) + goto out; + break; + case '[': + state = json_tokener_state_eatws; + saved_state = json_tokener_state_array; + current = json_object_new_array(); + if (current == NULL) + goto out; + break; + case 'I': + case 'i': + state = json_tokener_state_inf; + printbuf_reset(tok->pb); + tok->st_pos = 0; + goto redo_char; + case 'N': + case 'n': + state = json_tokener_state_null; // or NaN + printbuf_reset(tok->pb); + tok->st_pos = 0; + goto redo_char; + case '\'': + if (tok->flags & JSON_TOKENER_STRICT) + { + /* in STRICT mode only double-quote are allowed */ + tok->err = json_tokener_error_parse_unexpected; + goto out; + } + /* FALLTHRU */ + case '"': + state = json_tokener_state_string; + printbuf_reset(tok->pb); + tok->quote_char = c; + break; + case 'T': + case 't': + case 'F': + case 'f': + state = json_tokener_state_boolean; + printbuf_reset(tok->pb); + tok->st_pos = 0; + goto redo_char; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + state = json_tokener_state_number; + printbuf_reset(tok->pb); + tok->is_double = 0; + goto redo_char; + default: tok->err = json_tokener_error_parse_unexpected; goto out; + } + break; + + case json_tokener_state_finish: + if (tok->depth == 0) + goto out; + obj = json_object_get(current); + json_tokener_reset_level(tok, tok->depth); + tok->depth--; + goto redo_char; + + case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */ + { + /* If we were guaranteed to have len set, then we could (usually) handle + * the entire "Infinity" check in a single strncmp (strncasecmp), but + * since len might be -1 (i.e. "read until \0"), we need to check it + * a character at a time. + * Trying to handle it both ways would make this code considerably more + * complicated with likely little performance benefit. + */ + int is_negative = 0; + const char *_json_inf_str = json_inf_str; + if (!(tok->flags & JSON_TOKENER_STRICT)) + _json_inf_str = json_inf_str_lower; + + /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */ + while (tok->st_pos < (int)json_inf_str_len) + { + char inf_char = *str; + if (!(tok->flags & JSON_TOKENER_STRICT)) + inf_char = tolower((unsigned char)*str); + if (inf_char != _json_inf_str[tok->st_pos]) + { + tok->err = json_tokener_error_parse_unexpected; + goto out; + } + tok->st_pos++; + (void)ADVANCE_CHAR(str, tok); + if (!PEEK_CHAR(c, tok)) + { + /* out of input chars, for now at least */ + goto out; + } + } + /* We checked the full length of "Infinity", so create the object. + * When handling -Infinity, the number parsing code will have dropped + * the "-" into tok->pb for us, so check it now. + */ + if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-') + { + is_negative = 1; + } + current = json_object_new_double(is_negative ? -INFINITY : INFINITY); if (current == NULL) - goto out; + goto out; saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; goto redo_char; } - } else { - tok->err = json_tokener_error_parse_null; - goto out; - } - tok->st_pos++; - } - break; - - case json_tokener_state_comment_start: - if(c == '*') { - state = json_tokener_state_comment; - } else if(c == '/') { - state = json_tokener_state_comment_eol; - } else { - tok->err = json_tokener_error_parse_comment; - goto out; - } - printbuf_memappend_fast(tok->pb, &c, 1); - break; - - case json_tokener_state_comment: - { - /* Advance until we change state */ - const char *case_start = str; - while(c != '*') { - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - goto out; - } - } - printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start); - state = json_tokener_state_comment_end; - } - break; - - case json_tokener_state_comment_eol: - { - /* Advance until we change state */ - const char *case_start = str; - while(c != '\n') { - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - goto out; - } - } - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); - state = json_tokener_state_eatws; - } - break; - - case json_tokener_state_comment_end: - printbuf_memappend_fast(tok->pb, &c, 1); - if(c == '/') { - MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); - state = json_tokener_state_eatws; - } else { - state = json_tokener_state_comment; - } - break; - - case json_tokener_state_string: - { - /* Advance until we change state */ - const char *case_start = str; - while(1) { - if(c == tok->quote_char) { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos); - if(current == NULL) - goto out; - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - break; - } else if(c == '\\') { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - saved_state = json_tokener_state_string; - state = json_tokener_state_string_escape; - break; - } - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - goto out; - } - } - } - break; - - case json_tokener_state_string_escape: - switch(c) { - case '"': - case '\\': - case '/': - printbuf_memappend_fast(tok->pb, &c, 1); - state = saved_state; - break; - case 'b': - case 'n': - case 'r': - case 't': - case 'f': - if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1); - else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1); - else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1); - else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1); - else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1); - state = saved_state; - break; - case 'u': - tok->ucs_char = 0; - tok->st_pos = 0; - state = json_tokener_state_escape_unicode; - break; - default: - tok->err = json_tokener_error_parse_string; - goto out; - } - break; - - case json_tokener_state_escape_unicode: - { - unsigned int got_hi_surrogate = 0; - - /* Handle a 4-byte sequence, or two sequences if a surrogate pair */ - while(1) { - if (c && is_hex_char(c)) { - tok->ucs_char += ((unsigned int)jt_hexdigit(c) << ((3-tok->st_pos++)*4)); - if(tok->st_pos == 4) { - unsigned char unescaped_utf[4]; - - if (got_hi_surrogate) { - if (IS_LOW_SURROGATE(tok->ucs_char)) { - /* Recalculate the ucs_char, then fall thru to process normally */ - tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char); - } else { - /* Hi surrogate was not followed by a low surrogate */ - /* Replace the hi and process the rest normally */ - printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); - } - got_hi_surrogate = 0; - } - - if (tok->ucs_char < 0x80) { - unescaped_utf[0] = tok->ucs_char; - printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1); - } else if (tok->ucs_char < 0x800) { - unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); - unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); - printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2); - } else if (IS_HIGH_SURROGATE(tok->ucs_char)) { - /* Got a high surrogate. Remember it and look for the - * the beginning of another sequence, which should be the - * low surrogate. - */ - got_hi_surrogate = tok->ucs_char; - /* Not at end, and the next two chars should be "\u" */ - if ((len == -1 || len > (tok->char_offset + 2)) && - // str[0] != '0' && // implied by is_hex_char, above. - (str[1] == '\\') && - (str[2] == 'u')) - { - /* Advance through the 16 bit surrogate, and move on to the - * next sequence. The next step is to process the following - * characters. - */ - if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) { - printbuf_memappend_fast(tok->pb, - (char*) utf8_replacement_char, 3); - } - /* Advance to the first char of the next sequence and - * continue processing with the next sequence. - */ - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, - (char*) utf8_replacement_char, 3); - goto out; - } - tok->ucs_char = 0; - tok->st_pos = 0; - continue; /* other json_tokener_state_escape_unicode */ - } else { - /* Got a high surrogate without another sequence following - * it. Put a replacement char in for the hi surrogate - * and pretend we finished. - */ - printbuf_memappend_fast(tok->pb, - (char*) utf8_replacement_char, 3); - } - } else if (IS_LOW_SURROGATE(tok->ucs_char)) { - /* Got a low surrogate not preceded by a high */ - printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); - } else if (tok->ucs_char < 0x10000) { - unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); - unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); - unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); - printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3); - } else if (tok->ucs_char < 0x110000) { - unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); - unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); - unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); - unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); - printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4); - } else { - /* Don't know what we got--insert the replacement char */ - printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); - } - state = saved_state; break; - } - } else { - tok->err = json_tokener_error_parse_string; - goto out; - } - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - if (got_hi_surrogate) /* Clean up any pending chars */ - printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); - goto out; - } - } - } - break; - - case json_tokener_state_boolean: - { - int size1, size2; - printbuf_memappend_fast(tok->pb, &c, 1); - size1 = json_min(tok->st_pos+1, json_true_str_len); - size2 = json_min(tok->st_pos+1, json_false_str_len); - if((!(tok->flags & JSON_TOKENER_STRICT) && - strncasecmp(json_true_str, tok->pb->buf, size1) == 0) - || (strncmp(json_true_str, tok->pb->buf, size1) == 0) - ) { - if(tok->st_pos == json_true_str_len) { - current = json_object_new_boolean(1); - if(current == NULL) - goto out; - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - goto redo_char; - } - } else if((!(tok->flags & JSON_TOKENER_STRICT) && - strncasecmp(json_false_str, tok->pb->buf, size2) == 0) - || (strncmp(json_false_str, tok->pb->buf, size2) == 0)) { - if(tok->st_pos == json_false_str_len) { - current = json_object_new_boolean(0); - if(current == NULL) - goto out; - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - goto redo_char; - } - } else { - tok->err = json_tokener_error_parse_boolean; - goto out; - } - tok->st_pos++; - } - break; - - case json_tokener_state_number: - { - /* Advance until we change state */ - const char *case_start = str; - int case_len=0; - int is_exponent=0; - int negativesign_next_possible_location=1; - while(c && is_number_char(c)) { - ++case_len; - - /* non-digit characters checks */ - /* note: since the main loop condition to get here was - an input starting with 0-9 or '-', we are - protected from input starting with '.' or - e/E. */ - if (c == '.') { - if (tok->is_double != 0) { - /* '.' can only be found once, and out of the exponent part. - Thus, if the input is already flagged as double, it - is invalid. */ - tok->err = json_tokener_error_parse_number; - goto out; - } - tok->is_double = 1; - } - if (c == 'e' || c == 'E') { - if (is_exponent != 0) { - /* only one exponent possible */ - tok->err = json_tokener_error_parse_number; - goto out; - } - is_exponent = 1; - tok->is_double = 1; - /* the exponent part can begin with a negative sign */ - negativesign_next_possible_location = case_len + 1; - } - if (c == '-' && case_len != negativesign_next_possible_location) { - /* If the negative sign is not where expected (ie - start of input or start of exponent part), the - input is invalid. */ - tok->err = json_tokener_error_parse_number; - goto out; - } - - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, case_len); - goto out; - } - } - if (case_len>0) - printbuf_memappend_fast(tok->pb, case_start, case_len); + case json_tokener_state_null: /* aka starts with 'n' */ + { + int size; + int size_nan; + printbuf_memappend_fast(tok->pb, &c, 1); + size = json_min(tok->st_pos + 1, json_null_str_len); + size_nan = json_min(tok->st_pos + 1, json_nan_str_len); + if ((!(tok->flags & JSON_TOKENER_STRICT) && + strncasecmp(json_null_str, tok->pb->buf, size) == 0) || + (strncmp(json_null_str, tok->pb->buf, size) == 0)) + { + if (tok->st_pos == json_null_str_len) + { + current = NULL; + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + goto redo_char; + } + } + else if ((!(tok->flags & JSON_TOKENER_STRICT) && + strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || + (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0)) + { + if (tok->st_pos == json_nan_str_len) + { + current = json_object_new_double(NAN); + if (current == NULL) + goto out; + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + goto redo_char; + } + } + else + { + tok->err = json_tokener_error_parse_null; + goto out; + } + tok->st_pos++; + } + break; - // Check for -Infinity - if (tok->pb->buf[0] == '-' && case_len <= 1 && - (c == 'i' || c == 'I')) - { - state = json_tokener_state_inf; - tok->st_pos = 0; - goto redo_char; - } - } - { - int64_t num64; - double numd; - if (!tok->is_double && json_parse_sanitized_int64(tok->pb->buf, tok->pb->bpos, &num64) == 0) { - if (num64 && tok->pb->buf[0]=='0' && - (tok->flags & JSON_TOKENER_STRICT)) { - /* in strict mode, number must not start with 0 */ - tok->err = json_tokener_error_parse_number; - goto out; + case json_tokener_state_comment_start: + if (c == '*') + { + state = json_tokener_state_comment; + } + else if (c == '/') + { + state = json_tokener_state_comment_eol; + } + else + { + tok->err = json_tokener_error_parse_comment; + goto out; + } + printbuf_memappend_fast(tok->pb, &c, 1); + break; + + case json_tokener_state_comment: + { + /* Advance until we change state */ + const char *case_start = str; + while (c != '*') + { + if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + goto out; + } + } + printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start); + state = json_tokener_state_comment_end; + } + break; + + case json_tokener_state_comment_eol: + { + /* Advance until we change state */ + const char *case_start = str; + while (c != '\n') + { + if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + goto out; + } + } + printbuf_memappend_fast(tok->pb, case_start, str - case_start); + MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); + state = json_tokener_state_eatws; + } + break; + + case json_tokener_state_comment_end: + printbuf_memappend_fast(tok->pb, &c, 1); + if (c == '/') + { + MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); + state = json_tokener_state_eatws; + } + else + { + state = json_tokener_state_comment; + } + break; + + case json_tokener_state_string: + { + /* Advance until we change state */ + const char *case_start = str; + while (1) + { + if (c == tok->quote_char) + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + current = + json_object_new_string_len(tok->pb->buf, tok->pb->bpos); + if (current == NULL) + goto out; + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + break; + } + else if (c == '\\') + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + saved_state = json_tokener_state_string; + state = json_tokener_state_string_escape; + break; + } + if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + goto out; + } + } + } + break; + + case json_tokener_state_string_escape: + switch (c) + { + case '"': + case '\\': + case '/': + printbuf_memappend_fast(tok->pb, &c, 1); + state = saved_state; + break; + case 'b': + case 'n': + case 'r': + case 't': + case 'f': + if (c == 'b') + printbuf_memappend_fast(tok->pb, "\b", 1); + else if (c == 'n') + printbuf_memappend_fast(tok->pb, "\n", 1); + else if (c == 'r') + printbuf_memappend_fast(tok->pb, "\r", 1); + else if (c == 't') + printbuf_memappend_fast(tok->pb, "\t", 1); + else if (c == 'f') + printbuf_memappend_fast(tok->pb, "\f", 1); + state = saved_state; + break; + case 'u': + tok->ucs_char = 0; + tok->st_pos = 0; + state = json_tokener_state_escape_unicode; + break; + default: tok->err = json_tokener_error_parse_string; goto out; + } + break; + + // =================================================== + + case json_tokener_state_escape_unicode: + { + /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */ + while (1) + { - if (!c || !strchr(json_hex_chars, c)) ++ if (!c || !is_hex_char(c)) + { + tok->err = json_tokener_error_parse_string; + goto out; + } + tok->ucs_char |= + ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4)); + tok->st_pos++; + if (tok->st_pos >= 4) + break; + + (void)ADVANCE_CHAR(str, tok); + if (!PEEK_CHAR(c, tok)) + { + /* + * We're out of characters in the current call to + * json_tokener_parse(), but a subsequent call might + * provide us with more, so leave our current state + * as-is (including tok->high_surrogate) and return. + */ + goto out; + } + } + tok->st_pos = 0; + + /* Now, we have a full \uNNNN sequence in tok->ucs_char */ + + /* If the *previous* sequence was a high surrogate ... */ + if (tok->high_surrogate) + { + if (IS_LOW_SURROGATE(tok->ucs_char)) + { + /* Recalculate the ucs_char, then fall thru to process normally */ + tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate, + tok->ucs_char); + } + else + { + /* High surrogate was not followed by a low surrogate + * Replace the high and process the rest normally + */ + printbuf_memappend_fast(tok->pb, + (char *)utf8_replacement_char, 3); + } + tok->high_surrogate = 0; + } + + if (tok->ucs_char < 0x80) + { + unsigned char unescaped_utf[1]; + unescaped_utf[0] = tok->ucs_char; + printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1); + } + else if (tok->ucs_char < 0x800) + { + unsigned char unescaped_utf[2]; + unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); + unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); + printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2); + } + else if (IS_HIGH_SURROGATE(tok->ucs_char)) + { + /* + * The next two characters should be \u, HOWEVER, + * we can't simply peek ahead here, because the + * characters we need might not be passed to us + * until a subsequent call to json_tokener_parse. + * Instead, transition throug a couple of states. + * (now): + * _escape_unicode => _unicode_need_escape + * (see a '\\' char): + * _unicode_need_escape => _unicode_need_u + * (see a 'u' char): + * _unicode_need_u => _escape_unicode + * ...and we'll end up back around here. + */ + tok->high_surrogate = tok->ucs_char; + tok->ucs_char = 0; + state = json_tokener_state_escape_unicode_need_escape; + break; + } + else if (IS_LOW_SURROGATE(tok->ucs_char)) + { + /* Got a low surrogate not preceded by a high */ + printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + } + else if (tok->ucs_char < 0x10000) + { + unsigned char unescaped_utf[3]; + unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); + unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); + unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); + printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3); + } + else if (tok->ucs_char < 0x110000) + { + unsigned char unescaped_utf[4]; + unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); + unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); + unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); + unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); + printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4); + } + else + { + /* Don't know what we got--insert the replacement char */ + printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + } + state = saved_state; // i.e. _state_string or _state_object_field } - current = json_object_new_int64(num64); - if(current == NULL) - goto out; + break; + + case json_tokener_state_escape_unicode_need_escape: + // We get here after processing a high_surrogate + // require a '\\' char + if (!c || c != '\\') + { + /* Got a high surrogate without another sequence following + * it. Put a replacement char in for the high surrogate + * and pop back up to _state_string or _state_object_field. + */ + printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + tok->high_surrogate = 0; + tok->ucs_char = 0; + tok->st_pos = 0; + state = saved_state; + goto redo_char; + } + state = json_tokener_state_escape_unicode_need_u; + break; + + case json_tokener_state_escape_unicode_need_u: + /* We already had a \ char, check that it's \u */ + if (!c || c != 'u') + { + /* Got a high surrogate with some non-unicode escape + * sequence following it. + * Put a replacement char in for the high surrogate + * and handle the escape sequence normally. + */ + printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + tok->high_surrogate = 0; + tok->ucs_char = 0; + tok->st_pos = 0; + state = json_tokener_state_string_escape; + goto redo_char; + } + state = json_tokener_state_escape_unicode; + break; + + // =================================================== + + case json_tokener_state_boolean: + { + int size1, size2; + printbuf_memappend_fast(tok->pb, &c, 1); + size1 = json_min(tok->st_pos + 1, json_true_str_len); + size2 = json_min(tok->st_pos + 1, json_false_str_len); + if ((!(tok->flags & JSON_TOKENER_STRICT) && + strncasecmp(json_true_str, tok->pb->buf, size1) == 0) || + (strncmp(json_true_str, tok->pb->buf, size1) == 0)) + { + if (tok->st_pos == json_true_str_len) + { + current = json_object_new_boolean(1); + if (current == NULL) + goto out; + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + goto redo_char; + } + } + else if ((!(tok->flags & JSON_TOKENER_STRICT) && + strncasecmp(json_false_str, tok->pb->buf, size2) == 0) || + (strncmp(json_false_str, tok->pb->buf, size2) == 0)) + { + if (tok->st_pos == json_false_str_len) + { + current = json_object_new_boolean(0); + if (current == NULL) + goto out; + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + goto redo_char; + } + } + else + { + tok->err = json_tokener_error_parse_boolean; + goto out; + } + tok->st_pos++; + } + break; + + case json_tokener_state_number: + { + /* Advance until we change state */ + const char *case_start = str; + int case_len = 0; + int is_exponent = 0; + int neg_sign_ok = 1; + int pos_sign_ok = 0; + if (printbuf_length(tok->pb) > 0) + { + /* We don't save all state from the previous incremental parse + so we need to re-generate it based on the saved string so far. + */ + char *e_loc = strchr(tok->pb->buf, 'e'); + if (!e_loc) + e_loc = strchr(tok->pb->buf, 'E'); + if (e_loc) + { + char *last_saved_char = + &tok->pb->buf[printbuf_length(tok->pb) - 1]; + is_exponent = 1; + pos_sign_ok = neg_sign_ok = 1; + /* If the "e" isn't at the end, we can't start with a '-' */ + if (e_loc != last_saved_char) + { + neg_sign_ok = 0; + pos_sign_ok = 0; + } + // else leave it set to 1, i.e. start of the new input + } + } + + while (c && ((c >= '0' && c <= '9') || + (!is_exponent && (c == 'e' || c == 'E')) || + (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') || + (!tok->is_double && c == '.'))) + { + pos_sign_ok = neg_sign_ok = 0; + ++case_len; + + /* non-digit characters checks */ + /* note: since the main loop condition to get here was + * an input starting with 0-9 or '-', we are + * protected from input starting with '.' or + * e/E. + */ + switch (c) + { + case '.': + tok->is_double = 1; + pos_sign_ok = 1; + neg_sign_ok = 1; + break; + case 'e': /* FALLTHRU */ + case 'E': + is_exponent = 1; + tok->is_double = 1; + /* the exponent part can begin with a negative sign */ + pos_sign_ok = neg_sign_ok = 1; + break; + default: break; + } + + if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) + { + printbuf_memappend_fast(tok->pb, case_start, case_len); + goto out; + } + } + /* + Now we know c isn't a valid number char, but check whether + it might have been intended to be, and return a potentially + more understandable error right away. + However, if we're at the top-level, use the number as-is + because c can be part of a new object to parse on the + next call to json_tokener_parse(). + */ + if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' && - c != 'I' && c != 'i' && !isspace((unsigned char)c)) ++ c != 'I' && c != 'i' && !is_ws_char(c)) + { + tok->err = json_tokener_error_parse_number; + goto out; + } + if (case_len > 0) + printbuf_memappend_fast(tok->pb, case_start, case_len); + + // Check for -Infinity + if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I')) + { + state = json_tokener_state_inf; + tok->st_pos = 0; + goto redo_char; + } + if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT)) + { + /* Trim some chars off the end, to allow things + like "123e+" to parse ok. */ + while (printbuf_length(tok->pb) > 1) + { + char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1]; + if (last_char != 'e' && last_char != 'E' && + last_char != '-' && last_char != '+') + { + break; + } + tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0'; + printbuf_length(tok->pb)--; + } + } + } + { + int64_t num64; + uint64_t numuint64; + double numd; + if (!tok->is_double && tok->pb->buf[0] == '-' && + json_parse_int64(tok->pb->buf, &num64) == 0) + { + current = json_object_new_int64(num64); + if (current == NULL) + goto out; + } + else if (!tok->is_double && tok->pb->buf[0] != '-' && + json_parse_uint64(tok->pb->buf, &numuint64) == 0) + { + if (numuint64 && tok->pb->buf[0] == '0' && + (tok->flags & JSON_TOKENER_STRICT)) + { + tok->err = json_tokener_error_parse_number; + goto out; + } + if (numuint64 <= INT64_MAX) + { + num64 = (uint64_t)numuint64; + current = json_object_new_int64(num64); + if (current == NULL) + goto out; + } + else + { + current = json_object_new_uint64(numuint64); + if (current == NULL) + goto out; + } + } + else if (tok->is_double && + json_tokener_parse_double( + tok->pb->buf, printbuf_length(tok->pb), &numd) == 0) + { + current = json_object_new_double_s(numd, tok->pb->buf); + if (current == NULL) + goto out; + } + else + { + tok->err = json_tokener_error_parse_number; + goto out; + } + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + goto redo_char; + } + break; + + case json_tokener_state_array_after_sep: + case json_tokener_state_array: + if (c == ']') + { + // Minimize memory usage; assume parsed objs are unlikely to be changed + json_object_array_shrink(current, 0); + + if (state == json_tokener_state_array_after_sep && + (tok->flags & JSON_TOKENER_STRICT)) + { + tok->err = json_tokener_error_parse_unexpected; + goto out; + } + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + } + else + { + if (tok->depth >= tok->max_depth - 1) + { + tok->err = json_tokener_error_depth; + goto out; + } + state = json_tokener_state_array_add; + tok->depth++; + json_tokener_reset_level(tok, tok->depth); + goto redo_char; + } + break; + + case json_tokener_state_array_add: + if (json_object_array_add(current, obj) != 0) + goto out; + saved_state = json_tokener_state_array_sep; + state = json_tokener_state_eatws; + goto redo_char; + + case json_tokener_state_array_sep: + if (c == ']') + { + // Minimize memory usage; assume parsed objs are unlikely to be changed + json_object_array_shrink(current, 0); + + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + } + else if (c == ',') + { + saved_state = json_tokener_state_array_after_sep; + state = json_tokener_state_eatws; + } + else + { + tok->err = json_tokener_error_parse_array; + goto out; + } + break; + + case json_tokener_state_object_field_start: + case json_tokener_state_object_field_start_after_sep: + if (c == '}') + { + if (state == json_tokener_state_object_field_start_after_sep && + (tok->flags & JSON_TOKENER_STRICT)) + { + tok->err = json_tokener_error_parse_unexpected; + goto out; + } + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + } + else if (c == '"' || c == '\'') + { + tok->quote_char = c; + printbuf_reset(tok->pb); + state = json_tokener_state_object_field; + } + else + { + tok->err = json_tokener_error_parse_object_key_name; + goto out; + } + break; + + case json_tokener_state_object_field: + { + /* Advance until we change state */ + const char *case_start = str; + while (1) + { + if (c == tok->quote_char) + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + obj_field_name = strdup(tok->pb->buf); + saved_state = json_tokener_state_object_field_end; + state = json_tokener_state_eatws; + break; + } + else if (c == '\\') + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + saved_state = json_tokener_state_object_field; + state = json_tokener_state_string_escape; + break; + } + if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) + { + printbuf_memappend_fast(tok->pb, case_start, + str - case_start); + goto out; + } + } + } + break; + + case json_tokener_state_object_field_end: + if (c == ':') + { + saved_state = json_tokener_state_object_value; + state = json_tokener_state_eatws; + } + else + { + tok->err = json_tokener_error_parse_object_key_sep; + goto out; + } + break; + + case json_tokener_state_object_value: + if (tok->depth >= tok->max_depth - 1) + { + tok->err = json_tokener_error_depth; + goto out; + } + state = json_tokener_state_object_value_add; + tok->depth++; + json_tokener_reset_level(tok, tok->depth); + goto redo_char; + + case json_tokener_state_object_value_add: + json_object_object_add(current, obj_field_name, obj); + free(obj_field_name); + obj_field_name = NULL; + saved_state = json_tokener_state_object_sep; + state = json_tokener_state_eatws; + goto redo_char; + + case json_tokener_state_object_sep: + /* { */ + if (c == '}') + { + saved_state = json_tokener_state_finish; + state = json_tokener_state_eatws; + } + else if (c == ',') + { + saved_state = json_tokener_state_object_field_start_after_sep; + state = json_tokener_state_eatws; + } + else + { + tok->err = json_tokener_error_parse_object_value_sep; + goto out; + } + break; + } + (void)ADVANCE_CHAR(str, tok); + if (!c) // This is the char *before* advancing + break; + } /* while(PEEK_CHAR) */ + +out: + if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0)) + { + tok->err = json_tokener_error_parse_utf8_string; } - else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0) + if (c && (state == json_tokener_state_finish) && (tok->depth == 0) && + (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) == + JSON_TOKENER_STRICT) { - current = json_object_new_double_s(numd, tok->pb->buf); - if(current == NULL) - goto out; - } else { - tok->err = json_tokener_error_parse_number; - goto out; - } - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - goto redo_char; - } - break; - - case json_tokener_state_array_after_sep: - case json_tokener_state_array: - if(c == ']') { - if (state == json_tokener_state_array_after_sep && - (tok->flags & JSON_TOKENER_STRICT)) - { - tok->err = json_tokener_error_parse_unexpected; - goto out; - } - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - } else { - if(tok->depth >= tok->max_depth-1) { - tok->err = json_tokener_error_depth; - goto out; + /* unexpected char after JSON data */ + tok->err = json_tokener_error_parse_unexpected; } - state = json_tokener_state_array_add; - tok->depth++; - json_tokener_reset_level(tok, tok->depth); - goto redo_char; - } - break; - - case json_tokener_state_array_add: - if( json_object_array_add(current, obj) != 0 ) - goto out; - saved_state = json_tokener_state_array_sep; - state = json_tokener_state_eatws; - goto redo_char; - - case json_tokener_state_array_sep: - if(c == ']') { - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - } else if(c == ',') { - saved_state = json_tokener_state_array_after_sep; - state = json_tokener_state_eatws; - } else { - tok->err = json_tokener_error_parse_array; - goto out; - } - break; - - case json_tokener_state_object_field_start: - case json_tokener_state_object_field_start_after_sep: - if(c == '}') { - if (state == json_tokener_state_object_field_start_after_sep && - (tok->flags & JSON_TOKENER_STRICT)) - { - tok->err = json_tokener_error_parse_unexpected; - goto out; - } - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - } else if (c == '"' || c == '\'') { - tok->quote_char = c; - printbuf_reset(tok->pb); - state = json_tokener_state_object_field; - } else { - tok->err = json_tokener_error_parse_object_key_name; - goto out; - } - break; - - case json_tokener_state_object_field: - { - /* Advance until we change state */ - const char *case_start = str; - while(1) { - if(c == tok->quote_char) { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - obj_field_name = strdup(tok->pb->buf); - saved_state = json_tokener_state_object_field_end; - state = json_tokener_state_eatws; - break; - } else if(c == '\\') { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - saved_state = json_tokener_state_object_field; - state = json_tokener_state_string_escape; - break; - } - if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, str-case_start); - goto out; - } + if (!c) + { + /* We hit an eof char (0) */ + if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish) + tok->err = json_tokener_error_parse_eof; } - } - break; - - case json_tokener_state_object_field_end: - if(c == ':') { - saved_state = json_tokener_state_object_value; - state = json_tokener_state_eatws; - } else { - tok->err = json_tokener_error_parse_object_key_sep; - goto out; - } - break; - - case json_tokener_state_object_value: - if(tok->depth >= tok->max_depth-1) { - tok->err = json_tokener_error_depth; - goto out; - } - state = json_tokener_state_object_value_add; - tok->depth++; - json_tokener_reset_level(tok, tok->depth); - goto redo_char; - - case json_tokener_state_object_value_add: - json_object_object_add(current, obj_field_name, obj); - free(obj_field_name); - obj_field_name = NULL; - saved_state = json_tokener_state_object_sep; - state = json_tokener_state_eatws; - goto redo_char; - - case json_tokener_state_object_sep: - /* { */ - if(c == '}') { - saved_state = json_tokener_state_finish; - state = json_tokener_state_eatws; - } else if(c == ',') { - saved_state = json_tokener_state_object_field_start_after_sep; - state = json_tokener_state_eatws; - } else { - tok->err = json_tokener_error_parse_object_value_sep; - goto out; - } - break; - - } - if (!ADVANCE_CHAR(str, tok)) - goto out; - } /* while(PEEK_CHAR) */ - - out: - if (c && - (state == json_tokener_state_finish) && - (tok->depth == 0) && - (tok->flags & JSON_TOKENER_STRICT)) { - /* unexpected char after JSON data */ - tok->err = json_tokener_error_parse_unexpected; - } - if (!c) { /* We hit an eof char (0) */ - if(state != json_tokener_state_finish && - saved_state != json_tokener_state_finish) - tok->err = json_tokener_error_parse_eof; - } #ifdef HAVE_USELOCALE - uselocale(oldlocale); - freelocale(newloc); + uselocale(oldlocale); + freelocale(newloc); #elif defined(HAVE_SETLOCALE) - setlocale(LC_NUMERIC, oldlocale); - free(oldlocale); + setlocale(LC_NUMERIC, oldlocale); + free(oldlocale); #endif - if (tok->err == json_tokener_success) - { - json_object *ret = json_object_get(current); - int ii; + if (tok->err == json_tokener_success) + { + json_object *ret = json_object_get(current); + int ii; - /* Partially reset, so we parse additional objects on subsequent calls. */ - for(ii = tok->depth; ii >= 0; ii--) - json_tokener_reset_level(tok, ii); - return ret; - } + /* Partially reset, so we parse additional objects on subsequent calls. */ + for (ii = tok->depth; ii >= 0; ii--) + json_tokener_reset_level(tok, ii); + return ret; + } - MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", - json_tokener_errors[tok->err], tok->char_offset); - return NULL; + MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err], + tok->char_offset); + return NULL; +} + +static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) +{ + unsigned char chr = c; + if (*nBytes == 0) + { + if (chr >= 0x80) + { + if ((chr & 0xe0) == 0xc0) + *nBytes = 1; + else if ((chr & 0xf0) == 0xe0) + *nBytes = 2; + else if ((chr & 0xf8) == 0xf0) + *nBytes = 3; + else + return 0; + } + } + else + { + if ((chr & 0xC0) != 0x80) + return 0; + (*nBytes)--; + } + return 1; } void json_tokener_set_flags(struct json_tokener *tok, int flags)