From f23486a3217ed85ed4a0d7f95a10c4013250c987 Mon Sep 17 00:00:00 2001 From: Eric Haszlakiewicz Date: Mon, 29 Jun 2020 02:14:26 +0000 Subject: [PATCH] In the json_tokener_state_number case, explicitly adjust what "number" characters are allowed based on the exact micro-state that we're in, and check for invalid following characters in a different way, to allow a valid json_type_number object to be returned at the top level. This causes previously failing strings like "123-456" to return a valid json_object with the appropriate value. If you care about the trailing content, call json_tokener_parse_ex() and check the parse end point with json_tokener_get_parse_end(). --- ChangeLog | 3 ++ json_object.c | 1 - json_object_private.h | 1 - json_tokener.c | 77 +++++++++++++++++++++++---------------- tests/test_parse.c | 42 ++++++++++++++------- tests/test_parse.expected | 18 +++++++-- 6 files changed, 92 insertions(+), 50 deletions(-) diff --git a/ChangeLog b/ChangeLog index 32c28699..6e46cdab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -30,6 +30,9 @@ Other changes * Fix incremental parsing of numbers, especially those with exponents, e.g. so parsing "[0", "e+", "-]" now properly returns an error. Strict mode now rejects missing exponents ("0e"). +* Successfully return number objects at the top level even when they are + followed by a "-", "." or "e". This makes parsing things like "123-45" + behave consistently with things like "123xyz". *** diff --git a/json_object.c b/json_object.c index 89439aba..1dffd005 100644 --- a/json_object.c +++ b/json_object.c @@ -54,7 +54,6 @@ // Don't define this. It's not thread-safe. /* #define REFCOUNT_DEBUG 1 */ -const char *json_number_chars = "0123456789.+-eE"; const char *json_hex_chars = "0123456789abcdefABCDEF"; static void json_object_generic_delete(struct json_object *jso); diff --git a/json_object_private.h b/json_object_private.h index d1d782e2..e143b464 100644 --- a/json_object_private.h +++ b/json_object_private.h @@ -98,7 +98,6 @@ struct json_object_string void _json_c_set_last_err(const char *err_fmt, ...); -extern const char *json_number_chars; extern const char *json_hex_chars; #ifdef __cplusplus diff --git a/json_tokener.c b/json_tokener.c index b949d105..0d01d2b2 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -193,7 +193,17 @@ struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokene return NULL; obj = json_tokener_parse_ex(tok, str, -1); *error = tok->err; - if (tok->err != json_tokener_success) + if (tok->err != json_tokener_success +#if 0 + /* This would be a more sensible default, and cause parsing + * things like "null123" to fail when the caller can't know + * where the parsing left off, but starting to fail would + * be a notable behaviour change. Save for a 1.0 release. + */ + || json_tokener_get_parse_end(tok) != strlen(str) +#endif + ) + { if (obj != NULL) json_object_put(obj); @@ -838,7 +848,8 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * const char *case_start = str; int case_len = 0; int is_exponent = 0; - int negativesign_next_possible_location = 1; + int neg_sign_ok = 1; + int pos_sign_ok = 0; if (printbuf_length(tok->pb) > 0) { /* We don't save all state from the previous incremental parse @@ -852,14 +863,26 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * char *last_saved_char = &tok->pb->buf[printbuf_length(tok->pb) - 1]; is_exponent = 1; + pos_sign_ok = neg_sign_ok = 1; /* If the "e" isn't at the end, we can't start with a '-' */ if (e_loc != last_saved_char) - negativesign_next_possible_location = -1; + { + neg_sign_ok = 0; + pos_sign_ok = 0; + } // else leave it set to 1, i.e. start of the new input } } - while (c && strchr(json_number_chars, c)) + + while (c && + ((c >= '0' && c <= '9') || + (!is_exponent && (c=='e' || c=='E')) || + (neg_sign_ok && c=='-') || + (pos_sign_ok && c=='+') || + (!tok->is_double && c=='.') + )) { + pos_sign_ok = neg_sign_ok = 0; ++case_len; /* non-digit characters checks */ @@ -871,40 +894,16 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * switch (c) { case '.': - if (tok->is_double != 0) - { - /* '.' can only be found once, and out of the exponent part. - * Thus, if the input is already flagged as double, it - * is invalid. - */ - tok->err = json_tokener_error_parse_number; - goto out; - } tok->is_double = 1; + pos_sign_ok = 1; + neg_sign_ok = 1; break; case 'e': /* FALLTHRU */ case 'E': - if (is_exponent != 0) - { - /* only one exponent possible */ - tok->err = json_tokener_error_parse_number; - goto out; - } is_exponent = 1; tok->is_double = 1; /* the exponent part can begin with a negative sign */ - negativesign_next_possible_location = case_len + 1; - break; - case '-': - if (case_len != negativesign_next_possible_location) - { - /* If the negative sign is not where expected (ie - * start of input or start of exponent part), the - * input is invalid. - */ - tok->err = json_tokener_error_parse_number; - goto out; - } + pos_sign_ok = neg_sign_ok = 1; break; default: break; } @@ -915,6 +914,22 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * goto out; } } + /* + Now we know c isn't a valid number char, but check whether + it might have been intended to be, and return a potentially + more understandable error right away. + However, if we're at the top-level, use the number as-is + because c can be part of a new object to parse on the + next call to json_tokener_parse(). + */ + if (tok->depth > 0 && + c != ',' && c != ']' && c != '}' && c != '/' && + c != 'I' && c != 'i' && + !isspace((unsigned char)c)) + { + tok->err = json_tokener_error_parse_number; + goto out; + } if (case_len > 0) printbuf_memappend_fast(tok->pb, case_start, case_len); diff --git a/tests/test_parse.c b/tests/test_parse.c index 2e172151..3c9ac82a 100644 --- a/tests/test_parse.c +++ b/tests/test_parse.c @@ -141,16 +141,18 @@ static void test_basic_parse() single_basic_parse("12", 0); single_basic_parse("12.3", 0); - single_basic_parse("12.3.4", 0); /* non-sensical, returns null */ - /* was returning (int)2015 before patch, should return null */ - single_basic_parse("2015-01-15", 0); - /* ...but this works. It's rather inconsistent, and a future major release - * should change the behavior so it either always returns null when extra - * bytes are present (preferred), or always return object created from as much - * as was able to be parsed. + /* Even though, when using json_tokener_parse() there's no way to + * know when there is more data after the parsed object, + * an object is successfully returned anyway (in some cases) */ + + single_basic_parse("12.3.4", 0); + single_basic_parse("2015-01-15", 0); single_basic_parse("12.3xxx", 0); + single_basic_parse("12.3{\"a\":123}", 0); + single_basic_parse("12.3\n", 0); + single_basic_parse("12.3 ", 0); single_basic_parse("{\"FoO\" : -12.3E512}", 0); single_basic_parse("{\"FoO\" : -12.3e512}", 0); @@ -368,7 +370,10 @@ struct incremental_step {"[0e-]", -1, -1, json_tokener_success, 1}, {"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT}, - {"0e+-", 5, 3, json_tokener_error_parse_number, 1}, + /* You might expect this to fail, but it won't because + it's a valid partial parse; note the char_offset: */ + {"0e+-", 5, 3, json_tokener_success, 1}, + {"0e+-", 5, 3, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT}, {"[0e+-]", -1, 4, json_tokener_error_parse_number, 1}, /* Similar tests for other kinds of objects: */ @@ -447,11 +452,22 @@ struct incremental_step {"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0}, {&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1}, - /* Some bad formatting. Check we get the correct error status - * XXX this means we can't have two numbers in the incremental parse - * XXX stream with the second one being a negative number! - */ - {"2015-01-15", 10, 4, json_tokener_error_parse_number, 1}, + /* + * Though this may seem invalid at first glance, it + * parses as three separate numbers, 2015, -1 and -15 + * Of course, simply pasting together a stream of arbitrary + * positive numbers won't work, since there'll be no way to + * tell where in e.g. "2015015" the next number stats, so + * a reliably parsable stream must not include json_type_int + * or json_type_double objects without some other delimiter. + * e.g. whitespace + */ + {&"2015-01-15"[0], 11, 4, json_tokener_success, 1}, + {&"2015-01-15"[4], 7, 3, json_tokener_success, 1}, + {&"2015-01-15"[7], 4, 3, json_tokener_success, 1}, + {&"2015 01 15"[0], 11, 5, json_tokener_success, 1}, + {&"2015 01 15"[4], 7, 4, json_tokener_success, 1}, + {&"2015 01 15"[7], 4, 3, json_tokener_success, 1}, /* Strings have a well defined end point, so we can stop at the quote */ {"\"blue\"", -1, -1, json_tokener_success, 0}, diff --git a/tests/test_parse.expected b/tests/test_parse.expected index 48539fcd..37db44d8 100644 --- a/tests/test_parse.expected +++ b/tests/test_parse.expected @@ -40,9 +40,13 @@ new_obj.to_string(nAn)=NaN new_obj.to_string(iNfinity)=Infinity new_obj.to_string(12)=12 new_obj.to_string(12.3)=12.3 -new_obj.to_string(12.3.4)=null -new_obj.to_string(2015-01-15)=null +new_obj.to_string(12.3.4)=12.3 +new_obj.to_string(2015-01-15)=2015 new_obj.to_string(12.3xxx)=12.3 +new_obj.to_string(12.3{"a":123})=12.3 +new_obj.to_string(12.3 +)=12.3 +new_obj.to_string(12.3 )=12.3 new_obj.to_string({"FoO" : -12.3E512})={ "FoO": -12.3E512 } new_obj.to_string({"FoO" : -12.3e512})={ "FoO": -12.3e512 } new_obj.to_string({"FoO" : -12.3E51.2})=null @@ -162,6 +166,7 @@ json_tokener_parse_ex(tok, 0e- , 4) ... OK: got object of type [double json_tokener_parse_ex(tok, 0e- , 4) ... OK: got correct error: unexpected end of data json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got object of type [array]: [ 0 ] json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got correct error: number expected +json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got object of type [double]: 0 json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got correct error: number expected json_tokener_parse_ex(tok, [0e+-] , 6) ... OK: got correct error: number expected json_tokener_parse_ex(tok, false , 5) ... OK: got correct error: continue @@ -215,7 +220,12 @@ json_tokener_parse_ex(tok, nullx , 6) ... OK: got object of type [null]: json_tokener_parse_ex(tok, x , 2) ... OK: got correct error: unexpected character json_tokener_parse_ex(tok, {"a":1}{"b":2}, 15) ... OK: got object of type [object]: { "a": 1 } json_tokener_parse_ex(tok, {"b":2} , 8) ... OK: got object of type [object]: { "b": 2 } -json_tokener_parse_ex(tok, 2015-01-15 , 10) ... OK: got correct error: number expected +json_tokener_parse_ex(tok, 2015-01-15 , 11) ... OK: got object of type [int]: 2015 +json_tokener_parse_ex(tok, -01-15 , 7) ... OK: got object of type [int]: -1 +json_tokener_parse_ex(tok, -15 , 4) ... OK: got object of type [int]: -15 +json_tokener_parse_ex(tok, 2015 01 15 , 11) ... OK: got object of type [int]: 2015 +json_tokener_parse_ex(tok, 01 15 , 7) ... OK: got object of type [int]: 1 +json_tokener_parse_ex(tok, 15 , 4) ... OK: got object of type [int]: 15 json_tokener_parse_ex(tok, "blue" , 6) ... OK: got object of type [string]: "blue" json_tokener_parse_ex(tok, "\"" , 4) ... OK: got object of type [string]: "\"" json_tokener_parse_ex(tok, "\\" , 4) ... OK: got object of type [string]: "\\" @@ -265,5 +275,5 @@ json_tokener_parse_ex(tok, "\ud855 json_tokener_parse_ex(tok, "\ud0031À" , 10) ... OK: got correct error: invalid utf-8 string json_tokener_parse_ex(tok, 1111 , 5) ... OK: got correct error: invalid utf-8 string json_tokener_parse_ex(tok, {"1":1} , 8) ... OK: got correct error: invalid utf-8 string -End Incremental Tests OK=179 ERROR=0 +End Incremental Tests OK=185 ERROR=0 ================================== -- 2.47.2