In the json_tokener_state_number case, explicitly adjust what "number" characters...

author Eric Haszlakiewicz <erh+git@nimenees.com>

Mon, 29 Jun 2020 02:14:26 +0000 (02:14 +0000)

committer Eric Haszlakiewicz <erh+git@nimenees.com>

Mon, 29 Jun 2020 02:14:26 +0000 (02:14 +0000)
author Eric Haszlakiewicz <erh+git@nimenees.com>
Mon, 29 Jun 2020 02:14:26 +0000 (02:14 +0000)
committer Eric Haszlakiewicz <erh+git@nimenees.com>
Mon, 29 Jun 2020 02:14:26 +0000 (02:14 +0000)
diff --git a/ChangeLog b/ChangeLog

index 32c2869935e84cf0dc3d362c44313446acaef9c1..6e46cdabd6835f3f6f5f3894d97944a2fb05f9eb 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -30,6 +30,9 @@ Other changes
  * Fix incremental parsing of numbers, especially those with exponents, e.g.
     so parsing "[0", "e+", "-]" now properly returns an error.
    Strict mode now rejects missing exponents ("0e").
+* Successfully return number objects at the top level even when they are
+   followed by a "-", "." or "e".  This makes parsing things like "123-45"
+   behave consistently with things like "123xyz".
  
  
  ***
diff --git a/json_object.c b/json_object.c

index 89439aba5b446ca678487e061c8523ea41833767..1dffd005d6596ca0c3dca3a4b831b418e9376864 100644 (file)
--- a/json_object.c
+++ b/json_object.c
@@ -54,7 +54,6 @@
  // Don't define this.  It's not thread-safe.
  /* #define REFCOUNT_DEBUG 1 */
  
-const char *json_number_chars = "0123456789.+-eE";
  const char *json_hex_chars = "0123456789abcdefABCDEF";
  
  static void json_object_generic_delete(struct json_object *jso);
diff --git a/json_object_private.h b/json_object_private.h

index d1d782e28c16b380294676d07915436d925643fb..e143b4649acdd8e8815aab0fbed5e36506c95a9d 100644 (file)
--- a/json_object_private.h
+++ b/json_object_private.h
@@ -98,7 +98,6 @@ struct json_object_string
  
  void _json_c_set_last_err(const char *err_fmt, ...);
  
-extern const char *json_number_chars;
  extern const char *json_hex_chars;
  
  #ifdef __cplusplus
diff --git a/json_tokener.c b/json_tokener.c

index b949d105decc00ed8a8bdc18381118b585ff8022..0d01d2b2a571af3e14083dd014499f78085d720f 100644 (file)
--- a/json_tokener.c
+++ b/json_tokener.c
@@ -193,7 +193,17 @@ struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokene
                 return NULL;
         obj = json_tokener_parse_ex(tok, str, -1);
         *error = tok->err;
-       if (tok->err != json_tokener_success)
+       if (tok->err != json_tokener_success
+#if 0
+               /* This would be a more sensible default, and cause parsing
+                * things like "null123" to fail when the caller can't know
+                * where the parsing left off, but starting to fail would
+                * be a notable behaviour change.  Save for a 1.0 release.
+                */
+           || json_tokener_get_parse_end(tok) != strlen(str)
+#endif
+          )
+
         {
                 if (obj != NULL)
                         json_object_put(obj);
@@ -838,7 +848,8 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
                         const char *case_start = str;
                         int case_len = 0;
                         int is_exponent = 0;
-                       int negativesign_next_possible_location = 1;
+                       int neg_sign_ok = 1;
+                       int pos_sign_ok = 0;
                         if (printbuf_length(tok->pb) > 0)
                         {
                                 /* We don't save all state from the previous incremental parse
@@ -852,14 +863,26 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
                                         char *last_saved_char =
                                             &tok->pb->buf[printbuf_length(tok->pb) - 1];
                                         is_exponent = 1;
+                                       pos_sign_ok = neg_sign_ok = 1;
                                         /* If the "e" isn't at the end, we can't start with a '-' */
                                         if (e_loc != last_saved_char)
-                                               negativesign_next_possible_location = -1;
+                                       {
+                                               neg_sign_ok = 0;
+                                               pos_sign_ok = 0;
+                                       }
                                         // else leave it set to 1, i.e. start of the new input
                                 }
                         }
-                       while (c && strchr(json_number_chars, c))
+
+                       while (c &&
+                           ((c >= '0' && c <= '9') ||
+                                (!is_exponent && (c=='e' || c=='E')) ||
+                                (neg_sign_ok && c=='-') ||
+                                (pos_sign_ok && c=='+') ||
+                                (!tok->is_double && c=='.')
+                        ))
                         {
+                               pos_sign_ok = neg_sign_ok = 0;
                                 ++case_len;
  
                                 /* non-digit characters checks */
@@ -871,40 +894,16 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
                                 switch (c)
                                 {
                                 case '.':
-                                       if (tok->is_double != 0)
-                                       {
-                                               /* '.' can only be found once, and out of the exponent part.
-                                                * Thus, if the input is already flagged as double, it
-                                                * is invalid.
-                                                */
-                                               tok->err = json_tokener_error_parse_number;
-                                               goto out;
-                                       }
                                         tok->is_double = 1;
+                                       pos_sign_ok = 1;
+                                       neg_sign_ok = 1;
                                         break;
                                 case 'e': /* FALLTHRU */
                                 case 'E':
-                                       if (is_exponent != 0)
-                                       {
-                                               /* only one exponent possible */
-                                               tok->err = json_tokener_error_parse_number;
-                                               goto out;
-                                       }
                                         is_exponent = 1;
                                         tok->is_double = 1;
                                         /* the exponent part can begin with a negative sign */
-                                       negativesign_next_possible_location = case_len + 1;
-                                       break;
-                               case '-':
-                                       if (case_len != negativesign_next_possible_location)
-                                       {
-                                               /* If the negative sign is not where expected (ie
-                                                * start of input or start of exponent part), the
-                                                * input is invalid.
-                                                */
-                                               tok->err = json_tokener_error_parse_number;
-                                               goto out;
-                                       }
+                                       pos_sign_ok = neg_sign_ok = 1;
                                         break;
                                 default: break;
                                 }
@@ -915,6 +914,22 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
                                         goto out;
                                 }
                         }
+                       /*
+                               Now we know c isn't a valid number char, but check whether
+                               it might have been intended to be, and return a potentially
+                               more understandable error right away.
+                               However, if we're at the top-level, use the number as-is
+                           because c can be part of a new object to parse on the
+                               next call to json_tokener_parse().
+                        */
+                       if (tok->depth > 0 &&
+                           c != ',' && c != ']' && c != '}' && c != '/' &&
+                               c != 'I' && c != 'i' &&
+                           !isspace((unsigned char)c))
+                       {
+                               tok->err = json_tokener_error_parse_number;
+                               goto out;
+                       }
                         if (case_len > 0)
                                 printbuf_memappend_fast(tok->pb, case_start, case_len);
  
diff --git a/tests/test_parse.c b/tests/test_parse.c

index 2e1721516d29dc20273b994404c84f81cb2d8b53..3c9ac82a0af2e9e65fc118e8d838396cfa3f3383 100644 (file)
--- a/tests/test_parse.c
+++ b/tests/test_parse.c
@@ -141,16 +141,18 @@ static void test_basic_parse()
  
         single_basic_parse("12", 0);
         single_basic_parse("12.3", 0);
-       single_basic_parse("12.3.4", 0); /* non-sensical, returns null */
-       /* was returning (int)2015 before patch, should return null */
-       single_basic_parse("2015-01-15", 0);
  
-       /* ...but this works.  It's rather inconsistent, and a future major release
-        * should change the behavior so it either always returns null when extra
-        * bytes are present (preferred), or always return object created from as much
-        * as was able to be parsed.
+       /* Even though, when using json_tokener_parse() there's no way to
+        *  know when there is more data after the parsed object,
+        *  an object is successfully returned anyway (in some cases)
          */
+
+       single_basic_parse("12.3.4", 0);
+       single_basic_parse("2015-01-15", 0);
         single_basic_parse("12.3xxx", 0);
+       single_basic_parse("12.3{\"a\":123}", 0);
+       single_basic_parse("12.3\n", 0);
+       single_basic_parse("12.3 ", 0);
  
         single_basic_parse("{\"FoO\"  :   -12.3E512}", 0);
         single_basic_parse("{\"FoO\"  :   -12.3e512}", 0);
@@ -368,7 +370,10 @@ struct incremental_step
      {"[0e-]", -1, -1, json_tokener_success, 1},
      {"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},
  
-    {"0e+-", 5, 3, json_tokener_error_parse_number, 1},
+       /* You might expect this to fail, but it won't because
+          it's a valid partial parse; note the char_offset: */
+    {"0e+-", 5, 3, json_tokener_success, 1},
+    {"0e+-", 5, 3, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT},
      {"[0e+-]", -1, 4, json_tokener_error_parse_number, 1},
  
      /* Similar tests for other kinds of objects: */
@@ -447,11 +452,22 @@ struct incremental_step
      {"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0},
      {&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1},
  
-    /* Some bad formatting. Check we get the correct error status
-     * XXX this means we can't have two numbers in the incremental parse
-     * XXX stream with the second one being a negative number!
-     */
-    {"2015-01-15", 10, 4, json_tokener_error_parse_number, 1},
+       /*
+        * Though this may seem invalid at first glance, it
+        * parses as three separate numbers, 2015, -1 and -15
+        * Of course, simply pasting together a stream of arbitrary
+        * positive numbers won't work, since there'll be no way to
+     * tell where in e.g. "2015015" the next number stats, so
+        * a reliably parsable stream must not include json_type_int
+        * or json_type_double objects without some other delimiter.
+        * e.g. whitespace
+        */
+    {&"2015-01-15"[0], 11, 4, json_tokener_success, 1},
+    {&"2015-01-15"[4], 7, 3, json_tokener_success, 1},
+    {&"2015-01-15"[7], 4, 3, json_tokener_success, 1},
+    {&"2015 01 15"[0], 11, 5, json_tokener_success, 1},
+    {&"2015 01 15"[4], 7, 4, json_tokener_success, 1},
+    {&"2015 01 15"[7], 4, 3, json_tokener_success, 1},
  
      /* Strings have a well defined end point, so we can stop at the quote */
      {"\"blue\"", -1, -1, json_tokener_success, 0},
diff --git a/tests/test_parse.expected b/tests/test_parse.expected

index 48539fcdf8bd1b523d8150077cf0b7cbe425c361..37db44d86bd70cfb7d0d14d4e39c9b455589641c 100644 (file)
--- a/tests/test_parse.expected
+++ b/tests/test_parse.expected
@@ -40,9 +40,13 @@ new_obj.to_string(nAn)=NaN
  new_obj.to_string(iNfinity)=Infinity
  new_obj.to_string(12)=12
  new_obj.to_string(12.3)=12.3
-new_obj.to_string(12.3.4)=null
-new_obj.to_string(2015-01-15)=null
+new_obj.to_string(12.3.4)=12.3
+new_obj.to_string(2015-01-15)=2015
  new_obj.to_string(12.3xxx)=12.3
+new_obj.to_string(12.3{"a":123})=12.3
+new_obj.to_string(12.3
+)=12.3
+new_obj.to_string(12.3 )=12.3
  new_obj.to_string({"FoO"  :   -12.3E512})={ "FoO": -12.3E512 }
  new_obj.to_string({"FoO"  :   -12.3e512})={ "FoO": -12.3e512 }
  new_obj.to_string({"FoO"  :   -12.3E51.2})=null
@@ -162,6 +166,7 @@ json_tokener_parse_ex(tok, 0e-         ,   4) ... OK: got object of type [double
  json_tokener_parse_ex(tok, 0e-         ,   4) ... OK: got correct error: unexpected end of data
  json_tokener_parse_ex(tok, [0e-]       ,   5) ... OK: got object of type [array]: [ 0 ]
  json_tokener_parse_ex(tok, [0e-]       ,   5) ... OK: got correct error: number expected
+json_tokener_parse_ex(tok, 0e+-        ,   5) ... OK: got object of type [double]: 0
  json_tokener_parse_ex(tok, 0e+-        ,   5) ... OK: got correct error: number expected
  json_tokener_parse_ex(tok, [0e+-]      ,   6) ... OK: got correct error: number expected
  json_tokener_parse_ex(tok, false       ,   5) ... OK: got correct error: continue
@@ -215,7 +220,12 @@ json_tokener_parse_ex(tok, nullx       ,   6) ... OK: got object of type [null]:
  json_tokener_parse_ex(tok, x           ,   2) ... OK: got correct error: unexpected character
  json_tokener_parse_ex(tok, {"a":1}{"b":2},  15) ... OK: got object of type [object]: { "a": 1 }
  json_tokener_parse_ex(tok, {"b":2}     ,   8) ... OK: got object of type [object]: { "b": 2 }
-json_tokener_parse_ex(tok, 2015-01-15  ,  10) ... OK: got correct error: number expected
+json_tokener_parse_ex(tok, 2015-01-15  ,  11) ... OK: got object of type [int]: 2015
+json_tokener_parse_ex(tok, -01-15      ,   7) ... OK: got object of type [int]: -1
+json_tokener_parse_ex(tok, -15         ,   4) ... OK: got object of type [int]: -15
+json_tokener_parse_ex(tok, 2015 01 15  ,  11) ... OK: got object of type [int]: 2015
+json_tokener_parse_ex(tok,  01 15      ,   7) ... OK: got object of type [int]: 1
+json_tokener_parse_ex(tok,  15         ,   4) ... OK: got object of type [int]: 15
  json_tokener_parse_ex(tok, "blue"      ,   6) ... OK: got object of type [string]: "blue"
  json_tokener_parse_ex(tok, "\""        ,   4) ... OK: got object of type [string]: "\""
  json_tokener_parse_ex(tok, "\\"        ,   4) ... OK: got object of type [string]: "\\"
@@ -265,5 +275,5 @@ json_tokener_parse_ex(tok, "\ud855
  json_tokener_parse_ex(tok, "\ud0031À"  ,  10) ... OK: got correct error: invalid utf-8 string
  json_tokener_parse_ex(tok, 11\8111       ,   5) ... OK: got correct error: invalid utf-8 string
  json_tokener_parse_ex(tok, {"1\81":1}    ,   8) ... OK: got correct error: invalid utf-8 string
-End Incremental Tests OK=179 ERROR=0
+End Incremental Tests OK=185 ERROR=0
  ==================================
author	Eric Haszlakiewicz <erh+git@nimenees.com>
	Mon, 29 Jun 2020 02:14:26 +0000 (02:14 +0000)
committer	Eric Haszlakiewicz <erh+git@nimenees.com>
	Mon, 29 Jun 2020 02:14:26 +0000 (02:14 +0000)
ChangeLog		patch \| blob \| blame \| history
json_object.c		patch \| blob \| blame \| history
json_object_private.h		patch \| blob \| blame \| history
json_tokener.c		patch \| blob \| blame \| history
tests/test_parse.c		patch \| blob \| blame \| history
tests/test_parse.expected		patch \| blob \| blame \| history