]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix incremental JSON parser numeric token reassembly across chunks.
authorAndrew Dunstan <andrew@dunslane.net>
Thu, 9 Apr 2026 11:57:07 +0000 (07:57 -0400)
committerAndrew Dunstan <andrew@dunslane.net>
Fri, 10 Apr 2026 11:13:08 +0000 (07:13 -0400)
When the incremental JSON parser splits a numeric token across chunk
boundaries, it accumulates continuation characters into the partial
token buffer.  The accumulator's switch statement unconditionally
accepted '+', '-', '.', 'e', and 'E' as valid numeric continuations
regardless of position, which violated JSON number grammar
(-? int [frac] [exp]).  For example, input "4-" fed in single-byte
chunks would accumulate the '-' into the numeric token, producing an
invalid token that later triggered an assertion failure during
re-lexing.

Fix by tracking parser state (seen_dot, seen_exp, prev character)
across the existing partial token and incoming bytes, so that each
character class is accepted only in its grammatically valid position.

src/common/jsonapi.c

index 1145d93945f3ac49a5ca82a37a69335921287b8c..12e40f2d564fda0b5c4695e0804815bd746dad77 100644 (file)
@@ -1670,9 +1670,31 @@ json_lex(JsonLexContext *lex)
 
                        if (c == '-' || (c >= '0' && c <= '9'))
                        {
-                               /* for numbers look for possible numeric continuations */
-
+                               /*
+                                * Accumulate numeric continuations, respecting JSON number
+                                * grammar: -? int [frac] [exp]
+                                *
+                                * We must track what parts of the number we've already seen
+                                * so we don't over-consume.  '.' is valid only once and not
+                                * after 'e'/'E'; 'e'/'E' is valid only once; '+'/'-' are
+                                * valid only immediately after 'e'/'E'.
+                                */
                                bool            numend = false;
+                               bool            seen_dot = false;
+                               bool            seen_exp = false;
+                               char            prev;
+
+                               /* Scan existing partial token for state */
+                               for (int j = 0; j < ptok->len; j++)
+                               {
+                                       char            pc = ptok->data[j];
+
+                                       if (pc == '.')
+                                               seen_dot = true;
+                                       else if (pc == 'e' || pc == 'E')
+                                               seen_exp = true;
+                               }
+                               prev = ptok->data[ptok->len - 1];
 
                                for (size_t i = 0; i < lex->input_length && !numend; i++)
                                {
@@ -1682,8 +1704,35 @@ json_lex(JsonLexContext *lex)
                                        {
                                                case '+':
                                                case '-':
+                                                       if (prev != 'e' && prev != 'E')
+                                                       {
+                                                               numend = true;
+                                                               break;
+                                                       }
+                                                       jsonapi_appendStringInfoCharMacro(ptok, cc);
+                                                       added++;
+                                                       break;
+                                               case '.':
+                                                       if (seen_dot || seen_exp)
+                                                       {
+                                                               numend = true;
+                                                               break;
+                                                       }
+                                                       seen_dot = true;
+                                                       jsonapi_appendStringInfoCharMacro(ptok, cc);
+                                                       added++;
+                                                       break;
                                                case 'e':
                                                case 'E':
+                                                       if (seen_exp)
+                                                       {
+                                                               numend = true;
+                                                               break;
+                                                       }
+                                                       seen_exp = true;
+                                                       jsonapi_appendStringInfoCharMacro(ptok, cc);
+                                                       added++;
+                                                       break;
                                                case '0':
                                                case '1':
                                                case '2':
@@ -1694,14 +1743,14 @@ json_lex(JsonLexContext *lex)
                                                case '7':
                                                case '8':
                                                case '9':
-                                                       {
-                                                               jsonapi_appendStringInfoCharMacro(ptok, cc);
-                                                               added++;
-                                                       }
+                                                       jsonapi_appendStringInfoCharMacro(ptok, cc);
+                                                       added++;
                                                        break;
                                                default:
                                                        numend = true;
                                        }
+                                       if (!numend)
+                                               prev = cc;
                                }
                        }