From: Andrew Dunstan <andrew@dunslane.net>
Date: Thu, 9 Apr 2026 11:57:07 +0000 (-0400)
Subject: Fix incremental JSON parser numeric token reassembly across chunks.
X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=2478bd5db0aad3599802636201af7adc170ba280;p=thirdparty%2Fpostgresql.git

Fix incremental JSON parser numeric token reassembly across chunks.

When the incremental JSON parser splits a numeric token across chunk
boundaries, it accumulates continuation characters into the partial
token buffer.  The accumulator's switch statement unconditionally
accepted '+', '-', '.', 'e', and 'E' as valid numeric continuations
regardless of position, which violated JSON number grammar
(-? int [frac] [exp]).  For example, input "4-" fed in single-byte
chunks would accumulate the '-' into the numeric token, producing an
invalid token that later triggered an assertion failure during
re-lexing.

Fix by tracking parser state (seen_dot, seen_exp, prev character)
across the existing partial token and incoming bytes, so that each
character class is accepted only in its grammatically valid position.
---

diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 1145d93945f..12e40f2d564 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -1670,9 +1670,31 @@ json_lex(JsonLexContext *lex)
 
 			if (c == '-' || (c >= '0' && c <= '9'))
 			{
-				/* for numbers look for possible numeric continuations */
-
+				/*
+				 * Accumulate numeric continuations, respecting JSON number
+				 * grammar: -? int [frac] [exp]
+				 *
+				 * We must track what parts of the number we've already seen
+				 * so we don't over-consume.  '.' is valid only once and not
+				 * after 'e'/'E'; 'e'/'E' is valid only once; '+'/'-' are
+				 * valid only immediately after 'e'/'E'.
+				 */
 				bool		numend = false;
+				bool		seen_dot = false;
+				bool		seen_exp = false;
+				char		prev;
+
+				/* Scan existing partial token for state */
+				for (int j = 0; j < ptok->len; j++)
+				{
+					char		pc = ptok->data[j];
+
+					if (pc == '.')
+						seen_dot = true;
+					else if (pc == 'e' || pc == 'E')
+						seen_exp = true;
+				}
+				prev = ptok->data[ptok->len - 1];
 
 				for (size_t i = 0; i < lex->input_length && !numend; i++)
 				{
@@ -1682,8 +1704,35 @@ json_lex(JsonLexContext *lex)
 					{
 						case '+':
 						case '-':
+							if (prev != 'e' && prev != 'E')
+							{
+								numend = true;
+								break;
+							}
+							jsonapi_appendStringInfoCharMacro(ptok, cc);
+							added++;
+							break;
+						case '.':
+							if (seen_dot || seen_exp)
+							{
+								numend = true;
+								break;
+							}
+							seen_dot = true;
+							jsonapi_appendStringInfoCharMacro(ptok, cc);
+							added++;
+							break;
 						case 'e':
 						case 'E':
+							if (seen_exp)
+							{
+								numend = true;
+								break;
+							}
+							seen_exp = true;
+							jsonapi_appendStringInfoCharMacro(ptok, cc);
+							added++;
+							break;
 						case '0':
 						case '1':
 						case '2':
@@ -1694,14 +1743,14 @@ json_lex(JsonLexContext *lex)
 						case '7':
 						case '8':
 						case '9':
-							{
-								jsonapi_appendStringInfoCharMacro(ptok, cc);
-								added++;
-							}
+							jsonapi_appendStringInfoCharMacro(ptok, cc);
+							added++;
 							break;
 						default:
 							numend = true;
 					}
+					if (!numend)
+						prev = cc;
 				}
 			}