]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
psql: Tighten heuristics for BEGIN/END within CREATE SCHEMA.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 23 Jun 2026 18:12:03 +0000 (14:12 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 23 Jun 2026 18:12:03 +0000 (14:12 -0400)
Since d51697484, psql's scanner treats CREATE SCHEMA as a command that
may contain SQL-standard routine bodies, so that semicolons inside
BEGIN ATOMIC ... END blocks do not terminate the command too early.
However, the code counted BEGIN/END throughout CREATE SCHEMA, so that
it could be fooled by valid (and previously accepted) code such as

    CREATE SCHEMA s CREATE VIEW begin AS SELECT 1;

Improve this by explicitly checking whether each CREATE sub-clause is
CREATE [OR REPLACE] {FUNCTION|PROCEDURE}, and only counting BEGIN/END
within those clauses.  Since CREATE FUNCTION/PROCEDURE wasn't allowed
in CREATE SCHEMA before d51697484, this will not risk failure on any
cases that worked before v19.

There remain cases that fool the top-level CREATE FUNCTION/PROCEDURE
heuristic and thus also the CREATE SCHEMA case, for example

    CREATE FUNCTION begin () ...

But that's been true all along with no field complaints, so we'll
leave that issue for another day.

In the name of keeping things readable, move the logic supporting
this out of the {identifier} flex rule and into some small new
subroutines.  Also rename existing related PsqlScanState fields
to help distinguish them from the added fields.

This patch also fixes what seems to me (tgl) a small bug: \;
would reset BEGIN/END detection even when inside parens or BEGIN.
That's unlike what a plain semicolon would do, and no such effect
is suggested by the documentation.

Author: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/8E03BB8D-003D-4850-9772-5F8015A5A0C7@gmail.com

src/fe_utils/psqlscan.l
src/include/fe_utils/psqlscan_int.h
src/test/regress/expected/create_schema.out
src/test/regress/sql/create_schema.sql

index d29dda4d8e1e93432005506e523a7aad2a2e7e26..bbfafbc5223da4a6d95dc44488556db6c9520111 100644 (file)
@@ -61,6 +61,9 @@ typedef int YYSTYPE;
 
 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
 
+static void psqlscan_track_identifier(PsqlScanState state,
+                                                                         const char *identifier);
+
 %}
 
 %option reentrant
@@ -677,11 +680,12 @@ other                     .
 
 ";"                            {
                                        ECHO;
-                                       if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
+                                       if (cur_state->paren_depth == 0 &&
+                                               cur_state->begin_depth == 0)
                                        {
                                                /* Terminate lexing temporarily */
                                                cur_state->start_state = YY_START;
-                                               cur_state->identifier_count = 0;
+                                               cur_state->init_idents_count = 0;
                                                return LEXRES_SEMI;
                                        }
                                }
@@ -694,8 +698,11 @@ other                      .
 "\\"[;:]               {
                                        /* Force a semi-colon or colon into the query buffer */
                                        psqlscan_emit(cur_state, yytext + 1, 1);
-                                       if (yytext[1] == ';')
-                                               cur_state->identifier_count = 0;
+                                       /* Reset BEGIN/END tracking if semi at outer level */
+                                       if (yytext[1] == ';' &&
+                                               cur_state->paren_depth == 0 &&
+                                               cur_state->begin_depth == 0)
+                                               cur_state->init_idents_count = 0;
                                }
 
 "\\"                   {
@@ -921,61 +928,7 @@ other                      .
 
 
 {identifier}   {
-                                       /*
-                                        * We need to track if we are inside a BEGIN .. END block
-                                        * in a function definition, so that semicolons contained
-                                        * therein don't terminate the whole statement.  Short of
-                                        * writing a full parser here, the following heuristic
-                                        * should work.  First, we track whether the beginning of
-                                        * the statement matches CREATE [OR REPLACE]
-                                        * {FUNCTION|PROCEDURE|SCHEMA}.  (Allowing this in
-                                        * CREATE SCHEMA, without tracking whether we're within a
-                                        * CREATE FUNCTION/PROCEDURE subcommand, is a bit shaky
-                                        * but should be okay with the present set of valid
-                                        * subcommands.)
-                                        */
-
-                                       if (cur_state->identifier_count == 0)
-                                               memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
-
-                                       if (cur_state->identifier_count < sizeof(cur_state->identifiers))
-                                       {
-                                               if (pg_strcasecmp(yytext, "create") == 0 ||
-                                                       pg_strcasecmp(yytext, "function") == 0 ||
-                                                       pg_strcasecmp(yytext, "procedure") == 0 ||
-                                                       pg_strcasecmp(yytext, "or") == 0 ||
-                                                       pg_strcasecmp(yytext, "replace") == 0 ||
-                                                       pg_strcasecmp(yytext, "schema") == 0)
-                                                       cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
-                                       }
-
-                                       cur_state->identifier_count++;
-
-                                       if (cur_state->identifiers[0] == 'c' &&
-                                               (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
-                                                (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
-                                                 (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p')) ||
-                                                cur_state->identifiers[1] == 's') &&
-                                               cur_state->paren_depth == 0)
-                                       {
-                                               if (pg_strcasecmp(yytext, "begin") == 0)
-                                                       cur_state->begin_depth++;
-                                               else if (pg_strcasecmp(yytext, "case") == 0)
-                                               {
-                                                       /*
-                                                        * CASE also ends with END.  We only need to track
-                                                        * this if we are already inside a BEGIN.
-                                                        */
-                                                       if (cur_state->begin_depth >= 1)
-                                                               cur_state->begin_depth++;
-                                               }
-                                               else if (pg_strcasecmp(yytext, "end") == 0)
-                                               {
-                                                       if (cur_state->begin_depth > 0)
-                                                               cur_state->begin_depth--;
-                                               }
-                                       }
-
+                                       psqlscan_track_identifier(cur_state, yytext);
                                        ECHO;
                                }
 
@@ -1002,6 +955,135 @@ other                    .
 
 /* LCOV_EXCL_STOP */
 
+/*
+ * Record the first few keywords/identifiers of a statement or CREATE
+ * SCHEMA sub-statement in the idents[] array, of length idents_size.
+ * *idents_count is the number of entries filled so far.
+ *
+ * We record the interesting keywords using their first character, which
+ * works so long as those are all different.  We could switch to an enum
+ * if that stops being true, but for now this is easy and compact.
+ */
+static void
+psqlscan_record_initial_keyword(const char *identifier,
+                                                               char *idents,
+                                                               int idents_size,
+                                                               int *idents_count)
+{
+       if (*idents_count < idents_size)
+       {
+               /*
+                * What we need to recognize is CREATE [OR REPLACE] FUNCTION/PROCEDURE
+                * and CREATE SCHEMA.  Checking for SCHEMA is useless but not harmful
+                * in the CREATE SCHEMA sub-statement case.
+                */
+               if (pg_strcasecmp(identifier, "create") == 0 ||
+                       pg_strcasecmp(identifier, "function") == 0 ||
+                       pg_strcasecmp(identifier, "procedure") == 0 ||
+                       pg_strcasecmp(identifier, "or") == 0 ||
+                       pg_strcasecmp(identifier, "replace") == 0 ||
+                       pg_strcasecmp(identifier, "schema") == 0)
+                       idents[*idents_count] = pg_tolower((unsigned char) identifier[0]);
+               /* For other keywords or identifiers, leave '\0' in the array entry */
+               (*idents_count)++;
+       }
+}
+
+/*
+ * Does the current input match CREATE [OR REPLACE] {FUNCTION|PROCEDURE}?
+ */
+static bool
+psqlscan_is_create_routine(const char *idents)
+{
+       return idents[0] == 'c' &&
+               (idents[1] == 'f' || idents[1] == 'p' ||
+                (idents[1] == 'o' && idents[2] == 'r' &&
+                 (idents[3] == 'f' || idents[3] == 'p')));
+}
+
+/*
+ * Track whether we are inside a BEGIN .. END block in a function definition,
+ * so that semicolons contained therein don't terminate the whole statement.
+ * Short of writing a full parser here, the following heuristic should work.
+ *
+ * We track whether the beginning of the statement matches CREATE [OR REPLACE]
+ * {FUNCTION|PROCEDURE}.  For CREATE SCHEMA, track BEGIN .. END blocks only
+ * after recognizing an embedded CREATE [OR REPLACE] {FUNCTION|PROCEDURE}
+ * subcommand.  Once one of these conditions holds, count BEGIN and END
+ * pairs.  We also have to account for CASE ... END.
+ */
+static void
+psqlscan_track_identifier(PsqlScanState state, const char *identifier)
+{
+       bool            is_create_schema;
+
+       /* None of this needs to happen when we're inside parentheses */
+       if (state->paren_depth != 0)
+               return;
+
+       /* Reset all my state at the start of each new statement */
+       if (state->init_idents_count == 0)
+       {
+               memset(state->init_idents, 0, sizeof(state->init_idents));
+               state->sub_idents_count = 0;
+               memset(state->sub_idents, 0, sizeof(state->sub_idents));
+       }
+
+       /* Record initial keywords if init_idents_count is small enough */
+       psqlscan_record_initial_keyword(identifier,
+                                                                       state->init_idents,
+                                                                       lengthof(state->init_idents),
+                                                                       &state->init_idents_count);
+
+       /*
+        * In CREATE SCHEMA, track identifiers from each top-level CREATE schema
+        * element separately, so that BEGIN/END tracking is enabled only within
+        * CREATE [OR REPLACE] {FUNCTION|PROCEDURE} clauses.
+        */
+       is_create_schema = (state->init_idents[0] == 'c' &&
+                                               state->init_idents[1] == 's');
+       if (is_create_schema &&
+               state->begin_depth == 0)
+       {
+               /* Reset sub-clause state at each top-level CREATE keyword */
+               if (pg_strcasecmp(identifier, "create") == 0)
+               {
+                       state->sub_idents_count = 0;
+                       memset(state->sub_idents, 0, sizeof(state->sub_idents));
+               }
+               /* ... and record the first few keywords following that */
+               psqlscan_record_initial_keyword(identifier,
+                                                                               state->sub_idents,
+                                                                               lengthof(state->sub_idents),
+                                                                               &state->sub_idents_count);
+       }
+
+       /*
+        * Track BEGIN/CASE/END only when within an appropriate (sub) statement.
+        */
+       if (psqlscan_is_create_routine(state->init_idents) ||
+               (is_create_schema &&
+                psqlscan_is_create_routine(state->sub_idents)))
+       {
+               if (pg_strcasecmp(identifier, "begin") == 0)
+                       state->begin_depth++;
+               else if (pg_strcasecmp(identifier, "case") == 0)
+               {
+                       /*
+                        * CASE also ends with END.  We only need to track this if we are
+                        * already inside a BEGIN.
+                        */
+                       if (state->begin_depth >= 1)
+                               state->begin_depth++;
+               }
+               else if (pg_strcasecmp(identifier, "end") == 0)
+               {
+                       if (state->begin_depth > 0)
+                               state->begin_depth--;
+               }
+       }
+}
+
 /*
  * Create a lexer working state struct.
  *
@@ -1292,8 +1374,8 @@ psql_scan_reset(PsqlScanState state)
        if (state->dolqstart)
                free(state->dolqstart);
        state->dolqstart = NULL;
-       state->identifier_count = 0;
        state->begin_depth = 0;
+       state->init_idents_count = 0;
 }
 
 /*
index 488f416f0e551933123e6ec33b0205efaea0cccf..8b0d153261b35c0989d8e4f609bb3dcbb0816260 100644 (file)
@@ -117,9 +117,12 @@ typedef struct PsqlScanStateData
         * State to track boundaries of BEGIN ... END blocks in function
         * definitions, so that semicolons do not send query too early.
         */
-       int                     identifier_count;       /* identifiers since start of statement */
-       char            identifiers[4]; /* records the first few identifiers */
        int                     begin_depth;    /* depth of begin/end pairs */
+       int                     init_idents_count;      /* # identifiers since start of statement */
+       char            init_idents[4]; /* records the first few identifiers */
+       int                     sub_idents_count;       /* # identifiers since start of a CREATE
+                                                                        * SCHEMA element */
+       char            sub_idents[4];  /* records the first few of those identifiers */
 
        /*
         * Callback functions provided by the program making use of the lexer,
index bfe211338abe8c1c8b6896ea39e5ee0c530bc6b7..b9ae4c402fd1a748fa665b9416046b294f5efc7f 100644 (file)
@@ -195,7 +195,11 @@ CREATE SCHEMA regress_schema_misc
     as 'select $1 + $2'
   CREATE OPERATOR + (function = cs_add, leftarg = int4, rightarg = int4)
   CREATE PROCEDURE cs_proc(int4, int4)
-    BEGIN ATOMIC SELECT cs_add($1,$2); END
+    BEGIN ATOMIC
+      SELECT cs_add($1,$2);
+    END
+  -- this checks that psql is not fooled by an irrelevant BEGIN
+  CREATE VIEW begin AS SELECT 1 AS one
   CREATE TEXT SEARCH CONFIGURATION cs_ts_conf (copy=english)
   CREATE TEXT SEARCH DICTIONARY cs_ts_dict (template=simple)
   CREATE TEXT SEARCH PARSER cs_ts_prs
@@ -222,7 +226,7 @@ CREATE SCHEMA regress_schema_misc
 ;
 NOTICE:  return type cs_type is only a shell
 NOTICE:  argument type cs_type is only a shell
-LINE 29:   CREATE FUNCTION cs_type_out(cs_type)
+LINE 33:   CREATE FUNCTION cs_type_out(cs_type)
                                        ^
 \df regress_schema_misc.cs_add
                               List of functions
@@ -300,13 +304,14 @@ LINE 29:   CREATE FUNCTION cs_type_out(cs_type)
 (1 row)
 
 DROP SCHEMA regress_schema_misc CASCADE;
-NOTICE:  drop cascades to 16 other objects
+NOTICE:  drop cascades to 17 other objects
 DETAIL:  drop cascades to function regress_schema_misc.cs_sum(integer)
 drop cascades to collation regress_schema_misc.cs_builtin_c
 drop cascades to type regress_schema_misc.cs_positive
 drop cascades to function regress_schema_misc.cs_add(integer,integer)
 drop cascades to operator regress_schema_misc.+(integer,integer)
 drop cascades to function regress_schema_misc.cs_proc(integer,integer)
+drop cascades to view regress_schema_misc.begin
 drop cascades to text search configuration regress_schema_misc.cs_ts_conf
 drop cascades to text search dictionary regress_schema_misc.cs_ts_dict
 drop cascades to text search parser regress_schema_misc.cs_ts_prs
index ebe05d5110ecfff713bcd21550bd64cd202b3da0..526bb3cb065d06bd26655d3fecbcc8210b4b54f5 100644 (file)
@@ -120,7 +120,12 @@ CREATE SCHEMA regress_schema_misc
   CREATE OPERATOR + (function = cs_add, leftarg = int4, rightarg = int4)
 
   CREATE PROCEDURE cs_proc(int4, int4)
-    BEGIN ATOMIC SELECT cs_add($1,$2); END
+    BEGIN ATOMIC
+      SELECT cs_add($1,$2);
+    END
+
+  -- this checks that psql is not fooled by an irrelevant BEGIN
+  CREATE VIEW begin AS SELECT 1 AS one
 
   CREATE TEXT SEARCH CONFIGURATION cs_ts_conf (copy=english)