]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
run sentinel server side fns outside of VALUES
authorMike Bayer <mike_mp@zzzcomputing.com>
Mon, 1 Dec 2025 20:11:50 +0000 (15:11 -0500)
committerMike Bayer <mike_mp@zzzcomputing.com>
Wed, 3 Dec 2025 03:52:57 +0000 (22:52 -0500)
Fixed the structure of the SQL string used for the
:ref:`engine_insertmanyvalues` feature when an explicit sequence with
``nextval()`` is used. The SQL function invocation for the sequence has
been moved from being rendered inline within each tuple inside of VALUES to
being rendered once in the SELECT that reads from VALUES. This change
ensures the function is invoked in the correct order as rows are processed,
rather than assuming PostgreSQL will execute inline function calls within
VALUES in a particular order. While current PostgreSQL versions appear to
handle the previous approach correctly, the database does not guarantee
this behavior for future versions.

Fixes: #13015
Change-Id: Ia0a2a4e8f89e21852d7cb550dfa5d9ea9447b590

doc/build/changelog/unreleased_20/13015.rst [new file with mode: 0644]
lib/sqlalchemy/engine/base.py
lib/sqlalchemy/sql/compiler.py
lib/sqlalchemy/sql/crud.py
lib/sqlalchemy/testing/assertsql.py
test/dialect/postgresql/test_query.py

diff --git a/doc/build/changelog/unreleased_20/13015.rst b/doc/build/changelog/unreleased_20/13015.rst
new file mode 100644 (file)
index 0000000..afadb62
--- /dev/null
@@ -0,0 +1,14 @@
+.. change::
+    :tags: bug, postgresql
+    :tickets: 13015
+
+    Fixed the structure of the SQL string used for the
+    :ref:`engine_insertmanyvalues` feature when an explicit sequence with
+    ``nextval()`` is used. The SQL function invocation for the sequence has
+    been moved from being rendered inline within each tuple inside of VALUES to
+    being rendered once in the SELECT that reads from VALUES. This change
+    ensures the function is invoked in the correct order as rows are processed,
+    rather than assuming PostgreSQL will execute inline function calls within
+    VALUES in a particular order. While current PostgreSQL versions appear to
+    handle the previous approach correctly, the database does not guarantee
+    this behavior for future versions.
index 57cb0fc2a95e06ed30dd84e76d6fd6804fca85ff..4e2d6d878d04c4ba36dd231928d48b16ca1951ee 100644 (file)
@@ -1984,6 +1984,13 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]):
         else:
             do_execute_dispatch = ()
 
+        if engine_events:
+            _WORKAROUND_ISSUE_13018 = getattr(
+                self, "_WORKAROUND_ISSUE_13018", False
+            )
+        else:
+            _WORKAROUND_ISSUE_13018 = False
+
         if self._echo:
             stats = context._get_cache_stats() + " (insertmanyvalues)"
 
@@ -2098,8 +2105,9 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]):
                 self.dispatch.after_cursor_execute(
                     self,
                     cursor,
-                    str_statement,
-                    effective_parameters,
+                    # TODO: this will be fixed by #13018
+                    sub_stmt if _WORKAROUND_ISSUE_13018 else str_statement,
+                    sub_params if _WORKAROUND_ISSUE_13018 else parameters,
                     context,
                     context.executemany,
                 )
index bda6b8166668ee283edceee15fa6ed925dffc8a1..371d872b44201463082c49467aaa343f268d5603 100644 (file)
@@ -6263,9 +6263,7 @@ class SQLCompiler(Compiled):
                 self._insertmanyvalues = _InsertManyValues(
                     True,
                     self.dialect.default_metavalue_token,
-                    cast(
-                        "List[crud._CrudParamElementStr]", crud_params_single
-                    ),
+                    crud_params_single,
                     counted_bindparam,
                     sort_by_parameter_order=(
                         insert_stmt._sort_by_parameter_order
@@ -6287,100 +6285,123 @@ class SQLCompiler(Compiled):
                     for crud_param_set in crud_params_struct.all_multi_params
                 ),
             )
-        else:
-            insert_single_values_expr = ", ".join(
-                [
-                    value
-                    for _, _, value, _ in cast(
-                        "List[crud._CrudParamElementStr]",
-                        crud_params_single,
-                    )
-                ]
-            )
+        elif use_insertmanyvalues:
+            if (
+                implicit_sentinel
+                and (
+                    self.dialect.insertmanyvalues_implicit_sentinel
+                    & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT
+                )
+                # this is checking if we have
+                # INSERT INTO table (id) VALUES (DEFAULT).
+                and not (crud_params_struct.is_default_metavalue_only)
+            ):
+                # if we have a sentinel column that is server generated,
+                # then for selected backends render the VALUES list as a
+                # subquery.  This is the orderable form supported by
+                # PostgreSQL and in fewer cases SQL Server
+                embed_sentinel_value = True
+
+                render_bind_casts = (
+                    self.dialect.insertmanyvalues_implicit_sentinel
+                    & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS
+                )
 
-            if use_insertmanyvalues:
-                if (
-                    implicit_sentinel
-                    and (
-                        self.dialect.insertmanyvalues_implicit_sentinel
-                        & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT
-                    )
-                    # this is checking if we have
-                    # INSERT INTO table (id) VALUES (DEFAULT).
-                    and not (crud_params_struct.is_default_metavalue_only)
-                ):
-                    # if we have a sentinel column that is server generated,
-                    # then for selected backends render the VALUES list as a
-                    # subquery.  This is the orderable form supported by
-                    # PostgreSQL and SQL Server.
-                    embed_sentinel_value = True
+                add_sentinel_set = add_sentinel_cols or ()
 
-                    render_bind_casts = (
-                        self.dialect.insertmanyvalues_implicit_sentinel
-                        & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS
-                    )
+                insert_single_values_expr = ", ".join(
+                    [
+                        value
+                        for col, _, value, _ in crud_params_single
+                        if col not in add_sentinel_set
+                    ]
+                )
 
-                    colnames = ", ".join(
-                        f"p{i}" for i, _ in enumerate(crud_params_single)
-                    )
+                colnames = ", ".join(
+                    f"p{i}"
+                    for i, cp in enumerate(crud_params_single)
+                    if cp[0] not in add_sentinel_set
+                )
 
-                    if render_bind_casts:
-                        # render casts for the SELECT list.  For PG, we are
-                        # already rendering bind casts in the parameter list,
-                        # selectively for the more "tricky" types like ARRAY.
-                        # however, even for the "easy" types, if the parameter
-                        # is NULL for every entry, PG gives up and says
-                        # "it must be TEXT", which fails for other easy types
-                        # like ints.  So we cast on this side too.
-                        colnames_w_cast = ", ".join(
+                if render_bind_casts:
+                    # render casts for the SELECT list.  For PG, we are
+                    # already rendering bind casts in the parameter list,
+                    # selectively for the more "tricky" types like ARRAY.
+                    # however, even for the "easy" types, if the parameter
+                    # is NULL for every entry, PG gives up and says
+                    # "it must be TEXT", which fails for other easy types
+                    # like ints.  So we cast on this side too.
+                    colnames_w_cast = ", ".join(
+                        (
                             self.render_bind_cast(
                                 col.type,
                                 col.type._unwrapped_dialect_impl(self.dialect),
                                 f"p{i}",
                             )
-                            for i, (col, *_) in enumerate(crud_params_single)
+                            if col not in add_sentinel_set
+                            else expr
+                        )
+                        for i, (col, _, expr, _) in enumerate(
+                            crud_params_single
                         )
-                    else:
-                        colnames_w_cast = colnames
-
-                    text += (
-                        f" SELECT {colnames_w_cast} FROM "
-                        f"(VALUES ({insert_single_values_expr})) "
-                        f"AS imp_sen({colnames}, sen_counter) "
-                        "ORDER BY sen_counter"
                     )
                 else:
-                    # otherwise, if no sentinel or backend doesn't support
-                    # orderable subquery form, use a plain VALUES list
-                    embed_sentinel_value = False
-                    text += f" VALUES ({insert_single_values_expr})"
+                    colnames_w_cast = ", ".join(
+                        (f"p{i}" if col not in add_sentinel_set else expr)
+                        for i, (col, _, expr, _) in enumerate(
+                            crud_params_single
+                        )
+                    )
 
-                self._insertmanyvalues = _InsertManyValues(
-                    is_default_expr=False,
-                    single_values_expr=insert_single_values_expr,
-                    insert_crud_params=cast(
-                        "List[crud._CrudParamElementStr]",
-                        crud_params_single,
-                    ),
-                    num_positional_params_counted=counted_bindparam,
-                    sort_by_parameter_order=(
-                        insert_stmt._sort_by_parameter_order
-                    ),
-                    includes_upsert_behaviors=(
-                        insert_stmt._post_values_clause is not None
-                    ),
-                    sentinel_columns=add_sentinel_cols,
-                    num_sentinel_columns=(
-                        len(add_sentinel_cols) if add_sentinel_cols else 0
-                    ),
-                    sentinel_param_keys=named_sentinel_params,
-                    implicit_sentinel=implicit_sentinel,
-                    embed_values_counter=embed_sentinel_value,
+                insert_crud_params = [
+                    elem
+                    for elem in crud_params_single
+                    if elem[0] not in add_sentinel_set
+                ]
+
+                text += (
+                    f" SELECT {colnames_w_cast} FROM "
+                    f"(VALUES ({insert_single_values_expr})) "
+                    f"AS imp_sen({colnames}, sen_counter) "
+                    "ORDER BY sen_counter"
                 )
 
             else:
+                # otherwise, if no sentinel or backend doesn't support
+                # orderable subquery form, use a plain VALUES list
+                embed_sentinel_value = False
+                insert_crud_params = crud_params_single
+                insert_single_values_expr = ", ".join(
+                    [value for _, _, value, _ in crud_params_single]
+                )
+
                 text += f" VALUES ({insert_single_values_expr})"
 
+            self._insertmanyvalues = _InsertManyValues(
+                is_default_expr=False,
+                single_values_expr=insert_single_values_expr,
+                insert_crud_params=insert_crud_params,
+                num_positional_params_counted=counted_bindparam,
+                sort_by_parameter_order=(insert_stmt._sort_by_parameter_order),
+                includes_upsert_behaviors=(
+                    insert_stmt._post_values_clause is not None
+                ),
+                sentinel_columns=add_sentinel_cols,
+                num_sentinel_columns=(
+                    len(add_sentinel_cols) if add_sentinel_cols else 0
+                ),
+                sentinel_param_keys=named_sentinel_params,
+                implicit_sentinel=implicit_sentinel,
+                embed_values_counter=embed_sentinel_value,
+            )
+
+        else:
+            insert_single_values_expr = ", ".join(
+                [value for _, _, value, _ in crud_params_single]
+            )
+
+            text += f" VALUES ({insert_single_values_expr})"
+
         if insert_stmt._post_values_clause is not None:
             post_values_clause = self.process(
                 insert_stmt._post_values_clause, **kw
index bb63a41e728f78d74567412aacd9d0735f15f131..3d44b5c5f7e0175ff2bc8dcfd86432c02a0513b7 100644 (file)
@@ -106,7 +106,7 @@ _CrudParamSequence = List[_CrudParamElement]
 
 
 class _CrudParams(NamedTuple):
-    single_params: _CrudParamSequence
+    single_params: List[_CrudParamElementStr]
     all_multi_params: List[Sequence[_CrudParamElementStr]]
     is_default_metavalue_only: bool = False
     use_insertmanyvalues: bool = False
@@ -260,7 +260,7 @@ def _get_crud_params(
         }
 
     # create a list of column assignment clauses as tuples
-    values: List[_CrudParamElement] = []
+    values: List[_CrudParamElementStr] = []
 
     if stmt_parameter_tuples is not None:
         _get_stmt_parameter_tuples_params(
index 81c7138c4b5ed21f884d330a6ce3903ec19c0302..603955a8b5196981d7149d51b62e04bbdc547696 100644 (file)
@@ -275,7 +275,9 @@ class DialectSQL(CompiledSQL):
 
         # TODO: why do we need this part?
         for real_stmt in execute_observed.statements:
-            if self._compare_no_space(real_stmt.statement, received_stmt):
+            if self._compare_no_space(
+                real_stmt.context.statement, received_stmt
+            ):
                 break
         else:
             raise AssertionError(
@@ -482,6 +484,7 @@ def assert_engine(engine):
     def connection_execute(
         conn, clauseelement, multiparams, params, execution_options
     ):
+        conn._WORKAROUND_ISSUE_13018 = True
         # grab the original statement + params before any cursor
         # execution
         orig[:] = clauseelement, multiparams, params
@@ -502,6 +505,7 @@ def assert_engine(engine):
         else:
             obs = SQLExecuteObserved(context, orig[0], orig[1], orig[2])
             asserter.accumulated.append(obs)
+
         obs.statements.append(
             SQLCursorExecuteObserved(
                 statement, parameters, context, executemany
index 12383a5a04ea3034b3b389f02a0b0fbe17d58f40..8a4be4cf2d32423ede3b2b07bd9d11fbc900cf49 100644 (file)
@@ -162,6 +162,104 @@ class InsertTest(fixtures.TestBase, AssertsExecutionResults):
         metadata.create_all(connection)
         self._assert_data_noautoincrement(connection, table)
 
+    def test_full_cursor_insertmanyvalues_sql(self, metadata, connection):
+        """test compilation/ execution of the subquery form including
+        the fix for #13015
+
+        The specific form in question for #13015 is only supported by the
+        PostgreSQL dialect right now.   MSSQL would also use it for a server
+        side function that produces monotonic values, but we have no support
+        for that outside of sequence next right now, where SQL Server doesn't
+        support invokving the sequence outside of the VALUES tuples.
+
+        """
+
+        my_table = Table(
+            "my_table",
+            metadata,
+            Column("data1", String(50)),
+            Column(
+                "id",
+                Integer,
+                Sequence("foo_id_seq", start=1, data_type=Integer),
+                primary_key=True,
+            ),
+            Column("data2", String(50)),
+        )
+
+        my_table.create(connection)
+        with self.sql_execution_asserter(connection) as assert_:
+            connection.execute(
+                my_table.insert().returning(
+                    my_table.c.data1,
+                    my_table.c.id,
+                    sort_by_parameter_order=True,
+                ),
+                [
+                    {"data1": f"d1 row {i}", "data2": f"d2 row {i}"}
+                    for i in range(10)
+                ],
+            )
+
+        render_bind_casts = (
+            String().dialect_impl(connection.dialect).render_bind_cast
+        )
+
+        if render_bind_casts:
+            varchar_cast = "::VARCHAR"
+        else:
+            varchar_cast = ""
+
+        if connection.dialect.paramstyle == "pyformat":
+            params = ", ".join(
+                f"(%(data1__{i})s{varchar_cast}, %(data2__{i})s"
+                f"{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = {}
+            for i in range(10):
+                parameters[f"data1__{i}"] = f"d1 row {i}"
+                parameters[f"data2__{i}"] = f"d2 row {i}"
+
+        elif connection.dialect.paramstyle == "numeric_dollar":
+            params = ", ".join(
+                f"(${i * 2 + 1}{varchar_cast}, "
+                f"${i * 2 + 2}{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = ()
+            for i in range(10):
+                parameters += (f"d1 row {i}", f"d2 row {i}")
+        elif connection.dialect.paramstyle == "format":
+            params = ", ".join(
+                f"(%s{varchar_cast}, %s{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = ()
+            for i in range(10):
+                parameters += (f"d1 row {i}", f"d2 row {i}")
+        elif connection.dialect.paramstyle == "qmark":
+            params = ", ".join(
+                f"(?{varchar_cast}, ?{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = ()
+            for i in range(10):
+                parameters += (f"d1 row {i}", f"d2 row {i}")
+        else:
+            assert False
+
+        assert_.assert_(
+            CursorSQL(
+                "INSERT INTO my_table (data1, id, data2) "
+                f"SELECT p0::VARCHAR, nextval('foo_id_seq'), p2::VARCHAR "
+                f"FROM (VALUES {params}) "
+                "AS imp_sen(p0, p2, sen_counter) ORDER BY sen_counter "
+                "RETURNING my_table.data1, my_table.id, my_table.id AS id__1",
+                parameters,
+            )
+        )
+
     def _ints_and_strs_setinputsizes(self, connection):
         return (
             connection.dialect._bind_typing_render_casts