]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
run sentinel server side fns outside of VALUES
authorMike Bayer <mike_mp@zzzcomputing.com>
Mon, 1 Dec 2025 20:11:50 +0000 (15:11 -0500)
committerMike Bayer <mike_mp@zzzcomputing.com>
Wed, 3 Dec 2025 03:53:17 +0000 (22:53 -0500)
Fixed the structure of the SQL string used for the
:ref:`engine_insertmanyvalues` feature when an explicit sequence with
``nextval()`` is used. The SQL function invocation for the sequence has
been moved from being rendered inline within each tuple inside of VALUES to
being rendered once in the SELECT that reads from VALUES. This change
ensures the function is invoked in the correct order as rows are processed,
rather than assuming PostgreSQL will execute inline function calls within
VALUES in a particular order. While current PostgreSQL versions appear to
handle the previous approach correctly, the database does not guarantee
this behavior for future versions.

Fixes: #13015
Change-Id: Ia0a2a4e8f89e21852d7cb550dfa5d9ea9447b590
(cherry picked from commit c5d09f5ed4b4e37cfdd033026e2f67382ee9fcb3)

doc/build/changelog/unreleased_20/13015.rst [new file with mode: 0644]
lib/sqlalchemy/engine/base.py
lib/sqlalchemy/sql/compiler.py
lib/sqlalchemy/sql/crud.py
lib/sqlalchemy/testing/assertsql.py
test/dialect/postgresql/test_query.py

diff --git a/doc/build/changelog/unreleased_20/13015.rst b/doc/build/changelog/unreleased_20/13015.rst
new file mode 100644 (file)
index 0000000..afadb62
--- /dev/null
@@ -0,0 +1,14 @@
+.. change::
+    :tags: bug, postgresql
+    :tickets: 13015
+
+    Fixed the structure of the SQL string used for the
+    :ref:`engine_insertmanyvalues` feature when an explicit sequence with
+    ``nextval()`` is used. The SQL function invocation for the sequence has
+    been moved from being rendered inline within each tuple inside of VALUES to
+    being rendered once in the SELECT that reads from VALUES. This change
+    ensures the function is invoked in the correct order as rows are processed,
+    rather than assuming PostgreSQL will execute inline function calls within
+    VALUES in a particular order. While current PostgreSQL versions appear to
+    handle the previous approach correctly, the database does not guarantee
+    this behavior for future versions.
index 82729ee5798f3c6bfc3990c28b5ac532d0096245..9f1f76d9e1759456259327899bb3c0d3252b6ed9 100644 (file)
@@ -2022,6 +2022,13 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]):
         else:
             do_execute_dispatch = ()
 
+        if engine_events:
+            _WORKAROUND_ISSUE_13018 = getattr(
+                self, "_WORKAROUND_ISSUE_13018", False
+            )
+        else:
+            _WORKAROUND_ISSUE_13018 = False
+
         if self._echo:
             stats = context._get_cache_stats() + " (insertmanyvalues)"
 
@@ -2136,8 +2143,9 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]):
                 self.dispatch.after_cursor_execute(
                     self,
                     cursor,
-                    str_statement,
-                    effective_parameters,
+                    # TODO: this will be fixed by #13018
+                    sub_stmt if _WORKAROUND_ISSUE_13018 else str_statement,
+                    sub_params if _WORKAROUND_ISSUE_13018 else parameters,
                     context,
                     context.executemany,
                 )
index 3f20c93c4bb8e8cc63283295dd8c4a9037f866f3..0cd866b96ffc2ecee83e00b54129fd1a8ad687b9 100644 (file)
@@ -6074,9 +6074,7 @@ class SQLCompiler(Compiled):
                 self._insertmanyvalues = _InsertManyValues(
                     True,
                     self.dialect.default_metavalue_token,
-                    cast(
-                        "List[crud._CrudParamElementStr]", crud_params_single
-                    ),
+                    crud_params_single,
                     counted_bindparam,
                     sort_by_parameter_order=(
                         insert_stmt._sort_by_parameter_order
@@ -6098,100 +6096,123 @@ class SQLCompiler(Compiled):
                     for crud_param_set in crud_params_struct.all_multi_params
                 ),
             )
-        else:
-            insert_single_values_expr = ", ".join(
-                [
-                    value
-                    for _, _, value, _ in cast(
-                        "List[crud._CrudParamElementStr]",
-                        crud_params_single,
-                    )
-                ]
-            )
+        elif use_insertmanyvalues:
+            if (
+                implicit_sentinel
+                and (
+                    self.dialect.insertmanyvalues_implicit_sentinel
+                    & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT
+                )
+                # this is checking if we have
+                # INSERT INTO table (id) VALUES (DEFAULT).
+                and not (crud_params_struct.is_default_metavalue_only)
+            ):
+                # if we have a sentinel column that is server generated,
+                # then for selected backends render the VALUES list as a
+                # subquery.  This is the orderable form supported by
+                # PostgreSQL and in fewer cases SQL Server
+                embed_sentinel_value = True
+
+                render_bind_casts = (
+                    self.dialect.insertmanyvalues_implicit_sentinel
+                    & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS
+                )
 
-            if use_insertmanyvalues:
-                if (
-                    implicit_sentinel
-                    and (
-                        self.dialect.insertmanyvalues_implicit_sentinel
-                        & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT
-                    )
-                    # this is checking if we have
-                    # INSERT INTO table (id) VALUES (DEFAULT).
-                    and not (crud_params_struct.is_default_metavalue_only)
-                ):
-                    # if we have a sentinel column that is server generated,
-                    # then for selected backends render the VALUES list as a
-                    # subquery.  This is the orderable form supported by
-                    # PostgreSQL and SQL Server.
-                    embed_sentinel_value = True
+                add_sentinel_set = add_sentinel_cols or ()
 
-                    render_bind_casts = (
-                        self.dialect.insertmanyvalues_implicit_sentinel
-                        & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS
-                    )
+                insert_single_values_expr = ", ".join(
+                    [
+                        value
+                        for col, _, value, _ in crud_params_single
+                        if col not in add_sentinel_set
+                    ]
+                )
 
-                    colnames = ", ".join(
-                        f"p{i}" for i, _ in enumerate(crud_params_single)
-                    )
+                colnames = ", ".join(
+                    f"p{i}"
+                    for i, cp in enumerate(crud_params_single)
+                    if cp[0] not in add_sentinel_set
+                )
 
-                    if render_bind_casts:
-                        # render casts for the SELECT list.  For PG, we are
-                        # already rendering bind casts in the parameter list,
-                        # selectively for the more "tricky" types like ARRAY.
-                        # however, even for the "easy" types, if the parameter
-                        # is NULL for every entry, PG gives up and says
-                        # "it must be TEXT", which fails for other easy types
-                        # like ints.  So we cast on this side too.
-                        colnames_w_cast = ", ".join(
+                if render_bind_casts:
+                    # render casts for the SELECT list.  For PG, we are
+                    # already rendering bind casts in the parameter list,
+                    # selectively for the more "tricky" types like ARRAY.
+                    # however, even for the "easy" types, if the parameter
+                    # is NULL for every entry, PG gives up and says
+                    # "it must be TEXT", which fails for other easy types
+                    # like ints.  So we cast on this side too.
+                    colnames_w_cast = ", ".join(
+                        (
                             self.render_bind_cast(
                                 col.type,
                                 col.type._unwrapped_dialect_impl(self.dialect),
                                 f"p{i}",
                             )
-                            for i, (col, *_) in enumerate(crud_params_single)
+                            if col not in add_sentinel_set
+                            else expr
+                        )
+                        for i, (col, _, expr, _) in enumerate(
+                            crud_params_single
                         )
-                    else:
-                        colnames_w_cast = colnames
-
-                    text += (
-                        f" SELECT {colnames_w_cast} FROM "
-                        f"(VALUES ({insert_single_values_expr})) "
-                        f"AS imp_sen({colnames}, sen_counter) "
-                        "ORDER BY sen_counter"
                     )
                 else:
-                    # otherwise, if no sentinel or backend doesn't support
-                    # orderable subquery form, use a plain VALUES list
-                    embed_sentinel_value = False
-                    text += f" VALUES ({insert_single_values_expr})"
+                    colnames_w_cast = ", ".join(
+                        (f"p{i}" if col not in add_sentinel_set else expr)
+                        for i, (col, _, expr, _) in enumerate(
+                            crud_params_single
+                        )
+                    )
 
-                self._insertmanyvalues = _InsertManyValues(
-                    is_default_expr=False,
-                    single_values_expr=insert_single_values_expr,
-                    insert_crud_params=cast(
-                        "List[crud._CrudParamElementStr]",
-                        crud_params_single,
-                    ),
-                    num_positional_params_counted=counted_bindparam,
-                    sort_by_parameter_order=(
-                        insert_stmt._sort_by_parameter_order
-                    ),
-                    includes_upsert_behaviors=(
-                        insert_stmt._post_values_clause is not None
-                    ),
-                    sentinel_columns=add_sentinel_cols,
-                    num_sentinel_columns=(
-                        len(add_sentinel_cols) if add_sentinel_cols else 0
-                    ),
-                    sentinel_param_keys=named_sentinel_params,
-                    implicit_sentinel=implicit_sentinel,
-                    embed_values_counter=embed_sentinel_value,
+                insert_crud_params = [
+                    elem
+                    for elem in crud_params_single
+                    if elem[0] not in add_sentinel_set
+                ]
+
+                text += (
+                    f" SELECT {colnames_w_cast} FROM "
+                    f"(VALUES ({insert_single_values_expr})) "
+                    f"AS imp_sen({colnames}, sen_counter) "
+                    "ORDER BY sen_counter"
                 )
 
             else:
+                # otherwise, if no sentinel or backend doesn't support
+                # orderable subquery form, use a plain VALUES list
+                embed_sentinel_value = False
+                insert_crud_params = crud_params_single
+                insert_single_values_expr = ", ".join(
+                    [value for _, _, value, _ in crud_params_single]
+                )
+
                 text += f" VALUES ({insert_single_values_expr})"
 
+            self._insertmanyvalues = _InsertManyValues(
+                is_default_expr=False,
+                single_values_expr=insert_single_values_expr,
+                insert_crud_params=insert_crud_params,
+                num_positional_params_counted=counted_bindparam,
+                sort_by_parameter_order=(insert_stmt._sort_by_parameter_order),
+                includes_upsert_behaviors=(
+                    insert_stmt._post_values_clause is not None
+                ),
+                sentinel_columns=add_sentinel_cols,
+                num_sentinel_columns=(
+                    len(add_sentinel_cols) if add_sentinel_cols else 0
+                ),
+                sentinel_param_keys=named_sentinel_params,
+                implicit_sentinel=implicit_sentinel,
+                embed_values_counter=embed_sentinel_value,
+            )
+
+        else:
+            insert_single_values_expr = ", ".join(
+                [value for _, _, value, _ in crud_params_single]
+            )
+
+            text += f" VALUES ({insert_single_values_expr})"
+
         if insert_stmt._post_values_clause is not None:
             post_values_clause = self.process(
                 insert_stmt._post_values_clause, **kw
index 597f33d799424260c97a510c2383b8531f012db7..cff9c2a5aa990779416c4d1bdaf64592d4a0bc10 100644 (file)
@@ -104,7 +104,7 @@ _CrudParamSequence = List[_CrudParamElement]
 
 
 class _CrudParams(NamedTuple):
-    single_params: _CrudParamSequence
+    single_params: List[_CrudParamElementStr]
     all_multi_params: List[Sequence[_CrudParamElementStr]]
     is_default_metavalue_only: bool = False
     use_insertmanyvalues: bool = False
@@ -260,7 +260,7 @@ def _get_crud_params(
         }
 
     # create a list of column assignment clauses as tuples
-    values: List[_CrudParamElement] = []
+    values: List[_CrudParamElementStr] = []
 
     if stmt_parameter_tuples is not None:
         _get_stmt_parameter_tuples_params(
index 81c7138c4b5ed21f884d330a6ce3903ec19c0302..603955a8b5196981d7149d51b62e04bbdc547696 100644 (file)
@@ -275,7 +275,9 @@ class DialectSQL(CompiledSQL):
 
         # TODO: why do we need this part?
         for real_stmt in execute_observed.statements:
-            if self._compare_no_space(real_stmt.statement, received_stmt):
+            if self._compare_no_space(
+                real_stmt.context.statement, received_stmt
+            ):
                 break
         else:
             raise AssertionError(
@@ -482,6 +484,7 @@ def assert_engine(engine):
     def connection_execute(
         conn, clauseelement, multiparams, params, execution_options
     ):
+        conn._WORKAROUND_ISSUE_13018 = True
         # grab the original statement + params before any cursor
         # execution
         orig[:] = clauseelement, multiparams, params
@@ -502,6 +505,7 @@ def assert_engine(engine):
         else:
             obs = SQLExecuteObserved(context, orig[0], orig[1], orig[2])
             asserter.accumulated.append(obs)
+
         obs.statements.append(
             SQLCursorExecuteObserved(
                 statement, parameters, context, executemany
index 585630b81417360b3d1e10e40be89e66acfbbc1f..127bf29b122e373a760b1aaeaa5089afe5d2ac24 100644 (file)
@@ -161,6 +161,104 @@ class InsertTest(fixtures.TestBase, AssertsExecutionResults):
         metadata.create_all(connection)
         self._assert_data_noautoincrement(connection, table)
 
+    def test_full_cursor_insertmanyvalues_sql(self, metadata, connection):
+        """test compilation/ execution of the subquery form including
+        the fix for #13015
+
+        The specific form in question for #13015 is only supported by the
+        PostgreSQL dialect right now.   MSSQL would also use it for a server
+        side function that produces monotonic values, but we have no support
+        for that outside of sequence next right now, where SQL Server doesn't
+        support invokving the sequence outside of the VALUES tuples.
+
+        """
+
+        my_table = Table(
+            "my_table",
+            metadata,
+            Column("data1", String(50)),
+            Column(
+                "id",
+                Integer,
+                Sequence("foo_id_seq", start=1, data_type=Integer),
+                primary_key=True,
+            ),
+            Column("data2", String(50)),
+        )
+
+        my_table.create(connection)
+        with self.sql_execution_asserter(connection) as assert_:
+            connection.execute(
+                my_table.insert().returning(
+                    my_table.c.data1,
+                    my_table.c.id,
+                    sort_by_parameter_order=True,
+                ),
+                [
+                    {"data1": f"d1 row {i}", "data2": f"d2 row {i}"}
+                    for i in range(10)
+                ],
+            )
+
+        render_bind_casts = (
+            String().dialect_impl(connection.dialect).render_bind_cast
+        )
+
+        if render_bind_casts:
+            varchar_cast = "::VARCHAR"
+        else:
+            varchar_cast = ""
+
+        if connection.dialect.paramstyle == "pyformat":
+            params = ", ".join(
+                f"(%(data1__{i})s{varchar_cast}, %(data2__{i})s"
+                f"{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = {}
+            for i in range(10):
+                parameters[f"data1__{i}"] = f"d1 row {i}"
+                parameters[f"data2__{i}"] = f"d2 row {i}"
+
+        elif connection.dialect.paramstyle == "numeric_dollar":
+            params = ", ".join(
+                f"(${i * 2 + 1}{varchar_cast}, "
+                f"${i * 2 + 2}{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = ()
+            for i in range(10):
+                parameters += (f"d1 row {i}", f"d2 row {i}")
+        elif connection.dialect.paramstyle == "format":
+            params = ", ".join(
+                f"(%s{varchar_cast}, %s{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = ()
+            for i in range(10):
+                parameters += (f"d1 row {i}", f"d2 row {i}")
+        elif connection.dialect.paramstyle == "qmark":
+            params = ", ".join(
+                f"(?{varchar_cast}, ?{varchar_cast}, {i})"
+                for i in range(0, 10)
+            )
+            parameters = ()
+            for i in range(10):
+                parameters += (f"d1 row {i}", f"d2 row {i}")
+        else:
+            assert False
+
+        assert_.assert_(
+            CursorSQL(
+                "INSERT INTO my_table (data1, id, data2) "
+                f"SELECT p0::VARCHAR, nextval('foo_id_seq'), p2::VARCHAR "
+                f"FROM (VALUES {params}) "
+                "AS imp_sen(p0, p2, sen_counter) ORDER BY sen_counter "
+                "RETURNING my_table.data1, my_table.id, my_table.id AS id__1",
+                parameters,
+            )
+        )
+
     def _ints_and_strs_setinputsizes(self, connection):
         return (
             connection.dialect._bind_typing_render_casts