From 437745e92e0cfe172cfb75a55f3476942fc1c81d Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Mon, 1 Dec 2025 11:07:24 -0500 Subject: [PATCH] support monotonic functions as sentinels Added support for monotonic server-side functions such as PostgreSQL 18's ``uuidv7()`` to work with the :ref:`engine_insertmanyvalues` feature. By passing ``monotonic=True`` to any :class:`.Function`, the function can be used as a sentinel for tracking row order in batched INSERT operations with RETURNING, allowing the ORM and Core to efficiently batch INSERT statements while maintaining deterministic row ordering. Fixes: #13014 Change-Id: I2fabf96c8fbdb6c1d255fd4781cbd31fed17e1e9 --- doc/build/changelog/migration_21.rst | 55 ++++++++++++ doc/build/changelog/unreleased_21/13014.rst | 18 ++++ doc/build/core/connections.rst | 96 +++++++++++++++++++-- lib/sqlalchemy/dialects/postgresql/base.py | 40 +++++++++ lib/sqlalchemy/sql/base.py | 1 + lib/sqlalchemy/sql/compiler.py | 10 ++- lib/sqlalchemy/sql/functions.py | 6 ++ lib/sqlalchemy/sql/schema.py | 60 ++++++++++--- test/dialect/postgresql/test_query.py | 91 ++++++++++++++++--- 9 files changed, 347 insertions(+), 30 deletions(-) create mode 100644 doc/build/changelog/unreleased_21/13014.rst diff --git a/doc/build/changelog/migration_21.rst b/doc/build/changelog/migration_21.rst index 454a8562b2..bec10d8c19 100644 --- a/doc/build/changelog/migration_21.rst +++ b/doc/build/changelog/migration_21.rst @@ -1385,6 +1385,61 @@ To update an existing index: :ticket:`12948` +.. _change_13014_postgresql: + +Support for Server-Side Monotonic Functions such as uuidv7() in Batched INSERT Operations +------------------------------------------------------------------------------------------ + +SQLAlchemy 2.1 adds support for using monotonic server-side functions, such as +PostgreSQL 18's ``uuidv7()`` function, as sentinels in the +:ref:`engine_insertmanyvalues` feature. This allows these functions to work +efficiently with batched INSERT operations while maintaining deterministic row +ordering. + +When using a monotonic function as a default value, the ``monotonic=True`` +parameter must be passed to the function to indicate that it produces +monotonically increasing values. This enables SQLAlchemy to use the function's +values to correlate RETURNING results with input parameter sets:: + + from sqlalchemy import Table, Column, MetaData, UUID, Integer, func + + metadata = MetaData() + + t = Table( + "t", + metadata, + Column("id", UUID, server_default=func.uuidv7(monotonic=True), primary_key=True), + Column("x", Integer), + ) + +With the above configuration, when performing a batched INSERT with RETURNING +on PostgreSQL, SQLAlchemy will generate SQL that properly orders the rows +while allowing the server to generate the UUID values: + +.. sourcecode:: sql + + INSERT INTO t (x) SELECT p0::INTEGER FROM + (VALUES (%(x__0)s, 0), (%(x__1)s, 1), (%(x__2)s, 2), ...) + AS imp_sen(p0, sen_counter) ORDER BY sen_counter + RETURNING t.id, t.id AS id__1 + +The returned rows are then sorted by the monotonically increasing UUID values +to match the order of the input parameters, ensuring that ORM objects and +returned values are properly correlated. + +This feature works with both :paramref:`_schema.Column.server_default` (for +DDL-level defaults) and :paramref:`_schema.Column.default` (for ad-hoc +server-side function calls). + +.. seealso:: + + :ref:`engine_insertmanyvalues_monotonic_functions` - Complete documentation + on using monotonic functions + + :ref:`postgresql_monotonic_functions` - PostgreSQL-specific examples + +:ticket:`13014` + Microsoft SQL Server ==================== diff --git a/doc/build/changelog/unreleased_21/13014.rst b/doc/build/changelog/unreleased_21/13014.rst new file mode 100644 index 0000000000..1bc199b21f --- /dev/null +++ b/doc/build/changelog/unreleased_21/13014.rst @@ -0,0 +1,18 @@ +.. change:: + :tags: feature, postgresql, sql + :tickets: 13014 + + Added support for monotonic server-side functions such as PostgreSQL 18's + ``uuidv7()`` to work with the :ref:`engine_insertmanyvalues` feature. + By passing ``monotonic=True`` to any :class:`.Function`, the function can + be used as a sentinel for tracking row order in batched INSERT operations + with RETURNING, allowing the ORM and Core to efficiently batch INSERT + statements while maintaining deterministic row ordering. + + .. seealso:: + + :ref:`change_13014_postgresql` + + :ref:`engine_insertmanyvalues_monotonic_functions` + + :ref:`postgresql_monotonic_functions` diff --git a/doc/build/core/connections.rst b/doc/build/core/connections.rst index 54d6f4560e..b6ef42700a 100644 --- a/doc/build/core/connections.rst +++ b/doc/build/core/connections.rst @@ -1825,10 +1825,10 @@ performance example. including sample performance tests .. tip:: The :term:`insertmanyvalues` feature is a **transparently available** - performance feature which requires no end-user intervention in order for - it to take place as needed. This section describes the architecture - of the feature as well as how to measure its performance and tune its - behavior in order to optimize the speed of bulk INSERT statements, + performance feature which typically requires no end-user intervention in + order for it to take place as needed. This section describes the + architecture of the feature as well as how to measure its performance and + tune its behavior in order to optimize the speed of bulk INSERT statements, particularly as used by the ORM. As more databases have added support for INSERT..RETURNING, SQLAlchemy has @@ -2100,12 +2100,10 @@ also individually passed along to event listeners such as below). - - .. _engine_insertmanyvalues_sentinel_columns: Configuring Sentinel Columns -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In typical cases, the "insertmanyvalues" feature in order to provide INSERT..RETURNING with deterministic row order will automatically determine a @@ -2242,6 +2240,90 @@ In the example above, both "my_table" and "sub_table" will have an additional integer column named "_sentinel" that can be used by the "insertmanyvalues" feature to help optimize bulk inserts used by the ORM. +.. _engine_insertmanyvalues_monotonic_functions: + +Configuring Monotonic Functions such as UUIDV7 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using a monotonic function such as uuidv7 is supported by the "insertmanyvalues" +feature most easily by establishing the function as a client side callable, +e.g. using Python's built-in ``uuid.uuid7()`` call by providing the callable +to the :paramref:`_schema.Connection.default` parameter:: + + import uuid + + from sqlalchemy import UUID, Integer + + t = Table( + "t", + metadata, + Column("id", UUID, default=uuid.uuid7, primary_key=True), + Column("x", Integer), + ) + +In the above example, SQLAlchemy will invoke Python's ``uuid.uuid7()`` function +to create new primary key identifiers, which will be batchable by the +"insertmanyvalues" feature. + +However, some databases like PostgreSQL provide a server-side function for +uuid7 called ``uuidv7()``; in SQLAlchemy, this would be available from the +:data:`_sql.func` namespace as ``func.uuidv7()``, and may be configured on a +:class:`.Column` using either :paramref:`_schema.Connection.default` to allow +it to be called as needed, or :paramref:`_schema.Connection.server_default` to +establish it as part of the table's DDL. However, for full batched "insertmanyvalues" +behavior including support for sorted RETURNING (as would allow the ORM to +most effectively batch INSERT statements), an additional directive must be +included indicating that the function produces +monotonically increasing values, which is the ``monotonic=True`` directive. +This is illustrated below as a DDL server default using +:paramref:`_schema.Connection.server_default`:: + + from sqlalchemy import func, Integer + + t = Table( + "t", + metadata, + Column("id", UUID, server_default=func.uuidv7(monotonic=True), primary_key=True), + Column("x", Integer), + ) + +Using the above form, a batched INSERT...RETURNING on PostgreSQL with +:paramref:`.UpdateBase.returning.sort_by_parameter_order` set to True will +look like: + +.. sourcecode:: sql + + INSERT INTO t (x) SELECT p0::INTEGER FROM + (VALUES (%(x__0)s, 0), (%(x__1)s, 1), (%(x__2)s, 2), ...) + AS imp_sen(p0, sen_counter) ORDER BY sen_counter + RETURNING t.id, t.id AS id__1 + +Similarly if the function is configured as an ad-hoc server side function +using :paramref:`_schema.Connection.default`:: + + t = Table( + "t", + metadata, + Column("id", UUID, default=func.uuidv7(monotonic=True), primary_key=True), + Column("x", Integer), + ) + +The function will then be rendered in the SQL statement explicitly: + +.. sourcecode:: sql + + INSERT INTO t (id, x) SELECT uuidv7(), p1::INTEGER FROM + (VALUES (%(x__0)s, 0), (%(x__1)s, 1), (%(x__2)s, 2), ...) + AS imp_sen(p1, sen_counter) ORDER BY sen_counter + RETURNING t.id, t.id AS id__1 + +.. versionadded:: 2.1 Added support for explicit monotonic server side functions + using ``monotonic=True`` with any :class:`.Function`. + +.. seealso:: + + :ref:`postgresql_monotonic_functions` + .. _engine_insertmanyvalues_page_size: diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index e973d28fef..c4f166ad66 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -114,6 +114,46 @@ The CREATE TABLE for the above :class:`_schema.Table` object would be: PRIMARY KEY (id) ) +.. _postgresql_monotonic_functions: + +PostgreSQL 18 and above UUID with uuidv7 as a server default +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +PostgreSQL 18's ``uuidv7`` SQL function is available as any other +SQL function using the :data:`_sql.func` namespace:: + + >>> from sqlalchemy import select, func + >>> print(select(func.uuidv7())) + SELECT uuidv7() AS uuidv7_1 + +When using ``func.uuidv7()`` as a default on a :class:`.Column` using either +Core or ORM, an extra directive ``monotonic=True`` may be passed which +indicates this function produces monotonically increasing values; this in turn +allows Core and ORM to use a more efficient batched form of INSERT for large +insert operations:: + + import uuid + + + class MyClass(Base): + __tablename__ = "my_table" + + id: Mapped[uuid.UUID] = mapped_column( + server_default=func.uuidv7(monotonic=True) + ) + +With the above mapping, the ORM will be able to efficiently batch rows when +running bulk insert operations using the :ref:`engine_insertmanyvalues` +feature. + +.. versionadded:: 2.1 Added ``monotonic=True`` to allow functions like PostgreSQL's + ``uuidv7()`` to work with batched "insertmanyvalues" + +.. seealso:: + + :ref:`engine_insertmanyvalues_monotonic_functions` + + .. _postgresql_ss_cursors: Server Side Cursors diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 86b2662d8d..94d525ab64 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -1636,6 +1636,7 @@ class _SentinelDefaultCharacterization(Enum): SERVERSIDE = "serverside" IDENTITY = "identity" SEQUENCE = "sequence" + MONOTONIC_FUNCTION = "monotonic" class _SentinelColumnCharacterization(NamedTuple): diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 371d872b44..43e70d08b0 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -646,11 +646,14 @@ class InsertmanyvaluesSentinelOpts(FastIntFlag): AUTOINCREMENT = 2 IDENTITY = 4 SEQUENCE = 8 + MONOTONIC_FUNCTION = 16 - ANY_AUTOINCREMENT = AUTOINCREMENT | IDENTITY | SEQUENCE + ANY_AUTOINCREMENT = ( + AUTOINCREMENT | IDENTITY | SEQUENCE | MONOTONIC_FUNCTION + ) _SUPPORTED_OR_NOT = NOT_SUPPORTED | ANY_AUTOINCREMENT - USE_INSERT_FROM_SELECT = 16 + USE_INSERT_FROM_SELECT = 32 RENDER_SELECT_COL_CASTS = 64 @@ -5616,6 +5619,9 @@ class SQLCompiler(Compiled): _SentinelDefaultCharacterization.SEQUENCE: ( InsertmanyvaluesSentinelOpts.SEQUENCE ), + _SentinelDefaultCharacterization.MONOTONIC_FUNCTION: ( + InsertmanyvaluesSentinelOpts.MONOTONIC_FUNCTION + ), } ) _sentinel_col_autoinc_lookup = _sentinel_col_non_autoinc_lookup.union( diff --git a/lib/sqlalchemy/sql/functions.py b/lib/sqlalchemy/sql/functions.py index e5cf5f53aa..11d438bf77 100644 --- a/lib/sqlalchemy/sql/functions.py +++ b/lib/sqlalchemy/sql/functions.py @@ -146,6 +146,8 @@ class FunctionElement( packagenames: Tuple[str, ...] = () + monotonic: bool = False + _has_args = False _with_ordinality = False _table_value_type: Optional[TableValueType] = None @@ -1400,6 +1402,7 @@ class Function(FunctionElement[_T]): *clauses: _ColumnExpressionOrLiteralArgument[_T], type_: None = ..., packagenames: Optional[Tuple[str, ...]] = ..., + monotonic: bool = ..., ) -> None: ... @overload @@ -1409,6 +1412,7 @@ class Function(FunctionElement[_T]): *clauses: _ColumnExpressionOrLiteralArgument[Any], type_: _TypeEngineArgument[_T] = ..., packagenames: Optional[Tuple[str, ...]] = ..., + monotonic: bool = ..., ) -> None: ... def __init__( @@ -1417,6 +1421,7 @@ class Function(FunctionElement[_T]): *clauses: _ColumnExpressionOrLiteralArgument[Any], type_: Optional[_TypeEngineArgument[_T]] = None, packagenames: Optional[Tuple[str, ...]] = None, + monotonic: bool = False, ) -> None: """Construct a :class:`.Function`. @@ -1426,6 +1431,7 @@ class Function(FunctionElement[_T]): """ self.packagenames = packagenames or () self.name = name + self.monotonic = monotonic # if type is None, we get NULLTYPE, which is our _T. But I don't # know how to get the overloads to express that correctly diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index a63b21d670..7a9a5f8555 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -1147,6 +1147,10 @@ class Table( default_characterization = ( _SentinelDefaultCharacterization.SENTINEL_DEFAULT ) + elif the_sentinel_zero.default._is_monotonic_fn: + default_characterization = ( + _SentinelDefaultCharacterization.MONOTONIC_FUNCTION + ) elif default_is_sequence(the_sentinel_zero.default): if the_sentinel_zero.default._increment_is_negative: if sentinel_is_explicit: @@ -1169,24 +1173,38 @@ class Table( ) elif the_sentinel_zero.server_default is not None: if sentinel_is_explicit: - raise exc.InvalidRequestError( - f"Column {the_sentinel[0]} can't be a sentinel column " - "because it uses an explicit server side default " - "that's not the Identity() default." + if not the_sentinel_zero.server_default._is_monotonic_fn: + raise exc.InvalidRequestError( + f"Column {the_sentinel[0]} can't be a sentinel " + "column " + "because it uses an explicit server side default " + "that's not the Identity() default." + ) + else: + default_characterization = ( + _SentinelDefaultCharacterization.MONOTONIC_FUNCTION + ) + else: + default_characterization = ( + _SentinelDefaultCharacterization.SERVERSIDE ) - default_characterization = ( - _SentinelDefaultCharacterization.SERVERSIDE - ) - if the_sentinel is None and self.primary_key: assert autoinc_col is None # determine for non-autoincrement pk if all elements are # client side for _pkc in self.primary_key: - if _pkc.server_default is not None or ( - _pkc.default and not _pkc.default.is_callable + if ( + _pkc.server_default is not None + and not _pkc.server_default._is_monotonic_fn + ): + break + + if ( + _pkc.default + and not _pkc.default.is_callable + and not _pkc.default._is_monotonic_fn ): break else: @@ -3512,6 +3530,7 @@ class DefaultGenerator(Executable, SchemaItem): is_scalar = False has_arg = False is_sentinel = False + _is_monotonic_fn = False column: Optional[Column[Any]] def __init__(self, for_update: bool = False) -> None: @@ -3579,6 +3598,8 @@ class ColumnDefault(DefaultGenerator, ABC): arg: Any + _is_monotonic_fn = False + @overload def __new__( cls, arg: Callable[..., Any], for_update: bool = ... @@ -3726,6 +3747,15 @@ class ColumnElementColumnDefault(ColumnDefault): arg=self.arg, for_update=self.for_update ) + @util.memoized_property + @util.preload_module("sqlalchemy.sql.functions") + def _is_monotonic_fn(self) -> bool: + functions = util.preloaded.sql_functions + return ( + isinstance(self.arg, functions.FunctionElement) + and self.arg.monotonic + ) + @util.memoized_property @util.preload_module("sqlalchemy.sql.sqltypes") def _arg_is_typed(self) -> bool: @@ -4207,6 +4237,7 @@ class FetchedValue(SchemaEventTarget): has_argument = False is_clause_element = False is_identity = False + _is_monotonic_fn = False column: Optional[Column[Any]] @@ -4277,6 +4308,15 @@ class DefaultClause(FetchedValue): self.arg = arg self.reflected = _reflected + @util.memoized_property + @util.preload_module("sqlalchemy.sql.functions") + def _is_monotonic_fn(self) -> bool: + functions = util.preloaded.sql_functions + return ( + isinstance(self.arg, functions.FunctionElement) + and self.arg.monotonic + ) + def _copy(self) -> DefaultClause: return DefaultClause( arg=self.arg, for_update=self.for_update, _reflected=self.reflected diff --git a/test/dialect/postgresql/test_query.py b/test/dialect/postgresql/test_query.py index 8a4be4cf2d..f2d38e3a89 100644 --- a/test/dialect/postgresql/test_query.py +++ b/test/dialect/postgresql/test_query.py @@ -1,4 +1,5 @@ import datetime +from typing import Any from sqlalchemy import and_ from sqlalchemy import cast @@ -40,6 +41,7 @@ from sqlalchemy.testing import engines from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures +from sqlalchemy.testing import mock from sqlalchemy.testing.assertsql import CursorSQL from sqlalchemy.testing.assertsql import DialectSQL @@ -162,7 +164,22 @@ class InsertTest(fixtures.TestBase, AssertsExecutionResults): metadata.create_all(connection) self._assert_data_noautoincrement(connection, table) - def test_full_cursor_insertmanyvalues_sql(self, metadata, connection): + @testing.variation( + "default_type", + [ + "ss_sequence", + ("sd_uuidv7", testing.only_on("postgresql>=18")), + ("cd_uuidv7", testing.only_on("postgresql>=18")), + ], + ) + @testing.variation("set_sentinel", [True, False]) + def test_full_cursor_insertmanyvalues_sql( + self, + metadata, + connection, + default_type: testing.Variation, + set_sentinel: testing.Variation, + ): """test compilation/ execution of the subquery form including the fix for #13015 @@ -174,22 +191,49 @@ class InsertTest(fixtures.TestBase, AssertsExecutionResults): """ - my_table = Table( - "my_table", - metadata, - Column("data1", String(50)), - Column( + if set_sentinel: + col_kw: dict[str, Any] = {"insert_sentinel": True} + else: + col_kw = {} + + if default_type.ss_sequence: + col = Column( "id", Integer, Sequence("foo_id_seq", start=1, data_type=Integer), primary_key=True, - ), + **col_kw, + ) + elif default_type.sd_uuidv7: + col = Column( + "id", + Uuid(), + server_default=func.uuidv7(monotonic=True), + primary_key=True, + **col_kw, + ) + elif default_type.cd_uuidv7: + col = Column( + "id", + Uuid(), + default=func.uuidv7(monotonic=True), + primary_key=True, + **col_kw, + ) + else: + default_type.fail() + + my_table = Table( + "my_table", + metadata, + Column("data1", String(50)), + col, Column("data2", String(50)), ) my_table.create(connection) with self.sql_execution_asserter(connection) as assert_: - connection.execute( + result = connection.execute( my_table.insert().returning( my_table.c.data1, my_table.c.id, @@ -201,6 +245,16 @@ class InsertTest(fixtures.TestBase, AssertsExecutionResults): ], ) + rows = result.all() + if default_type.ss_sequence: + eq_(rows, [(f"d1 row {i}", i + 1) for i in range(10)]) + else: + # monotonic UUIDs are sorted + eq_( + list(sorted(rows, key=lambda row: row.id)), + [(f"d1 row {i}", mock.ANY) for i in range(10)], + ) + render_bind_casts = ( String().dialect_impl(connection.dialect).render_bind_cast ) @@ -249,12 +303,27 @@ class InsertTest(fixtures.TestBase, AssertsExecutionResults): else: assert False + if default_type.ss_sequence: + sqlfunc_sql = "nextval('foo_id_seq'), " + ins_cols = "id, " + p2 = "p2" + elif default_type.sd_uuidv7: + sqlfunc_sql = "" + ins_cols = "" + p2 = "p1" + elif default_type.cd_uuidv7: + sqlfunc_sql = "uuidv7(), " + ins_cols = "id, " + p2 = "p2" + else: + default_type.fail() + assert_.assert_( CursorSQL( - "INSERT INTO my_table (data1, id, data2) " - f"SELECT p0::VARCHAR, nextval('foo_id_seq'), p2::VARCHAR " + f"INSERT INTO my_table (data1, {ins_cols}data2) " + f"SELECT p0::VARCHAR, {sqlfunc_sql}{p2}::VARCHAR " f"FROM (VALUES {params}) " - "AS imp_sen(p0, p2, sen_counter) ORDER BY sen_counter " + f"AS imp_sen(p0, {p2}, sen_counter) ORDER BY sen_counter " "RETURNING my_table.data1, my_table.id, my_table.id AS id__1", parameters, ) -- 2.47.3