From: Mike Bayer Date: Wed, 1 Nov 2023 22:06:49 +0000 (-0400) Subject: implement render_nulls for new style bulk ORM INSERT X-Git-Tag: rel_2_0_23~1 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=392cd0edb4822202fbd7203d52f640a7cbafb63b;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git implement render_nulls for new style bulk ORM INSERT Implemented the :paramref:`_orm.Session.bulk_insert_mappings.render_nulls` parameter for new style bulk ORM inserts, allowing ``render_nulls=True`` as an execution option. This allows for bulk ORM inserts with a mixture of ``None`` values in the parameter dictionaries to use a single batch of rows for a given set of dicationary keys, rather than breaking up into batches that omit the NULL columns from each INSERT. Fixes: #10575 Change-Id: I47382bb586626e98d3964dfa8f55acb2c9c6d8d4 --- diff --git a/doc/build/changelog/unreleased_20/10575.rst b/doc/build/changelog/unreleased_20/10575.rst new file mode 100644 index 0000000000..5ae64f5200 --- /dev/null +++ b/doc/build/changelog/unreleased_20/10575.rst @@ -0,0 +1,14 @@ +.. change:: + :tags: usecase, orm + :tickets: 10575 + + Implemented the :paramref:`_orm.Session.bulk_insert_mappings.render_nulls` + parameter for new style bulk ORM inserts, allowing ``render_nulls=True`` as + an execution option. This allows for bulk ORM inserts with a mixture of + ``None`` values in the parameter dictionaries to use a single batch of rows + for a given set of dicationary keys, rather than breaking up into batches + that omit the NULL columns from each INSERT. + + .. seealso:: + + :ref:`orm_queryguide_insert_null_params` diff --git a/doc/build/orm/queryguide/dml.rst b/doc/build/orm/queryguide/dml.rst index 5c7acb2086..967397f1ae 100644 --- a/doc/build/orm/queryguide/dml.rst +++ b/doc/build/orm/queryguide/dml.rst @@ -257,6 +257,111 @@ three INSERT statements, grouped along the specific sets of keys in each dictionary while still maintaining row order, i.e. ``("name", "fullname", "species")``, ``("name", "species")``, ``("name","fullname", "species")``. +.. _orm_queryguide_insert_null_params: + +Sending NULL values in ORM bulk INSERT statements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bulk ORM insert feature draws upon a behavior that is also present +in the legacy "bulk" insert behavior, as well as in the ORM unit of work +overall, which is that rows which contain NULL values are INSERTed using +a statement that does not refer to those columns; the rationale here is so +that backends and schemas which contain server-side INSERT defaults that may +be sensitive to the presence of a NULL value vs. no value present will +produce a server side value as expected. This default behavior +has the effect of breaking up the bulk inserted batches into more +batches of fewer rows:: + + >>> session.execute( + ... insert(User), + ... [ + ... { + ... "name": "name_a", + ... "fullname": "Employee A", + ... "species": "Squid", + ... }, + ... { + ... "name": "name_b", + ... "fullname": "Employee B", + ... "species": "Squirrel", + ... }, + ... { + ... "name": "name_c", + ... "fullname": "Employee C", + ... "species": None, + ... }, + ... { + ... "name": "name_d", + ... "fullname": "Employee D", + ... "species": "Bluefish", + ... }, + ... ], + ... ) + {execsql}INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?) + [...] [('name_a', 'Employee A', 'Squid'), ('name_b', 'Employee B', 'Squirrel')] + INSERT INTO user_account (name, fullname) VALUES (?, ?) + [...] ('name_c', 'Employee C') + INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?) + [...] ('name_d', 'Employee D', 'Bluefish') + ... + +.. Setup code, not for display + + >>> session.rollback() + ROLLBACK... + >>> session.connection() + BEGIN (implicit)... + +Above, the bulk INSERT of four rows is broken into three separate statements, +the second statement reformatted to not refer to the NULL column for the single +parameter dictionary that contains a ``None`` value. This default +behavior may be undesirable when many rows in the dataset contain random NULL +values, as it causes the "executemany" operation to be broken into a larger +number of smaller operations; particularly when relying upon +:ref:`insertmanyvalues ` to reduce the overall number +of statements, this can have a bigger performance impact. + +To disable the handling of ``None`` values in the parameters into separate +batches, pass the execution option ``render_nulls=True``; this will cause +all parameter dictionaries to be treated equivalently, assuming the same +set of keys in each dictionary:: + + >>> session.execute( + ... insert(User).execution_options(render_nulls=True), + ... [ + ... { + ... "name": "name_a", + ... "fullname": "Employee A", + ... "species": "Squid", + ... }, + ... { + ... "name": "name_b", + ... "fullname": "Employee B", + ... "species": "Squirrel", + ... }, + ... { + ... "name": "name_c", + ... "fullname": "Employee C", + ... "species": None, + ... }, + ... { + ... "name": "name_d", + ... "fullname": "Employee D", + ... "species": "Bluefish", + ... }, + ... ], + ... ) + {execsql}INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?) + [...] [('name_a', 'Employee A', 'Squid'), ('name_b', 'Employee B', 'Squirrel'), ('name_c', 'Employee C', None), ('name_d', 'Employee D', 'Bluefish')] + ... + +Above, all parameter dictionaries are sent in a single INSERT batch, including +the ``None`` value present in the third parameter dictionary. + +.. versionadded:: 2.0.23 Added the ``render_nulls`` execution option which + mirrors the behavior of the legacy + :paramref:`_orm.Session.bulk_insert_mappings.render_nulls` parameter. + .. _orm_queryguide_insert_joined_table_inheritance: Bulk INSERT for Joined Table Inheritance diff --git a/lib/sqlalchemy/orm/_typing.py b/lib/sqlalchemy/orm/_typing.py index 60664283da..3085351ba3 100644 --- a/lib/sqlalchemy/orm/_typing.py +++ b/lib/sqlalchemy/orm/_typing.py @@ -93,6 +93,7 @@ class _OrmKnownExecutionOptions(_CoreKnownExecutionOptions, total=False): dml_strategy: DMLStrategyArgument is_delete_using: bool is_update_from: bool + render_nulls: bool OrmExecuteOptionsParameter = Union[ diff --git a/lib/sqlalchemy/orm/bulk_persistence.py b/lib/sqlalchemy/orm/bulk_persistence.py index 5f9db51b56..31caedc378 100644 --- a/lib/sqlalchemy/orm/bulk_persistence.py +++ b/lib/sqlalchemy/orm/bulk_persistence.py @@ -1158,7 +1158,7 @@ class BulkORMInsert(ORMDMLState, InsertDMLState): execution_options, ) = BulkORMInsert.default_insert_options.from_execution_options( "_sa_orm_insert_options", - {"dml_strategy", "autoflush", "populate_existing"}, + {"dml_strategy", "autoflush", "populate_existing", "render_nulls"}, execution_options, statement._execution_options, ) diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index a1d16f4476..104c5958a0 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -1176,6 +1176,7 @@ class Executable(roles.StatementRole): autoflush: bool = False, synchronize_session: SynchronizeSessionArgument = ..., dml_strategy: DMLStrategyArgument = ..., + render_nulls: bool = ..., is_delete_using: bool = ..., is_update_from: bool = ..., **opt: Any, diff --git a/test/orm/dml/test_bulk_statements.py b/test/orm/dml/test_bulk_statements.py index 7246a9749f..7af47de818 100644 --- a/test/orm/dml/test_bulk_statements.py +++ b/test/orm/dml/test_bulk_statements.py @@ -141,6 +141,67 @@ class InsertStmtTest(testing.AssertsExecutionResults, fixtures.TestBase): ], ) + @testing.variation("render_nulls", [True, False]) + def test_render_nulls(self, decl_base, render_nulls): + """test #10575""" + + class A(decl_base): + __tablename__ = "a" + id: Mapped[int] = mapped_column(Identity(), primary_key=True) + data: Mapped[str] + x: Mapped[Optional[int]] + + decl_base.metadata.create_all(testing.db) + s = fixture_session() + + with self.sql_execution_asserter() as asserter: + stmt = insert(A) + if render_nulls: + stmt = stmt.execution_options(render_nulls=True) + + s.execute( + stmt, + [ + {"data": "d3", "x": 5}, + {"data": "d4", "x": 6}, + {"data": "d5", "x": 6}, + {"data": "d6", "x": None}, + {"data": "d7", "x": 6}, + ], + ) + + if render_nulls: + asserter.assert_( + CompiledSQL( + "INSERT INTO a (data, x) VALUES (:data, :x)", + [ + {"data": "d3", "x": 5}, + {"data": "d4", "x": 6}, + {"data": "d5", "x": 6}, + {"data": "d6", "x": None}, + {"data": "d7", "x": 6}, + ], + ), + ) + else: + asserter.assert_( + CompiledSQL( + "INSERT INTO a (data, x) VALUES (:data, :x)", + [ + {"data": "d3", "x": 5}, + {"data": "d4", "x": 6}, + {"data": "d5", "x": 6}, + ], + ), + CompiledSQL( + "INSERT INTO a (data) VALUES (:data)", [{"data": "d6"}] + ), + CompiledSQL( + "INSERT INTO a (data, x) VALUES (:data, :x)", + [{"data": "d7", "x": 6}], + ), + ) + def test_omit_returning_ok(self, decl_base): class A(decl_base): __tablename__ = "a" diff --git a/test/typing/test_overloads.py b/test/typing/test_overloads.py index 4a258a00bf..968b60d926 100644 --- a/test/typing/test_overloads.py +++ b/test/typing/test_overloads.py @@ -37,6 +37,7 @@ orm_dml_execution_options = { "dml_strategy": "DMLStrategyArgument", "is_delete_using": "bool", "is_update_from": "bool", + "render_nulls": "bool", } orm_execution_options = {