]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
implement render_nulls for new style bulk ORM INSERT
authorMike Bayer <mike_mp@zzzcomputing.com>
Wed, 1 Nov 2023 22:06:49 +0000 (18:06 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Thu, 2 Nov 2023 00:16:11 +0000 (20:16 -0400)
Implemented the :paramref:`_orm.Session.bulk_insert_mappings.render_nulls`
parameter for new style bulk ORM inserts, allowing ``render_nulls=True`` as
an execution option.   This allows for bulk ORM inserts with a mixture of
``None`` values in the parameter dictionaries to use a single batch of rows
for a given set of dicationary keys, rather than breaking up into batches
that omit the NULL columns from each INSERT.

Fixes: #10575
Change-Id: I47382bb586626e98d3964dfa8f55acb2c9c6d8d4

doc/build/changelog/unreleased_20/10575.rst [new file with mode: 0644]
doc/build/orm/queryguide/dml.rst
lib/sqlalchemy/orm/_typing.py
lib/sqlalchemy/orm/bulk_persistence.py
lib/sqlalchemy/sql/base.py
test/orm/dml/test_bulk_statements.py
test/typing/test_overloads.py

diff --git a/doc/build/changelog/unreleased_20/10575.rst b/doc/build/changelog/unreleased_20/10575.rst
new file mode 100644 (file)
index 0000000..5ae64f5
--- /dev/null
@@ -0,0 +1,14 @@
+.. change::
+    :tags: usecase, orm
+    :tickets: 10575
+
+    Implemented the :paramref:`_orm.Session.bulk_insert_mappings.render_nulls`
+    parameter for new style bulk ORM inserts, allowing ``render_nulls=True`` as
+    an execution option.   This allows for bulk ORM inserts with a mixture of
+    ``None`` values in the parameter dictionaries to use a single batch of rows
+    for a given set of dicationary keys, rather than breaking up into batches
+    that omit the NULL columns from each INSERT.
+
+    .. seealso::
+
+        :ref:`orm_queryguide_insert_null_params`
index 5c7acb20867cec4372628ff47ffa636da0fd2217..967397f1ae9b35918b52e923c6fb97b95a25695f 100644 (file)
@@ -257,6 +257,111 @@ three INSERT statements, grouped along the specific sets of keys
 in each dictionary while still maintaining row order, i.e.
 ``("name", "fullname", "species")``, ``("name", "species")``, ``("name","fullname", "species")``.
 
+.. _orm_queryguide_insert_null_params:
+
+Sending NULL values in ORM bulk INSERT statements
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bulk ORM insert feature draws upon a behavior that is also present
+in the legacy "bulk" insert behavior, as well as in the ORM unit of work
+overall, which is that rows which contain NULL values are INSERTed using
+a statement that does not refer to those columns; the rationale here is so
+that backends and schemas which contain server-side INSERT defaults that may
+be sensitive to the presence of a NULL value vs. no value present will
+produce a server side value as expected.  This default behavior
+has the effect of breaking up the bulk inserted batches into more
+batches of fewer rows::
+
+    >>> session.execute(
+    ...     insert(User),
+    ...     [
+    ...         {
+    ...             "name": "name_a",
+    ...             "fullname": "Employee A",
+    ...             "species": "Squid",
+    ...         },
+    ...         {
+    ...             "name": "name_b",
+    ...             "fullname": "Employee B",
+    ...             "species": "Squirrel",
+    ...         },
+    ...         {
+    ...             "name": "name_c",
+    ...             "fullname": "Employee C",
+    ...             "species": None,
+    ...         },
+    ...         {
+    ...             "name": "name_d",
+    ...             "fullname": "Employee D",
+    ...             "species": "Bluefish",
+    ...         },
+    ...     ],
+    ... )
+    {execsql}INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?)
+    [...] [('name_a', 'Employee A', 'Squid'), ('name_b', 'Employee B', 'Squirrel')]
+    INSERT INTO user_account (name, fullname) VALUES (?, ?)
+    [...] ('name_c', 'Employee C')
+    INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?)
+    [...] ('name_d', 'Employee D', 'Bluefish')
+    ...
+
+..  Setup code, not for display
+
+  >>> session.rollback()
+  ROLLBACK...
+  >>> session.connection()
+  BEGIN (implicit)...
+
+Above, the bulk INSERT of four rows is broken into three separate statements,
+the second statement reformatted to not refer to the NULL column for the single
+parameter dictionary that contains a ``None`` value.    This default
+behavior may be undesirable when many rows in the dataset contain random NULL
+values, as it causes the "executemany" operation to be broken into a larger
+number of smaller operations; particularly when relying upon
+:ref:`insertmanyvalues <engine_insertmanyvalues>` to reduce the overall number
+of statements, this can have a bigger performance impact.
+
+To disable the handling of ``None`` values in the parameters into separate
+batches, pass the execution option ``render_nulls=True``; this will cause
+all parameter dictionaries to be treated equivalently, assuming the same
+set of keys in each dictionary::
+
+    >>> session.execute(
+    ...     insert(User).execution_options(render_nulls=True),
+    ...     [
+    ...         {
+    ...             "name": "name_a",
+    ...             "fullname": "Employee A",
+    ...             "species": "Squid",
+    ...         },
+    ...         {
+    ...             "name": "name_b",
+    ...             "fullname": "Employee B",
+    ...             "species": "Squirrel",
+    ...         },
+    ...         {
+    ...             "name": "name_c",
+    ...             "fullname": "Employee C",
+    ...             "species": None,
+    ...         },
+    ...         {
+    ...             "name": "name_d",
+    ...             "fullname": "Employee D",
+    ...             "species": "Bluefish",
+    ...         },
+    ...     ],
+    ... )
+    {execsql}INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?)
+    [...] [('name_a', 'Employee A', 'Squid'), ('name_b', 'Employee B', 'Squirrel'), ('name_c', 'Employee C', None), ('name_d', 'Employee D', 'Bluefish')]
+    ...
+
+Above, all parameter dictionaries are sent in a single INSERT batch, including
+the ``None`` value present in the third parameter dictionary.
+
+.. versionadded:: 2.0.23  Added the ``render_nulls`` execution option which
+   mirrors the behavior of the legacy
+   :paramref:`_orm.Session.bulk_insert_mappings.render_nulls` parameter.
+
 .. _orm_queryguide_insert_joined_table_inheritance:
 
 Bulk INSERT for Joined Table Inheritance
index 60664283dae03eac7167489f3ce9976dc4958dc9..3085351ba3b372e6275f53fe26ccd12bb7a8803f 100644 (file)
@@ -93,6 +93,7 @@ class _OrmKnownExecutionOptions(_CoreKnownExecutionOptions, total=False):
     dml_strategy: DMLStrategyArgument
     is_delete_using: bool
     is_update_from: bool
+    render_nulls: bool
 
 
 OrmExecuteOptionsParameter = Union[
index 5f9db51b56f011ada8d396d5c976c9244715a392..31caedc378577301ef8304bd998878eb3948842a 100644 (file)
@@ -1158,7 +1158,7 @@ class BulkORMInsert(ORMDMLState, InsertDMLState):
             execution_options,
         ) = BulkORMInsert.default_insert_options.from_execution_options(
             "_sa_orm_insert_options",
-            {"dml_strategy", "autoflush", "populate_existing"},
+            {"dml_strategy", "autoflush", "populate_existing", "render_nulls"},
             execution_options,
             statement._execution_options,
         )
index a1d16f44768d04f7a095846920666369c461d455..104c5958a07583810ca3214ab438f9e5d2781a09 100644 (file)
@@ -1176,6 +1176,7 @@ class Executable(roles.StatementRole):
         autoflush: bool = False,
         synchronize_session: SynchronizeSessionArgument = ...,
         dml_strategy: DMLStrategyArgument = ...,
+        render_nulls: bool = ...,
         is_delete_using: bool = ...,
         is_update_from: bool = ...,
         **opt: Any,
index 7246a9749f21e29c0195809110e86e6fa468262d..7af47de818681927e1383e946d1c57049a47e026 100644 (file)
@@ -141,6 +141,67 @@ class InsertStmtTest(testing.AssertsExecutionResults, fixtures.TestBase):
                     ],
                 )
 
+    @testing.variation("render_nulls", [True, False])
+    def test_render_nulls(self, decl_base, render_nulls):
+        """test #10575"""
+
+        class A(decl_base):
+            __tablename__ = "a"
+            id: Mapped[int] = mapped_column(Identity(), primary_key=True)
+            data: Mapped[str]
+            x: Mapped[Optional[int]]
+
+        decl_base.metadata.create_all(testing.db)
+        s = fixture_session()
+
+        with self.sql_execution_asserter() as asserter:
+            stmt = insert(A)
+            if render_nulls:
+                stmt = stmt.execution_options(render_nulls=True)
+
+            s.execute(
+                stmt,
+                [
+                    {"data": "d3", "x": 5},
+                    {"data": "d4", "x": 6},
+                    {"data": "d5", "x": 6},
+                    {"data": "d6", "x": None},
+                    {"data": "d7", "x": 6},
+                ],
+            )
+
+        if render_nulls:
+            asserter.assert_(
+                CompiledSQL(
+                    "INSERT INTO a (data, x) VALUES (:data, :x)",
+                    [
+                        {"data": "d3", "x": 5},
+                        {"data": "d4", "x": 6},
+                        {"data": "d5", "x": 6},
+                        {"data": "d6", "x": None},
+                        {"data": "d7", "x": 6},
+                    ],
+                ),
+            )
+        else:
+            asserter.assert_(
+                CompiledSQL(
+                    "INSERT INTO a (data, x) VALUES (:data, :x)",
+                    [
+                        {"data": "d3", "x": 5},
+                        {"data": "d4", "x": 6},
+                        {"data": "d5", "x": 6},
+                    ],
+                ),
+                CompiledSQL(
+                    "INSERT INTO a (data) VALUES (:data)", [{"data": "d6"}]
+                ),
+                CompiledSQL(
+                    "INSERT INTO a (data, x) VALUES (:data, :x)",
+                    [{"data": "d7", "x": 6}],
+                ),
+            )
+
     def test_omit_returning_ok(self, decl_base):
         class A(decl_base):
             __tablename__ = "a"
index 4a258a00bf527a4c6ec9053e112f1eabf710b0c0..968b60d926473e9e84896371f4d900ec5bce14f8 100644 (file)
@@ -37,6 +37,7 @@ orm_dml_execution_options = {
     "dml_strategy": "DMLStrategyArgument",
     "is_delete_using": "bool",
     "is_update_from": "bool",
+    "render_nulls": "bool",
 }
 
 orm_execution_options = {