From 3f1477e2ecf3b2e95a26383490d0e8c363f4d0cc Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Mon, 8 Dec 2014 01:10:30 -0500 Subject: [PATCH] - A new series of :class:`.Session` methods which provide hooks directly into the unit of work's facility for emitting INSERT and UPDATE statements has been created. When used correctly, this expert-oriented system can allow ORM-mappings to be used to generate bulk insert and update statements batched into executemany groups, allowing the statements to proceed at speeds that rival direct use of the Core. fixes #3100 --- doc/build/changelog/changelog_10.rst | 29 +++++++++ doc/build/changelog/migration_10.rst | 35 ++++++++++- doc/build/core/tutorial.rst | 2 + doc/build/faq.rst | 29 ++++----- doc/build/orm/session.rst | 92 +++++++++++++++++++++++++++- examples/performance/__init__.py | 9 +++ examples/performance/__main__.py | 2 + lib/sqlalchemy/orm/session.py | 59 +++++++++++------- 8 files changed, 217 insertions(+), 40 deletions(-) diff --git a/doc/build/changelog/changelog_10.rst b/doc/build/changelog/changelog_10.rst index 6d99095d9e..d6f36e97ea 100644 --- a/doc/build/changelog/changelog_10.rst +++ b/doc/build/changelog/changelog_10.rst @@ -22,6 +22,35 @@ series as well. For changes that are specific to 1.0 with an emphasis on compatibility concerns, see :doc:`/changelog/migration_10`. + .. change:: + :tags: feature, examples + + A new suite of examples dedicated to providing a detailed study + into performance of SQLAlchemy ORM and Core, as well as the DBAPI, + from multiple perspectives. The suite runs within a container + that provides built in profiling displays both through console + output as well as graphically via the RunSnake tool. + + .. seealso:: + + :ref:`examples_performance` + + .. change:: + :tags: feature, orm + :tickets: 3100 + + A new series of :class:`.Session` methods which provide hooks + directly into the unit of work's facility for emitting INSERT + and UPDATE statements has been created. When used correctly, + this expert-oriented system can allow ORM-mappings to be used + to generate bulk insert and update statements batched into + executemany groups, allowing the statements to proceed at + speeds that rival direct use of the Core. + + .. seealso:: + + :ref:`bulk_operations` + .. change:: :tags: feature, mssql :tickets: 3039 diff --git a/doc/build/changelog/migration_10.rst b/doc/build/changelog/migration_10.rst index 562bb9f1b7..cd5d420e53 100644 --- a/doc/build/changelog/migration_10.rst +++ b/doc/build/changelog/migration_10.rst @@ -8,7 +8,7 @@ What's New in SQLAlchemy 1.0? undergoing maintenance releases as of May, 2014, and SQLAlchemy version 1.0, as of yet unreleased. - Document last updated: October 23, 2014 + Document last updated: December 8, 2014 Introduction ============ @@ -230,6 +230,39 @@ the :class:`.Table` construct. :ticket:`2051` +New Session Bulk INSERT/UPDATE API +---------------------------------- + +A new series of :class:`.Session` methods which provide hooks directly +into the unit of work's facility for emitting INSERT and UPDATE +statements has been created. When used correctly, this expert-oriented system +can allow ORM-mappings to be used to generate bulk insert and update +statements batched into executemany groups, allowing the statements +to proceed at speeds that rival direct use of the Core. + +.. seealso:: + + :ref:`bulk_operations` - introduction and full documentation + +:ticket:`3100` + +New Performance Example Suite +------------------------------ + +Inspired by the benchmarking done for the :ref:`bulk_operations` feature +as well as for the :ref:`faq_how_to_profile` section of the FAQ, a new +example section has been added which features several scripts designed +to illustrate the relative performance profile of various Core and ORM +techniques. The scripts are organized into use cases, and are packaged +under a single console interface such that any combination of demonstrations +can be run, dumping out timings, Python profile results and/or RunSnake profile +displays. + +.. seealso:: + + :ref:`examples_performance` + + .. _feature_get_enums: New get_enums() method with Postgresql Dialect diff --git a/doc/build/core/tutorial.rst b/doc/build/core/tutorial.rst index 04a25b174e..b6c07bdc08 100644 --- a/doc/build/core/tutorial.rst +++ b/doc/build/core/tutorial.rst @@ -307,6 +307,8 @@ them is different across different databases; each database's determine the correct value (or values; note that ``inserted_primary_key`` returns a list so that it supports composite primary keys). +.. _execute_multiple: + Executing Multiple Statements ============================== diff --git a/doc/build/faq.rst b/doc/build/faq.rst index 555fdc9e11..8c3bd24f4d 100644 --- a/doc/build/faq.rst +++ b/doc/build/faq.rst @@ -705,9 +705,13 @@ main query. :ref:`subqueryload_ordering` +.. _faq_performance: + Performance =========== +.. _faq_how_to_profile: + How can I profile a SQLAlchemy powered application? --------------------------------------------------- @@ -961,18 +965,10 @@ Common strategies to mitigate this include: The output of a profile can be a little daunting but after some practice they are very easy to read. -If you're feeling ambitious, there's also a more involved example of -SQLAlchemy profiling within the SQLAlchemy unit tests in the -``tests/aaa_profiling`` section. Tests in this area -use decorators that assert a -maximum number of method calls being used for particular operations, -so that if something inefficient gets checked in, the tests will -reveal it (it is important to note that in cPython, function calls have -the highest overhead of any operation, and the count of calls is more -often than not nearly proportional to time spent). Of note are the -the "zoomark" tests which use a fancy "SQL capturing" scheme which -cuts out the overhead of the DBAPI from the equation - although that -technique isn't really necessary for garden-variety profiling. +.. seealso:: + + :ref:`examples_performance` - a suite of performance demonstrations + with bundled profiling capabilities. I'm inserting 400,000 rows with the ORM and it's really slow! -------------------------------------------------------------- @@ -1001,10 +997,15 @@ ORM as a first-class component. For the use case of fast bulk inserts, the SQL generation and execution system that the ORM builds on top of -is part of the Core. Using this system directly, we can produce an INSERT that +is part of the :doc:`Core `. Using this system directly, we can produce an INSERT that is competitive with using the raw database API directly. -The example below illustrates time-based tests for four different +Alternatively, the SQLAlchemy ORM offers the :ref:`bulk_operations` +suite of methods, which provide hooks into subsections of the unit of +work process in order to emit Core-level INSERT and UPDATE constructs with +a small degree of ORM-based automation. + +The example below illustrates time-based tests for several different methods of inserting rows, going from the most automated to the least. With cPython 2.7, runtimes observed:: diff --git a/doc/build/orm/session.rst b/doc/build/orm/session.rst index 01ac7230e7..08ef9303ef 100644 --- a/doc/build/orm/session.rst +++ b/doc/build/orm/session.rst @@ -1944,6 +1944,8 @@ transactions set the flag ``twophase=True`` on the session:: # before committing both transactions session.commit() +.. _session_sql_expressions: + Embedding SQL Insert/Update Expressions into a Flush ===================================================== @@ -2459,7 +2461,7 @@ See the "sharding" example: :ref:`examples_sharding`. .. _bulk_operations: Bulk Operations ---------------- +=============== .. note:: Bulk Operations mode is a new series of operations made available on the :class:`.Session` object for the purpose of invoking INSERT and @@ -2480,7 +2482,7 @@ to this approach is strictly one of reduced Python overhead: * The flush() process, including the survey of all objects, their state, their cascade status, the status of all objects associated with them - via :meth:`.relationship`, and the topological sort of all operations to + via :func:`.relationship`, and the topological sort of all operations to be performed is completely bypassed. This reduces a great amount of Python overhead. @@ -2489,7 +2491,7 @@ to this approach is strictly one of reduced Python overhead: overhead in attaching them or managing their state in terms of the identity map or session. -* The :meth:`.Session.bulk_insert_mappings`, and :meth:`.Session.bulk_update_mappings` +* The :meth:`.Session.bulk_insert_mappings` and :meth:`.Session.bulk_update_mappings` methods accept lists of plain Python dictionaries, not objects; this further reduces a large amount of overhead associated with instantiating mapped objects and assigning state to them, which normally is also subject to @@ -2509,6 +2511,90 @@ The performance behavior of the bulk routines should be studied using the scripts which illustrate Python call-counts across a variety of scenarios, including bulk insert and update scenarios. +.. seealso:: + + :ref:`examples_performance` - includes detailed examples of bulk operations + contrasted against traditional Core and ORM methods, including performance + metrics. + +Usage +----- + +The methods each work in the context of the :class:`.Session` object's +transaction, like any other:: + + s = Session() + objects = [ + User(name="u1"), + User(name="u2"), + User(name="u3") + ] + s.bulk_save_objects(objects) + +For :meth:`.Session.bulk_insert_mappings`, and :meth:`.Session.bulk_update_mappings`, +dictionaries are passed:: + + s.bulk_insert_mappings(User, + [dict(name="u1"), dict(name="u2"), dict(name="u3")] + ) + +.. seealso:: + + :meth:`.Session.bulk_save_objects` + + :meth:`.Session.bulk_insert_mappings` + + :meth:`.Session.bulk_update_mappings` + + +Comparison to Core Insert / Update Constructs +--------------------------------------------- + +The bulk methods offer performance that under particular circumstances +can be close to that of using the core :class:`.Insert` and +:class:`.Update` constructs in an "executemany" context (for a description +of "executemany", see :ref:`execute_multiple` in the Core tutorial). +In order to achieve this, the +:paramref:`.Session.bulk_insert_mappings.return_defaults` +flag should be disabled so that rows can be batched together. The example +suite in :ref:`examples_performance` should be carefully studied in order +to gain familiarity with how fast bulk performance can be achieved. + +ORM Compatibility +----------------- + +The bulk insert / update methods lose a significant amount of functionality +versus traditional ORM use. The following is a listing of features that +are **not available** when using these methods: + +* persistence along :meth:`.relationship` linkages + +* sorting of rows within order of dependency; rows are inserted or updated + directly in the order in which they are passed to the methods + +* Session-management on the given objects, including attachment to the + session, identity map management. + +* Functionality related to primary key mutation, ON UPDATE cascade + +* SQL expression inserts / updates (e.g. :ref:`session_sql_expressions`) + +* ORM events such as :meth:`.MapperEvents.before_insert`, etc. The bulk + session methods have no event support. + +Features that **are available** include:: + +* INSERTs and UPDATEs of mapped objects + +* Version identifier support + +* Multi-table mappings, such as joined-inheritance - however, an object + to be inserted across multiple tables either needs to have primary key + identifiers fully populated ahead of time, else the + :paramref:`.Session.bulk_save_objects.return_defaults` flag must be used, + which will greatly reduce the performance benefits + + Sessions API diff --git a/examples/performance/__init__.py b/examples/performance/__init__.py index 6e2e1fc898..a4edfce363 100644 --- a/examples/performance/__init__.py +++ b/examples/performance/__init__.py @@ -48,6 +48,15 @@ Or with options:: --dburl mysql+mysqldb://scott:tiger@localhost/test \\ --profile --num 1000 +.. seealso:: + + :ref:`faq_how_to_profile` + +File Listing +------------- + +.. autosource:: + Running all tests with time --------------------------- diff --git a/examples/performance/__main__.py b/examples/performance/__main__.py index 957d6c699d..5e05143bf2 100644 --- a/examples/performance/__main__.py +++ b/examples/performance/__main__.py @@ -1,3 +1,5 @@ +"""Allows the examples/performance package to be run as a script.""" + from . import Profiler if __name__ == '__main__': diff --git a/lib/sqlalchemy/orm/session.py b/lib/sqlalchemy/orm/session.py index 72d393f54b..d40d281541 100644 --- a/lib/sqlalchemy/orm/session.py +++ b/lib/sqlalchemy/orm/session.py @@ -2061,17 +2061,22 @@ class Session(_SessionClassMethods): The objects as given are not added to the session and no additional state is established on them, unless the ``return_defaults`` flag - is also set. + is also set, in which case primary key attributes and server-side + default values will be populated. + + .. versionadded:: 1.0.0 .. warning:: The bulk save feature allows for a lower-latency INSERT/UPDATE - of rows at the expense of a lack of features. Features such - as object management, relationship handling, and SQL clause - support are bypassed in favor of raw INSERT/UPDATES of records. + of rows at the expense of most other unit-of-work features. + Features such as object management, relationship handling, + and SQL clause support are **silently omitted** in favor of raw + INSERT/UPDATES of records. - **Please read the list of caveats at :ref:`bulk_operations` - before using this method.** + **Please read the list of caveats at** :ref:`bulk_operations` + **before using this method, and fully test and confirm the + functionality of all code developed using these systems.** :param objects: a list of mapped object instances. The mapped objects are persisted as is, and are **not** associated with the @@ -2098,8 +2103,8 @@ class Session(_SessionClassMethods): is available. In particular this will allow joined-inheritance and other multi-table mappings to insert correctly without the need to provide primary key values ahead of time; however, - return_defaults mode greatly reduces the performance gains of the - method overall. + :paramref:`.Session.bulk_save_objects.return_defaults` **greatly + reduces the performance gains** of the method overall. :param update_changed_only: when True, UPDATE statements are rendered based on those attributes in each state that have logged changes. @@ -2138,15 +2143,19 @@ class Session(_SessionClassMethods): organizing the values within them across the tables to which the given mapper is mapped. + .. versionadded:: 1.0.0 + .. warning:: The bulk insert feature allows for a lower-latency INSERT - of rows at the expense of a lack of features. Features such - as relationship handling and SQL clause support are bypassed - in favor of a raw INSERT of records. + of rows at the expense of most other unit-of-work features. + Features such as object management, relationship handling, + and SQL clause support are **silently omitted** in favor of raw + INSERT of records. - **Please read the list of caveats at :ref:`bulk_operations` - before using this method.** + **Please read the list of caveats at** :ref:`bulk_operations` + **before using this method, and fully test and confirm the + functionality of all code developed using these systems.** :param mapper: a mapped class, or the actual :class:`.Mapper` object, representing the single kind of object represented within the mapping @@ -2164,8 +2173,10 @@ class Session(_SessionClassMethods): is available. In particular this will allow joined-inheritance and other multi-table mappings to insert correctly without the need to provide primary - key values ahead of time; however, return_defaults mode greatly - reduces the performance gains of the method overall. If the rows + key values ahead of time; however, + :paramref:`.Session.bulk_insert_mappings.return_defaults` + **greatly reduces the performance gains** of the method overall. + If the rows to be inserted only refer to a single table, then there is no reason this flag should be set as the returned default information is not used. @@ -2181,7 +2192,7 @@ class Session(_SessionClassMethods): """ self._bulk_save_mappings( - mapper, mappings, False, False, return_defaults) + mapper, mappings, False, False, return_defaults, False) def bulk_update_mappings(self, mapper, mappings): """Perform a bulk update of the given list of mapping dictionaries. @@ -2193,15 +2204,19 @@ class Session(_SessionClassMethods): state management features in use, reducing latency when updating large numbers of simple rows. + .. versionadded:: 1.0.0 + .. warning:: The bulk update feature allows for a lower-latency UPDATE - of rows at the expense of a lack of features. Features such - as relationship handling and SQL clause support are bypassed - in favor of a raw UPDATE of records. - - **Please read the list of caveats at :ref:`bulk_operations` - before using this method.** + of rows at the expense of most other unit-of-work features. + Features such as object management, relationship handling, + and SQL clause support are **silently omitted** in favor of raw + UPDATES of records. + + **Please read the list of caveats at** :ref:`bulk_operations` + **before using this method, and fully test and confirm the + functionality of all code developed using these systems.** :param mapper: a mapped class, or the actual :class:`.Mapper` object, representing the single kind of object represented within the mapping -- 2.47.3