From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Wed, 22 Dec 2021 13:34:15 +0000 (-0500)
Subject: use QueuePool for sqlite file databases
X-Git-Tag: rel_2_0_0b1~490^2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=22ed657827b487df9012def07271aed01bd4ae12;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git

use QueuePool for sqlite file databases

The SQLite dialect now defaults to :class:`_pool.QueuePool` when a file
based database is used. This is set along with setting the
``check_same_thread`` parameter to ``False``. It has been observed that the
previous approach of defaulting to :class:`_pool.NullPool`, which does not
hold onto database connections after they are released, did in fact have a
measurable negative performance impact. As always, the pool class is always
customizable via the :paramref:`_sa.create_engine.poolclass` parameter.

Fixes: #7490
Change-Id: I5f6c259def0ef43d401c6163dc99f651e519148d
---

diff --git a/doc/build/changelog/migration_20.rst b/doc/build/changelog/migration_20.rst
index afab782770..ad0dce3f10 100644
--- a/doc/build/changelog/migration_20.rst
+++ b/doc/build/changelog/migration_20.rst
@@ -317,6 +317,25 @@ such as gevent.
 :ticket:`7433`
 
 
+.. _change_7490:
+
+The SQLite dialect uses QueuePool for file-based databases
+------------------------------------------------------------
+
+The SQLite dialect now defaults to :class:`_pool.QueuePool` when a file
+based database is used. This is set along with setting the
+``check_same_thread`` parameter to ``False``. It has been observed that the
+previous approach of defaulting to :class:`_pool.NullPool`, which does not
+hold onto database connections after they are released, did in fact have a
+measurable negative performance impact. As always, the pool class is always
+customizable via the :paramref:`_sa.create_engine.poolclass` parameter.
+
+.. seealso::
+
+    :ref:`pysqlite_threading_pooling`
+
+
+:ticket:`7490`
 
 .. _migration_20_overview:
 
diff --git a/doc/build/changelog/unreleased_20/7490.rst b/doc/build/changelog/unreleased_20/7490.rst
new file mode 100644
index 0000000000..0e1487cce3
--- /dev/null
+++ b/doc/build/changelog/unreleased_20/7490.rst
@@ -0,0 +1,16 @@
+.. change::
+    :tags: bug, sqlite, performance
+    :tickets: 7490
+
+    The SQLite dialect now defaults to :class:`_pool.QueuePool` when a file
+    based database is used. This is set along with setting the
+    ``check_same_thread`` parameter to ``False``. It has been observed that the
+    previous approach of defaulting to :class:`_pool.NullPool`, which does not
+    hold onto database connections after they are released, did in fact have a
+    measurable negative performance impact. As always, the pool class is always
+    customizable via the :paramref:`_sa.create_engine.poolclass` parameter.
+
+    .. seealso::
+
+        :ref:`change_7490`
+
diff --git a/doc/build/core/pooling.rst b/doc/build/core/pooling.rst
index f6eb7c405c..bb5e2826a7 100644
--- a/doc/build/core/pooling.rst
+++ b/doc/build/core/pooling.rst
@@ -38,13 +38,6 @@ directly to :func:`~sqlalchemy.create_engine` as keyword arguments:
   engine = create_engine('postgresql+psycopg2://me@localhost/mydb',
                          pool_size=20, max_overflow=0)
 
-In the case of SQLite, the :class:`.SingletonThreadPool` or
-:class:`.NullPool` are selected by the dialect to provide
-greater compatibility with SQLite's threading and locking
-model, as well as to provide a reasonable default behavior
-to SQLite "memory" databases, which maintain their entire
-dataset within the scope of a single connection.
-
 All SQLAlchemy pool implementations have in common
 that none of them "pre create" connections - all implementations wait
 until first use before creating a connection.   At that point, if
@@ -64,13 +57,9 @@ Switching Pool Implementations
 The usual way to use a different kind of pool with :func:`_sa.create_engine`
 is to use the ``poolclass`` argument.   This argument accepts a class
 imported from the ``sqlalchemy.pool`` module, and handles the details
-of building the pool for you.   Common options include specifying
-:class:`.QueuePool` with SQLite::
-
-    from sqlalchemy.pool import QueuePool
-    engine = create_engine('sqlite:///file.db', poolclass=QueuePool)
-
-Disabling pooling using :class:`.NullPool`::
+of building the pool for you.   A common use case here is when
+connection pooling is to be disabled, which can be achieved by using
+the :class:`.NullPool` implementation::
 
     from sqlalchemy.pool import NullPool
     engine = create_engine(
diff --git a/lib/sqlalchemy/dialects/sqlite/pysqlite.py b/lib/sqlalchemy/dialects/sqlite/pysqlite.py
index 47d5f7a85f..8476e68342 100644
--- a/lib/sqlalchemy/dialects/sqlite/pysqlite.py
+++ b/lib/sqlalchemy/dialects/sqlite/pysqlite.py
@@ -199,35 +199,53 @@ processing.
 Threading/Pooling Behavior
 ---------------------------
 
-Pysqlite's default behavior is to prohibit the usage of a single connection
-in more than one thread.   This is originally intended to work with older
-versions of SQLite that did not support multithreaded operation under
-various circumstances.  In particular, older SQLite versions
-did not allow a ``:memory:`` database to be used in multiple threads
-under any circumstances.
-
-Pysqlite does include a now-undocumented flag known as
-``check_same_thread`` which will disable this check, however note that
-pysqlite connections are still not safe to use in concurrently in multiple
-threads.  In particular, any statement execution calls would need to be
-externally mutexed, as Pysqlite does not provide for thread-safe propagation
-of error messages among other things.   So while even ``:memory:`` databases
-can be shared among threads in modern SQLite, Pysqlite doesn't provide enough
-thread-safety to make this usage worth it.
-
-SQLAlchemy sets up pooling to work with Pysqlite's default behavior:
+The ``sqlite3`` DBAPI by default prohibits the use of a particular connection
+in a thread which is not the one in which it was created.  As SQLite has
+matured, it's behavior under multiple threads has improved, and even includes
+options for memory only databases to be used in multiple threads.
+
+The thread prohibition is known as "check same thread" and may be controlled
+using the ``sqlite3`` parameter ``check_same_thread``, which will disable or
+enable this check. SQLAlchemy's default behavior here is to set
+``check_same_thread`` to ``False`` automatically whenever a file-based database
+is in use, to establish compatibility with the default pool class
+:class:`.QueuePool`.
+
+The SQLAlchemy ``pysqlite`` DBAPI establishes the connection pool differently
+based on the kind of SQLite database that's requested:
 
 * When a ``:memory:`` SQLite database is specified, the dialect by default
   will use :class:`.SingletonThreadPool`. This pool maintains a single
   connection per thread, so that all access to the engine within the current
   thread use the same ``:memory:`` database - other threads would access a
-  different ``:memory:`` database.
+  different ``:memory:`` database.  The ``check_same_thread`` parameter
+  defaults to ``True``.
 * When a file-based database is specified, the dialect will use
-  :class:`.NullPool` as the source of connections. This pool closes and
-  discards connections which are returned to the pool immediately. SQLite
-  file-based connections have extremely low overhead, so pooling is not
-  necessary. The scheme also prevents a connection from being used again in
-  a different thread and works best with SQLite's coarse-grained file locking.
+  :class:`.QueuePool` as the source of connections.   at the same time,
+  the ``check_same_thread`` flag is set to False by default unless overridden.
+
+  .. versionchanged:: 2.0
+
+    SQLite file database engines now use :class:`.QueuePool` by default.
+    Previously, :class:`.NullPool` were used.  The :class:`.NullPool` class
+    may be used by specifying it via the
+    :paramref:`_sa.create_engine.poolclass` parameter.
+
+Disabling Connection Pooling for File Databases
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pooling may be disabled for a file based database by specifying the
+:class:`.NullPool` implementation for the :func:`_sa.create_engine.poolclass`
+parameter::
+
+    from sqlalchemy import NullPool
+    engine = create_engine("sqlite:///myfile.db", poolclass=NullPool)
+
+It's been observed that the :class:`.NullPool` implementation incurs an
+extremely small performance overhead for repeated checkouts due to the lack of
+connection re-use implemented by :class:`.QueuePool`.  However, it still
+may be beneficial to use this class if the application is experiencing
+issues with files being locked.
 
 Using a Memory Database in Multiple Threads
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -274,23 +292,12 @@ Note that :class:`.SingletonThreadPool` should be configured for the number
 of threads that are to be used; beyond that number, connections will be
 closed out in a non deterministic way.
 
-Unicode
--------
-
-The pysqlite driver only returns Python ``unicode`` objects in result sets,
-never plain strings, and accommodates ``unicode`` objects within bound
-parameter values in all cases.   Regardless of the SQLAlchemy string type in
-use, string-based result values will by Python ``unicode`` in Python 2.
-The :class:`.Unicode` type should still be used to indicate those columns that
-require unicode, however, so that non-``unicode`` values passed inadvertently
-will emit a warning.  Pysqlite will emit an error if a non-``unicode`` string
-is passed containing non-ASCII characters.
 
-Dealing with Mixed String / Binary Columns in Python 3
+Dealing with Mixed String / Binary Columns
 ------------------------------------------------------
 
 The SQLite database is weakly typed, and as such it is possible when using
-binary values, which in Python 3 are represented as ``b'some string'``, that a
+binary values, which in Python are represented as ``b'some string'``, that a
 particular SQLite database can have data values within different rows where
 some of them will be returned as a ``b''`` value by the Pysqlite driver, and
 others will be returned as Python strings, e.g. ``''`` values.   This situation
@@ -305,8 +312,6 @@ table will not be consistently readable because SQLAlchemy's
 To deal with a SQLite table that has mixed string / binary data in the
 same column, use a custom type that will check each row individually::
 
-    # note this is Python 3 only
-
     from sqlalchemy import String
     from sqlalchemy import TypeDecorator
 
@@ -477,7 +482,7 @@ class SQLiteDialect_pysqlite(SQLiteDialect):
     @classmethod
     def get_pool_class(cls, url):
         if cls._is_url_file_db(url):
-            return pool.NullPool
+            return pool.QueuePool
         else:
             return pool.SingletonThreadPool
 
@@ -586,6 +591,10 @@ class SQLiteDialect_pysqlite(SQLiteDialect):
             if filename != ":memory:":
                 filename = os.path.abspath(filename)
 
+        pysqlite_opts.setdefault(
+            "check_same_thread", not self._is_url_file_db(url)
+        )
+
         return ([filename], pysqlite_opts)
 
     def is_disconnect(self, e, connection, cursor):
diff --git a/test/dialect/test_sqlite.py b/test/dialect/test_sqlite.py
index 4fe4198652..d7021a3432 100644
--- a/test/dialect/test_sqlite.py
+++ b/test/dialect/test_sqlite.py
@@ -717,18 +717,22 @@ class DialectTest(
         assert e.pool.__class__ is pool.SingletonThreadPool
 
         e = create_engine("sqlite+pysqlite:///foo.db")
-        assert e.pool.__class__ is pool.NullPool
+        # changed as of 2.0 #7490
+        assert e.pool.__class__ is pool.QueuePool
 
     @combinations(
         (
             "sqlite:///foo.db",  # file path is absolute
-            ([os.path.abspath("foo.db")], {}),
+            ([os.path.abspath("foo.db")], {"check_same_thread": False}),
         ),
         (
             "sqlite:////abs/path/to/foo.db",
-            ([os.path.abspath("/abs/path/to/foo.db")], {}),
+            (
+                [os.path.abspath("/abs/path/to/foo.db")],
+                {"check_same_thread": False},
+            ),
         ),
-        ("sqlite://", ([":memory:"], {})),
+        ("sqlite://", ([":memory:"], {"check_same_thread": True})),
         (
             "sqlite:///?check_same_thread=true",
             ([":memory:"], {"check_same_thread": True}),
@@ -743,11 +747,17 @@ class DialectTest(
         ),
         (
             "sqlite:///file:path/to/database?" "mode=ro&uri=true",
-            (["file:path/to/database?mode=ro"], {"uri": True}),
+            (
+                ["file:path/to/database?mode=ro"],
+                {"uri": True, "check_same_thread": False},
+            ),
         ),
         (
             "sqlite:///file:path/to/database?uri=true",
-            (["file:path/to/database"], {"uri": True}),
+            (
+                ["file:path/to/database"],
+                {"uri": True, "check_same_thread": False},
+            ),
         ),
     )
     def test_connect_args(self, url, expected):