From: Mike Bayer Date: Wed, 22 Dec 2021 13:34:15 +0000 (-0500) Subject: use QueuePool for sqlite file databases X-Git-Tag: rel_2_0_0b1~490^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=22ed657827b487df9012def07271aed01bd4ae12;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git use QueuePool for sqlite file databases The SQLite dialect now defaults to :class:`_pool.QueuePool` when a file based database is used. This is set along with setting the ``check_same_thread`` parameter to ``False``. It has been observed that the previous approach of defaulting to :class:`_pool.NullPool`, which does not hold onto database connections after they are released, did in fact have a measurable negative performance impact. As always, the pool class is always customizable via the :paramref:`_sa.create_engine.poolclass` parameter. Fixes: #7490 Change-Id: I5f6c259def0ef43d401c6163dc99f651e519148d --- diff --git a/doc/build/changelog/migration_20.rst b/doc/build/changelog/migration_20.rst index afab782770..ad0dce3f10 100644 --- a/doc/build/changelog/migration_20.rst +++ b/doc/build/changelog/migration_20.rst @@ -317,6 +317,25 @@ such as gevent. :ticket:`7433` +.. _change_7490: + +The SQLite dialect uses QueuePool for file-based databases +------------------------------------------------------------ + +The SQLite dialect now defaults to :class:`_pool.QueuePool` when a file +based database is used. This is set along with setting the +``check_same_thread`` parameter to ``False``. It has been observed that the +previous approach of defaulting to :class:`_pool.NullPool`, which does not +hold onto database connections after they are released, did in fact have a +measurable negative performance impact. As always, the pool class is always +customizable via the :paramref:`_sa.create_engine.poolclass` parameter. + +.. seealso:: + + :ref:`pysqlite_threading_pooling` + + +:ticket:`7490` .. _migration_20_overview: diff --git a/doc/build/changelog/unreleased_20/7490.rst b/doc/build/changelog/unreleased_20/7490.rst new file mode 100644 index 0000000000..0e1487cce3 --- /dev/null +++ b/doc/build/changelog/unreleased_20/7490.rst @@ -0,0 +1,16 @@ +.. change:: + :tags: bug, sqlite, performance + :tickets: 7490 + + The SQLite dialect now defaults to :class:`_pool.QueuePool` when a file + based database is used. This is set along with setting the + ``check_same_thread`` parameter to ``False``. It has been observed that the + previous approach of defaulting to :class:`_pool.NullPool`, which does not + hold onto database connections after they are released, did in fact have a + measurable negative performance impact. As always, the pool class is always + customizable via the :paramref:`_sa.create_engine.poolclass` parameter. + + .. seealso:: + + :ref:`change_7490` + diff --git a/doc/build/core/pooling.rst b/doc/build/core/pooling.rst index f6eb7c405c..bb5e2826a7 100644 --- a/doc/build/core/pooling.rst +++ b/doc/build/core/pooling.rst @@ -38,13 +38,6 @@ directly to :func:`~sqlalchemy.create_engine` as keyword arguments: engine = create_engine('postgresql+psycopg2://me@localhost/mydb', pool_size=20, max_overflow=0) -In the case of SQLite, the :class:`.SingletonThreadPool` or -:class:`.NullPool` are selected by the dialect to provide -greater compatibility with SQLite's threading and locking -model, as well as to provide a reasonable default behavior -to SQLite "memory" databases, which maintain their entire -dataset within the scope of a single connection. - All SQLAlchemy pool implementations have in common that none of them "pre create" connections - all implementations wait until first use before creating a connection. At that point, if @@ -64,13 +57,9 @@ Switching Pool Implementations The usual way to use a different kind of pool with :func:`_sa.create_engine` is to use the ``poolclass`` argument. This argument accepts a class imported from the ``sqlalchemy.pool`` module, and handles the details -of building the pool for you. Common options include specifying -:class:`.QueuePool` with SQLite:: - - from sqlalchemy.pool import QueuePool - engine = create_engine('sqlite:///file.db', poolclass=QueuePool) - -Disabling pooling using :class:`.NullPool`:: +of building the pool for you. A common use case here is when +connection pooling is to be disabled, which can be achieved by using +the :class:`.NullPool` implementation:: from sqlalchemy.pool import NullPool engine = create_engine( diff --git a/lib/sqlalchemy/dialects/sqlite/pysqlite.py b/lib/sqlalchemy/dialects/sqlite/pysqlite.py index 47d5f7a85f..8476e68342 100644 --- a/lib/sqlalchemy/dialects/sqlite/pysqlite.py +++ b/lib/sqlalchemy/dialects/sqlite/pysqlite.py @@ -199,35 +199,53 @@ processing. Threading/Pooling Behavior --------------------------- -Pysqlite's default behavior is to prohibit the usage of a single connection -in more than one thread. This is originally intended to work with older -versions of SQLite that did not support multithreaded operation under -various circumstances. In particular, older SQLite versions -did not allow a ``:memory:`` database to be used in multiple threads -under any circumstances. - -Pysqlite does include a now-undocumented flag known as -``check_same_thread`` which will disable this check, however note that -pysqlite connections are still not safe to use in concurrently in multiple -threads. In particular, any statement execution calls would need to be -externally mutexed, as Pysqlite does not provide for thread-safe propagation -of error messages among other things. So while even ``:memory:`` databases -can be shared among threads in modern SQLite, Pysqlite doesn't provide enough -thread-safety to make this usage worth it. - -SQLAlchemy sets up pooling to work with Pysqlite's default behavior: +The ``sqlite3`` DBAPI by default prohibits the use of a particular connection +in a thread which is not the one in which it was created. As SQLite has +matured, it's behavior under multiple threads has improved, and even includes +options for memory only databases to be used in multiple threads. + +The thread prohibition is known as "check same thread" and may be controlled +using the ``sqlite3`` parameter ``check_same_thread``, which will disable or +enable this check. SQLAlchemy's default behavior here is to set +``check_same_thread`` to ``False`` automatically whenever a file-based database +is in use, to establish compatibility with the default pool class +:class:`.QueuePool`. + +The SQLAlchemy ``pysqlite`` DBAPI establishes the connection pool differently +based on the kind of SQLite database that's requested: * When a ``:memory:`` SQLite database is specified, the dialect by default will use :class:`.SingletonThreadPool`. This pool maintains a single connection per thread, so that all access to the engine within the current thread use the same ``:memory:`` database - other threads would access a - different ``:memory:`` database. + different ``:memory:`` database. The ``check_same_thread`` parameter + defaults to ``True``. * When a file-based database is specified, the dialect will use - :class:`.NullPool` as the source of connections. This pool closes and - discards connections which are returned to the pool immediately. SQLite - file-based connections have extremely low overhead, so pooling is not - necessary. The scheme also prevents a connection from being used again in - a different thread and works best with SQLite's coarse-grained file locking. + :class:`.QueuePool` as the source of connections. at the same time, + the ``check_same_thread`` flag is set to False by default unless overridden. + + .. versionchanged:: 2.0 + + SQLite file database engines now use :class:`.QueuePool` by default. + Previously, :class:`.NullPool` were used. The :class:`.NullPool` class + may be used by specifying it via the + :paramref:`_sa.create_engine.poolclass` parameter. + +Disabling Connection Pooling for File Databases +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pooling may be disabled for a file based database by specifying the +:class:`.NullPool` implementation for the :func:`_sa.create_engine.poolclass` +parameter:: + + from sqlalchemy import NullPool + engine = create_engine("sqlite:///myfile.db", poolclass=NullPool) + +It's been observed that the :class:`.NullPool` implementation incurs an +extremely small performance overhead for repeated checkouts due to the lack of +connection re-use implemented by :class:`.QueuePool`. However, it still +may be beneficial to use this class if the application is experiencing +issues with files being locked. Using a Memory Database in Multiple Threads ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -274,23 +292,12 @@ Note that :class:`.SingletonThreadPool` should be configured for the number of threads that are to be used; beyond that number, connections will be closed out in a non deterministic way. -Unicode -------- - -The pysqlite driver only returns Python ``unicode`` objects in result sets, -never plain strings, and accommodates ``unicode`` objects within bound -parameter values in all cases. Regardless of the SQLAlchemy string type in -use, string-based result values will by Python ``unicode`` in Python 2. -The :class:`.Unicode` type should still be used to indicate those columns that -require unicode, however, so that non-``unicode`` values passed inadvertently -will emit a warning. Pysqlite will emit an error if a non-``unicode`` string -is passed containing non-ASCII characters. -Dealing with Mixed String / Binary Columns in Python 3 +Dealing with Mixed String / Binary Columns ------------------------------------------------------ The SQLite database is weakly typed, and as such it is possible when using -binary values, which in Python 3 are represented as ``b'some string'``, that a +binary values, which in Python are represented as ``b'some string'``, that a particular SQLite database can have data values within different rows where some of them will be returned as a ``b''`` value by the Pysqlite driver, and others will be returned as Python strings, e.g. ``''`` values. This situation @@ -305,8 +312,6 @@ table will not be consistently readable because SQLAlchemy's To deal with a SQLite table that has mixed string / binary data in the same column, use a custom type that will check each row individually:: - # note this is Python 3 only - from sqlalchemy import String from sqlalchemy import TypeDecorator @@ -477,7 +482,7 @@ class SQLiteDialect_pysqlite(SQLiteDialect): @classmethod def get_pool_class(cls, url): if cls._is_url_file_db(url): - return pool.NullPool + return pool.QueuePool else: return pool.SingletonThreadPool @@ -586,6 +591,10 @@ class SQLiteDialect_pysqlite(SQLiteDialect): if filename != ":memory:": filename = os.path.abspath(filename) + pysqlite_opts.setdefault( + "check_same_thread", not self._is_url_file_db(url) + ) + return ([filename], pysqlite_opts) def is_disconnect(self, e, connection, cursor): diff --git a/test/dialect/test_sqlite.py b/test/dialect/test_sqlite.py index 4fe4198652..d7021a3432 100644 --- a/test/dialect/test_sqlite.py +++ b/test/dialect/test_sqlite.py @@ -717,18 +717,22 @@ class DialectTest( assert e.pool.__class__ is pool.SingletonThreadPool e = create_engine("sqlite+pysqlite:///foo.db") - assert e.pool.__class__ is pool.NullPool + # changed as of 2.0 #7490 + assert e.pool.__class__ is pool.QueuePool @combinations( ( "sqlite:///foo.db", # file path is absolute - ([os.path.abspath("foo.db")], {}), + ([os.path.abspath("foo.db")], {"check_same_thread": False}), ), ( "sqlite:////abs/path/to/foo.db", - ([os.path.abspath("/abs/path/to/foo.db")], {}), + ( + [os.path.abspath("/abs/path/to/foo.db")], + {"check_same_thread": False}, + ), ), - ("sqlite://", ([":memory:"], {})), + ("sqlite://", ([":memory:"], {"check_same_thread": True})), ( "sqlite:///?check_same_thread=true", ([":memory:"], {"check_same_thread": True}), @@ -743,11 +747,17 @@ class DialectTest( ), ( "sqlite:///file:path/to/database?" "mode=ro&uri=true", - (["file:path/to/database?mode=ro"], {"uri": True}), + ( + ["file:path/to/database?mode=ro"], + {"uri": True, "check_same_thread": False}, + ), ), ( "sqlite:///file:path/to/database?uri=true", - (["file:path/to/database"], {"uri": True}), + ( + ["file:path/to/database"], + {"uri": True, "check_same_thread": False}, + ), ), ) def test_connect_args(self, url, expected):