From: Mike Bayer Date: Tue, 7 Jun 2022 19:00:20 +0000 (-0400) Subject: fix race conditions in lambda statements X-Git-Tag: rel_1_4_38~15^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=53ae884a0c5d107d1a4d9b4b64192c3b3c507ca8;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git fix race conditions in lambda statements Fixed multiple observed race conditions related to :func:`.lambda_stmt`, including an initial "dogpile" issue when a new Python code object is initially analyzed among multiple simultaneous threads which created both a performance issue as well as some internal corruption of state. Additionally repaired observed race condition which could occur when "cloning" an expression construct that is also in the process of being compiled or otherwise accessed in a different thread due to memoized attributes altering the ``__dict__`` while iterated, for Python versions prior to 3.10; in particular the lambda SQL construct is sensitive to this as it holds onto a single statement object persistently. The iteration has been refined to use ``dict.copy()`` with or without an additional iteration instead. Fixes: #8098 Change-Id: I4e0b627bfa187f1780dc68ec81b94db1c78f846a (cherry picked from commit 117878f7870377f143917a22160320a891eb0211) --- diff --git a/doc/build/changelog/unreleased_14/8098.rst b/doc/build/changelog/unreleased_14/8098.rst new file mode 100644 index 0000000000..0267817abd --- /dev/null +++ b/doc/build/changelog/unreleased_14/8098.rst @@ -0,0 +1,16 @@ +.. change:: + :tags: bug, sql + :tickets: 8098 + + Fixed multiple observed race conditions related to :func:`.lambda_stmt`, + including an initial "dogpile" issue when a new Python code object is + initially analyzed among multiple simultaneous threads which created both a + performance issue as well as some internal corruption of state. + Additionally repaired observed race condition which could occur when + "cloning" an expression construct that is also in the process of being + compiled or otherwise accessed in a different thread due to memoized + attributes altering the ``__dict__`` while iterated, for Python versions + prior to 3.10; in particular the lambda SQL construct is sensitive to this + as it holds onto a single statement object persistently. The iteration has + been refined to use ``dict.copy()`` with or without an additional iteration + instead. diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 52339e35a7..ec685d1fac 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -559,8 +559,9 @@ class Generative(HasMemoized): cls = self.__class__ s = cls.__new__(cls) if skip: + # ensure this iteration remains atomic s.__dict__ = { - k: v for k, v in self.__dict__.items() if k not in skip + k: v for k, v in self.__dict__.copy().items() if k not in skip } else: s.__dict__ = self.__dict__.copy() diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 42ec3e0e7d..a1891f19ca 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -241,7 +241,14 @@ class ClauseElement( """ skip = self._memoized_keys c = self.__class__.__new__(self.__class__) - c.__dict__ = {k: v for k, v in self.__dict__.items() if k not in skip} + + if skip: + # ensure this iteration remains atomic + c.__dict__ = { + k: v for k, v in self.__dict__.copy().items() if k not in skip + } + else: + c.__dict__ = self.__dict__.copy() # this is a marker that helps to "equate" clauses to each other # when a Select returns its list of FROM clauses. the cloning diff --git a/lib/sqlalchemy/sql/lambdas.py b/lib/sqlalchemy/sql/lambdas.py index 5f91559987..584efe4c68 100644 --- a/lib/sqlalchemy/sql/lambdas.py +++ b/lib/sqlalchemy/sql/lambdas.py @@ -9,6 +9,7 @@ import inspect import itertools import operator import sys +import threading import types import weakref @@ -218,11 +219,17 @@ class LambdaElement(elements.ClauseElement): if rec is None: if cache_key is not traversals.NO_CACHE: - rec = AnalyzedFunction( - tracker, self, apply_propagate_attrs, fn - ) - rec.closure_bindparams = bindparams - lambda_cache[tracker_key + cache_key] = rec + + with AnalyzedCode._generation_mutex: + key = tracker_key + cache_key + if key not in lambda_cache: + rec = AnalyzedFunction( + tracker, self, apply_propagate_attrs, fn + ) + rec.closure_bindparams = bindparams + lambda_cache[key] = rec + else: + rec = lambda_cache[key] else: rec = NonAnalyzedFunction(self._invoke_user_fn(fn)) @@ -607,6 +614,8 @@ class AnalyzedCode(object): ) _fns = weakref.WeakKeyDictionary() + _generation_mutex = threading.RLock() + @classmethod def get(cls, fn, lambda_element, lambda_kw, **kw): try: @@ -614,10 +623,16 @@ class AnalyzedCode(object): return cls._fns[fn.__code__] except KeyError: pass - cls._fns[fn.__code__] = analyzed = AnalyzedCode( - fn, lambda_element, lambda_kw, **kw - ) - return analyzed + + with cls._generation_mutex: + # check for other thread already created object + if fn.__code__ in cls._fns: + return cls._fns[fn.__code__] + + cls._fns[fn.__code__] = analyzed = AnalyzedCode( + fn, lambda_element, lambda_kw, **kw + ) + return analyzed def __init__(self, fn, lambda_element, opts): if inspect.ismethod(fn):