]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
reorganize column collection init to be local
authorMike Bayer <mike_mp@zzzcomputing.com>
Sat, 1 Feb 2025 19:39:57 +0000 (14:39 -0500)
committerMike Bayer <mike_mp@zzzcomputing.com>
Wed, 5 Feb 2025 19:03:49 +0000 (14:03 -0500)
Reorganized the internals by which the `.c` collection on a
:class:`.FromClause` gets generated so that it is resilient against the
collection being accessed in concurrent fashion.   An example is creating a
:class:`.Alias` or :class:`.Subquery` and accessing it as a module level
variable.  This impacts the Oracle dialect which uses such module-level
global alias objects but is of general use as well.

Fixes: #12302
Change-Id: I30cb07c286affce24e2d85e49f9df5b787438d86

doc/build/changelog/unreleased_20/12302.rst [new file with mode: 0644]
lib/sqlalchemy/sql/dml.py
lib/sqlalchemy/sql/elements.py
lib/sqlalchemy/sql/schema.py
lib/sqlalchemy/sql/selectable.py
test/sql/test_selectable.py

diff --git a/doc/build/changelog/unreleased_20/12302.rst b/doc/build/changelog/unreleased_20/12302.rst
new file mode 100644 (file)
index 0000000..38d4544
--- /dev/null
@@ -0,0 +1,10 @@
+.. change::
+    :tags: bug, sql
+    :tickets: 12302
+
+    Reorganized the internals by which the `.c` collection on a
+    :class:`.FromClause` gets generated so that it is resilient against the
+    collection being accessed in concurrent fashion.   An example is creating a
+    :class:`.Alias` or :class:`.Subquery` and accessing it as a module level
+    variable.  This impacts the Oracle dialect which uses such module-level
+    global alias objects but is of general use as well.
index d7496cd367288f6a1ed7275c7686026a178ebc6b..cd81bc623fa5d3cbbb908486f41065e395ea9fa5 100644 (file)
@@ -23,6 +23,7 @@ from typing import NoReturn
 from typing import Optional
 from typing import overload
 from typing import Sequence
+from typing import Set
 from typing import Tuple
 from typing import Type
 from typing import TYPE_CHECKING
@@ -41,6 +42,7 @@ from .base import _from_objects
 from .base import _generative
 from .base import _select_iterables
 from .base import ColumnCollection
+from .base import ColumnSet
 from .base import CompileState
 from .base import DialectKWArgs
 from .base import Executable
@@ -422,10 +424,16 @@ class UpdateBase(
     is_dml = True
 
     def _generate_fromclause_column_proxies(
-        self, fromclause: FromClause
+        self,
+        fromclause: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
     ) -> None:
-        fromclause._columns._populate_separate_keys(
-            col._make_proxy(fromclause)
+        columns._populate_separate_keys(
+            col._make_proxy(
+                fromclause, primary_key=primary_key, foreign_keys=foreign_keys
+            )
             for col in self._all_selected_columns
             if is_column_element(col)
         )
index 41630261edfde3749e730b2604b565eadd38da39..57a3187015e1f8c7c136f9aacd6597df593f472e 100644 (file)
@@ -90,6 +90,7 @@ if typing.TYPE_CHECKING:
     from ._typing import _InfoType
     from ._typing import _PropagateAttrsType
     from ._typing import _TypeEngineArgument
+    from .base import ColumnSet
     from .cache_key import _CacheKeyTraversalType
     from .cache_key import CacheKey
     from .compiler import Compiled
@@ -1643,6 +1644,8 @@ class ColumnElement(
         self,
         selectable: FromClause,
         *,
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         name: Optional[str] = None,
         key: Optional[str] = None,
         name_is_truncatable: bool = False,
@@ -4586,7 +4589,7 @@ class NamedColumn(KeyedColumnElement[_T]):
         return self.name
 
     @HasMemoized.memoized_attribute
-    def _tq_key_label(self):
+    def _tq_key_label(self) -> Optional[str]:
         """table qualified label based on column key.
 
         for table-bound columns this is <tablename>_<column key/proxy key>;
@@ -4644,6 +4647,8 @@ class NamedColumn(KeyedColumnElement[_T]):
         self,
         selectable: FromClause,
         *,
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         name: Optional[str] = None,
         key: Optional[str] = None,
         name_is_truncatable: bool = False,
@@ -4824,6 +4829,8 @@ class Label(roles.LabeledColumnExprRole[_T], NamedColumn[_T]):
         self,
         selectable: FromClause,
         *,
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         name: Optional[str] = None,
         compound_select_cols: Optional[Sequence[ColumnElement[Any]]] = None,
         **kw: Any,
@@ -4836,6 +4843,8 @@ class Label(roles.LabeledColumnExprRole[_T], NamedColumn[_T]):
             disallow_is_literal=True,
             name_is_truncatable=isinstance(name, _truncated_label),
             compound_select_cols=compound_select_cols,
+            primary_key=primary_key,
+            foreign_keys=foreign_keys,
         )
 
         # there was a note here to remove this assertion, which was here
@@ -5072,6 +5081,8 @@ class ColumnClause(
         self,
         selectable: FromClause,
         *,
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         name: Optional[str] = None,
         key: Optional[str] = None,
         name_is_truncatable: bool = False,
index f1f93a955497021b94b1f2b8e0a10193b512054c..b86e5b8b09ffb4a08853e6aad96889399603f4a1 100644 (file)
@@ -96,9 +96,11 @@ if typing.TYPE_CHECKING:
     from ._typing import _InfoType
     from ._typing import _TextCoercedExpressionArgument
     from ._typing import _TypeEngineArgument
+    from .base import ColumnSet
     from .base import ReadOnlyColumnCollection
     from .compiler import DDLCompiler
     from .elements import BindParameter
+    from .elements import KeyedColumnElement
     from .functions import Function
     from .type_api import TypeEngine
     from .visitors import anon_map
@@ -2619,6 +2621,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
     def _make_proxy(
         self,
         selectable: FromClause,
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         name: Optional[str] = None,
         key: Optional[str] = None,
         name_is_truncatable: bool = False,
@@ -2688,10 +2692,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
         c._propagate_attrs = selectable._propagate_attrs
         if selectable._is_clone_of is not None:
             c._is_clone_of = selectable._is_clone_of.columns.get(c.key)
+
         if self.primary_key:
-            selectable.primary_key.add(c)  # type: ignore
+            primary_key.add(c)
+
         if fk:
-            selectable.foreign_keys.update(fk)  # type: ignore
+            foreign_keys.update(fk)  # type: ignore
+
         return c.key, c
 
 
index e12a44179ef6b7e5dafd6e3c839453dfacc429b2..cfe491e624c94a147cf71e3c652910aa289dec69 100644 (file)
@@ -245,7 +245,11 @@ class ReturnsRows(roles.ReturnsRowsRole, DQLDMLClauseElement):
         raise NotImplementedError()
 
     def _generate_fromclause_column_proxies(
-        self, fromclause: FromClause
+        self,
+        fromclause: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
     ) -> None:
         """Populate columns into an :class:`.AliasedReturnsRows` object."""
 
@@ -838,10 +842,17 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable):
         return getattr(self, "name", self.__class__.__name__ + " object")
 
     def _generate_fromclause_column_proxies(
-        self, fromclause: FromClause
+        self,
+        fromclause: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
     ) -> None:
-        fromclause._columns._populate_separate_keys(
-            col._make_proxy(fromclause) for col in self.c
+        columns._populate_separate_keys(
+            col._make_proxy(
+                fromclause, primary_key=primary_key, foreign_keys=foreign_keys
+            )
+            for col in self.c
         )
 
     @util.ro_non_memoized_property
@@ -895,10 +906,30 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable):
 
         """
         if "_columns" not in self.__dict__:
-            self._init_collections()
-            self._populate_column_collection()
+            self._setup_collections()
         return self._columns.as_readonly()
 
+    def _setup_collections(self) -> None:
+        assert "_columns" not in self.__dict__
+        assert "primary_key" not in self.__dict__
+        assert "foreign_keys" not in self.__dict__
+
+        _columns: ColumnCollection[Any, Any] = ColumnCollection()
+        primary_key = ColumnSet()
+        foreign_keys: Set[KeyedColumnElement[Any]] = set()
+
+        self._populate_column_collection(
+            columns=_columns,
+            primary_key=primary_key,
+            foreign_keys=foreign_keys,
+        )
+
+        # assigning these three collections separately is not itself atomic,
+        # but greatly reduces the surface for problems
+        self._columns = _columns
+        self.primary_key = primary_key  # type: ignore
+        self.foreign_keys = foreign_keys  # type: ignore
+
     @util.ro_non_memoized_property
     def entity_namespace(self) -> _EntityNamespace:
         """Return a namespace used for name-based access in SQL expressions.
@@ -925,8 +956,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable):
         iterable collection of :class:`_schema.Column` objects.
 
         """
-        self._init_collections()
-        self._populate_column_collection()
+        self._setup_collections()
         return self.primary_key
 
     @util.ro_memoized_property
@@ -943,8 +973,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable):
             :attr:`_schema.Table.foreign_key_constraints`
 
         """
-        self._init_collections()
-        self._populate_column_collection()
+        self._setup_collections()
         return self.foreign_keys
 
     def _reset_column_collection(self) -> None:
@@ -968,20 +997,16 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable):
     def _select_iterable(self) -> _SelectIterable:
         return (c for c in self.c if not _never_select_column(c))
 
-    def _init_collections(self) -> None:
-        assert "_columns" not in self.__dict__
-        assert "primary_key" not in self.__dict__
-        assert "foreign_keys" not in self.__dict__
-
-        self._columns = ColumnCollection()
-        self.primary_key = ColumnSet()  # type: ignore
-        self.foreign_keys = set()  # type: ignore
-
     @property
     def _cols_populated(self) -> bool:
         return "_columns" in self.__dict__
 
-    def _populate_column_collection(self) -> None:
+    def _populate_column_collection(
+        self,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
+    ) -> None:
         """Called on subclasses to establish the .c collection.
 
         Each implementation has a different way of establishing
@@ -1308,22 +1333,27 @@ class Join(roles.DMLTableRole, FromClause):
         return FromGrouping(self)
 
     @util.preload_module("sqlalchemy.sql.util")
-    def _populate_column_collection(self) -> None:
+    def _populate_column_collection(
+        self,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
+    ) -> None:
         sqlutil = util.preloaded.sql_util
-        columns: List[KeyedColumnElement[Any]] = [c for c in self.left.c] + [
+        _columns: List[KeyedColumnElement[Any]] = [c for c in self.left.c] + [
             c for c in self.right.c
         ]
 
-        self.primary_key.extend(  # type: ignore
+        primary_key.extend(  # type: ignore
             sqlutil.reduce_columns(
-                (c for c in columns if c.primary_key), self.onclause
+                (c for c in _columns if c.primary_key), self.onclause
             )
         )
-        self._columns._populate_separate_keys(
-            (col._tq_key_label, col) for col in columns
+        columns._populate_separate_keys(
+            (col._tq_key_label, col) for col in _columns  # type: ignore
         )
-        self.foreign_keys.update(  # type: ignore
-            itertools.chain(*[col.foreign_keys for col in columns])
+        foreign_keys.update(
+            itertools.chain(*[col.foreign_keys for col in _columns])  # type: ignore  # noqa: E501
         )
 
     def _copy_internals(
@@ -1350,7 +1380,7 @@ class Join(roles.DMLTableRole, FromClause):
         def replace(
             obj: Union[BinaryExpression[Any], ColumnClause[Any]],
             **kw: Any,
-        ) -> Optional[KeyedColumnElement[ColumnElement[Any]]]:
+        ) -> Optional[KeyedColumnElement[Any]]:
             if isinstance(obj, ColumnClause) and obj.table in new_froms:
                 newelem = new_froms[obj.table].corresponding_column(obj)
                 return newelem
@@ -1706,8 +1736,15 @@ class AliasedReturnsRows(NoInit, NamedFromClause):
         super()._refresh_for_new_column(column)
         self.element._refresh_for_new_column(column)
 
-    def _populate_column_collection(self) -> None:
-        self.element._generate_fromclause_column_proxies(self)
+    def _populate_column_collection(
+        self,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
+    ) -> None:
+        self.element._generate_fromclause_column_proxies(
+            self, columns, primary_key=primary_key, foreign_keys=foreign_keys
+        )
 
     @util.ro_non_memoized_property
     def description(self) -> str:
@@ -2147,11 +2184,26 @@ class CTE(
             self._suffixes = _suffixes
         super()._init(selectable, name=name)
 
-    def _populate_column_collection(self) -> None:
+    def _populate_column_collection(
+        self,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
+    ) -> None:
         if self._cte_alias is not None:
-            self._cte_alias._generate_fromclause_column_proxies(self)
+            self._cte_alias._generate_fromclause_column_proxies(
+                self,
+                columns,
+                primary_key=primary_key,
+                foreign_keys=foreign_keys,
+            )
         else:
-            self.element._generate_fromclause_column_proxies(self)
+            self.element._generate_fromclause_column_proxies(
+                self,
+                columns,
+                primary_key=primary_key,
+                foreign_keys=foreign_keys,
+            )
 
     def alias(self, name: Optional[str] = None, flat: bool = False) -> CTE:
         """Return an :class:`_expression.Alias` of this
@@ -2949,9 +3001,6 @@ class FromGrouping(GroupedElement, FromClause):
     def __init__(self, element: FromClause):
         self.element = coercions.expect(roles.FromClauseRole, element)
 
-    def _init_collections(self) -> None:
-        pass
-
     @util.ro_non_memoized_property
     def columns(
         self,
@@ -3112,9 +3161,6 @@ class TableClause(roles.DMLTableRole, Immutable, NamedFromClause):
     def _refresh_for_new_column(self, column: ColumnElement[Any]) -> None:
         pass
 
-    def _init_collections(self) -> None:
-        pass
-
     @util.ro_memoized_property
     def description(self) -> str:
         return self.name
@@ -3376,16 +3422,23 @@ class Values(roles.InElementRole, Generative, LateralFromClause):
         """
         return ScalarValues(self._column_args, self._data, self.literal_binds)
 
-    def _populate_column_collection(self) -> None:
+    def _populate_column_collection(
+        self,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
+    ) -> None:
         for c in self._column_args:
             if c.table is not None and c.table is not self:
-                _, c = c._make_proxy(self)
+                _, c = c._make_proxy(
+                    self, primary_key=primary_key, foreign_keys=foreign_keys
+                )
             else:
                 # if the column was used in other contexts, ensure
                 # no memoizations of other FROM clauses.
                 # see test_values.py -> test_auto_proxy_select_direct_col
                 c._reset_memoizations()
-            self._columns.add(c)
+            columns.add(c)
             c.table = self
 
     @util.ro_non_memoized_property
@@ -3501,6 +3554,9 @@ class SelectBase(
     def _generate_fromclause_column_proxies(
         self,
         subquery: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         *,
         proxy_compound_columns: Optional[
             Iterable[Sequence[ColumnElement[Any]]]
@@ -3789,13 +3845,20 @@ class SelectStatementGrouping(GroupedElement, SelectBase, Generic[_SB]):
     def _generate_fromclause_column_proxies(
         self,
         subquery: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         *,
         proxy_compound_columns: Optional[
             Iterable[Sequence[ColumnElement[Any]]]
         ] = None,
     ) -> None:
         self.element._generate_fromclause_column_proxies(
-            subquery, proxy_compound_columns=proxy_compound_columns
+            subquery,
+            columns,
+            proxy_compound_columns=proxy_compound_columns,
+            primary_key=primary_key,
+            foreign_keys=foreign_keys,
         )
 
     @util.ro_non_memoized_property
@@ -4481,6 +4544,9 @@ class CompoundSelect(HasCompileState, GenerativeSelect, ExecutableReturnsRows):
     def _generate_fromclause_column_proxies(
         self,
         subquery: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         *,
         proxy_compound_columns: Optional[
             Iterable[Sequence[ColumnElement[Any]]]
@@ -4521,7 +4587,11 @@ class CompoundSelect(HasCompileState, GenerativeSelect, ExecutableReturnsRows):
         # i haven't tried to think what it means for compound nested in
         # compound
         select_0._generate_fromclause_column_proxies(
-            subquery, proxy_compound_columns=extra_col_iterator
+            subquery,
+            columns,
+            proxy_compound_columns=extra_col_iterator,
+            primary_key=primary_key,
+            foreign_keys=foreign_keys,
         )
 
     def _refresh_for_new_column(self, column: ColumnElement[Any]) -> None:
@@ -5747,7 +5817,7 @@ class Select(
         def replace(
             obj: Union[BinaryExpression[Any], ColumnClause[Any]],
             **kw: Any,
-        ) -> Optional[KeyedColumnElement[ColumnElement[Any]]]:
+        ) -> Optional[KeyedColumnElement[Any]]:
             if isinstance(obj, ColumnClause) and obj.table in new_froms:
                 newelem = new_froms[obj.table].corresponding_column(obj)
                 return newelem
@@ -6415,6 +6485,9 @@ class Select(
     def _generate_fromclause_column_proxies(
         self,
         subquery: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         *,
         proxy_compound_columns: Optional[
             Iterable[Sequence[ColumnElement[Any]]]
@@ -6432,6 +6505,8 @@ class Select(
                     name=required_label_name,
                     name_is_truncatable=True,
                     compound_select_cols=extra_cols,
+                    primary_key=primary_key,
+                    foreign_keys=foreign_keys,
                 )
                 for (
                     (
@@ -6457,6 +6532,8 @@ class Select(
                     key=proxy_key,
                     name=required_label_name,
                     name_is_truncatable=True,
+                    primary_key=primary_key,
+                    foreign_keys=foreign_keys,
                 )
                 for (
                     required_label_name,
@@ -6468,7 +6545,7 @@ class Select(
                 if is_column_element(c)
             ]
 
-        subquery._columns._populate_separate_keys(prox)
+        columns._populate_separate_keys(prox)
 
     def _needs_parens_for_grouping(self) -> bool:
         return self._has_row_limiting_clause or bool(
@@ -7027,6 +7104,9 @@ class TextualSelect(SelectBase, ExecutableReturnsRows, Generative):
     def _generate_fromclause_column_proxies(
         self,
         fromclause: FromClause,
+        columns: ColumnCollection[str, KeyedColumnElement[Any]],
+        primary_key: ColumnSet,
+        foreign_keys: Set[KeyedColumnElement[Any]],
         *,
         proxy_compound_columns: Optional[
             Iterable[Sequence[ColumnElement[Any]]]
@@ -7036,15 +7116,25 @@ class TextualSelect(SelectBase, ExecutableReturnsRows, Generative):
             assert isinstance(fromclause, Subquery)
 
         if proxy_compound_columns:
-            fromclause._columns._populate_separate_keys(
-                c._make_proxy(fromclause, compound_select_cols=extra_cols)
+            columns._populate_separate_keys(
+                c._make_proxy(
+                    fromclause,
+                    compound_select_cols=extra_cols,
+                    primary_key=primary_key,
+                    foreign_keys=foreign_keys,
+                )
                 for c, extra_cols in zip(
                     self.column_args, proxy_compound_columns
                 )
             )
         else:
-            fromclause._columns._populate_separate_keys(
-                c._make_proxy(fromclause) for c in self.column_args
+            columns._populate_separate_keys(
+                c._make_proxy(
+                    fromclause,
+                    primary_key=primary_key,
+                    foreign_keys=foreign_keys,
+                )
+                for c in self.column_args
             )
 
     def _scalar_type(self) -> Union[TypeEngine[Any], Any]:
index c2f07444b883bbb37f1a7a4ac7870e92d4197216..fc3039fada73c66cf98acc919beff32a77955b9c 100644 (file)
@@ -1,6 +1,9 @@
 """Test various algorithmic properties of selectables."""
 
 from itertools import zip_longest
+import random
+import threading
+import time
 
 from sqlalchemy import and_
 from sqlalchemy import bindparam
@@ -4024,3 +4027,39 @@ class AliasTest(fixtures.TestBase, AssertsCompiledSQL):
         a3 = a2._clone()
         a3._copy_internals()
         is_(a1.corresponding_column(a3.c.c), a1.c.c)
+
+
+class FromClauseConcurrencyTest(fixtures.TestBase):
+    """test for issue 12302"""
+
+    @testing.requires.timing_intensive
+    def test_c_collection(self):
+        dictionary_meta = MetaData()
+        all_indexes_table = Table(
+            "all_indexes",
+            dictionary_meta,
+            *[Column(f"col{i}", Integer) for i in range(50)],
+        )
+
+        fails = 0
+
+        def use_table():
+            nonlocal fails
+            try:
+                for i in range(3):
+                    time.sleep(random.random() * 0.0001)
+                    all_indexes.c.col35
+            except:
+                fails += 1
+                raise
+
+        for j in range(1000):
+            all_indexes = all_indexes_table.alias("a_indexes")
+
+            threads = [threading.Thread(target=use_table) for i in range(5)]
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+
+            assert not fails, "one or more runs failed"