From: Mike Bayer Date: Sat, 1 Feb 2025 19:39:57 +0000 (-0500) Subject: reorganize column collection init to be local X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3cd9a5b42f850618141ec459cffe30d0ade0f191;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git reorganize column collection init to be local Reorganized the internals by which the `.c` collection on a :class:`.FromClause` gets generated so that it is resilient against the collection being accessed in concurrent fashion. An example is creating a :class:`.Alias` or :class:`.Subquery` and accessing it as a module level variable. This impacts the Oracle dialect which uses such module-level global alias objects but is of general use as well. Fixes: #12302 Change-Id: I30cb07c286affce24e2d85e49f9df5b787438d86 --- diff --git a/doc/build/changelog/unreleased_20/12302.rst b/doc/build/changelog/unreleased_20/12302.rst new file mode 100644 index 0000000000..38d4544898 --- /dev/null +++ b/doc/build/changelog/unreleased_20/12302.rst @@ -0,0 +1,10 @@ +.. change:: + :tags: bug, sql + :tickets: 12302 + + Reorganized the internals by which the `.c` collection on a + :class:`.FromClause` gets generated so that it is resilient against the + collection being accessed in concurrent fashion. An example is creating a + :class:`.Alias` or :class:`.Subquery` and accessing it as a module level + variable. This impacts the Oracle dialect which uses such module-level + global alias objects but is of general use as well. diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py index d7496cd367..cd81bc623f 100644 --- a/lib/sqlalchemy/sql/dml.py +++ b/lib/sqlalchemy/sql/dml.py @@ -23,6 +23,7 @@ from typing import NoReturn from typing import Optional from typing import overload from typing import Sequence +from typing import Set from typing import Tuple from typing import Type from typing import TYPE_CHECKING @@ -41,6 +42,7 @@ from .base import _from_objects from .base import _generative from .base import _select_iterables from .base import ColumnCollection +from .base import ColumnSet from .base import CompileState from .base import DialectKWArgs from .base import Executable @@ -422,10 +424,16 @@ class UpdateBase( is_dml = True def _generate_fromclause_column_proxies( - self, fromclause: FromClause + self, + fromclause: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], ) -> None: - fromclause._columns._populate_separate_keys( - col._make_proxy(fromclause) + columns._populate_separate_keys( + col._make_proxy( + fromclause, primary_key=primary_key, foreign_keys=foreign_keys + ) for col in self._all_selected_columns if is_column_element(col) ) diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 41630261ed..57a3187015 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -90,6 +90,7 @@ if typing.TYPE_CHECKING: from ._typing import _InfoType from ._typing import _PropagateAttrsType from ._typing import _TypeEngineArgument + from .base import ColumnSet from .cache_key import _CacheKeyTraversalType from .cache_key import CacheKey from .compiler import Compiled @@ -1643,6 +1644,8 @@ class ColumnElement( self, selectable: FromClause, *, + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], name: Optional[str] = None, key: Optional[str] = None, name_is_truncatable: bool = False, @@ -4586,7 +4589,7 @@ class NamedColumn(KeyedColumnElement[_T]): return self.name @HasMemoized.memoized_attribute - def _tq_key_label(self): + def _tq_key_label(self) -> Optional[str]: """table qualified label based on column key. for table-bound columns this is _; @@ -4644,6 +4647,8 @@ class NamedColumn(KeyedColumnElement[_T]): self, selectable: FromClause, *, + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], name: Optional[str] = None, key: Optional[str] = None, name_is_truncatable: bool = False, @@ -4824,6 +4829,8 @@ class Label(roles.LabeledColumnExprRole[_T], NamedColumn[_T]): self, selectable: FromClause, *, + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], name: Optional[str] = None, compound_select_cols: Optional[Sequence[ColumnElement[Any]]] = None, **kw: Any, @@ -4836,6 +4843,8 @@ class Label(roles.LabeledColumnExprRole[_T], NamedColumn[_T]): disallow_is_literal=True, name_is_truncatable=isinstance(name, _truncated_label), compound_select_cols=compound_select_cols, + primary_key=primary_key, + foreign_keys=foreign_keys, ) # there was a note here to remove this assertion, which was here @@ -5072,6 +5081,8 @@ class ColumnClause( self, selectable: FromClause, *, + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], name: Optional[str] = None, key: Optional[str] = None, name_is_truncatable: bool = False, diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index f1f93a9554..b86e5b8b09 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -96,9 +96,11 @@ if typing.TYPE_CHECKING: from ._typing import _InfoType from ._typing import _TextCoercedExpressionArgument from ._typing import _TypeEngineArgument + from .base import ColumnSet from .base import ReadOnlyColumnCollection from .compiler import DDLCompiler from .elements import BindParameter + from .elements import KeyedColumnElement from .functions import Function from .type_api import TypeEngine from .visitors import anon_map @@ -2619,6 +2621,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): def _make_proxy( self, selectable: FromClause, + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], name: Optional[str] = None, key: Optional[str] = None, name_is_truncatable: bool = False, @@ -2688,10 +2692,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): c._propagate_attrs = selectable._propagate_attrs if selectable._is_clone_of is not None: c._is_clone_of = selectable._is_clone_of.columns.get(c.key) + if self.primary_key: - selectable.primary_key.add(c) # type: ignore + primary_key.add(c) + if fk: - selectable.foreign_keys.update(fk) # type: ignore + foreign_keys.update(fk) # type: ignore + return c.key, c diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index e12a44179e..cfe491e624 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -245,7 +245,11 @@ class ReturnsRows(roles.ReturnsRowsRole, DQLDMLClauseElement): raise NotImplementedError() def _generate_fromclause_column_proxies( - self, fromclause: FromClause + self, + fromclause: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], ) -> None: """Populate columns into an :class:`.AliasedReturnsRows` object.""" @@ -838,10 +842,17 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): return getattr(self, "name", self.__class__.__name__ + " object") def _generate_fromclause_column_proxies( - self, fromclause: FromClause + self, + fromclause: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], ) -> None: - fromclause._columns._populate_separate_keys( - col._make_proxy(fromclause) for col in self.c + columns._populate_separate_keys( + col._make_proxy( + fromclause, primary_key=primary_key, foreign_keys=foreign_keys + ) + for col in self.c ) @util.ro_non_memoized_property @@ -895,10 +906,30 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): """ if "_columns" not in self.__dict__: - self._init_collections() - self._populate_column_collection() + self._setup_collections() return self._columns.as_readonly() + def _setup_collections(self) -> None: + assert "_columns" not in self.__dict__ + assert "primary_key" not in self.__dict__ + assert "foreign_keys" not in self.__dict__ + + _columns: ColumnCollection[Any, Any] = ColumnCollection() + primary_key = ColumnSet() + foreign_keys: Set[KeyedColumnElement[Any]] = set() + + self._populate_column_collection( + columns=_columns, + primary_key=primary_key, + foreign_keys=foreign_keys, + ) + + # assigning these three collections separately is not itself atomic, + # but greatly reduces the surface for problems + self._columns = _columns + self.primary_key = primary_key # type: ignore + self.foreign_keys = foreign_keys # type: ignore + @util.ro_non_memoized_property def entity_namespace(self) -> _EntityNamespace: """Return a namespace used for name-based access in SQL expressions. @@ -925,8 +956,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): iterable collection of :class:`_schema.Column` objects. """ - self._init_collections() - self._populate_column_collection() + self._setup_collections() return self.primary_key @util.ro_memoized_property @@ -943,8 +973,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): :attr:`_schema.Table.foreign_key_constraints` """ - self._init_collections() - self._populate_column_collection() + self._setup_collections() return self.foreign_keys def _reset_column_collection(self) -> None: @@ -968,20 +997,16 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): def _select_iterable(self) -> _SelectIterable: return (c for c in self.c if not _never_select_column(c)) - def _init_collections(self) -> None: - assert "_columns" not in self.__dict__ - assert "primary_key" not in self.__dict__ - assert "foreign_keys" not in self.__dict__ - - self._columns = ColumnCollection() - self.primary_key = ColumnSet() # type: ignore - self.foreign_keys = set() # type: ignore - @property def _cols_populated(self) -> bool: return "_columns" in self.__dict__ - def _populate_column_collection(self) -> None: + def _populate_column_collection( + self, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], + ) -> None: """Called on subclasses to establish the .c collection. Each implementation has a different way of establishing @@ -1308,22 +1333,27 @@ class Join(roles.DMLTableRole, FromClause): return FromGrouping(self) @util.preload_module("sqlalchemy.sql.util") - def _populate_column_collection(self) -> None: + def _populate_column_collection( + self, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], + ) -> None: sqlutil = util.preloaded.sql_util - columns: List[KeyedColumnElement[Any]] = [c for c in self.left.c] + [ + _columns: List[KeyedColumnElement[Any]] = [c for c in self.left.c] + [ c for c in self.right.c ] - self.primary_key.extend( # type: ignore + primary_key.extend( # type: ignore sqlutil.reduce_columns( - (c for c in columns if c.primary_key), self.onclause + (c for c in _columns if c.primary_key), self.onclause ) ) - self._columns._populate_separate_keys( - (col._tq_key_label, col) for col in columns + columns._populate_separate_keys( + (col._tq_key_label, col) for col in _columns # type: ignore ) - self.foreign_keys.update( # type: ignore - itertools.chain(*[col.foreign_keys for col in columns]) + foreign_keys.update( + itertools.chain(*[col.foreign_keys for col in _columns]) # type: ignore # noqa: E501 ) def _copy_internals( @@ -1350,7 +1380,7 @@ class Join(roles.DMLTableRole, FromClause): def replace( obj: Union[BinaryExpression[Any], ColumnClause[Any]], **kw: Any, - ) -> Optional[KeyedColumnElement[ColumnElement[Any]]]: + ) -> Optional[KeyedColumnElement[Any]]: if isinstance(obj, ColumnClause) and obj.table in new_froms: newelem = new_froms[obj.table].corresponding_column(obj) return newelem @@ -1706,8 +1736,15 @@ class AliasedReturnsRows(NoInit, NamedFromClause): super()._refresh_for_new_column(column) self.element._refresh_for_new_column(column) - def _populate_column_collection(self) -> None: - self.element._generate_fromclause_column_proxies(self) + def _populate_column_collection( + self, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], + ) -> None: + self.element._generate_fromclause_column_proxies( + self, columns, primary_key=primary_key, foreign_keys=foreign_keys + ) @util.ro_non_memoized_property def description(self) -> str: @@ -2147,11 +2184,26 @@ class CTE( self._suffixes = _suffixes super()._init(selectable, name=name) - def _populate_column_collection(self) -> None: + def _populate_column_collection( + self, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], + ) -> None: if self._cte_alias is not None: - self._cte_alias._generate_fromclause_column_proxies(self) + self._cte_alias._generate_fromclause_column_proxies( + self, + columns, + primary_key=primary_key, + foreign_keys=foreign_keys, + ) else: - self.element._generate_fromclause_column_proxies(self) + self.element._generate_fromclause_column_proxies( + self, + columns, + primary_key=primary_key, + foreign_keys=foreign_keys, + ) def alias(self, name: Optional[str] = None, flat: bool = False) -> CTE: """Return an :class:`_expression.Alias` of this @@ -2949,9 +3001,6 @@ class FromGrouping(GroupedElement, FromClause): def __init__(self, element: FromClause): self.element = coercions.expect(roles.FromClauseRole, element) - def _init_collections(self) -> None: - pass - @util.ro_non_memoized_property def columns( self, @@ -3112,9 +3161,6 @@ class TableClause(roles.DMLTableRole, Immutable, NamedFromClause): def _refresh_for_new_column(self, column: ColumnElement[Any]) -> None: pass - def _init_collections(self) -> None: - pass - @util.ro_memoized_property def description(self) -> str: return self.name @@ -3376,16 +3422,23 @@ class Values(roles.InElementRole, Generative, LateralFromClause): """ return ScalarValues(self._column_args, self._data, self.literal_binds) - def _populate_column_collection(self) -> None: + def _populate_column_collection( + self, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], + ) -> None: for c in self._column_args: if c.table is not None and c.table is not self: - _, c = c._make_proxy(self) + _, c = c._make_proxy( + self, primary_key=primary_key, foreign_keys=foreign_keys + ) else: # if the column was used in other contexts, ensure # no memoizations of other FROM clauses. # see test_values.py -> test_auto_proxy_select_direct_col c._reset_memoizations() - self._columns.add(c) + columns.add(c) c.table = self @util.ro_non_memoized_property @@ -3501,6 +3554,9 @@ class SelectBase( def _generate_fromclause_column_proxies( self, subquery: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], *, proxy_compound_columns: Optional[ Iterable[Sequence[ColumnElement[Any]]] @@ -3789,13 +3845,20 @@ class SelectStatementGrouping(GroupedElement, SelectBase, Generic[_SB]): def _generate_fromclause_column_proxies( self, subquery: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], *, proxy_compound_columns: Optional[ Iterable[Sequence[ColumnElement[Any]]] ] = None, ) -> None: self.element._generate_fromclause_column_proxies( - subquery, proxy_compound_columns=proxy_compound_columns + subquery, + columns, + proxy_compound_columns=proxy_compound_columns, + primary_key=primary_key, + foreign_keys=foreign_keys, ) @util.ro_non_memoized_property @@ -4481,6 +4544,9 @@ class CompoundSelect(HasCompileState, GenerativeSelect, ExecutableReturnsRows): def _generate_fromclause_column_proxies( self, subquery: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], *, proxy_compound_columns: Optional[ Iterable[Sequence[ColumnElement[Any]]] @@ -4521,7 +4587,11 @@ class CompoundSelect(HasCompileState, GenerativeSelect, ExecutableReturnsRows): # i haven't tried to think what it means for compound nested in # compound select_0._generate_fromclause_column_proxies( - subquery, proxy_compound_columns=extra_col_iterator + subquery, + columns, + proxy_compound_columns=extra_col_iterator, + primary_key=primary_key, + foreign_keys=foreign_keys, ) def _refresh_for_new_column(self, column: ColumnElement[Any]) -> None: @@ -5747,7 +5817,7 @@ class Select( def replace( obj: Union[BinaryExpression[Any], ColumnClause[Any]], **kw: Any, - ) -> Optional[KeyedColumnElement[ColumnElement[Any]]]: + ) -> Optional[KeyedColumnElement[Any]]: if isinstance(obj, ColumnClause) and obj.table in new_froms: newelem = new_froms[obj.table].corresponding_column(obj) return newelem @@ -6415,6 +6485,9 @@ class Select( def _generate_fromclause_column_proxies( self, subquery: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], *, proxy_compound_columns: Optional[ Iterable[Sequence[ColumnElement[Any]]] @@ -6432,6 +6505,8 @@ class Select( name=required_label_name, name_is_truncatable=True, compound_select_cols=extra_cols, + primary_key=primary_key, + foreign_keys=foreign_keys, ) for ( ( @@ -6457,6 +6532,8 @@ class Select( key=proxy_key, name=required_label_name, name_is_truncatable=True, + primary_key=primary_key, + foreign_keys=foreign_keys, ) for ( required_label_name, @@ -6468,7 +6545,7 @@ class Select( if is_column_element(c) ] - subquery._columns._populate_separate_keys(prox) + columns._populate_separate_keys(prox) def _needs_parens_for_grouping(self) -> bool: return self._has_row_limiting_clause or bool( @@ -7027,6 +7104,9 @@ class TextualSelect(SelectBase, ExecutableReturnsRows, Generative): def _generate_fromclause_column_proxies( self, fromclause: FromClause, + columns: ColumnCollection[str, KeyedColumnElement[Any]], + primary_key: ColumnSet, + foreign_keys: Set[KeyedColumnElement[Any]], *, proxy_compound_columns: Optional[ Iterable[Sequence[ColumnElement[Any]]] @@ -7036,15 +7116,25 @@ class TextualSelect(SelectBase, ExecutableReturnsRows, Generative): assert isinstance(fromclause, Subquery) if proxy_compound_columns: - fromclause._columns._populate_separate_keys( - c._make_proxy(fromclause, compound_select_cols=extra_cols) + columns._populate_separate_keys( + c._make_proxy( + fromclause, + compound_select_cols=extra_cols, + primary_key=primary_key, + foreign_keys=foreign_keys, + ) for c, extra_cols in zip( self.column_args, proxy_compound_columns ) ) else: - fromclause._columns._populate_separate_keys( - c._make_proxy(fromclause) for c in self.column_args + columns._populate_separate_keys( + c._make_proxy( + fromclause, + primary_key=primary_key, + foreign_keys=foreign_keys, + ) + for c in self.column_args ) def _scalar_type(self) -> Union[TypeEngine[Any], Any]: diff --git a/test/sql/test_selectable.py b/test/sql/test_selectable.py index c2f07444b8..fc3039fada 100644 --- a/test/sql/test_selectable.py +++ b/test/sql/test_selectable.py @@ -1,6 +1,9 @@ """Test various algorithmic properties of selectables.""" from itertools import zip_longest +import random +import threading +import time from sqlalchemy import and_ from sqlalchemy import bindparam @@ -4024,3 +4027,39 @@ class AliasTest(fixtures.TestBase, AssertsCompiledSQL): a3 = a2._clone() a3._copy_internals() is_(a1.corresponding_column(a3.c.c), a1.c.c) + + +class FromClauseConcurrencyTest(fixtures.TestBase): + """test for issue 12302""" + + @testing.requires.timing_intensive + def test_c_collection(self): + dictionary_meta = MetaData() + all_indexes_table = Table( + "all_indexes", + dictionary_meta, + *[Column(f"col{i}", Integer) for i in range(50)], + ) + + fails = 0 + + def use_table(): + nonlocal fails + try: + for i in range(3): + time.sleep(random.random() * 0.0001) + all_indexes.c.col35 + except: + fails += 1 + raise + + for j in range(1000): + all_indexes = all_indexes_table.alias("a_indexes") + + threads = [threading.Thread(target=use_table) for i in range(5)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not fails, "one or more runs failed"