--- /dev/null
+.. change::
+ :tags: bug, sql
+ :tickets: 8790
+
+ Fixed critical memory issue identified in cache key generation, where for
+ very large and complex ORM statements that make use of lots of ORM aliases
+ with subqueries, cache key generation could produce excessively large keys
+ that were orders of magnitude bigger than the statement itself. Much thanks
+ to Rollo Konig Brock for their very patient, long term help in finally
+ identifying this issue.
@util.memoized_property
def _annotations_cache_key(self) -> Tuple[Any, ...]:
anon_map_ = anon_map()
+
+ return self._gen_annotations_cache_key(anon_map_)
+
+ def _gen_annotations_cache_key(
+ self, anon_map: anon_map
+ ) -> Tuple[Any, ...]:
return (
"_annotations",
tuple(
(
key,
- value._gen_cache_key(anon_map_, [])
+ value._gen_cache_key(anon_map, [])
if isinstance(value, HasCacheKey)
else value,
)
else None,
)
elif meth is InternalTraversal.dp_annotations_key:
- # obj is here is the _annotations dict. however, we
- # want to use the memoized cache key version of it. for
- # Columns, this should be long lived. For select()
- # statements, not so much, but they usually won't have
- # annotations.
- result += self._annotations_cache_key # type: ignore
+ # obj is here is the _annotations dict. Table uses
+ # a memoized version of it. however in other cases,
+ # we generate it given anon_map as we may be from a
+ # Join, Aliased, etc.
+ # see #8790
+
+ if self._gen_static_annotations_cache_key: # type: ignore # noqa: E501
+ result += self._annotations_cache_key # type: ignore # noqa: E501
+ else:
+ result += self._gen_annotations_cache_key(anon_map) # type: ignore # noqa: E501
+
elif (
meth is InternalTraversal.dp_clauseelement_list
or meth is InternalTraversal.dp_clauseelement_tuple
_is_column_element = False
_is_keyed_column_element = False
_is_table = False
+ _gen_static_annotations_cache_key = False
_is_textual = False
_is_from_clause = False
_is_returns_rows = False
_traverse_internals: _TraverseInternalsType = [
("clause", InternalTraversal.dp_clauseelement),
- ("typeclause", InternalTraversal.dp_clauseelement),
+ ("type", InternalTraversal.dp_type),
]
clause: ColumnElement[Any]
(
"type",
InternalTraversal.dp_type,
- ), # affects JSON CAST operators
+ ),
+ ]
+
+ _cache_key_traversal = [
+ ("left", InternalTraversal.dp_clauseelement),
+ ("right", InternalTraversal.dp_clauseelement),
+ ("operator", InternalTraversal.dp_operator),
+ ("modifiers", InternalTraversal.dp_plain_dict),
+ # "type" affects JSON CAST operators, so while redundant in most cases,
+ # is needed for that one
+ (
+ "type",
+ InternalTraversal.dp_type,
+ ),
]
_is_implicitly_boolean = True
("type", InternalTraversal.dp_type),
]
+ _cache_key_traversal = [
+ ("element", InternalTraversal.dp_clauseelement),
+ ]
+
element: Union[TextClause, ClauseList, ColumnElement[_T]]
def __init__(
("_element", InternalTraversal.dp_clauseelement),
]
+ _cache_key_traversal = [
+ ("name", InternalTraversal.dp_anon_name),
+ ("_element", InternalTraversal.dp_clauseelement),
+ ]
+
_element: ColumnElement[_T]
name: str
identity: Optional[Identity]
+ @util.memoized_property
+ def _gen_static_annotations_cache_key(self) -> bool: # type: ignore
+ """special attribute used by cache key gen, if true, we will
+ use a static cache key for the annotations dictionary, else we
+ will generate a new cache key for annotations each time.
+
+ Added for #8790
+
+ """
+ return self.table is not None and self.table._is_table
+
def _extra_kwargs(self, **kwargs: Any) -> None:
self._validate_dialect_kwargs(kwargs)
else self.__dict__[k],
)
for k in names
- if k in self.__dict__ and not k.startswith("_")
+ if k in self.__dict__
+ and not k.startswith("_")
+ and self.__dict__[k] is not None
)
@overload
from .assertions import expect_raises_message
from .assertions import expect_warnings
from .assertions import in_
+from .assertions import int_within_variance
from .assertions import is_
from .assertions import is_false
from .assertions import is_instance_of
engines.testing_reaper.assert_all_closed()
+def int_within_variance(expected, received, variance):
+ deviance = int(expected * variance)
+ assert (
+ abs(received - expected) < deviance
+ ), "Given int value %s is not within %d%% of expected value %s" % (
+ received,
+ variance * 100,
+ expected,
+ )
+
+
def eq_regex(a, b, msg=None):
assert re.match(b, a), msg or "%r !~ %r" % (a, b)
from __future__ import annotations
+from collections import deque
import decimal
import gc
+from itertools import chain
import random
import sys
+from sys import getsizeof
import types
from . import config
event_cls._clear()
return decorate
+
+
+def total_size(o):
+ """Returns the approximate memory footprint an object and all of its
+ contents.
+
+ source: https://code.activestate.com/recipes/577504/
+
+
+ """
+
+ def dict_handler(d):
+ return chain.from_iterable(d.items())
+
+ all_handlers = {
+ tuple: iter,
+ list: iter,
+ deque: iter,
+ dict: dict_handler,
+ set: iter,
+ frozenset: iter,
+ }
+ seen = set() # track which object id's have already been seen
+ default_size = getsizeof(0) # estimate sizeof object without __sizeof__
+
+ def sizeof(o):
+ if id(o) in seen: # do not double count the same object
+ return 0
+ seen.add(id(o))
+ s = getsizeof(o, default_size)
+
+ for typ, handler in all_handlers.items():
+ if isinstance(o, typ):
+ s += sum(map(sizeof, handler(o)))
+ break
+ return s
+
+ return sizeof(o)
+
+
+def count_cache_key_tuples(tup):
+ """given a cache key tuple, counts how many instances of actual
+ tuples are found.
+
+ used to alert large jumps in cache key complexity.
+
+ """
+ stack = [tup]
+
+ sentinel = object()
+ num_elements = 0
+
+ while stack:
+ elem = stack.pop(0)
+ if elem is sentinel:
+ num_elements += 1
+ elif isinstance(elem, tuple):
+ if elem:
+ stack = list(elem) + [sentinel] + stack
+ return num_elements
from .compat import osx as osx
from .compat import py310 as py310
from .compat import py311 as py311
+from .compat import py312 as py312
from .compat import py38 as py38
from .compat import py39 as py39
from .compat import pypy as pypy
from typing import Type
+py312 = sys.version_info >= (3, 12)
py311 = sys.version_info >= (3, 11)
py310 = sys.version_info >= (3, 10)
py39 = sys.version_info >= (3, 9)
from sqlalchemy import literal_column
from sqlalchemy import null
from sqlalchemy import select
+from sqlalchemy import String
from sqlalchemy import Table
from sqlalchemy import testing
from sqlalchemy import text
from sqlalchemy import true
from sqlalchemy import update
+from sqlalchemy import util
+from sqlalchemy.ext.declarative import ConcreteBase
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Bundle
from sqlalchemy.orm import defaultload
from sqlalchemy.testing import AssertsCompiledSQL
from sqlalchemy.testing import eq_
from sqlalchemy.testing import fixtures
+from sqlalchemy.testing import int_within_variance
from sqlalchemy.testing import ne_
+from sqlalchemy.testing.fixtures import DeclarativeMappedTest
from sqlalchemy.testing.fixtures import fixture_session
+from sqlalchemy.testing.util import count_cache_key_tuples
+from sqlalchemy.testing.util import total_size
from test.orm import _fixtures
from .inheritance import _poly_fixtures
+from .test_events import _RemoveListeners
from .test_query import QueryTest
)
eq_(stmt._generate_cache_key(), stmt2._generate_cache_key())
+
+
+class EmbeddedSubqTest(_RemoveListeners, DeclarativeMappedTest):
+ """test #8790.
+
+ it's expected that cache key structures will change, this test is here
+ testing something fairly similar to the issue we had (though vastly
+ smaller scale) so we mostly want to look for surprise jumps here.
+
+ """
+
+ @classmethod
+ def setup_classes(cls):
+ Base = cls.DeclarativeBasic
+
+ class Employee(ConcreteBase, Base):
+ __tablename__ = "employee"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+
+ __mapper_args__ = {
+ "polymorphic_identity": "employee",
+ "concrete": True,
+ }
+
+ class Manager(Employee):
+ __tablename__ = "manager"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+ manager_data = Column(String(40))
+
+ __mapper_args__ = {
+ "polymorphic_identity": "manager",
+ "concrete": True,
+ }
+
+ class Engineer(Employee):
+ __tablename__ = "engineer"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+ engineer_info = Column(String(40))
+
+ __mapper_args__ = {
+ "polymorphic_identity": "engineer",
+ "concrete": True,
+ }
+
+ @testing.combinations("tuples", "memory", argnames="assert_on")
+ def test_cache_key_gen(self, assert_on):
+ Employee = self.classes.Employee
+
+ e1 = aliased(Employee)
+ e2 = aliased(Employee)
+
+ subq = select(e1).union_all(select(e2)).subquery()
+
+ anno = aliased(Employee, subq)
+
+ stmt = select(anno)
+
+ ck = stmt._generate_cache_key()
+
+ if assert_on == "tuples":
+ # before the fix for #8790 this was 700
+ int_within_variance(142, count_cache_key_tuples(ck), 0.05)
+
+ elif assert_on == "memory":
+ # before the fix for #8790 this was 55154
+
+ if util.py312:
+ testing.skip_test("python platform not available")
+ elif util.py311:
+ int_within_variance(39996, total_size(ck), 0.05)
+ elif util.py310:
+ int_within_variance(29796, total_size(ck), 0.05)
+ else:
+ testing.skip_test("python platform not available")