--- /dev/null
+.. change::
+ :tags: bug, sql
+ :tickets: 8790
+
+ Fixed critical memory issue identified in cache key generation, where for
+ very large and complex ORM statements that make use of lots of ORM aliases
+ with subqueries, cache key generation could produce excessively large keys
+ that were orders of magnitude bigger than the statement itself. Much thanks
+ to Rollo Konig Brock for their very patient, long term help in finally
+ identifying this issue.
@util.memoized_property
def _annotations_cache_key(self):
anon_map_ = anon_map()
+
+ return self._gen_annotations_cache_key(anon_map_)
+
+ def _gen_annotations_cache_key(self, anon_map):
return (
"_annotations",
tuple(
(
key,
- value._gen_cache_key(anon_map_, [])
+ value._gen_cache_key(anon_map, [])
if isinstance(value, HasCacheKey)
else value,
)
is_clause_element = True
is_selectable = False
-
+ _gen_static_annotations_cache_key = False
+ _is_table = False
_is_textual = False
_is_from_clause = False
_is_returns_rows = False
_traverse_internals = [
("clause", InternalTraversal.dp_clauseelement),
- ("typeclause", InternalTraversal.dp_clauseelement),
+ ("type", InternalTraversal.dp_type),
]
def __init__(self, expression, type_):
(
"type",
InternalTraversal.dp_type,
- ), # affects JSON CAST operators
+ ),
+ ]
+
+ _cache_key_traversal = [
+ ("left", InternalTraversal.dp_clauseelement),
+ ("right", InternalTraversal.dp_clauseelement),
+ ("operator", InternalTraversal.dp_operator),
+ ("modifiers", InternalTraversal.dp_plain_dict),
+ # "type" affects JSON CAST operators, so while redundant in most cases,
+ # is needed for that one
+ (
+ "type",
+ InternalTraversal.dp_type,
+ ),
]
_is_implicitly_boolean = True
("type", InternalTraversal.dp_type),
]
+ _cache_key_traversal = [
+ ("element", InternalTraversal.dp_clauseelement),
+ ]
+
def __init__(self, element):
self.element = element
self.type = getattr(element, "type", type_api.NULLTYPE)
("_element", InternalTraversal.dp_clauseelement),
]
+ _cache_key_traversal = [
+ ("name", InternalTraversal.dp_anon_name),
+ ("_element", InternalTraversal.dp_clauseelement),
+ ]
+
def __init__(self, name, element, type_=None):
"""Return a :class:`Label` object for the
given :class:`_expression.ColumnElement`.
("schema", InternalTraversal.dp_string)
]
+ _is_table = True
+
def _gen_cache_key(self, anon_map, bindparams):
if self._annotations:
return (self,) + self._annotations_cache_key
"""
+ @util.memoized_property
+ def _gen_static_annotations_cache_key(self):
+ """special attribute used by cache key gen, if true, we will
+ use a static cache key for the annotations dictionary, else we
+ will generate a new cache key for annotations each time.
+
+ Added for #8790
+
+ """
+ return self.table is not None and self.table._is_table
+
def _extra_kwargs(self, **kwargs):
self._validate_dialect_kwargs(kwargs)
else None,
)
elif meth is InternalTraversal.dp_annotations_key:
- # obj is here is the _annotations dict. however, we
- # want to use the memoized cache key version of it. for
- # Columns, this should be long lived. For select()
- # statements, not so much, but they usually won't have
- # annotations.
- result += self._annotations_cache_key
+ # obj is here is the _annotations dict. Table uses
+ # a memoized version of it. however in other cases,
+ # we generate it given anon_map as we may be from a
+ # Join, Aliased, etc.
+ # see #8790
+
+ if self._gen_static_annotations_cache_key: # type: ignore # noqa: E501
+ result += self._annotations_cache_key # type: ignore # noqa: E501
+ else:
+ result += self._gen_annotations_cache_key(anon_map) # type: ignore # noqa: E501
elif (
meth is InternalTraversal.dp_clauseelement_list
or meth is InternalTraversal.dp_clauseelement_tuple
else self.__dict__[k],
)
for k in names
- if k in self.__dict__ and not k.startswith("_")
+ if k in self.__dict__
+ and not k.startswith("_")
+ and self.__dict__[k] is not None
)
def adapt(self, cls, **kw):
from .assertions import expect_raises_message
from .assertions import expect_warnings
from .assertions import in_
+from .assertions import int_within_variance
from .assertions import is_
from .assertions import is_false
from .assertions import is_instance_of
from .config import db
from .config import fixture
from .config import requirements as requires
+from .config import skip_test
from .exclusions import _is_excluded
from .exclusions import _server_version
from .exclusions import against as _against
engines.testing_reaper.assert_all_closed()
+def int_within_variance(expected, received, variance):
+ deviance = int(expected * variance)
+ assert (
+ abs(received - expected) < deviance
+ ), "Given int value %s is not within %d%% of expected value %s" % (
+ received,
+ variance * 100,
+ expected,
+ )
+
+
def eq_regex(a, b, msg=None):
assert re.match(b, a), msg or "%r !~ %r" % (a, b)
# This module is part of SQLAlchemy and is released under
# the MIT License: https://www.opensource.org/licenses/mit-license.php
+from collections import deque
import decimal
import gc
+from itertools import chain
import random
import sys
+from sys import getsizeof
import types
from . import config
event_cls._clear()
return decorate
+
+
+def total_size(o):
+ """Returns the approximate memory footprint an object and all of its
+ contents.
+
+ source: https://code.activestate.com/recipes/577504/
+
+
+ """
+
+ def dict_handler(d):
+ return chain.from_iterable(d.items())
+
+ all_handlers = {
+ tuple: iter,
+ list: iter,
+ deque: iter,
+ dict: dict_handler,
+ set: iter,
+ frozenset: iter,
+ }
+ seen = set() # track which object id's have already been seen
+ default_size = getsizeof(0) # estimate sizeof object without __sizeof__
+
+ def sizeof(o):
+ if id(o) in seen: # do not double count the same object
+ return 0
+ seen.add(id(o))
+ s = getsizeof(o, default_size)
+
+ for typ, handler in all_handlers.items():
+ if isinstance(o, typ):
+ s += sum(map(sizeof, handler(o)))
+ break
+ return s
+
+ return sizeof(o)
+
+
+def count_cache_key_tuples(tup):
+ """given a cache key tuple, counts how many instances of actual
+ tuples are found.
+
+ used to alert large jumps in cache key complexity.
+
+ """
+ stack = [tup]
+
+ sentinel = object()
+ num_elements = 0
+
+ while stack:
+ elem = stack.pop(0)
+ if elem is sentinel:
+ num_elements += 1
+ elif isinstance(elem, tuple):
+ if elem:
+ stack = list(elem) + [sentinel] + stack
+ return num_elements
from .compat import pickle
from .compat import print_
from .compat import py2k
+from .compat import py310
from .compat import py311
+from .compat import py312
from .compat import py37
from .compat import py38
from .compat import py39
import platform
import sys
+py312 = sys.version_info >= (3, 12)
py311 = sys.version_info >= (3, 11)
+py310 = sys.version_info >= (3, 10)
py39 = sys.version_info >= (3, 9)
py38 = sys.version_info >= (3, 8)
py37 = sys.version_info >= (3, 7)
from sqlalchemy import literal_column
from sqlalchemy import null
from sqlalchemy import select
+from sqlalchemy import String
from sqlalchemy import Table
from sqlalchemy import testing
from sqlalchemy import text
from sqlalchemy import true
from sqlalchemy import update
+from sqlalchemy import util
+from sqlalchemy.ext.declarative import ConcreteBase
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Bundle
from sqlalchemy.orm import defaultload
from sqlalchemy.testing import AssertsCompiledSQL
from sqlalchemy.testing import eq_
from sqlalchemy.testing import fixtures
+from sqlalchemy.testing import int_within_variance
from sqlalchemy.testing import ne_
+from sqlalchemy.testing.fixtures import DeclarativeMappedTest
from sqlalchemy.testing.fixtures import fixture_session
+from sqlalchemy.testing.util import count_cache_key_tuples
+from sqlalchemy.testing.util import total_size
from test.orm import _fixtures
from .inheritance import _poly_fixtures
+from .test_events import _RemoveListeners
from .test_query import QueryTest
)
eq_(stmt._generate_cache_key(), stmt2._generate_cache_key())
+
+
+class EmbeddedSubqTest(_RemoveListeners, DeclarativeMappedTest):
+ """test #8790.
+
+ it's expected that cache key structures will change, this test is here
+ testing something fairly similar to the issue we had (though vastly
+ smaller scale) so we mostly want to look for surprise jumps here.
+
+ """
+
+ @classmethod
+ def setup_classes(cls):
+ Base = cls.DeclarativeBasic
+
+ class Employee(ConcreteBase, Base):
+ __tablename__ = "employee"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+
+ __mapper_args__ = {
+ "polymorphic_identity": "employee",
+ "concrete": True,
+ }
+
+ class Manager(Employee):
+ __tablename__ = "manager"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+ manager_data = Column(String(40))
+
+ __mapper_args__ = {
+ "polymorphic_identity": "manager",
+ "concrete": True,
+ }
+
+ class Engineer(Employee):
+ __tablename__ = "engineer"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+ engineer_info = Column(String(40))
+
+ __mapper_args__ = {
+ "polymorphic_identity": "engineer",
+ "concrete": True,
+ }
+
+ @testing.combinations("tuples", "memory", argnames="assert_on")
+ def test_cache_key_gen(self, assert_on):
+ Employee = self.classes.Employee
+
+ e1 = aliased(Employee)
+ e2 = aliased(Employee)
+
+ subq = select(e1).union_all(select(e2)).subquery()
+
+ anno = aliased(Employee, subq)
+
+ stmt = select(anno)
+
+ ck = stmt._generate_cache_key()
+
+ if assert_on == "tuples":
+ # before the fix for #8790 this was 700
+ int_within_variance(142, count_cache_key_tuples(ck), 0.05)
+
+ elif assert_on == "memory":
+ # before the fix for #8790 this was 55154
+
+ if util.py312:
+ testing.skip_test("python platform not available")
+ elif util.py311:
+ int_within_variance(39996, total_size(ck), 0.05)
+ elif util.py310:
+ int_within_variance(29796, total_size(ck), 0.05)
+ else:
+ testing.skip_test("python platform not available")