From cef4e5ff38dc7d2200800837c110ab6beec10d8a Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Wed, 12 Apr 2017 11:37:19 -0400 Subject: [PATCH] Warn on _compiled_cache growth Added warnings to the LRU "compiled cache" used by the :class:`.Mapper` (and ultimately will be for other ORM-based LRU caches) such that when the cache starts hitting its size limits, the application will emit a warning that this is a performance-degrading situation that may require attention. The LRU caches can reach their size limits primarily if an application is making use of an unbounded number of :class:`.Engine` objects, which is an antipattern. Otherwise, this may suggest an issue that should be brought to the SQLAlchemy developer's attention. Additionally, adjusted the test_memusage algorithm again as the previous one could still allow a growing memory size to be missed. Change-Id: I020d1ceafb7a08f6addfa990a1e7acd09f933240 --- doc/build/changelog/changelog_12.rst | 13 +++++++ doc/build/faq/performance.rst | 51 ++++++++++++++++++++++++++ lib/sqlalchemy/orm/mapper.py | 15 +++++++- lib/sqlalchemy/util/_collections.py | 13 ++++++- test/aaa_profiling/test_memusage.py | 54 +++++++++++++--------------- test/orm/test_mapper.py | 25 +++++++++++++ 6 files changed, 140 insertions(+), 31 deletions(-) diff --git a/doc/build/changelog/changelog_12.rst b/doc/build/changelog/changelog_12.rst index 7c04210192..3b611d1476 100644 --- a/doc/build/changelog/changelog_12.rst +++ b/doc/build/changelog/changelog_12.rst @@ -13,6 +13,19 @@ .. changelog:: :version: 1.2.0b1 + .. change:: + :tags: bug, orm + + Added warnings to the LRU "compiled cache" used by the :class:`.Mapper` + (and ultimately will be for other ORM-based LRU caches) such that + when the cache starts hitting its size limits, the application will + emit a warning that this is a performance-degrading situation that + may require attention. The LRU caches can reach their size limits + primarily if an application is making use of an unbounded number + of :class:`.Engine` objects, which is an antipattern. Otherwise, + this may suggest an issue that should be brought to the SQLAlchemy + developer's attention. + .. change:: 3964 :tags: bug, postgresql :tickets: 3964 diff --git a/doc/build/faq/performance.rst b/doc/build/faq/performance.rst index 3b76c83264..61c0d6ea22 100644 --- a/doc/build/faq/performance.rst +++ b/doc/build/faq/performance.rst @@ -459,3 +459,54 @@ Script:: test_sqlalchemy_core(100000) test_sqlite3(100000) + +.. _faq_compiled_cache_threshold: + +How do I deal with "compiled statement cache reaching its size threshhold"? +----------------------------------------------------------------------------- + +Some parts of the ORM make use of a least-recently-used (LRU) cache in order +to cache generated SQL statements for fast reuse. More generally, these +areas are making use of the "compiled cache" feature of :class:`.Connection` +which can be invoked using :meth:`.Connection.execution_options`. + +The following two points summarize what should be done if this warning +is occurring: + +* Ensure the application **does not create an arbitrary number of + Engine objects**, that is, it does not call :func:`.create_engine` on + a per-operation basis. An application should have only **one Engine per + database URL**. + +* If the application does not have an unbounded number of engines, + **report the warning to the SQLAlchemy developers**. Guidelines on + mailing list support is at: http://www.sqlalchemy.org/support.html#mailinglist + +The cache works by creating a cache key that can uniquely identify the +combination of a specific **dialect** and a specific **Core SQL expression**. +A cache key that already exists in the cache will reuse the already-compiled +SQL expression. A cache key that doesn't exist will create a *new* entry +in the dictionary. When this dictionary reaches the configured threshhold, +the LRU cache will *trim the size* of the cache back down by a certain percentage. + +It is important to understand that from the above, **a compiled cache that +is reaching its size limit will perform badly.** This is because not only +are the SQL statements being freshly compiled into strings rather than using +the cached version, but the LRU cache is also spending lots of time trimming +its size back down. + +The primary reason the compiled caches can grow is due to the **antipattern of +using a new Engine for every operation**. Because the compiled cache +must key on the :class:`.Dialect` associated with an :class:`.Engine`, +calling :func`.create_engine` many times in an application will establish +new cache entries for every engine. Because the cache is self-trimming, +the application won't grow in size unbounded, however the application should +be repaired to not rely on an unbounded number of :class:`.Engine` +objects. + +Outside of this pattern, the default size limits set for these caches within +the ORM should not generally require adjustment, and the LRU boundaries +should never be reached. If this warning is occurring and the application +is not generating hundreds of engines, please report the issue to the +SQLAlchemy developers on the mailing list; see the guidelines +at http://www.sqlalchemy.org/support.html#mailinglist. \ No newline at end of file diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py index 27456c35b8..28a5c5b9bf 100644 --- a/lib/sqlalchemy/orm/mapper.py +++ b/lib/sqlalchemy/orm/mapper.py @@ -2718,7 +2718,20 @@ class Mapper(InspectionAttr): @_memoized_configured_property def _compiled_cache(self): - return util.LRUCache(self._compiled_cache_size) + return util.LRUCache(self._compiled_cache_size, + size_alert=self._alert_lru_cache_limit) + + def _alert_lru_cache_limit(self): + util.warn( + "Compiled statement cache for mapper %s is " + "reaching its size threshold of %d, based on _compiled_cache_size " + "of %d. Please refer to " + "http://docs.sqlalchemy.org/en/latest/faq/performance.html" + "#faq_compiled_cache_threshold" + " for best practices." % + (self, + self._compiled_cache.size_threshold, + self._compiled_cache_size)) @_memoized_configured_property def _sorted_tables(self): diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py index bdd40fc794..d94af5f62a 100644 --- a/lib/sqlalchemy/util/_collections.py +++ b/lib/sqlalchemy/util/_collections.py @@ -868,9 +868,12 @@ class LRUCache(dict): """ - def __init__(self, capacity=100, threshold=.5): + __slots__ = 'capacity', 'threshold', 'size_alert', '_counter', '_mutex' + + def __init__(self, capacity=100, threshold=.5, size_alert=None): self.capacity = capacity self.threshold = threshold + self.size_alert = size_alert self._counter = 0 self._mutex = threading.Lock() @@ -910,11 +913,19 @@ class LRUCache(dict): item[1] = value self._manage_size() + @property + def size_threshold(self): + return self.capacity + self.capacity * self.threshold + def _manage_size(self): if not self._mutex.acquire(False): return try: + size_alert = bool(self.size_alert) while len(self) > self.capacity + self.capacity * self.threshold: + if size_alert: + size_alert = False + self.size_alert() by_counter = sorted(dict.values(self), key=operator.itemgetter(2), reverse=True) diff --git a/test/aaa_profiling/test_memusage.py b/test/aaa_profiling/test_memusage.py index 53f118e15d..5e0baabefe 100644 --- a/test/aaa_profiling/test_memusage.py +++ b/test/aaa_profiling/test_memusage.py @@ -34,7 +34,7 @@ class ASub(A): pass -def profile_memory(maxtimes=50, +def profile_memory(maxtimes=250, assert_no_sessions=True, get_num_objects=None): def decorate(func): # run the test N times. if length of gc.get_objects() @@ -55,9 +55,10 @@ def profile_memory(maxtimes=50, def profile(*args): gc_collect() samples = [] - + max_ = 0 + max_grew_for = 0 success = False - for y in range(100 // 5): + for y in range(maxtimes // 5): for x in range(5): func(*args) gc_collect() @@ -71,35 +72,28 @@ def profile_memory(maxtimes=50, if assert_no_sessions: assert len(_sessions) == 0 - # check for "flatline" - size is constant for - # 5 iterations - for x in samples[-4:]: - if x != samples[-5]: - break + latest_max = max(samples[-5:]) + if latest_max > max_: + print( + "Max grew from %s to %s, max has " + "grown for %s samples" % ( + max_, latest_max, max_grew_for + ) + ) + max_ = latest_max + max_grew_for += 1 + continue else: - success = True - - if not success: - # object count is bigger than when it started - if samples[-1] > samples[0]: - for x in samples[1:-2]: - # see if a spike bigger than the endpoint exists - if x > samples[-1]: - success = True - break - else: + print("Max remained at %s, %s more attempts left" % + (max_, max_grew_for)) + max_grew_for -= 1 + if max_grew_for == 0: success = True - - # if we saw count go down or flatline, - # we're done - if success: - break - - # else keep trying until maxtimes - + break else: assert False, repr(samples) + assert success return profile return decorate @@ -204,6 +198,7 @@ class MemUsageTest(EnsureZeroed): del sessmaker go() + @testing.emits_warning("Compiled statement cache for.*") @testing.crashes('sqlite', ':memory: connection not suitable here') def test_orm_many_engines(self): metadata = MetaData(self.engine) @@ -224,10 +219,10 @@ class MemUsageTest(EnsureZeroed): m1 = mapper(A, table1, properties={ "bs": relationship(B, cascade="all, delete", order_by=table2.c.col1)}, - _compiled_cache_size=10 + _compiled_cache_size=50 ) m2 = mapper(B, table2, - _compiled_cache_size=10 + _compiled_cache_size=50 ) m3 = mapper(A, table1, non_primary=True) @@ -301,6 +296,7 @@ class MemUsageTest(EnsureZeroed): assert not eng.dialect._type_memos + @testing.emits_warning("Compiled statement cache for.*") def test_many_updates(self): metadata = MetaData(self.engine) diff --git a/test/orm/test_mapper.py b/test/orm/test_mapper.py index 65f3ffd2ce..2392aa0026 100644 --- a/test/orm/test_mapper.py +++ b/test/orm/test_mapper.py @@ -262,6 +262,31 @@ class MapperTest(_fixtures.FixtureTest, AssertsCompiledSQL): assert_raises(TypeError, Foo, x=5) assert_raises(TypeError, Bar, x=5) + def test_lru_cache_warning(self): + users = self.tables.users + User = self.classes.User + m = mapper(User, users) + + for i in range(149): + m._compiled_cache["key_%s" % i] = "foo" + + def go(): + m._compiled_cache["key_150"] = "foo" + m._compiled_cache["key_151"] = "foo" + + assert_raises_message( + sa.exc.SAWarning, + r"Compiled statement cache for mapper Mapper.User.users is " + "reaching its size threshold of 150, based on " + "_compiled_cache_size of 100. ", + go + ) + m._compiled_cache.size_alert = None + for i in range(152, 200): + m._compiled_cache["key_%d" % i] = "foo" + assert len(m._compiled_cache) < 150 + + def test_sort_states_comparisons(self): """test that _sort_states() doesn't compare insert_order to state.key, for set of mixed -- 2.47.2