From cef4e5ff38dc7d2200800837c110ab6beec10d8a Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Wed, 12 Apr 2017 11:37:19 -0400
Subject: [PATCH] Warn on _compiled_cache growth

Added warnings to the LRU "compiled cache" used by the :class:`.Mapper`
(and ultimately will be for other ORM-based LRU caches) such that
when the cache starts hitting its size limits, the application will
emit a warning that this is a performance-degrading situation that
may require attention.   The LRU caches can reach their size limits
primarily if an application is making use of an unbounded number
of :class:`.Engine` objects, which is an antipattern.  Otherwise,
this may suggest an issue that should be brought to the SQLAlchemy
developer's attention.

Additionally, adjusted the test_memusage algorithm again as the
previous one could still allow a growing memory size to be missed.

Change-Id: I020d1ceafb7a08f6addfa990a1e7acd09f933240
---
 doc/build/changelog/changelog_12.rst | 13 +++++++
 doc/build/faq/performance.rst        | 51 ++++++++++++++++++++++++++
 lib/sqlalchemy/orm/mapper.py         | 15 +++++++-
 lib/sqlalchemy/util/_collections.py  | 13 ++++++-
 test/aaa_profiling/test_memusage.py  | 54 +++++++++++++---------------
 test/orm/test_mapper.py              | 25 +++++++++++++
 6 files changed, 140 insertions(+), 31 deletions(-)

diff --git a/doc/build/changelog/changelog_12.rst b/doc/build/changelog/changelog_12.rst
index 7c04210192..3b611d1476 100644
--- a/doc/build/changelog/changelog_12.rst
+++ b/doc/build/changelog/changelog_12.rst
@@ -13,6 +13,19 @@
 .. changelog::
     :version: 1.2.0b1
 
+    .. change::
+        :tags: bug, orm
+
+        Added warnings to the LRU "compiled cache" used by the :class:`.Mapper`
+        (and ultimately will be for other ORM-based LRU caches) such that
+        when the cache starts hitting its size limits, the application will
+        emit a warning that this is a performance-degrading situation that
+        may require attention.   The LRU caches can reach their size limits
+        primarily if an application is making use of an unbounded number
+        of :class:`.Engine` objects, which is an antipattern.  Otherwise,
+        this may suggest an issue that should be brought to the SQLAlchemy
+        developer's attention.
+
     .. change:: 3964
         :tags: bug, postgresql
         :tickets: 3964
diff --git a/doc/build/faq/performance.rst b/doc/build/faq/performance.rst
index 3b76c83264..61c0d6ea22 100644
--- a/doc/build/faq/performance.rst
+++ b/doc/build/faq/performance.rst
@@ -459,3 +459,54 @@ Script::
         test_sqlalchemy_core(100000)
         test_sqlite3(100000)
 
+
+.. _faq_compiled_cache_threshold:
+
+How do I deal with "compiled statement cache reaching its size threshhold"?
+-----------------------------------------------------------------------------
+
+Some parts of the ORM make use of a least-recently-used (LRU) cache in order
+to cache generated SQL statements for fast reuse.  More generally, these
+areas are making use of the "compiled cache" feature of :class:`.Connection`
+which can be invoked using :meth:`.Connection.execution_options`.
+
+The following two points summarize what should be done if this warning
+is occurring:
+
+* Ensure the application **does not create an arbitrary number of
+  Engine objects**, that is, it does not call :func:`.create_engine` on
+  a per-operation basis.  An application should have only **one Engine per
+  database URL**.
+
+* If the application does not have an unbounded number of engines,
+  **report the warning to the SQLAlchemy developers**.   Guidelines on
+  mailing list support is at: http://www.sqlalchemy.org/support.html#mailinglist
+
+The cache works by creating a cache key that can uniquely identify the
+combination of a specific **dialect** and a specific **Core SQL expression**.
+A cache key that already exists in the cache will reuse the already-compiled
+SQL expression.  A cache key that doesn't exist will create a *new* entry
+in the dictionary.   When this dictionary reaches the configured threshhold,
+the LRU cache will *trim the size* of the cache back down by a certain percentage.
+
+It is important to understand that from the above, **a compiled cache that
+is reaching its size limit will perform badly.**   This is because not only
+are the SQL statements being freshly compiled into strings rather than using
+the cached version, but the LRU cache is also spending lots of time trimming
+its size back down.
+
+The primary reason the compiled caches can grow is due to the **antipattern of
+using a new Engine for every operation**.   Because the compiled cache
+must key on the :class:`.Dialect` associated with an :class:`.Engine`,
+calling :func`.create_engine` many times in an application will establish
+new cache entries for every engine.   Because the cache is self-trimming,
+the application won't grow in size unbounded, however the application should
+be repaired to not rely on an unbounded number of :class:`.Engine`
+objects.
+
+Outside of this pattern, the default size limits set for these caches within
+the ORM should not generally require adjustment, and the LRU boundaries
+should never be reached.  If this warning is occurring and the application
+is not generating hundreds of engines, please report the issue to the
+SQLAlchemy developers on the mailing list; see the guidelines
+at http://www.sqlalchemy.org/support.html#mailinglist.
\ No newline at end of file
diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py
index 27456c35b8..28a5c5b9bf 100644
--- a/lib/sqlalchemy/orm/mapper.py
+++ b/lib/sqlalchemy/orm/mapper.py
@@ -2718,7 +2718,20 @@ class Mapper(InspectionAttr):
 
     @_memoized_configured_property
     def _compiled_cache(self):
-        return util.LRUCache(self._compiled_cache_size)
+        return util.LRUCache(self._compiled_cache_size,
+                             size_alert=self._alert_lru_cache_limit)
+
+    def _alert_lru_cache_limit(self):
+        util.warn(
+            "Compiled statement cache for mapper %s is "
+            "reaching its size threshold of %d, based on _compiled_cache_size "
+            "of %d.  Please refer to "
+            "http://docs.sqlalchemy.org/en/latest/faq/performance.html"
+            "#faq_compiled_cache_threshold"
+            " for best practices." %
+            (self,
+             self._compiled_cache.size_threshold,
+             self._compiled_cache_size))
 
     @_memoized_configured_property
     def _sorted_tables(self):
diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py
index bdd40fc794..d94af5f62a 100644
--- a/lib/sqlalchemy/util/_collections.py
+++ b/lib/sqlalchemy/util/_collections.py
@@ -868,9 +868,12 @@ class LRUCache(dict):
 
     """
 
-    def __init__(self, capacity=100, threshold=.5):
+    __slots__ = 'capacity', 'threshold', 'size_alert', '_counter', '_mutex'
+
+    def __init__(self, capacity=100, threshold=.5, size_alert=None):
         self.capacity = capacity
         self.threshold = threshold
+        self.size_alert = size_alert
         self._counter = 0
         self._mutex = threading.Lock()
 
@@ -910,11 +913,19 @@ class LRUCache(dict):
             item[1] = value
         self._manage_size()
 
+    @property
+    def size_threshold(self):
+        return self.capacity + self.capacity * self.threshold
+
     def _manage_size(self):
         if not self._mutex.acquire(False):
             return
         try:
+            size_alert = bool(self.size_alert)
             while len(self) > self.capacity + self.capacity * self.threshold:
+                if size_alert:
+                    size_alert = False
+                    self.size_alert()
                 by_counter = sorted(dict.values(self),
                                     key=operator.itemgetter(2),
                                     reverse=True)
diff --git a/test/aaa_profiling/test_memusage.py b/test/aaa_profiling/test_memusage.py
index 53f118e15d..5e0baabefe 100644
--- a/test/aaa_profiling/test_memusage.py
+++ b/test/aaa_profiling/test_memusage.py
@@ -34,7 +34,7 @@ class ASub(A):
     pass
 
 
-def profile_memory(maxtimes=50,
+def profile_memory(maxtimes=250,
                    assert_no_sessions=True, get_num_objects=None):
     def decorate(func):
         # run the test N times.  if length of gc.get_objects()
@@ -55,9 +55,10 @@ def profile_memory(maxtimes=50,
         def profile(*args):
             gc_collect()
             samples = []
-
+            max_ = 0
+            max_grew_for = 0
             success = False
-            for y in range(100 // 5):
+            for y in range(maxtimes // 5):
                 for x in range(5):
                     func(*args)
                     gc_collect()
@@ -71,35 +72,28 @@ def profile_memory(maxtimes=50,
                 if assert_no_sessions:
                     assert len(_sessions) == 0
 
-                # check for "flatline" - size is constant for
-                # 5 iterations
-                for x in samples[-4:]:
-                    if x != samples[-5]:
-                        break
+                latest_max = max(samples[-5:])
+                if latest_max > max_:
+                    print(
+                        "Max grew from %s to %s, max has "
+                        "grown for %s samples" % (
+                            max_, latest_max, max_grew_for
+                        )
+                    )
+                    max_ = latest_max
+                    max_grew_for += 1
+                    continue
                 else:
-                    success = True
-
-                if not success:
-                    # object count is bigger than when it started
-                    if samples[-1] > samples[0]:
-                        for x in samples[1:-2]:
-                            # see if a spike bigger than the endpoint exists
-                            if x > samples[-1]:
-                                success = True
-                                break
-                    else:
+                    print("Max remained at %s, %s more attempts left" %
+                          (max_, max_grew_for))
+                    max_grew_for -= 1
+                    if max_grew_for == 0:
                         success = True
-
-                # if we saw count go down or flatline,
-                # we're done
-                if success:
-                    break
-
-                # else keep trying until maxtimes
-
+                        break
             else:
                 assert False, repr(samples)
 
+            assert success
         return profile
     return decorate
 
@@ -204,6 +198,7 @@ class MemUsageTest(EnsureZeroed):
             del sessmaker
         go()
 
+    @testing.emits_warning("Compiled statement cache for.*")
     @testing.crashes('sqlite', ':memory: connection not suitable here')
     def test_orm_many_engines(self):
         metadata = MetaData(self.engine)
@@ -224,10 +219,10 @@ class MemUsageTest(EnsureZeroed):
         m1 = mapper(A, table1, properties={
             "bs": relationship(B, cascade="all, delete",
                                order_by=table2.c.col1)},
-                    _compiled_cache_size=10
+                    _compiled_cache_size=50
                     )
         m2 = mapper(B, table2,
-                    _compiled_cache_size=10
+                    _compiled_cache_size=50
                     )
 
         m3 = mapper(A, table1, non_primary=True)
@@ -301,6 +296,7 @@ class MemUsageTest(EnsureZeroed):
 
         assert not eng.dialect._type_memos
 
+    @testing.emits_warning("Compiled statement cache for.*")
     def test_many_updates(self):
         metadata = MetaData(self.engine)
 
diff --git a/test/orm/test_mapper.py b/test/orm/test_mapper.py
index 65f3ffd2ce..2392aa0026 100644
--- a/test/orm/test_mapper.py
+++ b/test/orm/test_mapper.py
@@ -262,6 +262,31 @@ class MapperTest(_fixtures.FixtureTest, AssertsCompiledSQL):
         assert_raises(TypeError, Foo, x=5)
         assert_raises(TypeError, Bar, x=5)
 
+    def test_lru_cache_warning(self):
+        users = self.tables.users
+        User = self.classes.User
+        m = mapper(User, users)
+
+        for i in range(149):
+            m._compiled_cache["key_%s" % i] = "foo"
+
+        def go():
+            m._compiled_cache["key_150"] = "foo"
+            m._compiled_cache["key_151"] = "foo"
+
+        assert_raises_message(
+            sa.exc.SAWarning,
+            r"Compiled statement cache for mapper Mapper.User.users is "
+            "reaching its size threshold of 150, based on "
+            "_compiled_cache_size of 100. ",
+            go
+        )
+        m._compiled_cache.size_alert = None
+        for i in range(152, 200):
+            m._compiled_cache["key_%d" % i] = "foo"
+        assert len(m._compiled_cache) < 150
+
+
     def test_sort_states_comparisons(self):
         """test that _sort_states() doesn't compare
         insert_order to state.key, for set of mixed
-- 
2.47.2