]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
implement cython for cache_anon_map, prefix_anon_map
authorMike Bayer <mike_mp@zzzcomputing.com>
Sun, 19 Dec 2021 20:59:55 +0000 (15:59 -0500)
committerMike Bayer <mike_mp@zzzcomputing.com>
Tue, 21 Dec 2021 17:20:04 +0000 (12:20 -0500)
These are small bits where cache_anon_map in particular
is part of the cache key generation scheme which is a key
target for cython.

changing such a tiny element of the cache key gen is
doing basically nothing yet, as the cython impl is
mostly the exact same speed as the python one.  I guess for
cython to be effective we'd need to redo the whole cache key
generation and possibly not use the same kinds of structures,
which might not be very easy to do.

Additionally, some cython runtime import errors are being
observed on jenkins, add an upfront check to the test suite
to indicate if the expected build succeeded when REQUIRE_SQLALCHEMY_CEXT
is set.

Running case CacheAnonMap
Running python .... Done
Running cython .... Done
                    | python      | cython      | cy / py     |
test_get_anon_non_present| 0.301266758 | 0.231203834 | 0.767438915 |
test_get_anon_present| 0.300919362 | 0.227336695 | 0.755473803 |
test_has_key_non_present| 0.152725077 | 0.133191719 | 0.872101171 |
test_has_key_present| 0.152689778 | 0.133673095 | 0.875455428 |
Running case PrefixAnonMap
Running python .. Done
Running cython .. Done
                    | python      | cython      | cy / py     |
test_apply_non_present| 0.358715744 | 0.335245703 | 0.934572034 |
test_apply_present  | 0.354434996 | 0.338579782 | 0.955266229 |

Change-Id: I0d3f1dd285c044afc234479141d831b2ee0455be

lib/sqlalchemy/cyextension/util.pyx
lib/sqlalchemy/sql/_py_util.py [new file with mode: 0644]
lib/sqlalchemy/sql/base.py
lib/sqlalchemy/sql/elements.py
lib/sqlalchemy/sql/traversals.py
lib/sqlalchemy/testing/plugin/plugin_base.py
lib/sqlalchemy/util/langhelpers.py
test/aaa_profiling/test_misc.py
test/perf/compiled_extensions.py
test/profiles.txt
tox.ini

index ac15ff9de8876327679c4a6b06ce6a53272d3269..62ca960b33d39099e6adf6f3cfc587f5885b043b 100644 (file)
@@ -41,3 +41,45 @@ def _distill_raw_params(object params):
         return [params]
     else:
         raise exc.ArgumentError("mapping or sequence expected for parameters")
+
+cdef class prefix_anon_map(dict):
+    def __missing__(self, str key):
+        cdef str derived
+        cdef int anonymous_counter
+        cdef dict self_dict = self
+
+        derived = key.split(" ", 1)[1]
+
+        anonymous_counter = self_dict.get(derived, 1)
+        self_dict[derived] = anonymous_counter + 1
+        value = f"{derived}_{anonymous_counter}"
+        self_dict[key] = value
+        return value
+
+
+cdef class cache_anon_map(dict):
+    cdef int _index
+
+    def __init__(self):
+        self._index = 0
+
+    def get_anon(self, obj):
+        cdef long idself
+        cdef str id_
+        cdef dict self_dict = self
+
+        idself = id(obj)
+        if idself in self_dict:
+            return self_dict[idself], True
+        else:
+            id_ = self.__missing__(idself)
+            return id_, False
+
+    def __missing__(self, key):
+        cdef str val
+        cdef dict self_dict = self
+
+        self_dict[key] = val = str(self._index)
+        self._index += 1
+        return val
+
diff --git a/lib/sqlalchemy/sql/_py_util.py b/lib/sqlalchemy/sql/_py_util.py
new file mode 100644 (file)
index 0000000..ceb6376
--- /dev/null
@@ -0,0 +1,59 @@
+# sql/_py_util.py
+# Copyright (C) 2005-2021 the SQLAlchemy authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of SQLAlchemy and is released under
+# the MIT License: https://www.opensource.org/licenses/mit-license.php
+
+
+class prefix_anon_map(dict):
+    """A map that creates new keys for missing key access.
+
+    Considers keys of the form "<ident> <name>" to produce
+    new symbols "<name>_<index>", where "index" is an incrementing integer
+    corresponding to <name>.
+
+    Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which
+    is otherwise usually used for this type of operation.
+
+    """
+
+    def __missing__(self, key):
+        (ident, derived) = key.split(" ", 1)
+        anonymous_counter = self.get(derived, 1)
+        self[derived] = anonymous_counter + 1
+        value = f"{derived}_{anonymous_counter}"
+        self[key] = value
+        return value
+
+
+class cache_anon_map(dict):
+    """A map that creates new keys for missing key access.
+
+    Produces an incrementing sequence given a series of unique keys.
+
+    This is similar to the compiler prefix_anon_map class although simpler.
+
+    Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which
+    is otherwise usually used for this type of operation.
+
+    """
+
+    _index = 0
+
+    def get_anon(self, object_):
+
+        idself = id(object_)
+        if idself in self:
+            return self[idself], True
+        else:
+            # inline of __missing__
+            self[idself] = id_ = str(self._index)
+            self._index += 1
+
+            return id_, False
+
+    def __missing__(self, key):
+        self[key] = val = str(self._index)
+        self._index += 1
+        return val
index 4165751ca1f0439f7f648aa42053156eb112ee8a..b5a20830d550398b410d51302e963556886224ba 100644 (file)
@@ -30,6 +30,12 @@ from .. import util
 from ..util import HasMemoized
 from ..util import hybridmethod
 
+try:
+    from sqlalchemy.cyextension.util import prefix_anon_map  # noqa
+except ImportError:
+    from ._py_util import prefix_anon_map  # noqa
+
+
 coercions = None
 elements = None
 type_api = None
@@ -1012,27 +1018,6 @@ class Executable(roles.StatementRole, Generative):
         return self._execution_options
 
 
-class prefix_anon_map(dict):
-    """A map that creates new keys for missing key access.
-
-    Considers keys of the form "<ident> <name>" to produce
-    new symbols "<name>_<index>", where "index" is an incrementing integer
-    corresponding to <name>.
-
-    Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which
-    is otherwise usually used for this type of operation.
-
-    """
-
-    def __missing__(self, key):
-        (ident, derived) = key.split(" ", 1)
-        anonymous_counter = self.get(derived, 1)
-        self[derived] = anonymous_counter + 1
-        value = derived + "_" + str(anonymous_counter)
-        self[key] = value
-        return value
-
-
 class SchemaEventTarget:
     """Base class for elements that are the targets of :class:`.DDLEvents`
     events.
index 00270c9b5832f96b6b1e404b7dd7a62fa3fe47d5..08c99382023d3933620aef4877049856fb9aed76 100644 (file)
@@ -1659,14 +1659,9 @@ class BindParameter(roles.InElementRole, ColumnElement):
                 anon_map[NO_CACHE] = True
             return None
 
-        idself = id(self)
-        if idself in anon_map:
-            return (anon_map[idself], self.__class__)
-        else:
-            # inline of
-            # id_ = anon_map[idself]
-            anon_map[idself] = id_ = str(anon_map.index)
-            anon_map.index += 1
+        id_, found = anon_map.get_anon(self)
+        if found:
+            return (id_, self.__class__)
 
         if bindparams is not None:
             bindparams.append(self)
index d58b5c2bb376326899c9fa5333d6767719ba6634..22398e7c188ce04691c2df80b7f2757e868b7559 100644 (file)
@@ -12,6 +12,12 @@ from .. import util
 from ..inspection import inspect
 from ..util import HasMemoized
 
+try:
+    from sqlalchemy.cyextension.util import cache_anon_map as anon_map  # noqa
+except ImportError:
+    from ._py_util import cache_anon_map as anon_map  # noqa
+
+
 SKIP_TRAVERSE = util.symbol("skip_traverse")
 COMPARE_FAILED = False
 COMPARE_SUCCEEDED = True
@@ -177,16 +183,11 @@ class HasCacheKey:
 
         """
 
-        idself = id(self)
         cls = self.__class__
 
-        if idself in anon_map:
-            return (anon_map[idself], cls)
-        else:
-            # inline of
-            # id_ = anon_map[idself]
-            anon_map[idself] = id_ = str(anon_map.index)
-            anon_map.index += 1
+        id_, found = anon_map.get_anon(self)
+        if found:
+            return (id_, cls)
 
         try:
             dispatcher = cls.__dict__["_generated_cache_key_traversal"]
@@ -1030,27 +1031,6 @@ def _resolve_name_for_compare(element, name, anon_map, **kw):
     return name
 
 
-class anon_map(dict):
-    """A map that creates new keys for missing key access.
-
-    Produces an incrementing sequence given a series of unique keys.
-
-    This is similar to the compiler prefix_anon_map class although simpler.
-
-    Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which
-    is otherwise usually used for this type of operation.
-
-    """
-
-    def __init__(self):
-        self.index = 0
-
-    def __missing__(self, key):
-        self[key] = val = str(self.index)
-        self.index += 1
-        return val
-
-
 class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots):
     __slots__ = "stack", "cache", "anon_map"
 
index 7bc88a14b79c238712043f976220f25c168aa56f..2a6691fc8c243f9915600d00471e57aabd5fa0ad 100644 (file)
@@ -16,6 +16,7 @@ is pytest.
 import abc
 import configparser
 import logging
+import os
 import re
 import sys
 
@@ -369,6 +370,20 @@ def _monkeypatch_cdecimal(options, file_config):
         sys.modules["decimal"] = cdecimal
 
 
+@post
+def __ensure_cext(opt, file_config):
+    if os.environ.get("REQUIRE_SQLALCHEMY_CEXT", "0") == "1":
+        from sqlalchemy.util import has_compiled_ext
+
+        try:
+            has_compiled_ext(raise_=True)
+        except ImportError as err:
+            raise AssertionError(
+                "REQUIRE_SQLALCHEMY_CEXT is set but can't import the "
+                "cython extensions"
+            ) from err
+
+
 @post
 def _init_symbols(options, file_config):
     from sqlalchemy.testing import config
index b759490c57747531369cd0ff833eb880631f887f..dc08b049436113099ae1ea774d404b22330492cc 100644 (file)
@@ -1897,7 +1897,7 @@ def repr_tuple_names(names):
         return "%s, ..., %s" % (", ".join(res[0:3]), res[-1])
 
 
-def has_compiled_ext():
+def has_compiled_ext(raise_=False):
     try:
         from sqlalchemy.cyextension import collections  # noqa F401
         from sqlalchemy.cyextension import immutabledict  # noqa F401
@@ -1907,4 +1907,6 @@ def has_compiled_ext():
 
         return True
     except ImportError:
+        if raise_:
+            raise
         return False
index 4f811e69ad7009e41e233a08ba5b5459bbe5c1eb..c0508a3cfd94593dfb0fceeb04fd09e1b04678b6 100644 (file)
@@ -90,14 +90,14 @@ class CacheKeyTest(fixtures.TestBase):
         )
         registry.map_imperatively(Child, child)
 
+        registry.configure()
+
         yield Parent, Child
 
         registry.dispose()
 
     @testing.fixture(scope="function")
     def stmt_fixture_one(self, mapping_fixture):
-        # note that by using ORM elements we will have annotations in these
-        # items also which is part of the performance hit
         Parent, Child = mapping_fixture
 
         return [
@@ -120,13 +120,29 @@ class CacheKeyTest(fixtures.TestBase):
             else:
                 current_key = key
 
-    @profiling.function_call_count(variance=0.15, warmup=0)
-    def test_statement_key_is_not_cached(self, stmt_fixture_one):
-        current_key = None
-        for stmt in stmt_fixture_one:
-            key = stmt._generate_cache_key()
-            assert key is not None
-            if current_key:
-                eq_(key, current_key)
-            else:
-                current_key = key
+    def test_statement_key_is_not_cached(
+        self, stmt_fixture_one, mapping_fixture
+    ):
+        Parent, Child = mapping_fixture
+
+        # run a totally different statement so that everything cache
+        # related not specific to the statement is warmed up
+        some_other_statement = (
+            select(Parent.id, Child.id)
+            .join_from(Parent, Child, Parent.children)
+            .where(Parent.id == 5)
+        )
+        some_other_statement._generate_cache_key()
+
+        @profiling.function_call_count(variance=0.15, warmup=0)
+        def go():
+            current_key = None
+            for stmt in stmt_fixture_one:
+                key = stmt._generate_cache_key()
+                assert key is not None
+                if current_key:
+                    eq_(key, current_key)
+                else:
+                    current_key = key
+
+        go()
index b9bb9ebd279eab84893803c16a4268d06054152f..14b8ed693e68a8ac1546591893ac04618cea3de3 100644 (file)
@@ -994,6 +994,96 @@ class BaseRow(Case):
         self.row_long.y
 
 
+class CacheAnonMap(Case):
+    @staticmethod
+    def python():
+        from sqlalchemy.sql._py_util import cache_anon_map
+
+        return cache_anon_map
+
+    @staticmethod
+    def cython():
+        from sqlalchemy.cyextension.util import cache_anon_map
+
+        return cache_anon_map
+
+    IMPLEMENTATIONS = {"python": python.__func__, "cython": cython.__func__}
+
+    NUMBER = 1000000
+
+    def init_objects(self):
+        from sqlalchemy import column, bindparam
+
+        self.object_1 = column("x")
+        self.object_2 = bindparam("y")
+
+        self.impl_w_non_present = self.impl()
+        self.impl_w_present = iwp = self.impl()
+        iwp.get_anon(self.object_1)
+        iwp.get_anon(self.object_2)
+
+    @classmethod
+    def update_results(cls, results):
+        cls._divide_results(results, "cython", "python", "cy / py")
+
+    @test_case
+    def test_get_anon_non_present(self):
+        self.impl_w_non_present.get_anon(self.object_1)
+
+    @test_case
+    def test_get_anon_present(self):
+        self.impl_w_present.get_anon(self.object_1)
+
+    @test_case
+    def test_has_key_non_present(self):
+        id(self.object_1) in self.impl_w_non_present
+
+    @test_case
+    def test_has_key_present(self):
+        id(self.object_1) in self.impl_w_present
+
+
+class PrefixAnonMap(Case):
+    @staticmethod
+    def python():
+        from sqlalchemy.sql._py_util import prefix_anon_map
+
+        return prefix_anon_map
+
+    @staticmethod
+    def cython():
+        from sqlalchemy.cyextension.util import prefix_anon_map
+
+        return prefix_anon_map
+
+    IMPLEMENTATIONS = {"python": python.__func__, "cython": cython.__func__}
+
+    NUMBER = 1000000
+
+    def init_objects(self):
+        from sqlalchemy.sql.elements import _anonymous_label
+
+        self.name = _anonymous_label.safe_construct(58243, "some_column_name")
+
+        self.impl_w_non_present = self.impl()
+        self.impl_w_present = iwp = self.impl()
+        self.name.apply_map(iwp)
+
+    @classmethod
+    def update_results(cls, results):
+        cls._divide_results(results, "cython", "python", "cy / py")
+
+    @test_case
+    def test_apply_non_present(self):
+
+        self.name.apply_map(self.impl_w_non_present)
+
+    @test_case
+    def test_apply_present(self):
+
+        self.name.apply_map(self.impl_w_present)
+
+
 def tabulate(results, inverse):
     dim = 11
     header = "{:<20}|" + (" {:<%s} |" % dim) * len(results)
index 50907e984411c78462163a6174751530e703ce38..f306bcc3a08b4931e70f473dbf909f079d82d340 100644 (file)
@@ -1,15 +1,15 @@
 # /home/classic/dev/sqlalchemy/test/profiles.txt
 # This file is written out on a per-environment basis.
-# For each test in aaa_profiling, the corresponding function and 
+# For each test in aaa_profiling, the corresponding function and
 # environment is located within this file.  If it doesn't exist,
 # the test is skipped.
-# If a callcount does exist, it is compared to what we received. 
+# If a callcount does exist, it is compared to what we received.
 # assertions are raised if the counts do not match.
-# 
-# To add a new callcount test, apply the function_call_count 
-# decorator and re-run the tests using the --write-profiles 
+#
+# To add a new callcount test, apply the function_call_count
+# decorator and re-run the tests using the --write-profiles
 # option - this file will be rewritten including the new count.
-# 
+#
 
 # TEST: test.aaa_profiling.test_compiler.CompileTest.test_insert
 
@@ -89,7 +89,7 @@ test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_cached x86_64_li
 # TEST: test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached
 
 test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached x86_64_linux_cpython_3.10_sqlite_pysqlite_dbapiunicode_cextensions 5003
-test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached x86_64_linux_cpython_3.10_sqlite_pysqlite_dbapiunicode_nocextensions 5003
+test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached x86_64_linux_cpython_3.10_sqlite_pysqlite_dbapiunicode_nocextensions 6387
 
 # TEST: test.aaa_profiling.test_misc.EnumTest.test_create_enum_from_pep_435_w_expensive_members
 
diff --git a/tox.ini b/tox.ini
index 5ac5ef1a286743f22b6d7031cb247404e3a39275..512b11e4161582a2aaf1f643667338c8bf9a9ab5 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -117,10 +117,12 @@ setenv=
 # wildcards OK).  Need at least these
 passenv=ORACLE_HOME NLS_LANG TOX_POSTGRESQL TOX_POSTGRESQL_PY2K TOX_MYSQL TOX_MYSQL_PY2K TOX_ORACLE TOX_MSSQL TOX_SQLITE TOX_SQLITE_FILE TOX_WORKERS EXTRA_SQLITE_DRIVERS EXTRA_PG_DRIVERS EXTRA_MYSQL_DRIVERS
 
-# for nocext, we rm *.so in lib in case we are doing usedevelop=True
 commands=
-  cext: /bin/true
+
+  # this line is only meaningful when usedevelop=True is enabled.  we use
+  # that flag for coverage mode.
   nocext: sh -c "rm -f lib/sqlalchemy/*.so"
+
   {env:BASECOMMAND} {env:WORKERS} {env:SQLITE:} {env:EXTRA_SQLITE_DRIVERS:} {env:POSTGRESQL:} {env:EXTRA_PG_DRIVERS:} {env:MYSQL:} {env:EXTRA_MYSQL_DRIVERS:} {env:ORACLE:} {env:MSSQL:} {env:BACKENDONLY:} {env:IDENTS:} {env:MEMUSAGE:} {env:COVERAGE:} {posargs}
   oracle,mssql,sqlite_file: python reap_dbs.py db_idents.txt