From: Mike Bayer Date: Sun, 19 Dec 2021 20:59:55 +0000 (-0500) Subject: implement cython for cache_anon_map, prefix_anon_map X-Git-Tag: rel_2_0_0b1~579^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e6cd36fc51d25922f20aa203229a636f5d6daabe;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git implement cython for cache_anon_map, prefix_anon_map These are small bits where cache_anon_map in particular is part of the cache key generation scheme which is a key target for cython. changing such a tiny element of the cache key gen is doing basically nothing yet, as the cython impl is mostly the exact same speed as the python one. I guess for cython to be effective we'd need to redo the whole cache key generation and possibly not use the same kinds of structures, which might not be very easy to do. Additionally, some cython runtime import errors are being observed on jenkins, add an upfront check to the test suite to indicate if the expected build succeeded when REQUIRE_SQLALCHEMY_CEXT is set. Running case CacheAnonMap Running python .... Done Running cython .... Done | python | cython | cy / py | test_get_anon_non_present| 0.301266758 | 0.231203834 | 0.767438915 | test_get_anon_present| 0.300919362 | 0.227336695 | 0.755473803 | test_has_key_non_present| 0.152725077 | 0.133191719 | 0.872101171 | test_has_key_present| 0.152689778 | 0.133673095 | 0.875455428 | Running case PrefixAnonMap Running python .. Done Running cython .. Done | python | cython | cy / py | test_apply_non_present| 0.358715744 | 0.335245703 | 0.934572034 | test_apply_present | 0.354434996 | 0.338579782 | 0.955266229 | Change-Id: I0d3f1dd285c044afc234479141d831b2ee0455be --- diff --git a/lib/sqlalchemy/cyextension/util.pyx b/lib/sqlalchemy/cyextension/util.pyx index ac15ff9de8..62ca960b33 100644 --- a/lib/sqlalchemy/cyextension/util.pyx +++ b/lib/sqlalchemy/cyextension/util.pyx @@ -41,3 +41,45 @@ def _distill_raw_params(object params): return [params] else: raise exc.ArgumentError("mapping or sequence expected for parameters") + +cdef class prefix_anon_map(dict): + def __missing__(self, str key): + cdef str derived + cdef int anonymous_counter + cdef dict self_dict = self + + derived = key.split(" ", 1)[1] + + anonymous_counter = self_dict.get(derived, 1) + self_dict[derived] = anonymous_counter + 1 + value = f"{derived}_{anonymous_counter}" + self_dict[key] = value + return value + + +cdef class cache_anon_map(dict): + cdef int _index + + def __init__(self): + self._index = 0 + + def get_anon(self, obj): + cdef long idself + cdef str id_ + cdef dict self_dict = self + + idself = id(obj) + if idself in self_dict: + return self_dict[idself], True + else: + id_ = self.__missing__(idself) + return id_, False + + def __missing__(self, key): + cdef str val + cdef dict self_dict = self + + self_dict[key] = val = str(self._index) + self._index += 1 + return val + diff --git a/lib/sqlalchemy/sql/_py_util.py b/lib/sqlalchemy/sql/_py_util.py new file mode 100644 index 0000000000..ceb637609d --- /dev/null +++ b/lib/sqlalchemy/sql/_py_util.py @@ -0,0 +1,59 @@ +# sql/_py_util.py +# Copyright (C) 2005-2021 the SQLAlchemy authors and contributors +# +# +# This module is part of SQLAlchemy and is released under +# the MIT License: https://www.opensource.org/licenses/mit-license.php + + +class prefix_anon_map(dict): + """A map that creates new keys for missing key access. + + Considers keys of the form " " to produce + new symbols "_", where "index" is an incrementing integer + corresponding to . + + Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which + is otherwise usually used for this type of operation. + + """ + + def __missing__(self, key): + (ident, derived) = key.split(" ", 1) + anonymous_counter = self.get(derived, 1) + self[derived] = anonymous_counter + 1 + value = f"{derived}_{anonymous_counter}" + self[key] = value + return value + + +class cache_anon_map(dict): + """A map that creates new keys for missing key access. + + Produces an incrementing sequence given a series of unique keys. + + This is similar to the compiler prefix_anon_map class although simpler. + + Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which + is otherwise usually used for this type of operation. + + """ + + _index = 0 + + def get_anon(self, object_): + + idself = id(object_) + if idself in self: + return self[idself], True + else: + # inline of __missing__ + self[idself] = id_ = str(self._index) + self._index += 1 + + return id_, False + + def __missing__(self, key): + self[key] = val = str(self._index) + self._index += 1 + return val diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 4165751ca1..b5a20830d5 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -30,6 +30,12 @@ from .. import util from ..util import HasMemoized from ..util import hybridmethod +try: + from sqlalchemy.cyextension.util import prefix_anon_map # noqa +except ImportError: + from ._py_util import prefix_anon_map # noqa + + coercions = None elements = None type_api = None @@ -1012,27 +1018,6 @@ class Executable(roles.StatementRole, Generative): return self._execution_options -class prefix_anon_map(dict): - """A map that creates new keys for missing key access. - - Considers keys of the form " " to produce - new symbols "_", where "index" is an incrementing integer - corresponding to . - - Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which - is otherwise usually used for this type of operation. - - """ - - def __missing__(self, key): - (ident, derived) = key.split(" ", 1) - anonymous_counter = self.get(derived, 1) - self[derived] = anonymous_counter + 1 - value = derived + "_" + str(anonymous_counter) - self[key] = value - return value - - class SchemaEventTarget: """Base class for elements that are the targets of :class:`.DDLEvents` events. diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 00270c9b58..08c9938202 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -1659,14 +1659,9 @@ class BindParameter(roles.InElementRole, ColumnElement): anon_map[NO_CACHE] = True return None - idself = id(self) - if idself in anon_map: - return (anon_map[idself], self.__class__) - else: - # inline of - # id_ = anon_map[idself] - anon_map[idself] = id_ = str(anon_map.index) - anon_map.index += 1 + id_, found = anon_map.get_anon(self) + if found: + return (id_, self.__class__) if bindparams is not None: bindparams.append(self) diff --git a/lib/sqlalchemy/sql/traversals.py b/lib/sqlalchemy/sql/traversals.py index d58b5c2bb3..22398e7c18 100644 --- a/lib/sqlalchemy/sql/traversals.py +++ b/lib/sqlalchemy/sql/traversals.py @@ -12,6 +12,12 @@ from .. import util from ..inspection import inspect from ..util import HasMemoized +try: + from sqlalchemy.cyextension.util import cache_anon_map as anon_map # noqa +except ImportError: + from ._py_util import cache_anon_map as anon_map # noqa + + SKIP_TRAVERSE = util.symbol("skip_traverse") COMPARE_FAILED = False COMPARE_SUCCEEDED = True @@ -177,16 +183,11 @@ class HasCacheKey: """ - idself = id(self) cls = self.__class__ - if idself in anon_map: - return (anon_map[idself], cls) - else: - # inline of - # id_ = anon_map[idself] - anon_map[idself] = id_ = str(anon_map.index) - anon_map.index += 1 + id_, found = anon_map.get_anon(self) + if found: + return (id_, cls) try: dispatcher = cls.__dict__["_generated_cache_key_traversal"] @@ -1030,27 +1031,6 @@ def _resolve_name_for_compare(element, name, anon_map, **kw): return name -class anon_map(dict): - """A map that creates new keys for missing key access. - - Produces an incrementing sequence given a series of unique keys. - - This is similar to the compiler prefix_anon_map class although simpler. - - Inlines the approach taken by :class:`sqlalchemy.util.PopulateDict` which - is otherwise usually used for this type of operation. - - """ - - def __init__(self): - self.index = 0 - - def __missing__(self, key): - self[key] = val = str(self.index) - self.index += 1 - return val - - class TraversalComparatorStrategy(InternalTraversal, util.MemoizedSlots): __slots__ = "stack", "cache", "anon_map" diff --git a/lib/sqlalchemy/testing/plugin/plugin_base.py b/lib/sqlalchemy/testing/plugin/plugin_base.py index 7bc88a14b7..2a6691fc8c 100644 --- a/lib/sqlalchemy/testing/plugin/plugin_base.py +++ b/lib/sqlalchemy/testing/plugin/plugin_base.py @@ -16,6 +16,7 @@ is pytest. import abc import configparser import logging +import os import re import sys @@ -369,6 +370,20 @@ def _monkeypatch_cdecimal(options, file_config): sys.modules["decimal"] = cdecimal +@post +def __ensure_cext(opt, file_config): + if os.environ.get("REQUIRE_SQLALCHEMY_CEXT", "0") == "1": + from sqlalchemy.util import has_compiled_ext + + try: + has_compiled_ext(raise_=True) + except ImportError as err: + raise AssertionError( + "REQUIRE_SQLALCHEMY_CEXT is set but can't import the " + "cython extensions" + ) from err + + @post def _init_symbols(options, file_config): from sqlalchemy.testing import config diff --git a/lib/sqlalchemy/util/langhelpers.py b/lib/sqlalchemy/util/langhelpers.py index b759490c57..dc08b04943 100644 --- a/lib/sqlalchemy/util/langhelpers.py +++ b/lib/sqlalchemy/util/langhelpers.py @@ -1897,7 +1897,7 @@ def repr_tuple_names(names): return "%s, ..., %s" % (", ".join(res[0:3]), res[-1]) -def has_compiled_ext(): +def has_compiled_ext(raise_=False): try: from sqlalchemy.cyextension import collections # noqa F401 from sqlalchemy.cyextension import immutabledict # noqa F401 @@ -1907,4 +1907,6 @@ def has_compiled_ext(): return True except ImportError: + if raise_: + raise return False diff --git a/test/aaa_profiling/test_misc.py b/test/aaa_profiling/test_misc.py index 4f811e69ad..c0508a3cfd 100644 --- a/test/aaa_profiling/test_misc.py +++ b/test/aaa_profiling/test_misc.py @@ -90,14 +90,14 @@ class CacheKeyTest(fixtures.TestBase): ) registry.map_imperatively(Child, child) + registry.configure() + yield Parent, Child registry.dispose() @testing.fixture(scope="function") def stmt_fixture_one(self, mapping_fixture): - # note that by using ORM elements we will have annotations in these - # items also which is part of the performance hit Parent, Child = mapping_fixture return [ @@ -120,13 +120,29 @@ class CacheKeyTest(fixtures.TestBase): else: current_key = key - @profiling.function_call_count(variance=0.15, warmup=0) - def test_statement_key_is_not_cached(self, stmt_fixture_one): - current_key = None - for stmt in stmt_fixture_one: - key = stmt._generate_cache_key() - assert key is not None - if current_key: - eq_(key, current_key) - else: - current_key = key + def test_statement_key_is_not_cached( + self, stmt_fixture_one, mapping_fixture + ): + Parent, Child = mapping_fixture + + # run a totally different statement so that everything cache + # related not specific to the statement is warmed up + some_other_statement = ( + select(Parent.id, Child.id) + .join_from(Parent, Child, Parent.children) + .where(Parent.id == 5) + ) + some_other_statement._generate_cache_key() + + @profiling.function_call_count(variance=0.15, warmup=0) + def go(): + current_key = None + for stmt in stmt_fixture_one: + key = stmt._generate_cache_key() + assert key is not None + if current_key: + eq_(key, current_key) + else: + current_key = key + + go() diff --git a/test/perf/compiled_extensions.py b/test/perf/compiled_extensions.py index b9bb9ebd27..14b8ed693e 100644 --- a/test/perf/compiled_extensions.py +++ b/test/perf/compiled_extensions.py @@ -994,6 +994,96 @@ class BaseRow(Case): self.row_long.y +class CacheAnonMap(Case): + @staticmethod + def python(): + from sqlalchemy.sql._py_util import cache_anon_map + + return cache_anon_map + + @staticmethod + def cython(): + from sqlalchemy.cyextension.util import cache_anon_map + + return cache_anon_map + + IMPLEMENTATIONS = {"python": python.__func__, "cython": cython.__func__} + + NUMBER = 1000000 + + def init_objects(self): + from sqlalchemy import column, bindparam + + self.object_1 = column("x") + self.object_2 = bindparam("y") + + self.impl_w_non_present = self.impl() + self.impl_w_present = iwp = self.impl() + iwp.get_anon(self.object_1) + iwp.get_anon(self.object_2) + + @classmethod + def update_results(cls, results): + cls._divide_results(results, "cython", "python", "cy / py") + + @test_case + def test_get_anon_non_present(self): + self.impl_w_non_present.get_anon(self.object_1) + + @test_case + def test_get_anon_present(self): + self.impl_w_present.get_anon(self.object_1) + + @test_case + def test_has_key_non_present(self): + id(self.object_1) in self.impl_w_non_present + + @test_case + def test_has_key_present(self): + id(self.object_1) in self.impl_w_present + + +class PrefixAnonMap(Case): + @staticmethod + def python(): + from sqlalchemy.sql._py_util import prefix_anon_map + + return prefix_anon_map + + @staticmethod + def cython(): + from sqlalchemy.cyextension.util import prefix_anon_map + + return prefix_anon_map + + IMPLEMENTATIONS = {"python": python.__func__, "cython": cython.__func__} + + NUMBER = 1000000 + + def init_objects(self): + from sqlalchemy.sql.elements import _anonymous_label + + self.name = _anonymous_label.safe_construct(58243, "some_column_name") + + self.impl_w_non_present = self.impl() + self.impl_w_present = iwp = self.impl() + self.name.apply_map(iwp) + + @classmethod + def update_results(cls, results): + cls._divide_results(results, "cython", "python", "cy / py") + + @test_case + def test_apply_non_present(self): + + self.name.apply_map(self.impl_w_non_present) + + @test_case + def test_apply_present(self): + + self.name.apply_map(self.impl_w_present) + + def tabulate(results, inverse): dim = 11 header = "{:<20}|" + (" {:<%s} |" % dim) * len(results) diff --git a/test/profiles.txt b/test/profiles.txt index 50907e9844..f306bcc3a0 100644 --- a/test/profiles.txt +++ b/test/profiles.txt @@ -1,15 +1,15 @@ # /home/classic/dev/sqlalchemy/test/profiles.txt # This file is written out on a per-environment basis. -# For each test in aaa_profiling, the corresponding function and +# For each test in aaa_profiling, the corresponding function and # environment is located within this file. If it doesn't exist, # the test is skipped. -# If a callcount does exist, it is compared to what we received. +# If a callcount does exist, it is compared to what we received. # assertions are raised if the counts do not match. -# -# To add a new callcount test, apply the function_call_count -# decorator and re-run the tests using the --write-profiles +# +# To add a new callcount test, apply the function_call_count +# decorator and re-run the tests using the --write-profiles # option - this file will be rewritten including the new count. -# +# # TEST: test.aaa_profiling.test_compiler.CompileTest.test_insert @@ -89,7 +89,7 @@ test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_cached x86_64_li # TEST: test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached x86_64_linux_cpython_3.10_sqlite_pysqlite_dbapiunicode_cextensions 5003 -test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached x86_64_linux_cpython_3.10_sqlite_pysqlite_dbapiunicode_nocextensions 5003 +test.aaa_profiling.test_misc.CacheKeyTest.test_statement_key_is_not_cached x86_64_linux_cpython_3.10_sqlite_pysqlite_dbapiunicode_nocextensions 6387 # TEST: test.aaa_profiling.test_misc.EnumTest.test_create_enum_from_pep_435_w_expensive_members diff --git a/tox.ini b/tox.ini index 5ac5ef1a28..512b11e416 100644 --- a/tox.ini +++ b/tox.ini @@ -117,10 +117,12 @@ setenv= # wildcards OK). Need at least these passenv=ORACLE_HOME NLS_LANG TOX_POSTGRESQL TOX_POSTGRESQL_PY2K TOX_MYSQL TOX_MYSQL_PY2K TOX_ORACLE TOX_MSSQL TOX_SQLITE TOX_SQLITE_FILE TOX_WORKERS EXTRA_SQLITE_DRIVERS EXTRA_PG_DRIVERS EXTRA_MYSQL_DRIVERS -# for nocext, we rm *.so in lib in case we are doing usedevelop=True commands= - cext: /bin/true + + # this line is only meaningful when usedevelop=True is enabled. we use + # that flag for coverage mode. nocext: sh -c "rm -f lib/sqlalchemy/*.so" + {env:BASECOMMAND} {env:WORKERS} {env:SQLITE:} {env:EXTRA_SQLITE_DRIVERS:} {env:POSTGRESQL:} {env:EXTRA_PG_DRIVERS:} {env:MYSQL:} {env:EXTRA_MYSQL_DRIVERS:} {env:ORACLE:} {env:MSSQL:} {env:BACKENDONLY:} {env:IDENTS:} {env:MEMUSAGE:} {env:COVERAGE:} {posargs} oracle,mssql,sqlite_file: python reap_dbs.py db_idents.txt