From 6fbf00179845b800ada8e9a50e77e01838b5497a Mon Sep 17 00:00:00 2001 From: Federico Caselli Date: Thu, 11 Apr 2024 21:39:25 +0200 Subject: [PATCH] Some improvements to the cache key generation speed Optimize anon map performance Improve bindparam cache key generation Improvement is about 10% (Table must be manually constructed by running `python test/perf/compiled_extensions CacheKey` on main and in this change while recompiling cython) | change | main | change / main | check_not_caching | 0.850238500 | 0.8729132 | 0.9740241069 | parent_table | 0.380153900 | 0.4201198 | 0.9048702299 | parent_orm | 0.524657600 | 0.575952 | 0.9109398005 | parent_orm_join | 0.852317200 | 0.978303 | 0.8712200617 | many_types | 0.298978000 | 0.3166505 | 0.944189256 | oracle_all_objects_query | 0.830102500 | 0.940856 | 0.8822843241 | oracle_table_options_query | 0.787572400 | 0.8663654 | 0.9090533856 | oracle_column_query | 1.020074200 | 1.1518336 | 0.8856089977 | oracle_comment_query | 1.054886600 | 1.1857846 | 0.8896106426 | oracle_index_query | 0.932308800 | 1.0719409 | 0.869738994 | oracle_constraint_query | 1.204755500 | 1.3571617 | 0.8877022539 | pg_has_table_query | 0.713250000 | 0.8550803 | 0.8341321862 | pg_columns_query | 1.269068400 | 1.4404266 | 0.8810364929 | pg_table_oids_query | 0.787948100 | 0.9132093 | 0.8628340732 | pg_index_query | 1.118733900 | 1.2628904 | 0.8858519314 | pg_constraint_query | 0.917447700 | 1.0175005 | 0.9016680581 | pg_foreing_key_query | 0.980781200 | 1.1213958 | 0.8746075204 | pg_comment_query | 0.883384900 | 1.0329165 | 0.8552336031 | pg_check_constraint_query | 1.043821200 | 1.229517 | 0.8489684974 | pg_enum_query | 0.947796800 | 1.043922 | 0.907919174 | pg_domain_query | 1.082338200 | 1.2296575 | 0.880194851 | > Mean values | - | - | 0.8886518305 | Change-Id: I1c1432978d954863a3967267d599fbb3a53d5ad5 --- lib/sqlalchemy/sql/_util_cy.py | 47 ++++++-- lib/sqlalchemy/sql/annotation.py | 11 +- lib/sqlalchemy/sql/compiler.py | 6 +- lib/sqlalchemy/sql/elements.py | 103 +++++++++++------- lib/sqlalchemy/util/_collections_cy.pxd | 8 ++ lib/sqlalchemy/util/_collections_cy.py | 23 +--- .../{collections.py => collections_.py} | 0 test/perf/compiled_extensions/command.py | 2 +- 8 files changed, 122 insertions(+), 78 deletions(-) create mode 100644 lib/sqlalchemy/util/_collections_cy.pxd rename test/perf/compiled_extensions/{collections.py => collections_.py} (100%) diff --git a/lib/sqlalchemy/sql/_util_cy.py b/lib/sqlalchemy/sql/_util_cy.py index 2d15b1c7e2..8e5c55e0c5 100644 --- a/lib/sqlalchemy/sql/_util_cy.py +++ b/lib/sqlalchemy/sql/_util_cy.py @@ -35,6 +35,11 @@ def _is_compiled() -> bool: # END GENERATED CYTHON IMPORT +if cython.compiled: + from cython.cimports.sqlalchemy.util._collections_cy import _get_id +else: + _get_id = id + @cython.cclass class prefix_anon_map(Dict[str, str]): @@ -67,7 +72,7 @@ class prefix_anon_map(Dict[str, str]): class anon_map( Dict[ Union[int, str, "Literal[CacheConst.NO_CACHE]"], - Union[Literal[True], str], + Union[int, Literal[True]], ] ): """A map that creates new keys for missing key access. @@ -90,19 +95,41 @@ class anon_map( else: _index: int = 0 # type: ignore[no-redef] - def get_anon(self, obj: object, /) -> Tuple[str, bool]: + @cython.cfunc # type:ignore[misc] + @cython.inline # type:ignore[misc] + def _add_missing( + self: anon_map, key: Union[int, str, "Literal[CacheConst.NO_CACHE]"], / + ) -> int: + val: int = self._index + self._index += 1 + self_dict: dict = self # type: ignore[type-arg] + self_dict[key] = val + return val + + def get_anon(self: anon_map, obj: object, /) -> Tuple[int, bool]: self_dict: dict = self # type: ignore[type-arg] - idself = id(obj) + idself: int = _get_id(obj) if idself in self_dict: return self_dict[idself], True else: - return self.__missing__(idself), False + return self._add_missing(idself), False - def __missing__(self, key: Union[int, str], /) -> str: - val: str - self_dict: dict = self # type: ignore[type-arg] + if cython.compiled: - self_dict[key] = val = str(self._index) - self._index += 1 - return val + def __getitem__( + self: anon_map, + key: Union[int, str, "Literal[CacheConst.NO_CACHE]"], + /, + ) -> Union[int, Literal[True]]: + self_dict: dict = self # type: ignore[type-arg] + + if key in self_dict: + return self_dict[key] # type:ignore[no-any-return] + else: + return self._add_missing(key) # type:ignore[no-any-return] + + def __missing__( + self: anon_map, key: Union[int, str, "Literal[CacheConst.NO_CACHE]"], / + ) -> int: + return self._add_missing(key) # type:ignore[no-any-return] diff --git a/lib/sqlalchemy/sql/annotation.py b/lib/sqlalchemy/sql/annotation.py index db382b874b..29b1b4cdfa 100644 --- a/lib/sqlalchemy/sql/annotation.py +++ b/lib/sqlalchemy/sql/annotation.py @@ -17,6 +17,7 @@ related in any way to the pep-593 concept of "Annotated". from __future__ import annotations +from operator import itemgetter import typing from typing import Any from typing import Callable @@ -103,14 +104,16 @@ class SupportsAnnotations(ExternallyTraversible): else value ), ) - for key, value in [ - (key, self._annotations[key]) - for key in sorted(self._annotations) - ] + for key, value in sorted( + self._annotations.items(), key=_get_item0 + ) ), ) +_get_item0 = itemgetter(0) + + class SupportsWrappingAnnotations(SupportsAnnotations): __slots__ = () diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 785d2e9350..2905fb9176 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -3683,7 +3683,7 @@ class SQLCompiler(Compiled): bind_expression_template=wrapped, **kwargs, ) - return "(%s)" % ret + return f"({ret})" return wrapped @@ -3702,7 +3702,7 @@ class SQLCompiler(Compiled): bindparam, within_columns_clause=True, **kwargs ) if bindparam.expanding: - ret = "(%s)" % ret + ret = f"({ret})" return ret name = self._truncate_bindparam(bindparam) @@ -3799,7 +3799,7 @@ class SQLCompiler(Compiled): ) if bindparam.expanding: - ret = "(%s)" % ret + ret = f"({ret})" return ret diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 4e46070060..243d048b3d 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -1734,9 +1734,8 @@ class ColumnElement( seed = seed + "_" if isinstance(seed, _anonymous_label): - return _anonymous_label.safe_construct( - hash_value, "", enclosing_label=seed - ) + # NOTE: the space after the hash is required + return _anonymous_label(f"{seed}%({hash_value} )s") return _anonymous_label.safe_construct(hash_value, seed or "anon") @@ -1939,12 +1938,12 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): ] key: str + _anon_map_key: Optional[str] = None type: TypeEngine[_T] value: Optional[_T] _is_crud = False _is_bind_parameter = True - _key_is_anon = False # bindparam implements its own _gen_cache_key() method however # we check subclasses for this flag, else no cache key is generated @@ -1975,22 +1974,24 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): key = quoted_name.construct(key, quote) if unique: - self.key = _anonymous_label.safe_construct( - id(self), - ( - key - if key is not None - and not isinstance(key, _anonymous_label) - else "param" - ), - sanitize_key=True, + self.key, self._anon_map_key = ( + _anonymous_label.safe_construct_with_key( + id(self), + ( + key + if key is not None + and not isinstance(key, _anonymous_label) + else "param" + ), + sanitize_key=True, + ) ) - self._key_is_anon = True elif key: self.key = key else: - self.key = _anonymous_label.safe_construct(id(self), "param") - self._key_is_anon = True + self.key, self._anon_map_key = ( + _anonymous_label.safe_construct_with_key(id(self), "param") + ) # identifying key that won't change across # clones, used to identify the bind's logical @@ -2079,7 +2080,7 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): else: return self.value - def render_literal_execute(self) -> BindParameter[_T]: + def render_literal_execute(self) -> Self: """Produce a copy of this bound parameter that will enable the :paramref:`_sql.BindParameter.literal_execute` flag. @@ -2100,7 +2101,7 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): :ref:`engine_thirdparty_caching` """ - c = ClauseElement._clone(self) + c: Self = ClauseElement._clone(self) c.literal_execute = True return c @@ -2113,12 +2114,12 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): return self def _with_binary_element_type(self, type_): - c = ClauseElement._clone(self) + c: Self = ClauseElement._clone(self) # type: ignore[assignment] c.type = type_ return c def _clone(self, maintain_key: bool = False, **kw: Any) -> Self: - c = ClauseElement._clone(self, **kw) + c: Self = ClauseElement._clone(self, **kw) # ensure all the BindParameter objects stay in cloned set. # in #7823, we changed "clone" so that a clone only keeps a reference # to the "original" element, since for column correspondence, that's @@ -2129,7 +2130,7 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): # forward. c._cloned_set.update(self._cloned_set) if not maintain_key and self.unique: - c.key = _anonymous_label.safe_construct( + c.key, c._anon_map_key = _anonymous_label.safe_construct_with_key( id(c), c._orig_key or "param", sanitize_key=True ) return c @@ -2153,15 +2154,21 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): id_, self.__class__, self.type._static_cache_key, - self.key % anon_map if self._key_is_anon else self.key, + ( + anon_map[self._anon_map_key] + if self._anon_map_key is not None + else self.key + ), self.literal_execute, ) def _convert_to_unique(self): if not self.unique: self.unique = True - self.key = _anonymous_label.safe_construct( - id(self), self._orig_key or "param", sanitize_key=True + self.key, self._anon_map_key = ( + _anonymous_label.safe_construct_with_key( + id(self), self._orig_key or "param", sanitize_key=True + ) ) def __getstate__(self): @@ -2177,9 +2184,10 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]): def __setstate__(self, state): if state.get("unique", False): - state["key"] = _anonymous_label.safe_construct( + anon_and_key = _anonymous_label.safe_construct_with_key( id(self), state.get("_orig_key", "param"), sanitize_key=True ) + state["key"], state["_anon_map_key"] = anon_and_key self.__dict__.update(state) def __repr__(self): @@ -4911,10 +4919,12 @@ class ColumnClause( return None elif t is not None and is_named_from_clause(t): if has_schema_attr(t) and t.schema: - label = t.schema.replace(".", "_") + "_" + t.name + "_" + name + label = ( + t.schema.replace(".", "_") + "_" + t.name + ("_" + name) + ) else: assert not TYPE_CHECKING or isinstance(t, NamedFromClause) - label = t.name + "_" + name + label = t.name + ("_" + name) # propagate name quoting rules for labels. if is_quoted_name(name) and name.quote is not None: @@ -4941,7 +4951,7 @@ class ColumnClause( _label = label counter = 1 while _label in t.c: - _label = label + "_" + str(counter) + _label = label + f"_{counter}" counter += 1 label = _label @@ -5342,6 +5352,7 @@ class conv(_truncated_label): # _truncated_identifier() sequence in a custom # compiler _generated_label = _truncated_label +_anonymous_label_escape = re.compile(r"[%\(\) \$]+") class _anonymous_label(_truncated_label): @@ -5350,29 +5361,37 @@ class _anonymous_label(_truncated_label): __slots__ = () + @classmethod + def safe_construct_with_key( + cls, seed: int, body: str, sanitize_key: bool = False + ) -> typing_Tuple[_anonymous_label, str]: + # need to escape chars that interfere with format + # strings in any case, issue #8724 + body = _anonymous_label_escape.sub("_", body) + + if sanitize_key: + # sanitize_key is then an extra step used by BindParameter + body = body.strip("_") + + key = f"{seed} {body.replace('%', '%%')}" + label = _anonymous_label(f"%({key})s") + return label, key + @classmethod def safe_construct( - cls, - seed: int, - body: str, - enclosing_label: Optional[str] = None, - sanitize_key: bool = False, + cls, seed: int, body: str, sanitize_key: bool = False ) -> _anonymous_label: # need to escape chars that interfere with format # strings in any case, issue #8724 - body = re.sub(r"[%\(\) \$]+", "_", body) + body = _anonymous_label_escape.sub("_", body) if sanitize_key: # sanitize_key is then an extra step used by BindParameter body = body.strip("_") - label = "%%(%d %s)s" % (seed, body.replace("%", "%%")) - if enclosing_label: - label = "%s%s" % (enclosing_label, label) + return _anonymous_label(f"%({seed} {body.replace('%', '%%')})s") - return _anonymous_label(label) - - def __add__(self, other): + def __add__(self, other: str) -> _anonymous_label: if "%" in other and not isinstance(other, _anonymous_label): other = str(other).replace("%", "%%") else: @@ -5385,7 +5404,7 @@ class _anonymous_label(_truncated_label): ) ) - def __radd__(self, other): + def __radd__(self, other: str) -> _anonymous_label: if "%" in other and not isinstance(other, _anonymous_label): other = str(other).replace("%", "%%") else: @@ -5398,7 +5417,7 @@ class _anonymous_label(_truncated_label): ) ) - def apply_map(self, map_): + def apply_map(self, map_: Mapping[str, Any]) -> str: if self.quote is not None: # preserve quoting only if necessary return quoted_name(self % map_, self.quote) diff --git a/lib/sqlalchemy/util/_collections_cy.pxd b/lib/sqlalchemy/util/_collections_cy.pxd new file mode 100644 index 0000000000..cea6dc21f6 --- /dev/null +++ b/lib/sqlalchemy/util/_collections_cy.pxd @@ -0,0 +1,8 @@ +# util/_collections_cy.pxd +# Copyright (C) 2005-2024 the SQLAlchemy authors and contributors +# +# +# This module is part of SQLAlchemy and is released under +# the MIT License: https://www.opensource.org/licenses/mit-license.php + +cdef unsigned long long _get_id(item: object) \ No newline at end of file diff --git a/lib/sqlalchemy/util/_collections_cy.py b/lib/sqlalchemy/util/_collections_cy.py index 0931ac450c..b853f42a4a 100644 --- a/lib/sqlalchemy/util/_collections_cy.py +++ b/lib/sqlalchemy/util/_collections_cy.py @@ -19,7 +19,6 @@ from typing import NoReturn from typing import Optional from typing import Set from typing import Tuple -from typing import TYPE_CHECKING from typing import TypeVar from typing import Union @@ -42,14 +41,6 @@ def _is_compiled() -> bool: # END GENERATED CYTHON IMPORT - -if cython.compiled: - from cython.cimports.cpython.long import PyLong_FromUnsignedLongLong -elif TYPE_CHECKING: - - def PyLong_FromUnsignedLongLong(v: Any) -> int: ... - - _T = TypeVar("_T") _S = TypeVar("_S") @@ -267,16 +258,12 @@ class OrderedSet(Set[_T]): if cython.compiled: - @cython.final - @cython.inline @cython.cfunc - @cython.annotation_typing(False) - def _get_id(item: Any) -> int: - return PyLong_FromUnsignedLongLong( - cython.cast( - cython.ulonglong, - cython.cast(cython.pointer(cython.void), item), - ) + @cython.inline + def _get_id(item: object, /) -> cython.ulonglong: + return cython.cast( + cython.ulonglong, + cython.cast(cython.pointer(cython.void), item), ) else: diff --git a/test/perf/compiled_extensions/collections.py b/test/perf/compiled_extensions/collections_.py similarity index 100% rename from test/perf/compiled_extensions/collections.py rename to test/perf/compiled_extensions/collections_.py diff --git a/test/perf/compiled_extensions/command.py b/test/perf/compiled_extensions/command.py index 21fc1cacf8..97cf725460 100644 --- a/test/perf/compiled_extensions/command.py +++ b/test/perf/compiled_extensions/command.py @@ -4,7 +4,7 @@ from .base import Case if True: from . import cache_key # noqa: F401 - from . import collections # noqa: F401 + from . import collections_ # noqa: F401 from . import misc # noqa: F401 from . import row # noqa: F401 -- 2.47.2