]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
Some improvements to the cache key generation speed
authorFederico Caselli <cfederico87@gmail.com>
Thu, 11 Apr 2024 19:39:25 +0000 (21:39 +0200)
committerFederico Caselli <cfederico87@gmail.com>
Wed, 8 May 2024 19:35:37 +0000 (21:35 +0200)
Optimize anon map performance
Improve bindparam cache key generation

Improvement is about 10%
(Table must be manually constructed by running
`python test/perf/compiled_extensions CacheKey` on main and in
this change while recompiling cython)

                           | change      | main      | change / main |
check_not_caching          | 0.850238500 | 0.8729132 | 0.9740241069  |
parent_table               | 0.380153900 | 0.4201198 | 0.9048702299  |
parent_orm                 | 0.524657600 | 0.575952  | 0.9109398005  |
parent_orm_join            | 0.852317200 | 0.978303  | 0.8712200617  |
many_types                 | 0.298978000 | 0.3166505 | 0.944189256   |
oracle_all_objects_query   | 0.830102500 | 0.940856  | 0.8822843241  |
oracle_table_options_query | 0.787572400 | 0.8663654 | 0.9090533856  |
oracle_column_query        | 1.020074200 | 1.1518336 | 0.8856089977  |
oracle_comment_query       | 1.054886600 | 1.1857846 | 0.8896106426  |
oracle_index_query         | 0.932308800 | 1.0719409 | 0.869738994   |
oracle_constraint_query    | 1.204755500 | 1.3571617 | 0.8877022539  |
pg_has_table_query         | 0.713250000 | 0.8550803 | 0.8341321862  |
pg_columns_query           | 1.269068400 | 1.4404266 | 0.8810364929  |
pg_table_oids_query        | 0.787948100 | 0.9132093 | 0.8628340732  |
pg_index_query             | 1.118733900 | 1.2628904 | 0.8858519314  |
pg_constraint_query        | 0.917447700 | 1.0175005 | 0.9016680581  |
pg_foreing_key_query       | 0.980781200 | 1.1213958 | 0.8746075204  |
pg_comment_query           | 0.883384900 | 1.0329165 | 0.8552336031  |
pg_check_constraint_query  | 1.043821200 | 1.229517  | 0.8489684974  |
pg_enum_query              | 0.947796800 | 1.043922  | 0.907919174   |
pg_domain_query            | 1.082338200 | 1.2296575 | 0.880194851   |
> Mean values              | -           | -         | 0.8886518305  |

Change-Id: I1c1432978d954863a3967267d599fbb3a53d5ad5

lib/sqlalchemy/sql/_util_cy.py
lib/sqlalchemy/sql/annotation.py
lib/sqlalchemy/sql/compiler.py
lib/sqlalchemy/sql/elements.py
lib/sqlalchemy/util/_collections_cy.pxd [new file with mode: 0644]
lib/sqlalchemy/util/_collections_cy.py
test/perf/compiled_extensions/collections_.py [moved from test/perf/compiled_extensions/collections.py with 100% similarity]
test/perf/compiled_extensions/command.py

index 2d15b1c7e28a1dfdd0158ecb33b3f90e1c6e1833..8e5c55e0c500de3297619606ee75ba025940f96f 100644 (file)
@@ -35,6 +35,11 @@ def _is_compiled() -> bool:
 
 # END GENERATED CYTHON IMPORT
 
+if cython.compiled:
+    from cython.cimports.sqlalchemy.util._collections_cy import _get_id
+else:
+    _get_id = id
+
 
 @cython.cclass
 class prefix_anon_map(Dict[str, str]):
@@ -67,7 +72,7 @@ class prefix_anon_map(Dict[str, str]):
 class anon_map(
     Dict[
         Union[int, str, "Literal[CacheConst.NO_CACHE]"],
-        Union[Literal[True], str],
+        Union[int, Literal[True]],
     ]
 ):
     """A map that creates new keys for missing key access.
@@ -90,19 +95,41 @@ class anon_map(
     else:
         _index: int = 0  # type: ignore[no-redef]
 
-    def get_anon(self, obj: object, /) -> Tuple[str, bool]:
+    @cython.cfunc  # type:ignore[misc]
+    @cython.inline  # type:ignore[misc]
+    def _add_missing(
+        self: anon_map, key: Union[int, str, "Literal[CacheConst.NO_CACHE]"], /
+    ) -> int:
+        val: int = self._index
+        self._index += 1
+        self_dict: dict = self  # type: ignore[type-arg]
+        self_dict[key] = val
+        return val
+
+    def get_anon(self: anon_map, obj: object, /) -> Tuple[int, bool]:
         self_dict: dict = self  # type: ignore[type-arg]
 
-        idself = id(obj)
+        idself: int = _get_id(obj)
         if idself in self_dict:
             return self_dict[idself], True
         else:
-            return self.__missing__(idself), False
+            return self._add_missing(idself), False
 
-    def __missing__(self, key: Union[int, str], /) -> str:
-        val: str
-        self_dict: dict = self  # type: ignore[type-arg]
+    if cython.compiled:
 
-        self_dict[key] = val = str(self._index)
-        self._index += 1
-        return val
+        def __getitem__(
+            self: anon_map,
+            key: Union[int, str, "Literal[CacheConst.NO_CACHE]"],
+            /,
+        ) -> Union[int, Literal[True]]:
+            self_dict: dict = self  # type: ignore[type-arg]
+
+            if key in self_dict:
+                return self_dict[key]  # type:ignore[no-any-return]
+            else:
+                return self._add_missing(key)  # type:ignore[no-any-return]
+
+    def __missing__(
+        self: anon_map, key: Union[int, str, "Literal[CacheConst.NO_CACHE]"], /
+    ) -> int:
+        return self._add_missing(key)  # type:ignore[no-any-return]
index db382b874b60b2342695ebbc2f09fe735f050b57..29b1b4cdfa2c54364c0f921652eb9738e620bedb 100644 (file)
@@ -17,6 +17,7 @@ related in any way to the pep-593 concept of "Annotated".
 
 from __future__ import annotations
 
+from operator import itemgetter
 import typing
 from typing import Any
 from typing import Callable
@@ -103,14 +104,16 @@ class SupportsAnnotations(ExternallyTraversible):
                         else value
                     ),
                 )
-                for key, value in [
-                    (key, self._annotations[key])
-                    for key in sorted(self._annotations)
-                ]
+                for key, value in sorted(
+                    self._annotations.items(), key=_get_item0
+                )
             ),
         )
 
 
+_get_item0 = itemgetter(0)
+
+
 class SupportsWrappingAnnotations(SupportsAnnotations):
     __slots__ = ()
 
index 785d2e935020bd9b6a5802bd3f49434efaa8cbb7..2905fb91769e41e2b913c3b3117b4a9bc285d42d 100644 (file)
@@ -3683,7 +3683,7 @@ class SQLCompiler(Compiled):
                             bind_expression_template=wrapped,
                             **kwargs,
                         )
-                        return "(%s)" % ret
+                        return f"({ret})"
 
                 return wrapped
 
@@ -3702,7 +3702,7 @@ class SQLCompiler(Compiled):
                 bindparam, within_columns_clause=True, **kwargs
             )
             if bindparam.expanding:
-                ret = "(%s)" % ret
+                ret = f"({ret})"
             return ret
 
         name = self._truncate_bindparam(bindparam)
@@ -3799,7 +3799,7 @@ class SQLCompiler(Compiled):
         )
 
         if bindparam.expanding:
-            ret = "(%s)" % ret
+            ret = f"({ret})"
 
         return ret
 
index 4e46070060a43d0b32c9bfd3eb795832da451f91..243d048b3d2b47c74a2e6f1bc4878e4ceae3b924 100644 (file)
@@ -1734,9 +1734,8 @@ class ColumnElement(
             seed = seed + "_"
 
         if isinstance(seed, _anonymous_label):
-            return _anonymous_label.safe_construct(
-                hash_value, "", enclosing_label=seed
-            )
+            # NOTE: the space after the hash is required
+            return _anonymous_label(f"{seed}%({hash_value} )s")
 
         return _anonymous_label.safe_construct(hash_value, seed or "anon")
 
@@ -1939,12 +1938,12 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
     ]
 
     key: str
+    _anon_map_key: Optional[str] = None
     type: TypeEngine[_T]
     value: Optional[_T]
 
     _is_crud = False
     _is_bind_parameter = True
-    _key_is_anon = False
 
     # bindparam implements its own _gen_cache_key() method however
     # we check subclasses for this flag, else no cache key is generated
@@ -1975,22 +1974,24 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
             key = quoted_name.construct(key, quote)
 
         if unique:
-            self.key = _anonymous_label.safe_construct(
-                id(self),
-                (
-                    key
-                    if key is not None
-                    and not isinstance(key, _anonymous_label)
-                    else "param"
-                ),
-                sanitize_key=True,
+            self.key, self._anon_map_key = (
+                _anonymous_label.safe_construct_with_key(
+                    id(self),
+                    (
+                        key
+                        if key is not None
+                        and not isinstance(key, _anonymous_label)
+                        else "param"
+                    ),
+                    sanitize_key=True,
+                )
             )
-            self._key_is_anon = True
         elif key:
             self.key = key
         else:
-            self.key = _anonymous_label.safe_construct(id(self), "param")
-            self._key_is_anon = True
+            self.key, self._anon_map_key = (
+                _anonymous_label.safe_construct_with_key(id(self), "param")
+            )
 
         # identifying key that won't change across
         # clones, used to identify the bind's logical
@@ -2079,7 +2080,7 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
         else:
             return self.value
 
-    def render_literal_execute(self) -> BindParameter[_T]:
+    def render_literal_execute(self) -> Self:
         """Produce a copy of this bound parameter that will enable the
         :paramref:`_sql.BindParameter.literal_execute` flag.
 
@@ -2100,7 +2101,7 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
             :ref:`engine_thirdparty_caching`
 
         """
-        c = ClauseElement._clone(self)
+        c: Self = ClauseElement._clone(self)
         c.literal_execute = True
         return c
 
@@ -2113,12 +2114,12 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
             return self
 
     def _with_binary_element_type(self, type_):
-        c = ClauseElement._clone(self)
+        c: Self = ClauseElement._clone(self)  # type: ignore[assignment]
         c.type = type_
         return c
 
     def _clone(self, maintain_key: bool = False, **kw: Any) -> Self:
-        c = ClauseElement._clone(self, **kw)
+        c: Self = ClauseElement._clone(self, **kw)
         # ensure all the BindParameter objects stay in cloned set.
         # in #7823, we changed "clone" so that a clone only keeps a reference
         # to the "original" element, since for column correspondence, that's
@@ -2129,7 +2130,7 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
         # forward.
         c._cloned_set.update(self._cloned_set)
         if not maintain_key and self.unique:
-            c.key = _anonymous_label.safe_construct(
+            c.key, c._anon_map_key = _anonymous_label.safe_construct_with_key(
                 id(c), c._orig_key or "param", sanitize_key=True
             )
         return c
@@ -2153,15 +2154,21 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
             id_,
             self.__class__,
             self.type._static_cache_key,
-            self.key % anon_map if self._key_is_anon else self.key,
+            (
+                anon_map[self._anon_map_key]
+                if self._anon_map_key is not None
+                else self.key
+            ),
             self.literal_execute,
         )
 
     def _convert_to_unique(self):
         if not self.unique:
             self.unique = True
-            self.key = _anonymous_label.safe_construct(
-                id(self), self._orig_key or "param", sanitize_key=True
+            self.key, self._anon_map_key = (
+                _anonymous_label.safe_construct_with_key(
+                    id(self), self._orig_key or "param", sanitize_key=True
+                )
             )
 
     def __getstate__(self):
@@ -2177,9 +2184,10 @@ class BindParameter(roles.InElementRole, KeyedColumnElement[_T]):
 
     def __setstate__(self, state):
         if state.get("unique", False):
-            state["key"] = _anonymous_label.safe_construct(
+            anon_and_key = _anonymous_label.safe_construct_with_key(
                 id(self), state.get("_orig_key", "param"), sanitize_key=True
             )
+            state["key"], state["_anon_map_key"] = anon_and_key
         self.__dict__.update(state)
 
     def __repr__(self):
@@ -4911,10 +4919,12 @@ class ColumnClause(
             return None
         elif t is not None and is_named_from_clause(t):
             if has_schema_attr(t) and t.schema:
-                label = t.schema.replace(".", "_") + "_" + t.name + "_" + name
+                label = (
+                    t.schema.replace(".", "_") + "_" + t.name + ("_" + name)
+                )
             else:
                 assert not TYPE_CHECKING or isinstance(t, NamedFromClause)
-                label = t.name + "_" + name
+                label = t.name + ("_" + name)
 
             # propagate name quoting rules for labels.
             if is_quoted_name(name) and name.quote is not None:
@@ -4941,7 +4951,7 @@ class ColumnClause(
                     _label = label
                     counter = 1
                     while _label in t.c:
-                        _label = label + "_" + str(counter)
+                        _label = label + f"_{counter}"
                         counter += 1
                     label = _label
 
@@ -5342,6 +5352,7 @@ class conv(_truncated_label):
 # _truncated_identifier() sequence in a custom
 # compiler
 _generated_label = _truncated_label
+_anonymous_label_escape = re.compile(r"[%\(\) \$]+")
 
 
 class _anonymous_label(_truncated_label):
@@ -5350,29 +5361,37 @@ class _anonymous_label(_truncated_label):
 
     __slots__ = ()
 
+    @classmethod
+    def safe_construct_with_key(
+        cls, seed: int, body: str, sanitize_key: bool = False
+    ) -> typing_Tuple[_anonymous_label, str]:
+        # need to escape chars that interfere with format
+        # strings in any case, issue #8724
+        body = _anonymous_label_escape.sub("_", body)
+
+        if sanitize_key:
+            # sanitize_key is then an extra step used by BindParameter
+            body = body.strip("_")
+
+        key = f"{seed} {body.replace('%', '%%')}"
+        label = _anonymous_label(f"%({key})s")
+        return label, key
+
     @classmethod
     def safe_construct(
-        cls,
-        seed: int,
-        body: str,
-        enclosing_label: Optional[str] = None,
-        sanitize_key: bool = False,
+        cls, seed: int, body: str, sanitize_key: bool = False
     ) -> _anonymous_label:
         # need to escape chars that interfere with format
         # strings in any case, issue #8724
-        body = re.sub(r"[%\(\) \$]+", "_", body)
+        body = _anonymous_label_escape.sub("_", body)
 
         if sanitize_key:
             # sanitize_key is then an extra step used by BindParameter
             body = body.strip("_")
 
-        label = "%%(%d %s)s" % (seed, body.replace("%", "%%"))
-        if enclosing_label:
-            label = "%s%s" % (enclosing_label, label)
+        return _anonymous_label(f"%({seed} {body.replace('%', '%%')})s")
 
-        return _anonymous_label(label)
-
-    def __add__(self, other):
+    def __add__(self, other: str) -> _anonymous_label:
         if "%" in other and not isinstance(other, _anonymous_label):
             other = str(other).replace("%", "%%")
         else:
@@ -5385,7 +5404,7 @@ class _anonymous_label(_truncated_label):
             )
         )
 
-    def __radd__(self, other):
+    def __radd__(self, other: str) -> _anonymous_label:
         if "%" in other and not isinstance(other, _anonymous_label):
             other = str(other).replace("%", "%%")
         else:
@@ -5398,7 +5417,7 @@ class _anonymous_label(_truncated_label):
             )
         )
 
-    def apply_map(self, map_):
+    def apply_map(self, map_: Mapping[str, Any]) -> str:
         if self.quote is not None:
             # preserve quoting only if necessary
             return quoted_name(self % map_, self.quote)
diff --git a/lib/sqlalchemy/util/_collections_cy.pxd b/lib/sqlalchemy/util/_collections_cy.pxd
new file mode 100644 (file)
index 0000000..cea6dc2
--- /dev/null
@@ -0,0 +1,8 @@
+# util/_collections_cy.pxd
+# Copyright (C) 2005-2024 the SQLAlchemy authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of SQLAlchemy and is released under
+# the MIT License: https://www.opensource.org/licenses/mit-license.php
+
+cdef unsigned long long _get_id(item: object)
\ No newline at end of file
index 0931ac450cf465f54e502d50336c4e5aa2844922..b853f42a4a8e03dc32827b9a86b6a9edc2342d4f 100644 (file)
@@ -19,7 +19,6 @@ from typing import NoReturn
 from typing import Optional
 from typing import Set
 from typing import Tuple
-from typing import TYPE_CHECKING
 from typing import TypeVar
 from typing import Union
 
@@ -42,14 +41,6 @@ def _is_compiled() -> bool:
 
 
 # END GENERATED CYTHON IMPORT
-
-if cython.compiled:
-    from cython.cimports.cpython.long import PyLong_FromUnsignedLongLong
-elif TYPE_CHECKING:
-
-    def PyLong_FromUnsignedLongLong(v: Any) -> int: ...
-
-
 _T = TypeVar("_T")
 _S = TypeVar("_S")
 
@@ -267,16 +258,12 @@ class OrderedSet(Set[_T]):
 
 if cython.compiled:
 
-    @cython.final
-    @cython.inline
     @cython.cfunc
-    @cython.annotation_typing(False)
-    def _get_id(item: Any) -> int:
-        return PyLong_FromUnsignedLongLong(
-            cython.cast(
-                cython.ulonglong,
-                cython.cast(cython.pointer(cython.void), item),
-            )
+    @cython.inline
+    def _get_id(item: object, /) -> cython.ulonglong:
+        return cython.cast(
+            cython.ulonglong,
+            cython.cast(cython.pointer(cython.void), item),
         )
 
 else:
index 21fc1cacf8a6e88c5a79fbfa9e69e8819d6a402d..97cf725460a24a866d7daa59a2e1e32600913b9c 100644 (file)
@@ -4,7 +4,7 @@ from .base import Case
 
 if True:
     from . import cache_key  # noqa: F401
-    from . import collections  # noqa: F401
+    from . import collections_  # noqa: F401
     from . import misc  # noqa: F401
     from . import row  # noqa: F401