From: Daniele Varrazzo Date: Mon, 11 Sep 2023 00:36:26 +0000 (+0100) Subject: refactor: convert back query2pg from static method to regular function X-Git-Tag: pool-3.2.0~54 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2d3835ae2ba7b270d3ac09da7906d8298274d3a2;p=thirdparty%2Fpsycopg.git refactor: convert back query2pg from static method to regular function On Python < 3.10, a static method doesn't seem callable, at least during class building, so applying lru_cache on it fails. --- diff --git a/psycopg/psycopg/_queries.py b/psycopg/psycopg/_queries.py index 6da19fa76..376012aec 100644 --- a/psycopg/psycopg/_queries.py +++ b/psycopg/psycopg/_queries.py @@ -81,9 +81,9 @@ class PostgresQuery: len(bquery) <= MAX_CACHED_STATEMENT_LENGTH and len(vars) <= MAX_CACHED_STATEMENT_PARAMS ): - f: PostgresQuery._Query2Pg = PostgresQuery.query2pg + f: _Query2Pg = _query2pg else: - f = PostgresQuery.query2pg_nocache + f = _query2pg_nocache (self.query, self._want_formats, self._order, self._parts) = f( bquery, self._encoding @@ -165,63 +165,69 @@ class PostgresQuery: f" {', '.join(sorted(i for i in order or () if i not in vars))}" ) - # The type of the query2pg() and query2pg_nocache() methods - _Query2Pg: TypeAlias = Callable[ - [bytes, str], - Tuple[bytes, Optional[List[PyFormat]], Optional[List[str]], List[QueryPart]], - ] - @staticmethod - def query2pg_nocache( - query: bytes, encoding: str - ) -> Tuple[bytes, Optional[List[PyFormat]], Optional[List[str]], List[QueryPart]]: - """ - Convert Python query and params into something Postgres understands. - - - Convert Python placeholders (``%s``, ``%(name)s``) into Postgres - format (``$1``, ``$2``) - - placeholders can be %s, %t, or %b (auto, text or binary) - - return ``query`` (bytes), ``formats`` (list of formats) ``order`` - (sequence of names used in the query, in the position they appear) - ``parts`` (splits of queries and placeholders). - """ - parts = _split_query(query, encoding) - order: Optional[List[str]] = None - chunks: List[bytes] = [] - formats = [] - - if isinstance(parts[0].item, int): - for part in parts[:-1]: - assert isinstance(part.item, int) - chunks.append(part.pre) - chunks.append(b"$%d" % (part.item + 1)) +# The type of the _query2pg() and _query2pg_nocache() methods +_Query2Pg: TypeAlias = Callable[ + [bytes, str], Tuple[bytes, List[PyFormat], Optional[List[str]], List[QueryPart]] +] + + +def _query2pg_nocache( + query: bytes, encoding: str +) -> Tuple[bytes, List[PyFormat], Optional[List[str]], List[QueryPart]]: + """ + Convert Python query and params into something Postgres understands. + + - Convert Python placeholders (``%s``, ``%(name)s``) into Postgres + format (``$1``, ``$2``) + - placeholders can be %s, %t, or %b (auto, text or binary) + - return ``query`` (bytes), ``formats`` (list of formats) ``order`` + (sequence of names used in the query, in the position they appear) + ``parts`` (splits of queries and placeholders). + """ + parts = _split_query(query, encoding) + order: Optional[List[str]] = None + chunks: List[bytes] = [] + formats = [] + + if isinstance(parts[0].item, int): + for part in parts[:-1]: + assert isinstance(part.item, int) + chunks.append(part.pre) + chunks.append(b"$%d" % (part.item + 1)) + formats.append(part.format) + + elif isinstance(parts[0].item, str): + seen: Dict[str, Tuple[bytes, PyFormat]] = {} + order = [] + for part in parts[:-1]: + assert isinstance(part.item, str) + chunks.append(part.pre) + if part.item not in seen: + ph = b"$%d" % (len(seen) + 1) + seen[part.item] = (ph, part.format) + order.append(part.item) + chunks.append(ph) formats.append(part.format) + else: + if seen[part.item][1] != part.format: + raise e.ProgrammingError( + f"placeholder '{part.item}' cannot have different formats" + ) + chunks.append(seen[part.item][0]) - elif isinstance(parts[0].item, str): - seen: Dict[str, Tuple[bytes, PyFormat]] = {} - order = [] - for part in parts[:-1]: - assert isinstance(part.item, str) - chunks.append(part.pre) - if part.item not in seen: - ph = b"$%d" % (len(seen) + 1) - seen[part.item] = (ph, part.format) - order.append(part.item) - chunks.append(ph) - formats.append(part.format) - else: - if seen[part.item][1] != part.format: - raise e.ProgrammingError( - f"placeholder '{part.item}' cannot have different formats" - ) - chunks.append(seen[part.item][0]) + # last part + chunks.append(parts[-1].pre) - # last part - chunks.append(parts[-1].pre) + return b"".join(chunks), formats, order, parts - return b"".join(chunks), formats, order, parts - query2pg = lru_cache()(query2pg_nocache) +# Note: the cache size is 128 items, but someone has reported throwing ~12k +# queries (of type `INSERT ... VALUES (...), (...)` with a varying amount of +# records), and the resulting cache size is >100Mb. So, we will avoid to cache +# large queries or queries with a large number of params. See +# https://github.com/sqlalchemy/sqlalchemy/discussions/10270 +_query2pg = lru_cache()(_query2pg_nocache) class PostgresClientQuery(PostgresQuery): @@ -246,20 +252,15 @@ class PostgresClientQuery(PostgresQuery): bquery = query if vars is not None: - # Avoid caching queries extremely long or with a huge number of - # parameters. They are usually generated by ORMs and have poor - # cacheablility (e.g. INSERT ... VALUES (...), (...) with varying - # numbers of tuples. - # see https://github.com/psycopg/psycopg/discussions/628 if ( len(bquery) <= MAX_CACHED_STATEMENT_LENGTH and len(vars) <= MAX_CACHED_STATEMENT_PARAMS ): - f: PostgresQuery._Query2Pg = PostgresClientQuery.query2pg + f: _Query2PgClient = _query2pg_client else: - f = PostgresClientQuery.query2pg_nocache + f = _query2pg_client_nocache - (self.template, _, self._order, self._parts) = f(bquery, self._encoding) + (self.template, self._order, self._parts) = f(bquery, self._encoding) else: self.query = bquery self._order = None @@ -281,44 +282,50 @@ class PostgresClientQuery(PostgresQuery): else: self.params = None - @staticmethod - def query2pg_nocache( - query: bytes, encoding: str - ) -> Tuple[bytes, Optional[List[PyFormat]], Optional[List[str]], List[QueryPart]]: - """ - Convert Python query and params into a template to perform client-side binding - """ - parts = _split_query(query, encoding, collapse_double_percent=False) - order: Optional[List[str]] = None - chunks: List[bytes] = [] - - if isinstance(parts[0].item, int): - for part in parts[:-1]: - assert isinstance(part.item, int) - chunks.append(part.pre) - chunks.append(b"%s") - - elif isinstance(parts[0].item, str): - seen: Dict[str, Tuple[bytes, PyFormat]] = {} - order = [] - for part in parts[:-1]: - assert isinstance(part.item, str) - chunks.append(part.pre) - if part.item not in seen: - ph = b"%s" - seen[part.item] = (ph, part.format) - order.append(part.item) - chunks.append(ph) - else: - chunks.append(seen[part.item][0]) - order.append(part.item) - # last part - chunks.append(parts[-1].pre) +_Query2PgClient: TypeAlias = Callable[ + [bytes, str], Tuple[bytes, Optional[List[str]], List[QueryPart]] +] + + +def _query2pg_client_nocache( + query: bytes, encoding: str +) -> Tuple[bytes, Optional[List[str]], List[QueryPart]]: + """ + Convert Python query and params into a template to perform client-side binding + """ + parts = _split_query(query, encoding, collapse_double_percent=False) + order: Optional[List[str]] = None + chunks: List[bytes] = [] + + if isinstance(parts[0].item, int): + for part in parts[:-1]: + assert isinstance(part.item, int) + chunks.append(part.pre) + chunks.append(b"%s") + + elif isinstance(parts[0].item, str): + seen: Dict[str, Tuple[bytes, PyFormat]] = {} + order = [] + for part in parts[:-1]: + assert isinstance(part.item, str) + chunks.append(part.pre) + if part.item not in seen: + ph = b"%s" + seen[part.item] = (ph, part.format) + order.append(part.item) + chunks.append(ph) + else: + chunks.append(seen[part.item][0]) + order.append(part.item) + + # last part + chunks.append(parts[-1].pre) + + return b"".join(chunks), order, parts - return b"".join(chunks), None, order, parts - query2pg = lru_cache()(query2pg_nocache) +_query2pg_client = lru_cache()(_query2pg_client_nocache) _re_placeholder = re.compile( diff --git a/psycopg/psycopg/raw_cursor.py b/psycopg/psycopg/raw_cursor.py index 067f66b09..7fc4090bf 100644 --- a/psycopg/psycopg/raw_cursor.py +++ b/psycopg/psycopg/raw_cursor.py @@ -4,8 +4,7 @@ psycopg raw queries cursors # Copyright (C) 2023 The Psycopg Team -from typing import Optional, List, Tuple, TYPE_CHECKING -from functools import lru_cache +from typing import Optional, TYPE_CHECKING from .abc import ConnectionType, Query, Params from .sql import Composable @@ -13,7 +12,7 @@ from .rows import Row from ._enums import PyFormat from .cursor import BaseCursor, Cursor from .cursor_async import AsyncCursor -from ._queries import PostgresQuery, QueryPart +from ._queries import PostgresQuery if TYPE_CHECKING: from typing import Any # noqa: F401 @@ -48,14 +47,6 @@ class PostgresRawQuery(PostgresQuery): self.types = () self.formats = None - @staticmethod - def query2pg_nocache( - query: bytes, encoding: str - ) -> Tuple[bytes, Optional[List[PyFormat]], Optional[List[str]], List[QueryPart]]: - raise NotImplementedError() - - query2pg = lru_cache()(query2pg_nocache) - class RawCursorMixin(BaseCursor[ConnectionType, Row]): _query_cls = PostgresRawQuery diff --git a/tests/test_cursor.py b/tests/test_cursor.py index a54f2f7b4..2e607ec54 100644 --- a/tests/test_cursor.py +++ b/tests/test_cursor.py @@ -171,11 +171,21 @@ def test_execute_sql(conn): def test_query_parse_cache_size(conn): cur = conn.cursor() - query_cls = cur._query_cls + cls = type(cur) + + # Warning: testing internal structures. Test might need refactoring with the code. + cache: Any + if cls is psycopg.Cursor: + cache = psycopg._queries._query2pg + elif cls is psycopg.ClientCursor: + cache = psycopg._queries._query2pg_client + elif cls is psycopg.RawCursor: + pytest.skip("RawCursor has no query parse cache") + else: + assert False, cls - # Warning: testing internal structures. Test have to be refactored. - query_cls.query2pg.cache_clear() - ci = query_cls.query2pg.cache_info() + cache.cache_clear() + ci = cache.cache_info() h0, m0 = ci.hits, ci.misses tests = [ (f"select 1 -- {'x' * 3500}", (), h0, m0 + 1), @@ -188,15 +198,11 @@ def test_query_parse_cache_size(conn): (f"select 1 -- {'%s' * 60}", ("x",) * 60, h0 + 2, m0 + 2), ] for i, (query, params, hits, misses) in enumerate(tests): - pq = query_cls(psycopg.adapt.Transformer()) + pq = cur._query_cls(psycopg.adapt.Transformer()) pq.convert(query, params) - ci = query_cls.query2pg.cache_info() - if not isinstance(cur, psycopg.RawCursor): - assert ci.hits == hits, f"at {i}" - assert ci.misses == misses, f"at {i}" - else: - assert ci.hits == 0, f"at {i}" - assert ci.misses == 0, f"at {i}" + ci = cache.cache_info() + assert ci.hits == hits, f"at {i}" + assert ci.misses == misses, f"at {i}" def test_execute_many_results(conn):