From: Daniele Varrazzo Date: Sun, 20 Mar 2022 14:54:44 +0000 (+0100) Subject: refactor: add _as_python_identifier() function X-Git-Tag: 3.1~109^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d0dd0c74b6a3286594594871682614ddc0f3cbe0;p=thirdparty%2Fpsycopg.git refactor: add _as_python_identifier() function The function takes a PostgreSQL "invalid" name (such as `a-b` or '0' or an unicode string) and makes a valid Python name. Currently used to make valid namedtuples from `nametuple_row()` but can be used to make other valid Python names. --- diff --git a/psycopg/psycopg/_encodings.py b/psycopg/psycopg/_encodings.py index a44f3bda0..f672fd521 100644 --- a/psycopg/psycopg/_encodings.py +++ b/psycopg/psycopg/_encodings.py @@ -4,6 +4,8 @@ Mappings between PostgreSQL and Python encodings. # Copyright (C) 2020 The Psycopg Team +import re +import string import codecs from typing import Any, Dict, Optional, TYPE_CHECKING @@ -131,3 +133,23 @@ def pg2pyenc(name: bytes) -> str: except KeyError: sname = name.decode("utf8", "replace") raise NotSupportedError(f"codec not available in Python: {sname!r}") + + +def _as_python_identifier(s: str, prefix: str = "f") -> str: + """ + Reduce a string to a valid Python identifier. + + Replace all non-valid chars with '_' and prefix the value with *prefix* if + the first letter is an '_'. + """ + s = _re_clean.sub("_", s) + # Python identifier cannot start with numbers, namedtuple fields + # cannot start with underscore. So... + if s[0] == "_" or "0" <= s[0] <= "9": + s = prefix + s + return s + + +_re_clean = re.compile( + f"[^{string.ascii_lowercase}{string.ascii_uppercase}{string.digits}_]" +) diff --git a/psycopg/psycopg/rows.py b/psycopg/psycopg/rows.py index 99194cd97..2f2d64728 100644 --- a/psycopg/psycopg/rows.py +++ b/psycopg/psycopg/rows.py @@ -4,7 +4,6 @@ psycopg row factories # Copyright (C) 2021 The Psycopg Team -import re import functools from typing import Any, Callable, Dict, NamedTuple, NoReturn, Sequence, Tuple from typing import TYPE_CHECKING, Type, TypeVar @@ -12,6 +11,7 @@ from collections import namedtuple from . import errors as e from ._compat import Protocol, TypeAlias +from ._encodings import _as_python_identifier if TYPE_CHECKING: from .cursor import BaseCursor, Cursor @@ -138,20 +138,11 @@ def namedtuple_row( return nt._make -# ascii except alnum and underscore -_re_clean = re.compile("[" + re.escape(" !\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~") + "]") - - @functools.lru_cache(512) def _make_nt(*key: str) -> Type[NamedTuple]: fields = [] for s in key: - s = _re_clean.sub("_", s) - # Python identifier cannot start with numbers, namedtuple fields - # cannot start with underscore. So... - if s[0] == "_" or "0" <= s[0] <= "9": - s = "f" + s - fields.append(s) + fields.append(_as_python_identifier(s)) return namedtuple("Row", fields) # type: ignore[return-value]