From: Daniele Varrazzo Date: Mon, 6 Oct 2025 18:17:23 +0000 (+0200) Subject: refactor: cleaner inheritance in record/composite loaders X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=49575b8383daa5ce28908d2db5a584ce60aa7a87;p=thirdparty%2Fpsycopg.git refactor: cleaner inheritance in record/composite loaders - Avoid a common base class to reuse the code, use module-level functions. - Make the inheritance lines of text and binary loaders similar. - Add documentation and notes. This refactoring is in preparation of the implementation of these loaders in C, to clarify and simplify things. See #1175. --- diff --git a/psycopg/psycopg/types/composite.py b/psycopg/psycopg/types/composite.py index 1890fa6dc..572deefb7 100644 --- a/psycopg/psycopg/types/composite.py +++ b/psycopg/psycopg/types/composite.py @@ -11,11 +11,12 @@ import struct from typing import TYPE_CHECKING, Any, NamedTuple, cast from functools import cache from collections import namedtuple -from collections.abc import Callable, Iterator, Sequence +from collections.abc import Callable, Sequence from .. import abc, postgres, pq, sql from .._oids import TEXT_OID -from ..adapt import Buffer, Dumper, Loader, PyFormat, RecursiveDumper, Transformer +from ..adapt import Buffer, Dumper, Loader, PyFormat, RecursiveDumper, RecursiveLoader +from ..adapt import Transformer from .._struct import pack_len, unpack_len from .._typeinfo import TypeInfo from .._encodings import _as_python_identifier @@ -155,55 +156,36 @@ class TupleBinaryDumper(Dumper): return out -class BaseCompositeLoader(Loader): - def __init__(self, oid: int, context: abc.AdaptContext | None = None): - super().__init__(oid, context) - self._tx = Transformer(context) - - def _parse_record(self, data: abc.Buffer) -> Iterator[bytes | None]: - """ - Split a non-empty representation of a composite type into components. - - Terminators shouldn't be used in `!data` (so that both record and range - representations can be parsed). - """ - for m in self._re_tokenize.finditer(data): - if m.group(1): - yield None - elif m.group(2) is not None: - yield self._re_undouble.sub(rb"\1", m.group(2)) - else: - yield m.group(3) - - # If the final group ended in `,` there is a final NULL in the record - # that the regexp couldn't parse. - if m and m.group().endswith(b","): - yield None - - _re_tokenize = re.compile( - rb"""(?x) - (,) # an empty token, representing NULL - | " ((?: [^"] | "")*) " ,? # or a quoted string - | ([^",)]+) ,? # or an unquoted string - """ - ) - - _re_undouble = re.compile(rb'(["\\])\1') +class RecordLoader(RecursiveLoader): + """ + Load a `record` field from PostgreSQL. + In text mode we don't have type information of the composite's fields, so + convert every item as text. Note that in binary loading we have per-field + oids instead. + """ -class RecordLoader(BaseCompositeLoader): def load(self, data: abc.Buffer) -> tuple[Any, ...]: if data == b"()": return () cast = self._tx.get_loader(TEXT_OID, self.format).load - return tuple( - cast(token) if token is not None else None - for token in self._parse_record(data[1:-1]) - ) + record = _parse_text_record(data[1:-1]) + for i in range(len(record)): + if (f := record[i]) is not None: + record[i] = cast(f) + + return tuple(record) class RecordBinaryLoader(Loader): + """ + Load a `record` field from PostgreSQL. + + Unlike in text mode, the composite data contains oids of the fields, + so we can actually parse the records in its original types. + """ + format = pq.Format.BINARY def __init__(self, oid: int, context: abc.AdaptContext | None = None): @@ -216,20 +198,7 @@ class RecordBinaryLoader(Loader): self._txs: dict[tuple[int, ...], abc.Transformer] = {} def load(self, data: abc.Buffer) -> tuple[Any, ...]: - nfields = unpack_len(data, 0)[0] - offset = 4 - oids = [] - record: list[Buffer | None] = [] - for _ in range(nfields): - oid, length = _unpack_oidlen(data, offset) - offset += 8 - oids.append(oid) - if length >= 0: - record.append(data[offset : offset + length]) - offset += length - else: - record.append(None) - + record, oids = _parse_binary_record(data) key = tuple(oids) try: tx = self._txs[key] @@ -240,34 +209,62 @@ class RecordBinaryLoader(Loader): return tx.load_sequence(record) -class CompositeLoader(RecordLoader): +class _CompositeLoader(Loader): + """ + Base class to create text loaders of specific composite types. + + The class is complete but lack information about the fields types and + object factory. These will be added by register_composite(), which will + create a subclass of this class. + """ + factory: Callable[..., Any] fields_types: list[int] - _types_set = False - def load(self, data: abc.Buffer) -> Any: - if not self._types_set: - self._config_types(data) - self._types_set = True + def __init__(self, oid: int, context: abc.AdaptContext | None = None): + super().__init__(oid, context) + # Note: we cannot use the RecursiveLoader base class here because we + # always want a different Transformer instance, otherwise the types + # loaded will conflict with the types loaded by the record. + self._tx = Transformer(context) + self._tx.set_loader_types(self.fields_types, self.format) + def load(self, data: abc.Buffer) -> Any: + # Use `type(self).factory` instead of `self.factory` because, if + # `factory` is a function, `self.factory` will become bound and the + # first argument passed will become `self`. if data == b"()": return type(self).factory() return type(self).factory( - *self._tx.load_sequence(tuple(self._parse_record(data[1:-1]))) + *self._tx.load_sequence(_parse_text_record(data[1:-1])) ) - def _config_types(self, data: abc.Buffer) -> None: - self._tx.set_loader_types(self.fields_types, self.format) +class _CompositeBinaryLoader(Loader): + """ + Base class to create text loaders of specific composite types. + + The class is complete but lack information about the fields types and + object factory. These will be added by register_composite(), which will + create a subclass of this class. + """ -class CompositeBinaryLoader(RecordBinaryLoader): format = pq.Format.BINARY factory: Callable[..., Any] + fields_types: list[int] + + def __init__(self, oid: int, context: abc.AdaptContext | None = None): + super().__init__(oid, context) + self._tx = Transformer(context) + self._tx.set_loader_types(self.fields_types, self.format) def load(self, data: abc.Buffer) -> Any: - r = super().load(data) - return type(self).factory(*r) + record, _ = _parse_binary_record(data) # assume oids == self.fields_types + # Use `type(self).factory` instead of `self.factory` because, if + # `factory` is a function, `self.factory` will become bound and the + # first argument passed will become `self`. + return type(self).factory(*self._tx.load_sequence(record)) def register_composite( @@ -305,12 +302,12 @@ def register_composite( adapters = context.adapters if context else postgres.adapters # generate and register a customized text loader - loader: type[BaseCompositeLoader] + loader: type[Loader] loader = _make_loader(info.name, tuple(info.field_types), factory) adapters.register_loader(info.oid, loader) # generate and register a customized binary loader - loader = _make_binary_loader(info.name, factory) + loader = _make_binary_loader(info.name, tuple(info.field_types), factory) adapters.register_loader(info.oid, loader) # If the factory is a type, create and register dumpers for it @@ -339,6 +336,65 @@ def _nt_from_info(info: CompositeInfo) -> type[NamedTuple]: return _make_nt(name, fields) +def _parse_text_record(data: abc.Buffer) -> list[bytes | None]: + """ + Split a non-empty representation of a composite type into components. + + Terminators shouldn't be used in `!data` (so that both record and range + representations can be parsed). + """ + record: list[bytes | None] = [] + for m in _re_tokenize.finditer(data): + if m.group(1): + record.append(None) + elif m.group(2) is not None: + record.append(_re_undouble.sub(rb"\1", m.group(2))) + else: + record.append(m.group(3)) + + # If the final group ended in `,` there is a final NULL in the record + # that the regexp couldn't parse. + if m and m.group().endswith(b","): + record.append(None) + + return record + + +_re_tokenize = re.compile( + rb"""(?x) + (,) # an empty token, representing NULL + | " ((?: [^"] | "")*) " ,? # or a quoted string + | ([^",)]+) ,? # or an unquoted string + """ +) +_re_undouble = re.compile(rb'(["\\])\1') + + +def _parse_binary_record(data: abc.Buffer) -> tuple[list[Buffer | None], list[int]]: + """ + Parse the binary representation of a composite type. + + Return the sequence of fields and oids found in the type. The fields + are returned as buffer: they will need a Transformer to be converted + to Python types. + """ + nfields = unpack_len(data, 0)[0] + offset = 4 + oids = [] + record: list[Buffer | None] = [] + for _ in range(nfields): + oid, length = _unpack_oidlen(data, offset) + offset += 8 + oids.append(oid) + if length >= 0: + record.append(data[offset : offset + length]) + offset += length + else: + record.append(None) + + return record, oids + + # Cache all dynamically-generated types to avoid leaks in case the types # cannot be GC'd. @@ -351,20 +407,24 @@ def _make_nt(name: str, fields: tuple[str, ...]) -> type[NamedTuple]: @cache def _make_loader( name: str, types: tuple[int, ...], factory: Callable[..., Any] -) -> type[BaseCompositeLoader]: +) -> type[_CompositeLoader]: + doc = f"Text loader for the '{name}' composite." return type( f"{name.title()}Loader", - (CompositeLoader,), - {"factory": factory, "fields_types": list(types)}, + (_CompositeLoader,), + {"__doc__": doc, "factory": factory, "fields_types": list(types)}, ) @cache def _make_binary_loader( - name: str, factory: Callable[..., Any] -) -> type[BaseCompositeLoader]: + name: str, types: tuple[int, ...], factory: Callable[..., Any] +) -> type[_CompositeBinaryLoader]: + doc = f"Binary loader for the '{name}' composite." return type( - f"{name.title()}BinaryLoader", (CompositeBinaryLoader,), {"factory": factory} + f"{name.title()}BinaryLoader", + (_CompositeBinaryLoader,), + {"__doc__": doc, "factory": factory, "fields_types": list(types)}, )