From: Daniele Varrazzo Date: Sat, 2 Jan 2021 13:52:27 +0000 (+0100) Subject: Define better registering, importing, subclassing adapters. X-Git-Tag: 3.0.dev0~205 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=991cec5ac6790ae09ae2ff91d4147d71df48ddf5;p=thirdparty%2Fpsycopg.git Define better registering, importing, subclassing adapters. - The C dumpers cannot be subclassed. Subclassing from Python makes difficult to use the c fast path, unless doing relatively expensive checks. - C optimised classes are chosen on registration. The types module only exposes the Python objects, so they can be documented and subclassed. Override adapter in the respective type modules This way they are properly exposed to the rest of the Python code. Note that it will need some trickery to be able to subclass them in Python: overridden adapter will need the C fast path bypassed if subclassing is detected. Export C int subclasses dumpers to Python Added guards to allow subclassing C adapters Not sure this is the way to go though. Subclassing might be a rare enough case to just make the classes final. The check is implemented at instance level: it would be more efficient to implement it at class level, however I don't know how to store class attribute or implement metaclasses in Cython. Don't make C types subclassable C types are no more exposed in types, so they won't get subclassed by final users. Checking for subclass in adaptation is expensive, so I'd rather not do it. Check if a class has a matching optimised class (with the same name) at registration time. --- diff --git a/psycopg3/psycopg3/__init__.py b/psycopg3/psycopg3/__init__.py index be10c37db..00466c09d 100644 --- a/psycopg3/psycopg3/__init__.py +++ b/psycopg3/psycopg3/__init__.py @@ -33,13 +33,6 @@ paramstyle = "pyformat" BinaryDumper.register(Binary, global_adapters) # dbapi20 -# Override adapters with fast version if available -if pq.__impl__ == "c": - from psycopg3_c import _psycopg3 - - _psycopg3.register_builtin_c_adapters() - - # Note: defining the exported methods helps both Sphynx in documenting that # this is the canonical place to obtain them and should be used by MyPy too, # so that function signatures are consistent with the documentation. diff --git a/psycopg3/psycopg3/adapt.py b/psycopg3/psycopg3/adapt.py index 5e6372f86..19824ba10 100644 --- a/psycopg3/psycopg3/adapt.py +++ b/psycopg3/psycopg3/adapt.py @@ -5,8 +5,8 @@ Entry point into the adaptation system. # Copyright (C) 2020 The Psycopg Team from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Type, Union -from typing import TYPE_CHECKING +from typing import Any, Dict, List, Optional, Type, TypeVar, Union +from typing import cast, TYPE_CHECKING from . import pq from . import proto from .pq import Format as Format @@ -16,6 +16,8 @@ from .proto import AdaptContext if TYPE_CHECKING: from .connection import BaseConnection +RV = TypeVar("RV") + class Dumper(ABC): """ @@ -23,7 +25,6 @@ class Dumper(ABC): """ format: Format - connection: Optional["BaseConnection"] = None # A class-wide oid, which will be used by default by instances unless # the subclass overrides it in init. @@ -31,7 +32,9 @@ class Dumper(ABC): def __init__(self, cls: type, context: Optional[AdaptContext] = None): self.cls = cls - self.connection = context.connection if context else None + self.connection: Optional["BaseConnection"] = ( + context.connection if context else None + ) self.oid = self._oid """The oid to pass to the server, if known.""" @@ -79,11 +82,12 @@ class Loader(ABC): """ format: Format - connection: Optional["BaseConnection"] def __init__(self, oid: int, context: Optional[AdaptContext] = None): self.oid = oid - self.connection = context.connection if context else None + self.connection: Optional["BaseConnection"] = ( + context.connection if context else None + ) @abstractmethod def load(self, data: bytes) -> Any: @@ -116,6 +120,9 @@ class AdaptersMap(AdaptContext): _dumpers: List[Dict[Union[type, str], Type["Dumper"]]] _loaders: List[Dict[int, Type["Loader"]]] + # Record if a dumper or loader has an optimised version. + _optimised: Dict[type, type] = {} + def __init__(self, extend: Optional["AdaptersMap"] = None): if extend: self._dumpers = extend._dumpers[:] @@ -148,6 +155,7 @@ class AdaptersMap(AdaptContext): f"dumpers should be registered on classes, got {cls} instead" ) + dumper = self._get_optimised(dumper) fmt = dumper.format if not self._own_dumpers[fmt]: self._dumpers[fmt] = self._dumpers[fmt].copy() @@ -164,6 +172,7 @@ class AdaptersMap(AdaptContext): f"loaders should be registered on oid, got {oid} instead" ) + loader = self._get_optimised(loader) fmt = loader.format if not self._own_loaders[fmt]: self._loaders[fmt] = self._loaders[fmt].copy() @@ -201,6 +210,32 @@ class AdaptersMap(AdaptContext): """ return self._loaders[format].get(oid) + @classmethod + def _get_optimised(self, cls: Type[RV]) -> Type[RV]: + """Return the optimised version of a Dumper or Loader class. + + Return the input class itself if there is no optimised version. + """ + try: + return self._optimised[cls] + except KeyError: + pass + + # Check if the class comes from psycopg3.types and there is a class + # with the same name in psycopg3_c._psycopg3. + if pq.__impl__ == "c": + from psycopg3 import types + from psycopg3_c import _psycopg3 + + if cls.__module__.startswith(types.__name__): + new = cast(Type[RV], getattr(_psycopg3, cls.__name__, None)) + if new: + self._optimised[cls] = new + return new + + self._optimised[cls] = cls + return cls + global_adapters = AdaptersMap() diff --git a/psycopg3_c/psycopg3_c/_psycopg3.pyi b/psycopg3_c/psycopg3_c/_psycopg3.pyi index 0a229001c..128f2e9ff 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3.pyi +++ b/psycopg3_c/psycopg3_c/_psycopg3.pyi @@ -36,7 +36,6 @@ class Transformer(proto.AdaptContext): ) -> Tuple[Any, ...]: ... def get_loader(self, oid: int, format: Format) -> Loader: ... -def register_builtin_c_adapters() -> None: ... def connect(conninfo: str) -> proto.PQGenConn[PGconn]: ... def execute(pgconn: PGconn) -> proto.PQGen[List[PGresult]]: ... def format_row_binary(row: Sequence[Any], tx: proto.Transformer) -> bytes: ... diff --git a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx index 20391a355..afa44ad31 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx +++ b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx @@ -15,6 +15,7 @@ equivalent C implementations. from typing import Any +cimport cython from cpython.bytes cimport PyBytes_AsStringAndSize from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_Resize from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_AS_STRING @@ -29,6 +30,7 @@ import logging logger = logging.getLogger("psycopg3.adapt") +@cython.freelist(8) cdef class CDumper: cdef object cls cdef public libpq.Oid oid @@ -140,9 +142,10 @@ cdef class CDumper: return PyByteArray_AS_STRING(ba) + offset +@cython.freelist(8) cdef class CLoader: cdef public libpq.Oid oid - cdef public connection + cdef readonly connection def __init__(self, int oid, context: Optional[AdaptContext] = None): self.oid = oid @@ -174,17 +177,3 @@ cdef class CLoader: from psycopg3.adapt import global_adapters as adapters adapters.register_loader(oid, cls) - - -def register_builtin_c_adapters(): - """ - Register all the builtin optimized adpaters. - - This function is supposed to be called only once, after the Python adapters - are registered. - - """ - logger.debug("registering optimised c adapters") - register_numeric_c_adapters() - register_singletons_c_adapters() - register_text_c_adapters() diff --git a/psycopg3_c/psycopg3_c/types/numeric.pyx b/psycopg3_c/psycopg3_c/types/numeric.pyx index eeb756237..0bca8ac03 100644 --- a/psycopg3_c/psycopg3_c/types/numeric.pyx +++ b/psycopg3_c/psycopg3_c/types/numeric.pyx @@ -4,6 +4,8 @@ Cython adapters for numeric types. # Copyright (C) 2020 The Psycopg Team +cimport cython + from libc.stdint cimport * from libc.string cimport memcpy, strlen from cpython.mem cimport PyMem_Free @@ -25,6 +27,7 @@ cdef extern from "Python.h": int Py_DTSF_ADD_DOT_0 +# @cython.final # TODO? causes compile warnings cdef class IntDumper(CDumper): format = Format.TEXT @@ -54,6 +57,7 @@ cdef class IntDumper(CDumper): return rv +@cython.final cdef class Int4BinaryDumper(CDumper): format = Format.BINARY @@ -71,6 +75,7 @@ cdef class Int4BinaryDumper(CDumper): return sizeof(int32_t) +@cython.final cdef class Int8BinaryDumper(CDumper): format = Format.BINARY @@ -88,6 +93,7 @@ cdef class Int8BinaryDumper(CDumper): return sizeof(int64_t) +@cython.final cdef class IntLoader(CLoader): format = Format.TEXT @@ -96,6 +102,7 @@ cdef class IntLoader(CLoader): return PyLong_FromString(data, NULL, 10) +@cython.final cdef class Int2BinaryLoader(CLoader): format = Format.BINARY @@ -104,6 +111,7 @@ cdef class Int2BinaryLoader(CLoader): return PyLong_FromLong(be16toh((data)[0])) +@cython.final cdef class Int4BinaryLoader(CLoader): format = Format.BINARY @@ -112,6 +120,7 @@ cdef class Int4BinaryLoader(CLoader): return PyLong_FromLong(be32toh((data)[0])) +@cython.final cdef class Int8BinaryLoader(CLoader): format = Format.BINARY @@ -120,6 +129,7 @@ cdef class Int8BinaryLoader(CLoader): return PyLong_FromLongLong(be64toh((data)[0])) +@cython.final cdef class OidBinaryLoader(CLoader): format = Format.BINARY @@ -128,6 +138,7 @@ cdef class OidBinaryLoader(CLoader): return PyLong_FromUnsignedLong(be32toh((data)[0])) +@cython.final cdef class FloatDumper(CDumper): format = Format.TEXT @@ -160,6 +171,7 @@ cdef dict _special_float = { } +@cython.final cdef class FloatBinaryDumper(CDumper): format = Format.BINARY @@ -176,6 +188,7 @@ cdef class FloatBinaryDumper(CDumper): return sizeof(swp) +@cython.final cdef class FloatLoader(CLoader): format = Format.TEXT @@ -186,6 +199,7 @@ cdef class FloatLoader(CLoader): return PyFloat_FromDouble(d) +@cython.final cdef class Float4BinaryLoader(CLoader): format = Format.BINARY @@ -198,6 +212,7 @@ cdef class Float4BinaryLoader(CLoader): return PyFloat_FromDouble((swp)[0]) +@cython.final cdef class Float8BinaryLoader(CLoader): format = Format.BINARY @@ -206,27 +221,3 @@ cdef class Float8BinaryLoader(CLoader): cdef uint64_t asint = be64toh((data)[0]) cdef char *swp = &asint return PyFloat_FromDouble((swp)[0]) - - -cdef void register_numeric_c_adapters(): - logger.debug("registering optimised numeric c adapters") - - IntDumper.register(int) - Int8BinaryDumper.register(int) - - IntLoader.register(oids.INT2_OID) - IntLoader.register(oids.INT4_OID) - IntLoader.register(oids.INT8_OID) - IntLoader.register(oids.OID_OID) - Int2BinaryLoader.register(oids.INT2_OID) - Int4BinaryLoader.register(oids.INT4_OID) - Int8BinaryLoader.register(oids.INT8_OID) - OidBinaryLoader.register(oids.OID_OID) - - FloatDumper.register(float) - FloatBinaryDumper.register(float) - - FloatLoader.register(oids.FLOAT4_OID) - FloatLoader.register(oids.FLOAT8_OID) - Float4BinaryLoader.register(oids.FLOAT4_OID) - Float8BinaryLoader.register(oids.FLOAT8_OID) diff --git a/psycopg3_c/psycopg3_c/types/singletons.pyx b/psycopg3_c/psycopg3_c/types/singletons.pyx index c25737de9..75aa03da6 100644 --- a/psycopg3_c/psycopg3_c/types/singletons.pyx +++ b/psycopg3_c/psycopg3_c/types/singletons.pyx @@ -4,9 +4,12 @@ Cython adapters for boolean. # Copyright (C) 2020 The Psycopg Team +cimport cython + from psycopg3.pq import Format +@cython.final cdef class BoolDumper(CDumper): format = Format.TEXT @@ -41,10 +44,14 @@ cdef class BoolDumper(CDumper): return b"true" if obj else b"false" -cdef class BoolBinaryDumper(BoolDumper): +@cython.final +cdef class BoolBinaryDumper(CDumper): format = Format.BINARY + def __cinit__(self): + self.oid = oids.BOOL_OID + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: CDumper.ensure_size(rv, offset, 1) @@ -64,6 +71,7 @@ cdef class BoolBinaryDumper(BoolDumper): return 1 +@cython.final cdef class BoolLoader(CLoader): format = Format.TEXT @@ -73,19 +81,10 @@ cdef class BoolLoader(CLoader): return True if data[0] == b't' else False +@cython.final cdef class BoolBinaryLoader(CLoader): format = Format.BINARY cdef object cload(self, const char *data, size_t length): return True if data[0] else False - - -cdef void register_singletons_c_adapters(): - logger.debug("registering optimised singletons c adapters") - - BoolDumper.register(bool) - BoolBinaryDumper.register(bool) - - BoolLoader.register(oids.BOOL_OID) - BoolBinaryLoader.register(oids.BOOL_OID) diff --git a/psycopg3_c/psycopg3_c/types/text.pyx b/psycopg3_c/psycopg3_c/types/text.pyx index 6802e21c6..bb7d83f9d 100644 --- a/psycopg3_c/psycopg3_c/types/text.pyx +++ b/psycopg3_c/psycopg3_c/types/text.pyx @@ -4,6 +4,8 @@ Cython adapters for textual types. # Copyright (C) 2020 The Psycopg Team +cimport cython + from libc.string cimport memcpy, memchr from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize from cpython.unicode cimport ( @@ -41,11 +43,6 @@ cdef class _StringDumper(CDumper): ): self.is_utf8 = 1 - -cdef class StringBinaryDumper(_StringDumper): - - format = Format.BINARY - cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: # the server will raise DataError subclass if the string contains 0x00 cdef Py_ssize_t size; @@ -71,7 +68,14 @@ cdef class StringBinaryDumper(_StringDumper): return size -cdef class StringDumper(StringBinaryDumper): +@cython.final +cdef class StringBinaryDumper(_StringDumper): + + format = Format.BINARY + + +@cython.final +cdef class StringDumper(_StringDumper): format = Format.TEXT @@ -88,7 +92,7 @@ cdef class StringDumper(StringBinaryDumper): return size -cdef class TextLoader(CLoader): +cdef class _TextLoader(CLoader): format = Format.TEXT @@ -119,11 +123,19 @@ cdef class TextLoader(CLoader): else: return data[:length] +@cython.final +cdef class TextLoader(_TextLoader): + + format = Format.TEXT + + +@cython.final +cdef class TextBinaryLoader(_TextLoader): -cdef class TextBinaryLoader(TextLoader): format = Format.BINARY +@cython.final cdef class BytesDumper(CDumper): format = Format.TEXT @@ -158,6 +170,7 @@ cdef class BytesDumper(CDumper): return len_out +@cython.final cdef class BytesBinaryDumper(CDumper): format = Format.BINARY @@ -175,6 +188,7 @@ cdef class BytesBinaryDumper(CDumper): return size +@cython.final cdef class ByteaLoader(CLoader): format = Format.TEXT @@ -193,37 +207,10 @@ cdef class ByteaLoader(CLoader): return rv +@cython.final cdef class ByteaBinaryLoader(CLoader): format = Format.BINARY cdef object cload(self, const char *data, size_t length): return data[:length] - - -cdef void register_text_c_adapters(): - logger.debug("registering optimised text c adapters") - - StringDumper.register(str) - StringBinaryDumper.register(str) - - TextLoader.register(oids.INVALID_OID) - TextLoader.register(oids.BPCHAR_OID) - TextLoader.register(oids.NAME_OID) - TextLoader.register(oids.TEXT_OID) - TextLoader.register(oids.VARCHAR_OID) - TextBinaryLoader.register(oids.BPCHAR_OID) - TextBinaryLoader.register(oids.NAME_OID) - TextBinaryLoader.register(oids.TEXT_OID) - TextBinaryLoader.register(oids.VARCHAR_OID) - - BytesDumper.register(bytes) - BytesDumper.register(bytearray) - BytesDumper.register(memoryview) - BytesBinaryDumper.register(bytes) - BytesBinaryDumper.register(bytearray) - BytesBinaryDumper.register(memoryview) - - ByteaLoader.register(oids.BYTEA_OID) - ByteaBinaryLoader.register(oids.BYTEA_OID) - ByteaBinaryLoader.register(oids.INVALID_OID) diff --git a/tests/test_adapt.py b/tests/test_adapt.py index 17288caef..e6e365221 100644 --- a/tests/test_adapt.py +++ b/tests/test_adapt.py @@ -72,6 +72,30 @@ def test_dump_subclass(conn, fmt_out): assert cur.fetchone() == ("hello", "world") +def test_subclass_dumper(conn): + # This might be a C fast object: make sure that the Python code is called + from psycopg3.types import StringDumper + + class MyStringDumper(StringDumper): + def dump(self, obj): + return (obj * 2).encode("utf-8") + + MyStringDumper.register(str, conn) + assert conn.execute("select %s", ["hello"]).fetchone()[0] == "hellohello" + + +def test_subclass_loader(conn): + # This might be a C fast object: make sure that the Python code is called + from psycopg3.types import TextLoader + + class MyTextLoader(TextLoader): + def load(self, data): + return (data * 2).decode("utf-8") + + MyTextLoader.register("text", conn) + assert conn.execute("select 'hello'::text").fetchone()[0] == "hellohello" + + @pytest.mark.parametrize( "data, format, type, result", [ diff --git a/tests/test_copy.py b/tests/test_copy.py index af1547eae..fccf994f3 100644 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -16,7 +16,7 @@ sample_records = [(Int4(10), Int4(20), "hello"), (Int4(40), None, "world")] sample_values = "values (10::int, 20::int, 'hello'::text), (40, NULL, 'world')" -sample_tabledef = "col1 int primary key, col2 int, data text" +sample_tabledef = "col1 serial primary key, col2 int, data text" sample_text = b"""\ 10\t20\thello @@ -147,6 +147,31 @@ def test_copy_in_empty(conn, format): assert cur.rowcount == 0 +@pytest.mark.parametrize("format", [Format.TEXT, Format.BINARY]) +def test_subclass_adapter(conn, format): + if format == Format.TEXT: + from psycopg3.types import StringDumper as BaseDumper + else: + from psycopg3.types import StringBinaryDumper as BaseDumper + + class MyStringDumper(BaseDumper): + def dump(self, obj): + return super().dump(obj) * 2 + + MyStringDumper.register(str, conn) + + cur = conn.cursor() + ensure_table(cur, sample_tabledef) + + with cur.copy( + f"copy copy_in (data) from stdin (format {format.name})" + ) as copy: + copy.write_row(("hello",)) + + rec = cur.execute("select data from copy_in").fetchone() + assert rec[0] == "hellohello" + + @pytest.mark.parametrize("format", [Format.TEXT, Format.BINARY]) def test_copy_in_error_empty(conn, format): cur = conn.cursor()