From: Daniele Varrazzo Date: Sun, 27 Nov 2022 14:20:10 +0000 (+0100) Subject: perf(c/array): add C implementation of array loader X-Git-Tag: 3.1.5~12^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=563b276c8b80c23385a17329262679705a42dda3;p=thirdparty%2Fpsycopg.git perf(c/array): add C implementation of array loader With this change we get to load an array without the transformer calling any Python code (if the element loader is a CLoader as well). We don't need to subclass the binary loader anymore because all the required info is now taken from the loaded data. As a consequence, we don't need a BaseArrayLoader anymore. --- diff --git a/psycopg/psycopg/types/array.py b/psycopg/psycopg/types/array.py index da315b7f4..e35c5e76f 100644 --- a/psycopg/psycopg/types/array.py +++ b/psycopg/psycopg/types/array.py @@ -293,34 +293,32 @@ class ListBinaryDumper(BaseListDumper): return b"".join(data) -class BaseArrayLoader(RecursiveLoader): - base_oid: int - - -class ArrayLoader(BaseArrayLoader): +class ArrayLoader(RecursiveLoader): delimiter = b"," + base_oid: int def load(self, data: Buffer) -> List[Any]: loader = self._tx.get_loader(self.base_oid, self.format) - return load_text(data, loader, self.delimiter) + return _load_text(data, loader, self.delimiter) -class ArrayBinaryLoader(BaseArrayLoader): +class ArrayBinaryLoader(RecursiveLoader): format = pq.Format.BINARY def load(self, data: Buffer) -> List[Any]: - return load_binary(data, self._tx) + return _load_binary(data, self._tx) def register_array(info: TypeInfo, context: Optional[AdaptContext] = None) -> None: if not info.array_oid: raise ValueError(f"the type info {info} doesn't describe an array") + base: Type[Any] adapters = context.adapters if context else postgres.adapters - base: Type[Any] = ArrayLoader + base = getattr(_psycopg, "ArrayLoader", ArrayLoader) name = f"{info.name.title()}{base.__name__}" attribs = { "base_oid": info.oid, @@ -329,10 +327,7 @@ def register_array(info: TypeInfo, context: Optional[AdaptContext] = None) -> No loader = type(name, (base,), attribs) adapters.register_loader(info.array_oid, loader) - base = ArrayBinaryLoader - name = f"{info.name.title()}{base.__name__}" - attribs = {"base_oid": info.oid} - loader = type(name, (base,), attribs) + loader = getattr(_psycopg, "ArrayBinaryLoader", ArrayBinaryLoader) adapters.register_loader(info.array_oid, loader) base = ListDumper @@ -467,13 +462,3 @@ def _load_binary(data: Buffer, tx: Transformer) -> List[Any]: out = [out[i : i + dim] for i in range(0, len(out), dim)] return out - - -# Override functions with fast versions if available -if _psycopg: - load_text = _psycopg.array_load_text - load_binary = _psycopg.array_load_binary - -else: - load_text = _load_text - load_binary = _load_binary diff --git a/psycopg_c/psycopg_c/_psycopg/adapt.pyx b/psycopg_c/psycopg_c/_psycopg/adapt.pyx index 518363c85..a6d8e6a48 100644 --- a/psycopg_c/psycopg_c/_psycopg/adapt.pyx +++ b/psycopg_c/psycopg_c/_psycopg/adapt.pyx @@ -161,3 +161,11 @@ cdef class CLoader: cdef Py_ssize_t length _buffer_as_string_and_size(data, &ptr, &length) return self.cload(ptr, length) + + +cdef class _CRecursiveLoader(CLoader): + + cdef Transformer _tx + + def __cinit__(self, oid: int, context: Optional[AdaptContext] = None): + self._tx = Transformer.from_context(context) diff --git a/psycopg_c/psycopg_c/_psycopg/transform.pyx b/psycopg_c/psycopg_c/_psycopg/transform.pyx index 48f338dd8..3525bc9be 100644 --- a/psycopg_c/psycopg_c/_psycopg/transform.pyx +++ b/psycopg_c/psycopg_c/_psycopg/transform.pyx @@ -117,10 +117,7 @@ cdef class Transformer: If the context is a Transformer instance, just return it. """ - if isinstance(context, Transformer): - return context - else: - return cls(context) + return _tx_from_context(context) @property def encoding(self) -> str: @@ -634,3 +631,10 @@ cdef object _as_row_dumper(object dumper): row_dumper.cdumper = dumper return row_dumper + + +cdef Transformer _tx_from_context(object context): + if isinstance(context, Transformer): + return context + else: + return Transformer(context) diff --git a/psycopg_c/psycopg_c/types/array.pyx b/psycopg_c/psycopg_c/types/array.pyx index a28532c99..af78b7857 100644 --- a/psycopg_c/psycopg_c/types/array.pyx +++ b/psycopg_c/psycopg_c/types/array.pyx @@ -28,27 +28,36 @@ cdef extern from *: const int MAXDIM -def array_load_text( - data: Buffer, loader: Loader, delimiter: bytes = b"," -) -> List[Any]: - cdef char cdelim = delimiter[0] +cdef class ArrayLoader(_CRecursiveLoader): - cdef char *buf = NULL - cdef Py_ssize_t length = 0 - _buffer_as_string_and_size(data, &buf, &length) + format = PQ_TEXT + base_oid = 0 + delimiter = b"," - cdef CLoader cloader = None - cdef object pyload = None + cdef object cload(self, const char *data, size_t length): + cdef PyObject *row_loader = self._tx._c_get_loader( + self.base_oid, PQ_TEXT) - if isinstance(loader, CLoader): - cloader = loader - else: - pyload = loader.load + cdef char cdelim = self.delimiter[0] + return _array_load_text(data, length, row_loader, cdelim) + + +@cython.final +cdef class ArrayBinaryLoader(_CRecursiveLoader): + + format = PQ_BINARY + + cdef object cload(self, const char *data, size_t length): + return _array_load_binary(data, length, self._tx) + +cdef object _array_load_text( + const char *buf, size_t length, PyObject *row_loader, char cdelim +): if length == 0: raise e.DataError("malformed array: empty data") - cdef char *end = buf + length + cdef const char *end = buf + length # Keep and grow a buffer instead of malloc'ing at each element cdef char *scratch = NULL @@ -66,6 +75,13 @@ def array_load_text( rv = a cdef PyObject *tmp + cdef CLoader cloader = None + cdef object pyload = None + if (row_loader).cloader is not None: + cloader = (row_loader).cloader + else: + pyload = (row_loader).loadfunc + try: while buf < end: if buf[0] == b'{': @@ -100,10 +116,10 @@ def array_load_text( cdef object _parse_token( - char **bufptr, char *bufend, char cdelim, + const char **bufptr, const char *bufend, char cdelim, char **scratch, size_t *sclen, CLoader cloader, object load ): - cdef char *start = bufptr[0] + cdef const char *start = bufptr[0] cdef int has_quotes = start[0] == b'"' cdef int quoted = has_quotes cdef int num_escapes = 0 @@ -111,7 +127,7 @@ cdef object _parse_token( if has_quotes: start += 1 - cdef char *end = start + cdef const char *end = start while end < bufend: if (end[0] == cdelim or end[0] == b'}') and not quoted: @@ -139,7 +155,7 @@ cdef object _parse_token( and start[2] == b'L' and start[3] == b'L': return None - cdef char *src + cdef const char *src cdef char *tgt cdef size_t unesclen @@ -175,11 +191,7 @@ cdef object _parse_token( @cython.cdivision(True) -def array_load_binary(data: Buffer, Transformer tx) -> List[Any]: - cdef char *buf = NULL - cdef Py_ssize_t length = 0 - _buffer_as_string_and_size(data, &buf, &length) - +cdef object _array_load_binary(const char *buf, size_t length, Transformer tx): # head is ndims, hasnull, elem oid cdef uint32_t *buf32 = buf cdef int ndims = endian.be32toh(buf32[0]) @@ -193,7 +205,8 @@ def array_load_binary(data: Buffer, Transformer tx) -> List[Any]: ) cdef object oid = endian.be32toh(buf32[2]) - cdef PyObject *row_loader = tx._c_get_loader(oid, PQ_BINARY) + cdef PyObject *row_loader = tx._c_get_loader( + oid, PQ_BINARY) cdef Py_ssize_t[MAXDIM] dims cdef int i @@ -207,9 +220,9 @@ def array_load_binary(data: Buffer, Transformer tx) -> List[Any]: cdef object _array_load_binary_rec( - Py_ssize_t ndims, Py_ssize_t *dims, char **bufptr, PyObject *row_loader + Py_ssize_t ndims, Py_ssize_t *dims, const char **bufptr, PyObject *row_loader ): - cdef char *buf + cdef const char *buf cdef int i cdef int32_t size cdef object val