From: Daniele Varrazzo <daniele.varrazzo@gmail.com>
Date: Thu, 31 Dec 2020 18:59:22 +0000 (+0100)
Subject: Added c interface to dumpers
X-Git-Tag: 3.0.dev0~212
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=51f854150c2a7aeed048866c074be293af76c42f;p=thirdparty%2Fpsycopg.git

Added c interface to dumpers
---

diff --git a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx
index 68c7e5480..3773e49e2 100644
--- a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx
+++ b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx
@@ -17,7 +17,7 @@ from typing import Any
 
 from cpython.bytes cimport PyBytes_AsStringAndSize
 from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_Resize
-from cpython.bytearray cimport PyByteArray_AS_STRING
+from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_AS_STRING
 
 from psycopg3_c.pq cimport _buffer_as_string_and_size
 
@@ -52,9 +52,30 @@ cdef class CDumper:
         ):
             self.oid = oids.TEXT_OID
 
-    def dump(self, obj: Any) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        """Store the Postgres representation *obj* into *rv* at *offset*
+
+        Return the number of bytes written to rv or -1 on Python exception.
+
+        Subclasses must implement this method. The `dump()` implementation
+        transforms the result of this method to a bytearray so that it can be
+        returned to Python.
+
+        The function interface allows C code to use this method automatically
+        to create larger buffers, e.g. for copy, composite objects, etc.
+
+        Implementation note: as you will alway need to make sure that rv
+        has enough space to include what you want to dump, `ensure_size()`
+        might probably come handy.
+        """
         raise NotImplementedError()
 
+    def dump(self, obj: Any) -> bytearray:
+        """Return the Postgres representation of *obj* as Python array of bytes"""
+        cdef rv = PyByteArray_FromStringAndSize("", 0)
+        self.cdump(obj, rv, 0)
+        return rv
+
     def quote(self, obj: Any) -> bytearray:
         cdef char *ptr
         cdef char *ptr_out
@@ -90,7 +111,7 @@ cdef class CDumper:
 
     @classmethod
     def register(
-        cls,
+        this_cls,
         cls: Union[type, str],
         context: Optional[AdaptContext] = None,
         int format = Format.TEXT,
@@ -100,7 +121,22 @@ cdef class CDumper:
         else:
             from psycopg3.adapt import global_adapters as adapters
 
-        adapters.register_dumper(cls, cls)
+        adapters.register_dumper(cls, this_cls)
+
+    @staticmethod
+    cdef char *ensure_size(bytearray ba, Py_ssize_t offset, Py_ssize_t size) except NULL:
+        """
+        Grow *ba*, if necessary, to contains at least *size* bytes after *offset*
+
+        Return the pointer in the bytearray at *offset*, i.e. the place where
+        you want to write *size* bytes.
+        """
+        cdef Py_ssize_t curr_size = PyByteArray_GET_SIZE(ba)
+        cdef Py_ssize_t new_size = offset + size
+        if curr_size < new_size:
+            PyByteArray_Resize(ba, new_size)
+
+        return PyByteArray_AS_STRING(ba) + offset
 
 
 cdef class CLoader:
diff --git a/psycopg3_c/psycopg3_c/types/numeric.pyx b/psycopg3_c/psycopg3_c/types/numeric.pyx
index b33953bdf..0bcd3c4fc 100644
--- a/psycopg3_c/psycopg3_c/types/numeric.pyx
+++ b/psycopg3_c/psycopg3_c/types/numeric.pyx
@@ -5,6 +5,7 @@ Cython adapters for numeric types.
 # Copyright (C) 2020 The Psycopg Team
 
 from libc.stdint cimport *
+from libc.string cimport memcpy
 from cpython.long cimport PyLong_FromString, PyLong_FromLong, PyLong_AsLongLong
 from cpython.long cimport PyLong_FromLongLong, PyLong_FromUnsignedLong
 from cpython.float cimport PyFloat_FromDouble
@@ -26,37 +27,39 @@ cdef class IntDumper(CDumper):
     def __cinit__(self):
         self.oid = oids.INT8_OID
 
-    def __init__(self, cls: type, context: Optional[AdaptContext] = None):
-        super().__init__(cls, context)
-
-    def dump(self, obj) -> bytes:
-        cdef char buf[22]
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef int size = 22  # max int as string
+        cdef char *buf = CDumper.ensure_size(rv, offset, size)
         cdef long long val = PyLong_AsLongLong(obj)
-        cdef int written = PyOS_snprintf(buf, sizeof(buf), "%lld", val)
-        return buf[:written]
+        cdef int written = PyOS_snprintf(buf, size, "%lld", val)
+        PyByteArray_Resize(rv, offset + written)
+        return written
 
-    def quote(self, obj) -> bytes:
-        cdef char buf[23]
-        cdef long long val = PyLong_AsLongLong(obj)
-        cdef int written
-        if val >= 0:
-            written = PyOS_snprintf(buf, sizeof(buf), "%lld", val)
+    def quote(self, obj) -> bytearray:
+        rv = PyByteArray_FromStringAndSize("", 0)
+        PyByteArray_Resize(rv, 23)
+
+        if obj >= 0:
+            self.cdump(obj, rv, 0)
         else:
-            written = PyOS_snprintf(buf, sizeof(buf), " %lld", val)
+            rv[0] = b' '
+            self.cdump(obj, rv, 1)
 
-        return buf[:written]
+        return rv
 
 
 cdef class IntBinaryDumper(IntDumper):
 
     format = Format.BINARY
 
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef char *buf = CDumper.ensure_size(rv, offset, sizeof(int64_t))
         cdef long long val = PyLong_AsLongLong(obj)
+        # swap bytes if needed
         cdef uint64_t *ptvar = <uint64_t *>(&val)
         cdef int64_t beval = htobe64(ptvar[0])
-        cdef char *buf = <char *>&beval
-        return buf[:sizeof(beval)]
+        memcpy(buf, <void *>&beval, sizeof(int64_t))
+        return sizeof(int64_t)
 
 
 cdef class IntLoader(CLoader):
diff --git a/psycopg3_c/psycopg3_c/types/singletons.pyx b/psycopg3_c/psycopg3_c/types/singletons.pyx
index 58fcaace7..c25737de9 100644
--- a/psycopg3_c/psycopg3_c/types/singletons.pyx
+++ b/psycopg3_c/psycopg3_c/types/singletons.pyx
@@ -14,14 +14,23 @@ cdef class BoolDumper(CDumper):
     def __cinit__(self):
         self.oid = oids.BOOL_OID
 
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        CDumper.ensure_size(rv, offset, 1)
+
         # Fast paths, just a pointer comparison
+        cdef char val
         if obj is True:
-            return b"t"
+            val = b"t"
         elif obj is False:
-            return b"f"
+            val = b"f"
+        elif obj:
+            val = b"t"
         else:
-            return b"t" if obj else b"f"
+            val = b"f"
+
+        cdef char *buf = PyByteArray_AS_STRING(rv)
+        buf[offset] = val
+        return 1
 
     def quote(self, obj: bool) -> bytes:
         if obj is True:
@@ -36,13 +45,23 @@ cdef class BoolBinaryDumper(BoolDumper):
 
     format = Format.BINARY
 
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        CDumper.ensure_size(rv, offset, 1)
+
+        # Fast paths, just a pointer comparison
+        cdef char val
         if obj is True:
-            return b"\x01"
+            val = b"\x01"
         elif obj is False:
-            return b"\x00"
+            val = b"\x00"
+        elif obj:
+            val = b"\x01"
         else:
-            return b"\x01" if obj else b"\x00"
+            val = b"\x00"
+
+        cdef char *buf = PyByteArray_AS_STRING(rv)
+        buf[offset] = val
+        return 1
 
 
 cdef class BoolLoader(CLoader):
diff --git a/psycopg3_c/psycopg3_c/types/text.pyx b/psycopg3_c/psycopg3_c/types/text.pyx
index bebe144bf..6802e21c6 100644
--- a/psycopg3_c/psycopg3_c/types/text.pyx
+++ b/psycopg3_c/psycopg3_c/types/text.pyx
@@ -4,11 +4,20 @@ Cython adapters for textual types.
 
 # Copyright (C) 2020 The Psycopg Team
 
+from libc.string cimport memcpy, memchr
 from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize
-from cpython.unicode cimport PyUnicode_Decode, PyUnicode_DecodeUTF8
-from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsEncodedString
+from cpython.unicode cimport (
+    PyUnicode_AsEncodedString,
+    PyUnicode_AsUTF8String,
+    PyUnicode_CheckExact,
+    PyUnicode_Decode,
+    PyUnicode_DecodeUTF8,
+)
 
-from psycopg3_c.pq cimport Escaping
+from psycopg3_c.pq cimport libpq, Escaping, _buffer_as_string_and_size
+
+cdef extern from "Python.h":
+    const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size)
 
 
 cdef class _StringDumper(CDumper):
@@ -16,7 +25,7 @@ cdef class _StringDumper(CDumper):
     cdef char *encoding
     cdef bytes _bytes_encoding  # needed to keep `encoding` alive
 
-    def __init__(self, cls: type, context: Optional[AdaptContext]):
+    def __init__(self, cls: type, context: Optional[AdaptContext] = None):
         super().__init__(cls, context)
 
         self.is_utf8 = 0
@@ -37,38 +46,46 @@ cdef class StringBinaryDumper(_StringDumper):
 
     format = Format.BINARY
 
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
         # the server will raise DataError subclass if the string contains 0x00
+        cdef Py_ssize_t size;
+        cdef const char *src
+
         if self.is_utf8:
-            return PyUnicode_AsUTF8String(obj)
+            # Probably the fastest path, but doesn't work with subclasses
+            if PyUnicode_CheckExact(obj):
+                src = PyUnicode_AsUTF8AndSize(obj, &size)
+                if src == NULL:
+                    # re-encode using a function raising an exception.
+                    # TODO: is there a better way?
+                    PyUnicode_AsUTF8String(obj)
+            else:
+                b = PyUnicode_AsUTF8String(obj)
+                PyBytes_AsStringAndSize(b, <char **>&src, &size)
         else:
-            return PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+            b = PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+            PyBytes_AsStringAndSize(b, <char **>&src, &size)
 
+        cdef char *buf = CDumper.ensure_size(rv, offset, size)
+        memcpy(buf, src, size)
+        return size
 
-cdef class StringDumper(_StringDumper):
 
-    format = Format.TEXT
+cdef class StringDumper(StringBinaryDumper):
 
-    def dump(self, obj) -> bytes:
-        cdef bytes rv
-        cdef char *buf
+    format = Format.TEXT
 
-        if self.is_utf8:
-            rv = PyUnicode_AsUTF8String(obj)
-        else:
-            rv = PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef Py_ssize_t size = StringBinaryDumper.cdump(self, obj, rv, offset)
 
-        try:
-            # the function raises ValueError if the bytes contains 0x00
-            PyBytes_AsStringAndSize(rv, &buf, NULL)
-        except ValueError:
+        # Like the binary dump, but check for 0, or the string will be truncated
+        cdef const char *buf = PyByteArray_AS_STRING(rv)
+        if NULL != memchr(buf + offset, 0x00, size):
             from psycopg3 import DataError
-
             raise DataError(
                 "PostgreSQL text fields cannot contain NUL (0x00) bytes"
             )
-
-        return rv
+        return size
 
 
 cdef class TextLoader(CLoader):
@@ -111,25 +128,51 @@ cdef class BytesDumper(CDumper):
 
     format = Format.TEXT
 
-    cdef Escaping esc
-
     def __cinit__(self):
         self.oid = oids.BYTEA_OID
 
-    def __init__(self, cls: type, context: Optional[AdaptContext] = None):
-        super().__init__(cls, context)
-        self.esc = Escaping(self._pgconn)
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
 
-    def dump(self, obj) -> memoryview:
-        return self.esc.escape_bytea(obj)
+        cdef size_t len_out
+        cdef unsigned char *out
+        cdef char *ptr
+        cdef Py_ssize_t length
 
+        _buffer_as_string_and_size(obj, &ptr, &length)
 
-cdef class BytesBinaryDumper(BytesDumper):
+        if self._pgconn is not None and self._pgconn.pgconn_ptr != NULL:
+            out = libpq.PQescapeByteaConn(
+                self._pgconn.pgconn_ptr, <unsigned char *>ptr, length, &len_out)
+        else:
+            out = libpq.PQescapeBytea(<unsigned char *>ptr, length, &len_out)
+
+        if out is NULL:
+            raise MemoryError(
+                f"couldn't allocate for escape_bytea of {length} bytes"
+            )
+
+        len_out -= 1  # out includes final 0
+        cdef char *buf = CDumper.ensure_size(rv, offset, len_out)
+        memcpy(buf, out, len_out)
+        libpq.PQfreemem(out)
+        return len_out
+
+
+cdef class BytesBinaryDumper(CDumper):
 
     format = Format.BINARY
 
-    def dump(self, obj):
-        return obj
+    def __cinit__(self):
+        self.oid = oids.BYTEA_OID
+
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef char *src
+        cdef Py_ssize_t size;
+        _buffer_as_string_and_size(obj, &src, &size)
+
+        cdef char *buf = CDumper.ensure_size(rv, offset, size)
+        memcpy(buf, src, size)
+        return  size
 
 
 cdef class ByteaLoader(CLoader):