Added c interface to dumpers

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Thu, 31 Dec 2020 18:59:22 +0000 (19:59 +0100)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Fri, 8 Jan 2021 01:26:53 +0000 (02:26 +0100)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Thu, 31 Dec 2020 18:59:22 +0000 (19:59 +0100)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Fri, 8 Jan 2021 01:26:53 +0000 (02:26 +0100)
diff --git a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx

index 68c7e5480def5c6554a03f345f324c8b58d4ed92..3773e49e20e183ac4d402507021ec6fdee6e52cd 100644 (file)
--- a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx
+++ b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx
@@ -17,7 +17,7 @@ from typing import Any
  
  from cpython.bytes cimport PyBytes_AsStringAndSize
  from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_Resize
-from cpython.bytearray cimport PyByteArray_AS_STRING
+from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_AS_STRING
  
  from psycopg3_c.pq cimport _buffer_as_string_and_size
  
@@ -52,9 +52,30 @@ cdef class CDumper:
          ):
              self.oid = oids.TEXT_OID
  
-    def dump(self, obj: Any) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        """Store the Postgres representation *obj* into *rv* at *offset*
+
+        Return the number of bytes written to rv or -1 on Python exception.
+
+        Subclasses must implement this method. The `dump()` implementation
+        transforms the result of this method to a bytearray so that it can be
+        returned to Python.
+
+        The function interface allows C code to use this method automatically
+        to create larger buffers, e.g. for copy, composite objects, etc.
+
+        Implementation note: as you will alway need to make sure that rv
+        has enough space to include what you want to dump, `ensure_size()`
+        might probably come handy.
+        """
          raise NotImplementedError()
  
+    def dump(self, obj: Any) -> bytearray:
+        """Return the Postgres representation of *obj* as Python array of bytes"""
+        cdef rv = PyByteArray_FromStringAndSize("", 0)
+        self.cdump(obj, rv, 0)
+        return rv
+
      def quote(self, obj: Any) -> bytearray:
          cdef char *ptr
          cdef char *ptr_out
@@ -90,7 +111,7 @@ cdef class CDumper:
  
      @classmethod
      def register(
-        cls,
+        this_cls,
          cls: Union[type, str],
          context: Optional[AdaptContext] = None,
          int format = Format.TEXT,
@@ -100,7 +121,22 @@ cdef class CDumper:
          else:
              from psycopg3.adapt import global_adapters as adapters
  
-        adapters.register_dumper(cls, cls)
+        adapters.register_dumper(cls, this_cls)
+
+    @staticmethod
+    cdef char *ensure_size(bytearray ba, Py_ssize_t offset, Py_ssize_t size) except NULL:
+        """
+        Grow *ba*, if necessary, to contains at least *size* bytes after *offset*
+
+        Return the pointer in the bytearray at *offset*, i.e. the place where
+        you want to write *size* bytes.
+        """
+        cdef Py_ssize_t curr_size = PyByteArray_GET_SIZE(ba)
+        cdef Py_ssize_t new_size = offset + size
+        if curr_size < new_size:
+            PyByteArray_Resize(ba, new_size)
+
+        return PyByteArray_AS_STRING(ba) + offset
  
  
  cdef class CLoader:
diff --git a/psycopg3_c/psycopg3_c/types/numeric.pyx b/psycopg3_c/psycopg3_c/types/numeric.pyx

index b33953bdfa487744c77c817b5eee937063ae36ef..0bcd3c4fcb12a65380923ac90be0c22b77b6125f 100644 (file)
--- a/psycopg3_c/psycopg3_c/types/numeric.pyx
+++ b/psycopg3_c/psycopg3_c/types/numeric.pyx
@@ -5,6 +5,7 @@ Cython adapters for numeric types.
  # Copyright (C) 2020 The Psycopg Team
  
  from libc.stdint cimport *
+from libc.string cimport memcpy
  from cpython.long cimport PyLong_FromString, PyLong_FromLong, PyLong_AsLongLong
  from cpython.long cimport PyLong_FromLongLong, PyLong_FromUnsignedLong
  from cpython.float cimport PyFloat_FromDouble
@@ -26,37 +27,39 @@ cdef class IntDumper(CDumper):
      def __cinit__(self):
          self.oid = oids.INT8_OID
  
-    def __init__(self, cls: type, context: Optional[AdaptContext] = None):
-        super().__init__(cls, context)
-
-    def dump(self, obj) -> bytes:
-        cdef char buf[22]
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef int size = 22  # max int as string
+        cdef char *buf = CDumper.ensure_size(rv, offset, size)
          cdef long long val = PyLong_AsLongLong(obj)
-        cdef int written = PyOS_snprintf(buf, sizeof(buf), "%lld", val)
-        return buf[:written]
+        cdef int written = PyOS_snprintf(buf, size, "%lld", val)
+        PyByteArray_Resize(rv, offset + written)
+        return written
  
-    def quote(self, obj) -> bytes:
-        cdef char buf[23]
-        cdef long long val = PyLong_AsLongLong(obj)
-        cdef int written
-        if val >= 0:
-            written = PyOS_snprintf(buf, sizeof(buf), "%lld", val)
+    def quote(self, obj) -> bytearray:
+        rv = PyByteArray_FromStringAndSize("", 0)
+        PyByteArray_Resize(rv, 23)
+
+        if obj >= 0:
+            self.cdump(obj, rv, 0)
          else:
-            written = PyOS_snprintf(buf, sizeof(buf), " %lld", val)
+            rv[0] = b' '
+            self.cdump(obj, rv, 1)
  
-        return buf[:written]
+        return rv
  
  
  cdef class IntBinaryDumper(IntDumper):
  
      format = Format.BINARY
  
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef char *buf = CDumper.ensure_size(rv, offset, sizeof(int64_t))
          cdef long long val = PyLong_AsLongLong(obj)
+        # swap bytes if needed
          cdef uint64_t *ptvar = <uint64_t *>(&val)
          cdef int64_t beval = htobe64(ptvar[0])
-        cdef char *buf = <char *>&beval
-        return buf[:sizeof(beval)]
+        memcpy(buf, <void *>&beval, sizeof(int64_t))
+        return sizeof(int64_t)
  
  
  cdef class IntLoader(CLoader):
diff --git a/psycopg3_c/psycopg3_c/types/singletons.pyx b/psycopg3_c/psycopg3_c/types/singletons.pyx

index 58fcaace7091b018eb5161ee33c788a80efc0350..c25737de9c5c82f16804f7251911228c50c052a5 100644 (file)
--- a/psycopg3_c/psycopg3_c/types/singletons.pyx
+++ b/psycopg3_c/psycopg3_c/types/singletons.pyx
@@ -14,14 +14,23 @@ cdef class BoolDumper(CDumper):
      def __cinit__(self):
          self.oid = oids.BOOL_OID
  
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        CDumper.ensure_size(rv, offset, 1)
+
          # Fast paths, just a pointer comparison
+        cdef char val
          if obj is True:
-            return b"t"
+            val = b"t"
          elif obj is False:
-            return b"f"
+            val = b"f"
+        elif obj:
+            val = b"t"
          else:
-            return b"t" if obj else b"f"
+            val = b"f"
+
+        cdef char *buf = PyByteArray_AS_STRING(rv)
+        buf[offset] = val
+        return 1
  
      def quote(self, obj: bool) -> bytes:
          if obj is True:
@@ -36,13 +45,23 @@ cdef class BoolBinaryDumper(BoolDumper):
  
      format = Format.BINARY
  
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        CDumper.ensure_size(rv, offset, 1)
+
+        # Fast paths, just a pointer comparison
+        cdef char val
          if obj is True:
-            return b"\x01"
+            val = b"\x01"
          elif obj is False:
-            return b"\x00"
+            val = b"\x00"
+        elif obj:
+            val = b"\x01"
          else:
-            return b"\x01" if obj else b"\x00"
+            val = b"\x00"
+
+        cdef char *buf = PyByteArray_AS_STRING(rv)
+        buf[offset] = val
+        return 1
  
  
  cdef class BoolLoader(CLoader):
diff --git a/psycopg3_c/psycopg3_c/types/text.pyx b/psycopg3_c/psycopg3_c/types/text.pyx

index bebe144bfc64fc05c31178dbb9939ac46fef4a96..6802e21c6dca8acdcd36b6a957b4a7c98ecb6089 100644 (file)
--- a/psycopg3_c/psycopg3_c/types/text.pyx
+++ b/psycopg3_c/psycopg3_c/types/text.pyx
@@ -4,11 +4,20 @@ Cython adapters for textual types.
  
  # Copyright (C) 2020 The Psycopg Team
  
+from libc.string cimport memcpy, memchr
  from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize
-from cpython.unicode cimport PyUnicode_Decode, PyUnicode_DecodeUTF8
-from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsEncodedString
+from cpython.unicode cimport (
+    PyUnicode_AsEncodedString,
+    PyUnicode_AsUTF8String,
+    PyUnicode_CheckExact,
+    PyUnicode_Decode,
+    PyUnicode_DecodeUTF8,
+)
  
-from psycopg3_c.pq cimport Escaping
+from psycopg3_c.pq cimport libpq, Escaping, _buffer_as_string_and_size
+
+cdef extern from "Python.h":
+    const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size)
  
  
  cdef class _StringDumper(CDumper):
@@ -16,7 +25,7 @@ cdef class _StringDumper(CDumper):
      cdef char *encoding
      cdef bytes _bytes_encoding  # needed to keep `encoding` alive
  
-    def __init__(self, cls: type, context: Optional[AdaptContext]):
+    def __init__(self, cls: type, context: Optional[AdaptContext] = None):
          super().__init__(cls, context)
  
          self.is_utf8 = 0
@@ -37,38 +46,46 @@ cdef class StringBinaryDumper(_StringDumper):
  
      format = Format.BINARY
  
-    def dump(self, obj) -> bytes:
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
          # the server will raise DataError subclass if the string contains 0x00
+        cdef Py_ssize_t size;
+        cdef const char *src
+
          if self.is_utf8:
-            return PyUnicode_AsUTF8String(obj)
+            # Probably the fastest path, but doesn't work with subclasses
+            if PyUnicode_CheckExact(obj):
+                src = PyUnicode_AsUTF8AndSize(obj, &size)
+                if src == NULL:
+                    # re-encode using a function raising an exception.
+                    # TODO: is there a better way?
+                    PyUnicode_AsUTF8String(obj)
+            else:
+                b = PyUnicode_AsUTF8String(obj)
+                PyBytes_AsStringAndSize(b, <char **>&src, &size)
          else:
-            return PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+            b = PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+            PyBytes_AsStringAndSize(b, <char **>&src, &size)
  
+        cdef char *buf = CDumper.ensure_size(rv, offset, size)
+        memcpy(buf, src, size)
+        return size
  
-cdef class StringDumper(_StringDumper):
  
-    format = Format.TEXT
+cdef class StringDumper(StringBinaryDumper):
  
-    def dump(self, obj) -> bytes:
-        cdef bytes rv
-        cdef char *buf
+    format = Format.TEXT
  
-        if self.is_utf8:
-            rv = PyUnicode_AsUTF8String(obj)
-        else:
-            rv = PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef Py_ssize_t size = StringBinaryDumper.cdump(self, obj, rv, offset)
  
-        try:
-            # the function raises ValueError if the bytes contains 0x00
-            PyBytes_AsStringAndSize(rv, &buf, NULL)
-        except ValueError:
+        # Like the binary dump, but check for 0, or the string will be truncated
+        cdef const char *buf = PyByteArray_AS_STRING(rv)
+        if NULL != memchr(buf + offset, 0x00, size):
              from psycopg3 import DataError
-
              raise DataError(
                  "PostgreSQL text fields cannot contain NUL (0x00) bytes"
              )
-
-        return rv
+        return size
  
  
  cdef class TextLoader(CLoader):
@@ -111,25 +128,51 @@ cdef class BytesDumper(CDumper):
  
      format = Format.TEXT
  
-    cdef Escaping esc
-
      def __cinit__(self):
          self.oid = oids.BYTEA_OID
  
-    def __init__(self, cls: type, context: Optional[AdaptContext] = None):
-        super().__init__(cls, context)
-        self.esc = Escaping(self._pgconn)
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
  
-    def dump(self, obj) -> memoryview:
-        return self.esc.escape_bytea(obj)
+        cdef size_t len_out
+        cdef unsigned char *out
+        cdef char *ptr
+        cdef Py_ssize_t length
  
+        _buffer_as_string_and_size(obj, &ptr, &length)
  
-cdef class BytesBinaryDumper(BytesDumper):
+        if self._pgconn is not None and self._pgconn.pgconn_ptr != NULL:
+            out = libpq.PQescapeByteaConn(
+                self._pgconn.pgconn_ptr, <unsigned char *>ptr, length, &len_out)
+        else:
+            out = libpq.PQescapeBytea(<unsigned char *>ptr, length, &len_out)
+
+        if out is NULL:
+            raise MemoryError(
+                f"couldn't allocate for escape_bytea of {length} bytes"
+            )
+
+        len_out -= 1  # out includes final 0
+        cdef char *buf = CDumper.ensure_size(rv, offset, len_out)
+        memcpy(buf, out, len_out)
+        libpq.PQfreemem(out)
+        return len_out
+
+
+cdef class BytesBinaryDumper(CDumper):
  
      format = Format.BINARY
  
-    def dump(self, obj):
-        return obj
+    def __cinit__(self):
+        self.oid = oids.BYTEA_OID
+
+    cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1:
+        cdef char *src
+        cdef Py_ssize_t size;
+        _buffer_as_string_and_size(obj, &src, &size)
+
+        cdef char *buf = CDumper.ensure_size(rv, offset, size)
+        memcpy(buf, src, size)
+        return  size
  
  
  cdef class ByteaLoader(CLoader):
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Thu, 31 Dec 2020 18:59:22 +0000 (19:59 +0100)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Fri, 8 Jan 2021 01:26:53 +0000 (02:26 +0100)
psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx		patch \| blob \| blame \| history
psycopg3_c/psycopg3_c/types/numeric.pyx		patch \| blob \| blame \| history
psycopg3_c/psycopg3_c/types/singletons.pyx		patch \| blob \| blame \| history
psycopg3_c/psycopg3_c/types/text.pyx		patch \| blob \| blame \| history