From: Daniele Varrazzo <daniele.varrazzo@gmail.com>
Date: Sat, 21 Nov 2020 23:09:16 +0000 (+0000)
Subject: Added C string dumpers
X-Git-Tag: 3.0.dev0~332^2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e5b99b6fae9dbce0dc511bbbf5c425ce10917837;p=thirdparty%2Fpsycopg.git

Added C string dumpers
---

diff --git a/psycopg3/psycopg3/types/text.py b/psycopg3/psycopg3/types/text.py
index 0be59bef0..df0993b61 100644
--- a/psycopg3/psycopg3/types/text.py
+++ b/psycopg3/psycopg3/types/text.py
@@ -30,6 +30,7 @@ class _StringDumper(Dumper):
 @Dumper.binary(str)
 class StringBinaryDumper(_StringDumper):
     def dump(self, obj: str) -> bytes:
+        # the server will raise DataError subclass if the string contains 0x00
         return obj.encode(self.encoding)
 
 
diff --git a/psycopg3_c/psycopg3_c/types/numeric.pyx b/psycopg3_c/psycopg3_c/types/numeric.pyx
index a13b4c272..fe97209e4 100644
--- a/psycopg3_c/psycopg3_c/types/numeric.pyx
+++ b/psycopg3_c/psycopg3_c/types/numeric.pyx
@@ -23,13 +23,13 @@ cdef extern from "Python.h":
 cdef class IntDumper(CDumper):
     oid = oids.INT8_OID
 
-    def dump(self, obj: Any) -> bytes:
+    def dump(self, obj) -> bytes:
         cdef char buf[22]
         cdef long long val = PyLong_AsLongLong(obj)
         cdef int written = PyOS_snprintf(buf, sizeof(buf), "%lld", val)
         return buf[:written]
 
-    def quote(self, obj: Any) -> bytes:
+    def quote(self, obj) -> bytes:
         cdef char buf[23]
         cdef long long val = PyLong_AsLongLong(obj)
         cdef int written
@@ -42,7 +42,7 @@ cdef class IntDumper(CDumper):
 
 
 cdef class IntBinaryDumper(IntDumper):
-    def dump(self, obj: Any) -> bytes:
+    def dump(self, obj) -> bytes:
         cdef long long val = PyLong_AsLongLong(obj)
         cdef uint64_t *ptvar = <uint64_t *>(&val)
         cdef int64_t beval = htobe64(ptvar[0])
diff --git a/psycopg3_c/psycopg3_c/types/singletons.pyx b/psycopg3_c/psycopg3_c/types/singletons.pyx
index 2377d4452..32aa1206b 100644
--- a/psycopg3_c/psycopg3_c/types/singletons.pyx
+++ b/psycopg3_c/psycopg3_c/types/singletons.pyx
@@ -10,7 +10,7 @@ from psycopg3_c cimport oids
 cdef class BoolDumper(CDumper):
     oid = oids.BOOL_OID
 
-    def dump(self, obj: bool) -> bytes:
+    def dump(self, obj) -> bytes:
         # Fast paths, just a pointer comparison
         if obj is True:
             return b"t"
@@ -29,7 +29,7 @@ cdef class BoolDumper(CDumper):
 
 
 cdef class BoolBinaryDumper(BoolDumper):
-    def dump(self, obj: bool) -> bytes:
+    def dump(self, obj) -> bytes:
         if obj is True:
             return b"\x01"
         elif obj is False:
diff --git a/psycopg3_c/psycopg3_c/types/text.pyx b/psycopg3_c/psycopg3_c/types/text.pyx
index ebc6bd13d..b633ec08d 100644
--- a/psycopg3_c/psycopg3_c/types/text.pyx
+++ b/psycopg3_c/psycopg3_c/types/text.pyx
@@ -4,10 +4,67 @@ Cython adapters for textual types.
 
 # Copyright (C) 2020 The Psycopg Team
 
+from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize
 from cpython.unicode cimport PyUnicode_Decode, PyUnicode_DecodeUTF8
+from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsEncodedString
+
 from psycopg3_c cimport libpq, oids
 
 
+cdef class _StringDumper(CDumper):
+    cdef int is_utf8
+    cdef char *encoding
+    cdef bytes _bytes_encoding  # needed to keep `encoding` alive
+
+    def __init__(self, src: type, context: AdaptContext):
+        super().__init__(src, context)
+
+        self.is_utf8 = 0
+        self.encoding = "utf-8"
+
+        conn = self._connection
+        if conn:
+            self._bytes_encoding = conn.client_encoding.encode("utf-8")
+            self.encoding = PyBytes_AsString(self._bytes_encoding)
+            if (
+                self._bytes_encoding == b"utf-8"
+                or self._bytes_encoding == b"ascii"
+            ):
+                self.is_utf8 = 1
+
+
+cdef class StringBinaryDumper(_StringDumper):
+    def dump(self, obj) -> bytes:
+        # the server will raise DataError subclass if the string contains 0x00
+        if self.is_utf8:
+            return PyUnicode_AsUTF8String(obj)
+        else:
+            return PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+
+
+cdef class StringDumper(_StringDumper):
+    def dump(self, obj) -> bytes:
+        cdef bytes rv
+        cdef char *buf
+
+        if self.is_utf8:
+            rv = PyUnicode_AsUTF8String(obj)
+        else:
+            rv = PyUnicode_AsEncodedString(obj, self.encoding, NULL)
+
+        try:
+            # the function raises ValueError if the bytes contains 0x00
+            PyBytes_AsStringAndSize(rv, &buf, NULL)
+        except ValueError:
+            from psycopg3 import DataError
+
+            raise DataError(
+                "PostgreSQL text fields cannot contain NUL (0x00) bytes"
+            )
+
+        return rv
+
+
 cdef class TextLoader(CLoader):
     cdef int is_utf8
     cdef char *encoding
@@ -19,10 +76,10 @@ cdef class TextLoader(CLoader):
         self.is_utf8 = 0
         self.encoding = "utf-8"
 
-        conn = self.connection
+        conn = self._connection
         if conn:
             self._bytes_encoding = conn.client_encoding.encode("utf-8")
-            self.encoding = self._bytes_encoding
+            self.encoding = PyBytes_AsString(self._bytes_encoding)
             if self._bytes_encoding == b"utf-8":
                 self.is_utf8 = 1
             elif self._bytes_encoding == b"ascii":
@@ -60,6 +117,9 @@ cdef class ByteaBinaryLoader(CLoader):
 cdef void register_text_c_adapters():
     logger.debug("registering optimised text c adapters")
 
+    StringDumper.register(str)
+    StringBinaryDumper.register_binary(str)
+
     TextLoader.register(oids.INVALID_OID)
     TextLoader.register(oids.TEXT_OID)
     TextLoader.register_binary(oids.TEXT_OID)