From: Daniele Varrazzo Date: Thu, 31 Dec 2020 18:59:22 +0000 (+0100) Subject: Added c interface to dumpers X-Git-Tag: 3.0.dev0~212 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=51f854150c2a7aeed048866c074be293af76c42f;p=thirdparty%2Fpsycopg.git Added c interface to dumpers --- diff --git a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx index 68c7e5480..3773e49e2 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx +++ b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx @@ -17,7 +17,7 @@ from typing import Any from cpython.bytes cimport PyBytes_AsStringAndSize from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_Resize -from cpython.bytearray cimport PyByteArray_AS_STRING +from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_AS_STRING from psycopg3_c.pq cimport _buffer_as_string_and_size @@ -52,9 +52,30 @@ cdef class CDumper: ): self.oid = oids.TEXT_OID - def dump(self, obj: Any) -> bytes: + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + """Store the Postgres representation *obj* into *rv* at *offset* + + Return the number of bytes written to rv or -1 on Python exception. + + Subclasses must implement this method. The `dump()` implementation + transforms the result of this method to a bytearray so that it can be + returned to Python. + + The function interface allows C code to use this method automatically + to create larger buffers, e.g. for copy, composite objects, etc. + + Implementation note: as you will alway need to make sure that rv + has enough space to include what you want to dump, `ensure_size()` + might probably come handy. + """ raise NotImplementedError() + def dump(self, obj: Any) -> bytearray: + """Return the Postgres representation of *obj* as Python array of bytes""" + cdef rv = PyByteArray_FromStringAndSize("", 0) + self.cdump(obj, rv, 0) + return rv + def quote(self, obj: Any) -> bytearray: cdef char *ptr cdef char *ptr_out @@ -90,7 +111,7 @@ cdef class CDumper: @classmethod def register( - cls, + this_cls, cls: Union[type, str], context: Optional[AdaptContext] = None, int format = Format.TEXT, @@ -100,7 +121,22 @@ cdef class CDumper: else: from psycopg3.adapt import global_adapters as adapters - adapters.register_dumper(cls, cls) + adapters.register_dumper(cls, this_cls) + + @staticmethod + cdef char *ensure_size(bytearray ba, Py_ssize_t offset, Py_ssize_t size) except NULL: + """ + Grow *ba*, if necessary, to contains at least *size* bytes after *offset* + + Return the pointer in the bytearray at *offset*, i.e. the place where + you want to write *size* bytes. + """ + cdef Py_ssize_t curr_size = PyByteArray_GET_SIZE(ba) + cdef Py_ssize_t new_size = offset + size + if curr_size < new_size: + PyByteArray_Resize(ba, new_size) + + return PyByteArray_AS_STRING(ba) + offset cdef class CLoader: diff --git a/psycopg3_c/psycopg3_c/types/numeric.pyx b/psycopg3_c/psycopg3_c/types/numeric.pyx index b33953bdf..0bcd3c4fc 100644 --- a/psycopg3_c/psycopg3_c/types/numeric.pyx +++ b/psycopg3_c/psycopg3_c/types/numeric.pyx @@ -5,6 +5,7 @@ Cython adapters for numeric types. # Copyright (C) 2020 The Psycopg Team from libc.stdint cimport * +from libc.string cimport memcpy from cpython.long cimport PyLong_FromString, PyLong_FromLong, PyLong_AsLongLong from cpython.long cimport PyLong_FromLongLong, PyLong_FromUnsignedLong from cpython.float cimport PyFloat_FromDouble @@ -26,37 +27,39 @@ cdef class IntDumper(CDumper): def __cinit__(self): self.oid = oids.INT8_OID - def __init__(self, cls: type, context: Optional[AdaptContext] = None): - super().__init__(cls, context) - - def dump(self, obj) -> bytes: - cdef char buf[22] + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef int size = 22 # max int as string + cdef char *buf = CDumper.ensure_size(rv, offset, size) cdef long long val = PyLong_AsLongLong(obj) - cdef int written = PyOS_snprintf(buf, sizeof(buf), "%lld", val) - return buf[:written] + cdef int written = PyOS_snprintf(buf, size, "%lld", val) + PyByteArray_Resize(rv, offset + written) + return written - def quote(self, obj) -> bytes: - cdef char buf[23] - cdef long long val = PyLong_AsLongLong(obj) - cdef int written - if val >= 0: - written = PyOS_snprintf(buf, sizeof(buf), "%lld", val) + def quote(self, obj) -> bytearray: + rv = PyByteArray_FromStringAndSize("", 0) + PyByteArray_Resize(rv, 23) + + if obj >= 0: + self.cdump(obj, rv, 0) else: - written = PyOS_snprintf(buf, sizeof(buf), " %lld", val) + rv[0] = b' ' + self.cdump(obj, rv, 1) - return buf[:written] + return rv cdef class IntBinaryDumper(IntDumper): format = Format.BINARY - def dump(self, obj) -> bytes: + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef char *buf = CDumper.ensure_size(rv, offset, sizeof(int64_t)) cdef long long val = PyLong_AsLongLong(obj) + # swap bytes if needed cdef uint64_t *ptvar = (&val) cdef int64_t beval = htobe64(ptvar[0]) - cdef char *buf = &beval - return buf[:sizeof(beval)] + memcpy(buf, &beval, sizeof(int64_t)) + return sizeof(int64_t) cdef class IntLoader(CLoader): diff --git a/psycopg3_c/psycopg3_c/types/singletons.pyx b/psycopg3_c/psycopg3_c/types/singletons.pyx index 58fcaace7..c25737de9 100644 --- a/psycopg3_c/psycopg3_c/types/singletons.pyx +++ b/psycopg3_c/psycopg3_c/types/singletons.pyx @@ -14,14 +14,23 @@ cdef class BoolDumper(CDumper): def __cinit__(self): self.oid = oids.BOOL_OID - def dump(self, obj) -> bytes: + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + CDumper.ensure_size(rv, offset, 1) + # Fast paths, just a pointer comparison + cdef char val if obj is True: - return b"t" + val = b"t" elif obj is False: - return b"f" + val = b"f" + elif obj: + val = b"t" else: - return b"t" if obj else b"f" + val = b"f" + + cdef char *buf = PyByteArray_AS_STRING(rv) + buf[offset] = val + return 1 def quote(self, obj: bool) -> bytes: if obj is True: @@ -36,13 +45,23 @@ cdef class BoolBinaryDumper(BoolDumper): format = Format.BINARY - def dump(self, obj) -> bytes: + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + CDumper.ensure_size(rv, offset, 1) + + # Fast paths, just a pointer comparison + cdef char val if obj is True: - return b"\x01" + val = b"\x01" elif obj is False: - return b"\x00" + val = b"\x00" + elif obj: + val = b"\x01" else: - return b"\x01" if obj else b"\x00" + val = b"\x00" + + cdef char *buf = PyByteArray_AS_STRING(rv) + buf[offset] = val + return 1 cdef class BoolLoader(CLoader): diff --git a/psycopg3_c/psycopg3_c/types/text.pyx b/psycopg3_c/psycopg3_c/types/text.pyx index bebe144bf..6802e21c6 100644 --- a/psycopg3_c/psycopg3_c/types/text.pyx +++ b/psycopg3_c/psycopg3_c/types/text.pyx @@ -4,11 +4,20 @@ Cython adapters for textual types. # Copyright (C) 2020 The Psycopg Team +from libc.string cimport memcpy, memchr from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize -from cpython.unicode cimport PyUnicode_Decode, PyUnicode_DecodeUTF8 -from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsEncodedString +from cpython.unicode cimport ( + PyUnicode_AsEncodedString, + PyUnicode_AsUTF8String, + PyUnicode_CheckExact, + PyUnicode_Decode, + PyUnicode_DecodeUTF8, +) -from psycopg3_c.pq cimport Escaping +from psycopg3_c.pq cimport libpq, Escaping, _buffer_as_string_and_size + +cdef extern from "Python.h": + const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size) cdef class _StringDumper(CDumper): @@ -16,7 +25,7 @@ cdef class _StringDumper(CDumper): cdef char *encoding cdef bytes _bytes_encoding # needed to keep `encoding` alive - def __init__(self, cls: type, context: Optional[AdaptContext]): + def __init__(self, cls: type, context: Optional[AdaptContext] = None): super().__init__(cls, context) self.is_utf8 = 0 @@ -37,38 +46,46 @@ cdef class StringBinaryDumper(_StringDumper): format = Format.BINARY - def dump(self, obj) -> bytes: + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: # the server will raise DataError subclass if the string contains 0x00 + cdef Py_ssize_t size; + cdef const char *src + if self.is_utf8: - return PyUnicode_AsUTF8String(obj) + # Probably the fastest path, but doesn't work with subclasses + if PyUnicode_CheckExact(obj): + src = PyUnicode_AsUTF8AndSize(obj, &size) + if src == NULL: + # re-encode using a function raising an exception. + # TODO: is there a better way? + PyUnicode_AsUTF8String(obj) + else: + b = PyUnicode_AsUTF8String(obj) + PyBytes_AsStringAndSize(b, &src, &size) else: - return PyUnicode_AsEncodedString(obj, self.encoding, NULL) + b = PyUnicode_AsEncodedString(obj, self.encoding, NULL) + PyBytes_AsStringAndSize(b, &src, &size) + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size -cdef class StringDumper(_StringDumper): - format = Format.TEXT +cdef class StringDumper(StringBinaryDumper): - def dump(self, obj) -> bytes: - cdef bytes rv - cdef char *buf + format = Format.TEXT - if self.is_utf8: - rv = PyUnicode_AsUTF8String(obj) - else: - rv = PyUnicode_AsEncodedString(obj, self.encoding, NULL) + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef Py_ssize_t size = StringBinaryDumper.cdump(self, obj, rv, offset) - try: - # the function raises ValueError if the bytes contains 0x00 - PyBytes_AsStringAndSize(rv, &buf, NULL) - except ValueError: + # Like the binary dump, but check for 0, or the string will be truncated + cdef const char *buf = PyByteArray_AS_STRING(rv) + if NULL != memchr(buf + offset, 0x00, size): from psycopg3 import DataError - raise DataError( "PostgreSQL text fields cannot contain NUL (0x00) bytes" ) - - return rv + return size cdef class TextLoader(CLoader): @@ -111,25 +128,51 @@ cdef class BytesDumper(CDumper): format = Format.TEXT - cdef Escaping esc - def __cinit__(self): self.oid = oids.BYTEA_OID - def __init__(self, cls: type, context: Optional[AdaptContext] = None): - super().__init__(cls, context) - self.esc = Escaping(self._pgconn) + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: - def dump(self, obj) -> memoryview: - return self.esc.escape_bytea(obj) + cdef size_t len_out + cdef unsigned char *out + cdef char *ptr + cdef Py_ssize_t length + _buffer_as_string_and_size(obj, &ptr, &length) -cdef class BytesBinaryDumper(BytesDumper): + if self._pgconn is not None and self._pgconn.pgconn_ptr != NULL: + out = libpq.PQescapeByteaConn( + self._pgconn.pgconn_ptr, ptr, length, &len_out) + else: + out = libpq.PQescapeBytea(ptr, length, &len_out) + + if out is NULL: + raise MemoryError( + f"couldn't allocate for escape_bytea of {length} bytes" + ) + + len_out -= 1 # out includes final 0 + cdef char *buf = CDumper.ensure_size(rv, offset, len_out) + memcpy(buf, out, len_out) + libpq.PQfreemem(out) + return len_out + + +cdef class BytesBinaryDumper(CDumper): format = Format.BINARY - def dump(self, obj): - return obj + def __cinit__(self): + self.oid = oids.BYTEA_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + cdef char *src + cdef Py_ssize_t size; + _buffer_as_string_and_size(obj, &src, &size) + + cdef char *buf = CDumper.ensure_size(rv, offset, size) + memcpy(buf, src, size) + return size cdef class ByteaLoader(CLoader):