From: Daniele Varrazzo Date: Mon, 10 May 2021 10:46:26 +0000 (+0200) Subject: Separate the decimal binary dumper to a different extension module X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=30401523f08d7e2c1c3f6aa9a53567bbb6804d48;p=thirdparty%2Fpsycopg.git Separate the decimal binary dumper to a different extension module This module has a dependency on the mpdec library so I expect a lot of fun to have it compiled. --- diff --git a/psycopg3_c/psycopg3_c/.gitignore b/psycopg3_c/psycopg3_c/.gitignore index 20bb0e25b..dd0f9a9de 100644 --- a/psycopg3_c/psycopg3_c/.gitignore +++ b/psycopg3_c/psycopg3_c/.gitignore @@ -1,4 +1,5 @@ /*.so _psycopg3.c +pg3dec.c pq.c *.html diff --git a/psycopg3_c/psycopg3_c/_psycopg3.pxd b/psycopg3_c/psycopg3_c/_psycopg3.pxd new file mode 100644 index 000000000..b81ccefc8 --- /dev/null +++ b/psycopg3_c/psycopg3_c/_psycopg3.pxd @@ -0,0 +1,28 @@ +""" +C implementation of the adaptation system. + +External interface to allow to write adapters in external modules. +""" + +from psycopg3_c cimport pq +from psycopg3_c.pq cimport libpq + + +cdef class CDumper: + cdef readonly object cls + cdef public libpq.Oid oid + cdef pq.PGconn _pgconn + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1 + cdef object get_key(self, object obj, object format) + cdef object upgrade(self, object obj, object format) + + @staticmethod + cdef char *ensure_size(bytearray ba, Py_ssize_t offset, Py_ssize_t size) except NULL + + +cdef class CLoader: + cdef public libpq.Oid oid + cdef pq.PGconn _pgconn + + cdef object cload(self, const char *data, size_t length) diff --git a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx index da60d7c48..e567286fa 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx +++ b/psycopg3_c/psycopg3_c/_psycopg3/adapt.pyx @@ -31,10 +31,6 @@ logger = logging.getLogger("psycopg3.adapt") @cython.freelist(8) cdef class CDumper: - cdef readonly object cls - cdef public libpq.Oid oid - cdef pq.PGconn _pgconn - def __init__(self, cls, context: Optional[AdaptContext] = None): self.cls = cls conn = context.connection if context is not None else None @@ -136,9 +132,6 @@ cdef class CDumper: @cython.freelist(8) cdef class CLoader: - cdef public libpq.Oid oid - cdef pq.PGconn _pgconn - def __init__(self, int oid, context: Optional[AdaptContext] = None): self.oid = oid conn = context.connection if context is not None else None diff --git a/psycopg3_c/psycopg3_c/pg3dec.pyx b/psycopg3_c/psycopg3_c/pg3dec.pyx new file mode 100644 index 000000000..cbf52e68e --- /dev/null +++ b/psycopg3_c/psycopg3_c/pg3dec.pyx @@ -0,0 +1,15 @@ +""" +psycopg3_c.pg3dec optimization module. + +This module contains fast binary conversions between Python Decimal (backed by +the mpdecimal_ library) and PostgreSQL numeric data type. + +.. _mpdecimal: https://www.bytereef.org/mpdecimal/ + +""" + +# Copyright (C) 2021 The Psycopg Team + +from psycopg3_c._psycopg3 cimport CDumper, oids, endian + +include "pg3dec/pg3dec.pyx" diff --git a/psycopg3_c/psycopg3_c/pg3dec/pg3dec.pyx b/psycopg3_c/psycopg3_c/pg3dec/pg3dec.pyx new file mode 100644 index 000000000..03d87c03e --- /dev/null +++ b/psycopg3_c/psycopg3_c/pg3dec/pg3dec.pyx @@ -0,0 +1,130 @@ +cimport cython + +from libc.stdint cimport uint16_t, int16_t + +import psycopg3.pq +from psycopg3 import errors as e + +cdef extern from "Python.h": + # Missing in cpython/unicode.pxd + const char *PyUnicode_AsUTF8(object unicode) except NULL + + +DEF DEC_DIGITS = 4 # decimal digits per Postgres "digit" +DEF NUMERIC_POS = 0x0000 +DEF NUMERIC_NEG = 0x4000 +DEF NUMERIC_NAN = 0xC000 +DEF NUMERIC_PINF = 0xD000 +DEF NUMERIC_NINF = 0xF000 + +cdef extern from *: + """ +/* Weights of py digits into a pg digit according to their positions. */ +static const int pydigit_weights[] = {1000, 100, 10, 1}; +""" + const int[4] pydigit_weights + + +@cython.final +@cython.cdivision(True) +cdef class DecimalBinaryDumper(CDumper): + + format = psycopg3.pq.Format.BINARY + + def __cinit__(self): + self.oid = oids.NUMERIC_OID + + cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: + + # TODO: this implementation is about 30% slower than the text dump. + # This might be probably optimised by accessing the C structure of + # the Decimal object, if available, which would save the creation of + # several intermediate Python objects (the DecimalTuple, the digits + # tuple, and then accessing them). + + cdef object t = obj.as_tuple() + cdef int sign = t[0] + cdef tuple digits = t[1] + cdef uint16_t *buf + cdef Py_ssize_t length + + cdef object pyexp = t[2] + cdef const char *bexp + if not isinstance(pyexp, int): + # Handle inf, nan + length = 4 * sizeof(uint16_t) + buf = CDumper.ensure_size(rv, offset, length) + buf[0] = 0 + buf[1] = 0 + buf[3] = 0 + bexp = PyUnicode_AsUTF8(pyexp) + if bexp[0] == b'n' or bexp[0] == b'N': + buf[2] = endian.htobe16(NUMERIC_NAN) + elif bexp[0] == b'F': + if sign: + buf[2] = endian.htobe16(NUMERIC_NINF) + else: + buf[2] = endian.htobe16(NUMERIC_PINF) + else: + raise e.DataError(f"unexpected decimal exponent: {pyexp}") + return length + + cdef int exp = pyexp + cdef uint16_t ndigits = len(digits) + + # Find the last nonzero digit + cdef int nzdigits = ndigits + while nzdigits > 0 and digits[nzdigits - 1] == 0: + nzdigits -= 1 + + cdef uint16_t dscale + if exp <= 0: + dscale = -exp + else: + dscale = 0 + # align the py digits to the pg digits if there's some py exponent + ndigits += exp % DEC_DIGITS + + if nzdigits == 0: + length = 4 * sizeof(uint16_t) + buf = CDumper.ensure_size(rv, offset, length) + buf[0] = 0 # ndigits + buf[1] = 0 # weight + buf[2] = endian.htobe16(NUMERIC_POS) # sign + buf[3] = endian.htobe16(dscale) + return length + + # Equivalent of 0-padding left to align the py digits to the pg digits + # but without changing the digits tuple. + cdef int wi = 0 + cdef int mod = (ndigits - dscale) % DEC_DIGITS + if mod < 0: + # the difference between C and Py % operator + mod += 4 + if mod: + wi = DEC_DIGITS - mod + ndigits += wi + + cdef int tmp = nzdigits + wi + cdef int pgdigits = tmp // DEC_DIGITS + (tmp % DEC_DIGITS and 1) + length = (pgdigits + 4) * sizeof(uint16_t) + buf = CDumper.ensure_size(rv, offset, length) + buf[0] = endian.htobe16(pgdigits) + buf[1] = endian.htobe16(((ndigits + exp) // DEC_DIGITS - 1)) + buf[2] = endian.htobe16(NUMERIC_NEG) if sign else endian.htobe16(NUMERIC_POS) + buf[3] = endian.htobe16(dscale) + + cdef uint16_t pgdigit = 0 + cdef int bi = 4 + for i in range(nzdigits): + pgdigit += pydigit_weights[wi] * (digits[i]) + wi += 1 + if wi >= DEC_DIGITS: + buf[bi] = endian.htobe16(pgdigit) + pgdigit = wi = 0 + bi += 1 + + if pgdigit: + buf[bi] = endian.htobe16(pgdigit) + + return length diff --git a/psycopg3_c/psycopg3_c/types/numeric.pyx b/psycopg3_c/psycopg3_c/types/numeric.pyx index a2646047f..b6937aba1 100644 --- a/psycopg3_c/psycopg3_c/types/numeric.pyx +++ b/psycopg3_c/psycopg3_c/types/numeric.pyx @@ -9,7 +9,6 @@ cimport cython from libc.stdint cimport * from libc.string cimport memcpy, strlen from cpython.mem cimport PyMem_Free -from cpython.ref cimport Py_DECREF from cpython.dict cimport PyDict_GetItem, PyDict_SetItem from cpython.long cimport ( PyLong_FromString, PyLong_FromLong, PyLong_FromLongLong, @@ -25,6 +24,15 @@ from psycopg3 import errors as e from psycopg3.wrappers.numeric import Int2, Int4, Int8, IntNumeric +# Objects implemented by a different C extension library but exposed by +# this extension to Python so that the optimised adapter import machinery +# can find them. If found it will shadow the class defined here. +try: + from psycopg3_c.pg3dec import DecimalBinaryDumper +except ImportError: + DecimalBinaryDumper = _DecimalBinaryDumper + + cdef extern from "Python.h": # work around https://github.com/cython/cython/issues/3909 double PyOS_string_to_double( @@ -545,7 +553,16 @@ static const int pydigit_weights[] = {1000, 100, 10, 1}; @cython.final @cython.cdivision(True) -cdef class DecimalBinaryDumper(CDumper): +cdef class _DecimalBinaryDumper(CDumper): + """ + Dump Python Decimal objects to the PostgreSQL binary numeric format. + + This implementation only relies on the Python interface of the Decimal + class, so it should be well educated but not the most performing. + A more optimised implementation is available in psycopg3_c.pg3dec extension + module, however it depends on the mpdec external library so it might not + be available everywhere. + """ format = PQ_BINARY @@ -554,12 +571,6 @@ cdef class DecimalBinaryDumper(CDumper): cdef Py_ssize_t cdump(self, obj, bytearray rv, Py_ssize_t offset) except -1: - # TODO: this implementation is about 30% slower than the text dump. - # This might be probably optimised by accessing the C structure of - # the Decimal object, if available, which would save the creation of - # several intermediate Python objects (the DecimalTuple, the digits - # tuple, and then accessing them). - cdef object t = obj.as_tuple() cdef int sign = t[0] cdef tuple digits = t[1] diff --git a/psycopg3_c/setup.py b/psycopg3_c/setup.py index 23f5769d6..bd070c798 100644 --- a/psycopg3_c/setup.py +++ b/psycopg3_c/setup.py @@ -26,6 +26,31 @@ with open("psycopg3_c/version.py") as f: raise Exception(f"cannot find version in {f.name}") version = m.group(1) +# Some details missing, to be finished by psycopg3_build_ext.finalize_options +ext_modules = [ + Extension( + "psycopg3_c._psycopg3", + [ + "psycopg3_c/_psycopg3.c", + "psycopg3_c/types/numutils.c", + ], + libraries=["pq"], + include_dirs=[], + ), + Extension( + "psycopg3_c.pq", + ["psycopg3_c/pq.c"], + libraries=["pq"], + include_dirs=[], + ), + Extension( + "psycopg3_c.pg3dec", + ["psycopg3_c/pg3dec.c"], + libraries=["mpdec"], + include_dirs=[], + ), +] + class psycopg3_build_ext(build_ext): def finalize_options(self) -> None: @@ -35,7 +60,7 @@ class psycopg3_build_ext(build_ext): def _setup_ext_build(self) -> None: cythonize = None - # In the sdist there are not .pyx, only c, so we don't need Cython + # In the sdist there are no .pyx, only c, so we don't need Cython # Otherwise Cython is a requirement and is be used to compile pyx to c if os.path.exists("psycopg3_c/_psycopg3.pyx"): from Cython.Build import cythonize @@ -69,30 +94,12 @@ class psycopg3_build_ext(build_ext): annotate=False, # enable to get an html view of the C module ) else: - self.distribution.ext_modules = [pgext, pqext] + self.distribution.ext_modules = ext_modules -# Some details missing, to be finished by psycopg3_build_ext.finalize_options -pgext = Extension( - "psycopg3_c._psycopg3", - [ - "psycopg3_c/_psycopg3.c", - "psycopg3_c/types/numutils.c", - ], - libraries=["pq"], - include_dirs=[], -) - -pqext = Extension( - "psycopg3_c.pq", - ["psycopg3_c/pq.c"], - libraries=["pq"], - include_dirs=[], -) - setup( version=version, - ext_modules=[pgext, pqext], + ext_modules=ext_modules, cmdclass={"build_ext": psycopg3_build_ext}, # For some reason pacakge_data doesn't work in setup.cfg package_data={