From: Daniele Varrazzo Date: Mon, 27 Oct 2025 16:27:10 +0000 (+0000) Subject: perf: use PySequence_Fast function instead of slower alternatives X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f70857e88b28f4892abce2bef8d441888088db8a;p=thirdparty%2Fpsycopg.git perf: use PySequence_Fast function instead of slower alternatives --- diff --git a/psycopg_c/psycopg_c/_psycopg/copy.pyx b/psycopg_c/psycopg_c/_psycopg/copy.pyx index c14371a33..ff1d213ca 100644 --- a/psycopg_c/psycopg_c/_psycopg/copy.pyx +++ b/psycopg_c/psycopg_c/_psycopg/copy.pyx @@ -7,7 +7,8 @@ C optimised functions for the copy system. from libc.stdint cimport int32_t, uint16_t, uint32_t from libc.string cimport memcpy -from cpython.tuple cimport PyTuple_GET_SIZE +from cpython.sequence cimport PySequence_Fast, PySequence_Fast_GET_ITEM +from cpython.sequence cimport PySequence_Fast_GET_SIZE from cpython.bytearray cimport PyByteArray_AS_STRING, PyByteArray_FromStringAndSize from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_Resize from cpython.memoryview cimport PyMemoryView_FromObject @@ -23,13 +24,8 @@ cdef int32_t _binary_null = -1 cdef object _format_row_binary(object row, Transformer tx, bytearray out): """Convert a row of adapted data to the data to send for binary copy""" - cdef Py_ssize_t rowlen - if type(row) is list: - rowlen = PyList_GET_SIZE(row) - elif type(row) is tuple: - rowlen = PyTuple_GET_SIZE(row) - else: - rowlen = len(row) + cdef row_fast = PySequence_Fast(row, "'row' is not a valid sequence") + cdef Py_ssize_t rowlen = PySequence_Fast_GET_SIZE(row_fast) cdef uint16_t berowlen = endian.htobe16(rowlen) # offset in 'out' where to write @@ -55,22 +51,23 @@ cdef object _format_row_binary(object row, Transformer tx, bytearray out): if PyList_GET_SIZE(dumpers) != rowlen: raise e.DataError(f"expected {len(dumpers)} values in row, got {rowlen}") + cdef PyObject *item for i in range(rowlen): - item = row[i] - if item is None: + item = PySequence_Fast_GET_ITEM(row_fast, i) + if item is None: _append_binary_none(out, &pos) continue row_dumper = PyList_GET_ITEM(dumpers, i) if not row_dumper: - row_dumper = tx.get_row_dumper(item, fmt) + row_dumper = tx.get_row_dumper(item, fmt) Py_INCREF(row_dumper) PyList_SET_ITEM(dumpers, i, row_dumper) if (row_dumper).cdumper is not None: # A cdumper can resize if necessary and copy in place size = (row_dumper).cdumper.cdump( - item, out, pos + sizeof(besize)) + item, out, pos + sizeof(besize)) # Also add the size of the item, before the item besize = endian.htobe32(size) target = PyByteArray_AS_STRING(out) # might have been moved by cdump @@ -78,7 +75,7 @@ cdef object _format_row_binary(object row, Transformer tx, bytearray out): else: # A Python dumper, gotta call it and extract its juices b = PyObject_CallFunctionObjArgs( - (row_dumper).dumpfunc, item, NULL) + (row_dumper).dumpfunc, item, NULL) if b is None: _append_binary_none(out, &pos) continue @@ -115,11 +112,17 @@ cdef int _append_binary_none(bytearray out, Py_ssize_t *pos) except -1: return 0 -cdef object _format_row_text( - object row, Py_ssize_t rowlen, Transformer tx, bytearray out -): +cdef object _format_row_text(object row, Transformer tx, bytearray out): # offset in 'out' where to write cdef Py_ssize_t pos = PyByteArray_GET_SIZE(out) + cdef row_fast = PySequence_Fast(row, "'row' is not a valid sequence") + + # exit early, if the row is empty + cdef Py_ssize_t rowlen = PySequence_Fast_GET_SIZE(row_fast) + if rowlen == 0: + PyByteArray_Resize(out, pos + 1) + out[pos] = b"\n" + return cdef Py_ssize_t size, tmpsize cdef char *buf @@ -134,12 +137,13 @@ cdef object _format_row_text( if dumpers and PyList_GET_SIZE(dumpers) != rowlen: raise e.DataError(f"expected {len(dumpers)} values in row, got {rowlen}") + cdef PyObject *item for i in range(rowlen): # Include the tab before the data, so it gets included in the resizes with_tab = i > 0 - item = row[i] - if item is None: + item = PySequence_Fast_GET_ITEM(row_fast, i) + if item == None: _append_text_none(out, &pos, with_tab) continue @@ -148,17 +152,17 @@ cdef object _format_row_text( row_dumper = PyList_GET_ITEM(dumpers, i) else: # no pinned dumpers, thus free value dumping - row_dumper = tx.get_row_dumper(item, fmt) + row_dumper = tx.get_row_dumper(item, fmt) if (row_dumper).cdumper is not None: # A cdumper can resize if necessary and copy in place size = (row_dumper).cdumper.cdump( - item, out, pos + with_tab) + item, out, pos + with_tab) target = PyByteArray_AS_STRING(out) + pos else: # A Python dumper, gotta call it and extract its juices b = PyObject_CallFunctionObjArgs( - (row_dumper).dumpfunc, item, NULL) + (row_dumper).dumpfunc, item, NULL) if b is None: _append_text_none(out, &pos, with_tab) continue @@ -206,22 +210,8 @@ cdef object _format_row_text( def format_row_text(row: Sequence[Any], tx: Transformer, out: bytearray) -> None: cdef Py_ssize_t size = PyByteArray_GET_SIZE(out) - # exit early, if the row is empty - cdef Py_ssize_t rowlen - if type(row) is list: - rowlen = PyList_GET_SIZE(row) - elif type(row) is tuple: - rowlen = PyTuple_GET_SIZE(row) - else: - rowlen = len(row) - - if rowlen == 0: - PyByteArray_Resize(out, size + 1) - out[size] = b"\n" - return - try: - _format_row_text(row, rowlen, tx, out) + _format_row_text(row, tx, out) except Exception as e: # Restore the input bytearray to the size it was before entering here # to avoid potentially passing junk to copy. diff --git a/psycopg_c/psycopg_c/_psycopg/transform.pyx b/psycopg_c/psycopg_c/_psycopg/transform.pyx index b1f530d4f..010470d5e 100644 --- a/psycopg_c/psycopg_c/_psycopg/transform.pyx +++ b/psycopg_c/psycopg_c/_psycopg/transform.pyx @@ -350,34 +350,37 @@ cdef class Transformer: return ptr cpdef dump_sequence(self, object params, object formats): - # Verify that they are not none and that PyList_GET_ITEM won't blow up - cdef Py_ssize_t nparams = len(params) - cdef list out = PyList_New(nparams) - cdef int i cdef PyObject *dumper_ptr # borrowed pointer to row dumper cdef object dumped cdef Py_ssize_t size + cdef params_fast = PySequence_Fast( + params, "'params' is not a valid sequence") + cdef formats_fast = PySequence_Fast( + formats, "'formats' is not a valid sequence") + + cdef Py_ssize_t nparams = PySequence_Fast_GET_SIZE(params_fast) + cdef list out = PyList_New(nparams) + cdef PyObject *param + if self._none_oid < 0: self._none_oid = self.adapters.get_dumper(NoneType, "s").oid dumpers = self._row_dumpers - if dumpers: for i in range(nparams): - param = params[i] - if param is not None: + param = PySequence_Fast_GET_ITEM(params_fast, i) + if param != None: dumper_ptr = PyList_GET_ITEM(dumpers, i) if (dumper_ptr).cdumper is not None: dumped = PyByteArray_FromStringAndSize("", 0) size = (dumper_ptr).cdumper.cdump( - param, dumped, 0) + param, dumped, 0) PyByteArray_Resize(dumped, size) else: dumped = PyObject_CallFunctionObjArgs( - (dumper_ptr).dumpfunc, - param, NULL) + (dumper_ptr).dumpfunc, param, NULL) else: dumped = None @@ -388,21 +391,21 @@ cdef class Transformer: cdef tuple types = PyTuple_New(nparams) cdef list pqformats = PyList_New(nparams) + cdef PyObject *format for i in range(nparams): - param = params[i] - if param is not None: - dumper_ptr = self.get_row_dumper( - param, formats[i]) + param = PySequence_Fast_GET_ITEM(params_fast, i) + if param != None: + format = PySequence_Fast_GET_ITEM(formats_fast, i) + dumper_ptr = self.get_row_dumper(param, format) if (dumper_ptr).cdumper is not None: dumped = PyByteArray_FromStringAndSize("", 0) size = (dumper_ptr).cdumper.cdump( - param, dumped, 0) + param, dumped, 0) PyByteArray_Resize(dumped, size) else: dumped = PyObject_CallFunctionObjArgs( - (dumper_ptr).dumpfunc, - param, NULL) + (dumper_ptr).dumpfunc, param, NULL) oid = (dumper_ptr).oid fmt = (dumper_ptr).format else: @@ -526,13 +529,15 @@ cdef class Transformer: return record cpdef object load_sequence(self, record: Sequence[Buffer | None]): - cdef Py_ssize_t nfields = len(record) - out = PyTuple_New(nfields) + cdef record_fast = PySequence_Fast(record, "'record' is not a valid sequence") + cdef Py_ssize_t nfields = PySequence_Fast_GET_SIZE(record_fast) cdef PyObject *loader # borrowed RowLoader cdef int col cdef char *ptr cdef Py_ssize_t size + cdef PyObject *item + out = PyTuple_New(nfields) row_loaders = self._row_loaders # avoid an incref/decref per item if PyList_GET_SIZE(row_loaders) != nfields: raise e.ProgrammingError( @@ -540,19 +545,17 @@ cdef class Transformer: f" {len(self._row_loaders)} loaders registered") for col in range(nfields): - item = record[col] - if item is None: - Py_INCREF(None) - PyTuple_SET_ITEM(out, col, None) - continue - - loader = PyList_GET_ITEM(row_loaders, col) - if (loader).cloader is not None: - _buffer_as_string_and_size(item, &ptr, &size) - pyval = (loader).cloader.cload(ptr, size) + item = PySequence_Fast_GET_ITEM(record_fast, col) + if item == None: + pyval = None else: - pyval = PyObject_CallFunctionObjArgs( - (loader).loadfunc, item, NULL) + loader = PyList_GET_ITEM(row_loaders, col) + if (loader).cloader is not None: + _buffer_as_string_and_size(item, &ptr, &size) + pyval = (loader).cloader.cload(ptr, size) + else: + pyval = PyObject_CallFunctionObjArgs( + (loader).loadfunc, item, NULL) Py_INCREF(pyval) PyTuple_SET_ITEM(out, col, pyval) diff --git a/psycopg_c/psycopg_c/pq/pgconn.pyx b/psycopg_c/psycopg_c/pq/pgconn.pyx index 1e1725f03..759054937 100644 --- a/psycopg_c/psycopg_c/pq/pgconn.pyx +++ b/psycopg_c/psycopg_c/pq/pgconn.pyx @@ -20,6 +20,9 @@ cdef extern from * nogil: from libc.stdio cimport fdopen from cpython.mem cimport PyMem_Free, PyMem_Malloc from cpython.bytes cimport PyBytes_AsString +from cpython.object cimport PyObject +from cpython.sequence cimport PySequence_Fast, PySequence_Fast_GET_ITEM +from cpython.sequence cimport PySequence_Fast_GET_SIZE from cpython.memoryview cimport PyMemoryView_FromObject import sys @@ -321,12 +324,17 @@ cdef class PGconn: _ensure_pgconn(self) cdef int i - cdef Py_ssize_t nparams = len(param_types) if param_types else 0 + cdef types_fast + cdef Py_ssize_t nparams = 0 + if param_types is not None: + types_fast = PySequence_Fast(param_types, "'param_types' is not a sequence") + nparams = PySequence_Fast_GET_SIZE(types_fast) + cdef libpq.Oid *atypes = NULL if nparams: atypes = PyMem_Malloc(nparams * sizeof(libpq.Oid)) for i in range(nparams): - atypes[i] = param_types[i] + atypes[i] = PySequence_Fast_GET_ITEM(types_fast, i) cdef int rv with nogil: @@ -376,12 +384,17 @@ cdef class PGconn: _ensure_pgconn(self) cdef int i - cdef Py_ssize_t nparams = len(param_types) if param_types else 0 + cdef types_fast + cdef Py_ssize_t nparams = 0 + if param_types is not None: + types_fast = PySequence_Fast(param_types, "'param_types' is not a sequence") + nparams = PySequence_Fast_GET_SIZE(types_fast) + cdef libpq.Oid *atypes = NULL if nparams: atypes = PyMem_Malloc(nparams * sizeof(libpq.Oid)) for i in range(nparams): - atypes[i] = param_types[i] + atypes[i] = PySequence_Fast_GET_ITEM(types_fast, i) cdef libpq.PGresult *rv with nogil: @@ -751,31 +764,35 @@ cdef void notice_receiver(void *arg, const libpq.PGresult *res_ptr) noexcept wit cdef (Py_ssize_t, libpq.Oid *, char * const*, int *, int *) _query_params_args( - list param_values: Sequence[bytes | None] | None, + param_values: Sequence[bytes | None] | None, param_types: Sequence[int] | None, - list param_formats: Sequence[int] | None, + param_formats: Sequence[int] | None, ) except *: cdef int i - # the PostgresQuery converts the param_types to tuple, so this operation - # is most often no-op - cdef tuple tparam_types - if param_types is not None and not isinstance(param_types, tuple): - tparam_types = tuple(param_types) - else: - tparam_types = param_types + cdef values_fast + cdef types_fast + cdef formats_fast - cdef Py_ssize_t nparams = len(param_values) if param_values else 0 - if tparam_types is not None and len(tparam_types) != nparams: - raise ValueError( - "got %d param_values but %d param_types" - % (nparams, len(tparam_types)) - ) - if param_formats is not None and len(param_formats) != nparams: - raise ValueError( - "got %d param_values but %d param_formats" - % (nparams, len(param_formats)) - ) + cdef Py_ssize_t nparams = 0 + if param_values is not None: + values_fast = PySequence_Fast(param_values, "'param_values' is not a sequence") + nparams = PySequence_Fast_GET_SIZE(values_fast) + + if param_types is not None: + types_fast = PySequence_Fast(param_types, "'param_types' is not a sequence") + if PySequence_Fast_GET_SIZE(types_fast) != nparams: + raise ValueError( + f"got {nparams} param_values but {len(param_types)} param_types" + ) + + if param_formats is not None: + formats_fast = PySequence_Fast( + param_formats, "'param_formats' is not a sequence") + if PySequence_Fast_GET_SIZE(formats_fast) != nparams: + raise ValueError( + f"got {nparams} param_values but {len(param_formats)} param_formats" + ) cdef char **aparams = NULL cdef int *alenghts = NULL @@ -786,28 +803,28 @@ cdef (Py_ssize_t, libpq.Oid *, char * const*, int *, int *) _query_params_args( aparams = PyMem_Malloc(nparams * sizeof(char *)) alenghts = PyMem_Malloc(nparams * sizeof(int)) for i in range(nparams): - obj = param_values[i] - if obj is None: - aparams[i] = NULL - alenghts[i] = 0 - else: + obj = PySequence_Fast_GET_ITEM(values_fast, i) + if obj != None: # TODO: it is a leak if this fails (but it should only fail # on internal error, e.g. if obj is not a buffer) - _buffer_as_string_and_size(obj, &ptr, &length) + _buffer_as_string_and_size(obj, &ptr, &length) aparams[i] = ptr alenghts[i] = length + else: + aparams[i] = NULL + alenghts[i] = 0 cdef libpq.Oid *atypes = NULL - if tparam_types: + if param_types is not None: atypes = PyMem_Malloc(nparams * sizeof(libpq.Oid)) for i in range(nparams): - atypes[i] = tparam_types[i] + atypes[i] = PySequence_Fast_GET_ITEM(types_fast, i) cdef int *aformats = NULL if param_formats is not None: aformats = PyMem_Malloc(nparams * sizeof(int *)) for i in range(nparams): - aformats[i] = param_formats[i] + aformats[i] = PySequence_Fast_GET_ITEM(formats_fast, i) return (nparams, atypes, aparams, alenghts, aformats)