perf: use PySequence_Fast function instead of slower alternatives

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Mon, 27 Oct 2025 16:27:10 +0000 (16:27 +0000)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Thu, 30 Oct 2025 12:28:54 +0000 (12:28 +0000)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Mon, 27 Oct 2025 16:27:10 +0000 (16:27 +0000)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Thu, 30 Oct 2025 12:28:54 +0000 (12:28 +0000)
diff --git a/psycopg_c/psycopg_c/_psycopg/copy.pyx b/psycopg_c/psycopg_c/_psycopg/copy.pyx

index c14371a333cd3f9f26931adcd1e9e9a6b5fae86d..ff1d213ca1162d7ce2191261ab702aa7e828bda9 100644 (file)
--- a/psycopg_c/psycopg_c/_psycopg/copy.pyx
+++ b/psycopg_c/psycopg_c/_psycopg/copy.pyx
@@ -7,7 +7,8 @@ C optimised functions for the copy system.
  
  from libc.stdint cimport int32_t, uint16_t, uint32_t
  from libc.string cimport memcpy
-from cpython.tuple cimport PyTuple_GET_SIZE
+from cpython.sequence cimport PySequence_Fast, PySequence_Fast_GET_ITEM
+from cpython.sequence cimport PySequence_Fast_GET_SIZE
  from cpython.bytearray cimport PyByteArray_AS_STRING, PyByteArray_FromStringAndSize
  from cpython.bytearray cimport PyByteArray_GET_SIZE, PyByteArray_Resize
  from cpython.memoryview cimport PyMemoryView_FromObject
@@ -23,13 +24,8 @@ cdef int32_t _binary_null = -1
  
  cdef object _format_row_binary(object row, Transformer tx, bytearray out):
      """Convert a row of adapted data to the data to send for binary copy"""
-    cdef Py_ssize_t rowlen
-    if type(row) is list:
-        rowlen = PyList_GET_SIZE(row)
-    elif type(row) is tuple:
-        rowlen = PyTuple_GET_SIZE(row)
-    else:
-        rowlen = len(row)
+    cdef row_fast = PySequence_Fast(row, "'row' is not a valid sequence")
+    cdef Py_ssize_t rowlen = PySequence_Fast_GET_SIZE(row_fast)
      cdef uint16_t berowlen = endian.htobe16(<int16_t>rowlen)
  
      # offset in 'out' where to write
@@ -55,22 +51,23 @@ cdef object _format_row_binary(object row, Transformer tx, bytearray out):
      if PyList_GET_SIZE(dumpers) != rowlen:
          raise e.DataError(f"expected {len(dumpers)} values in row, got {rowlen}")
  
+    cdef PyObject *item
      for i in range(rowlen):
-        item = row[i]
-        if item is None:
+        item = PySequence_Fast_GET_ITEM(row_fast, i)
+        if item is <PyObject *>None:
              _append_binary_none(out, &pos)
              continue
  
          row_dumper = PyList_GET_ITEM(dumpers, i)
          if not row_dumper:
-            row_dumper = tx.get_row_dumper(<PyObject *>item, fmt)
+            row_dumper = tx.get_row_dumper(item, fmt)
              Py_INCREF(<object>row_dumper)
              PyList_SET_ITEM(dumpers, i, <object>row_dumper)
  
          if (<RowDumper>row_dumper).cdumper is not None:
              # A cdumper can resize if necessary and copy in place
              size = (<RowDumper>row_dumper).cdumper.cdump(
-                item, out, pos + sizeof(besize))
+                <object>item, out, pos + sizeof(besize))
              # Also add the size of the item, before the item
              besize = endian.htobe32(<int32_t>size)
              target = PyByteArray_AS_STRING(out)  # might have been moved by cdump
@@ -78,7 +75,7 @@ cdef object _format_row_binary(object row, Transformer tx, bytearray out):
          else:
              # A Python dumper, gotta call it and extract its juices
              b = PyObject_CallFunctionObjArgs(
-                (<RowDumper>row_dumper).dumpfunc, <PyObject *>item, NULL)
+                (<RowDumper>row_dumper).dumpfunc, item, NULL)
              if b is None:
                  _append_binary_none(out, &pos)
                  continue
@@ -115,11 +112,17 @@ cdef int _append_binary_none(bytearray out, Py_ssize_t *pos) except -1:
      return 0
  
  
-cdef object _format_row_text(
-    object row, Py_ssize_t rowlen, Transformer tx, bytearray out
-):
+cdef object _format_row_text(object row, Transformer tx, bytearray out):
      # offset in 'out' where to write
      cdef Py_ssize_t pos = PyByteArray_GET_SIZE(out)
+    cdef row_fast = PySequence_Fast(row, "'row' is not a valid sequence")
+
+    # exit early, if the row is empty
+    cdef Py_ssize_t rowlen = PySequence_Fast_GET_SIZE(row_fast)
+    if rowlen == 0:
+        PyByteArray_Resize(out, pos + 1)
+        out[pos] = b"\n"
+        return
  
      cdef Py_ssize_t size, tmpsize
      cdef char *buf
@@ -134,12 +137,13 @@ cdef object _format_row_text(
      if dumpers and PyList_GET_SIZE(dumpers) != rowlen:
          raise e.DataError(f"expected {len(dumpers)} values in row, got {rowlen}")
  
+    cdef PyObject *item
      for i in range(rowlen):
          # Include the tab before the data, so it gets included in the resizes
          with_tab = i > 0
  
-        item = row[i]
-        if item is None:
+        item = PySequence_Fast_GET_ITEM(row_fast, i)
+        if item == <PyObject *>None:
              _append_text_none(out, &pos, with_tab)
              continue
  
@@ -148,17 +152,17 @@ cdef object _format_row_text(
              row_dumper = PyList_GET_ITEM(dumpers, i)
          else:
              # no pinned dumpers, thus free value dumping
-            row_dumper = tx.get_row_dumper(<PyObject *>item, fmt)
+            row_dumper = tx.get_row_dumper(item, fmt)
  
          if (<RowDumper>row_dumper).cdumper is not None:
              # A cdumper can resize if necessary and copy in place
              size = (<RowDumper>row_dumper).cdumper.cdump(
-                item, out, pos + with_tab)
+                <object>item, out, pos + with_tab)
              target = <unsigned char *>PyByteArray_AS_STRING(out) + pos
          else:
              # A Python dumper, gotta call it and extract its juices
              b = PyObject_CallFunctionObjArgs(
-                (<RowDumper>row_dumper).dumpfunc, <PyObject *>item, NULL)
+                (<RowDumper>row_dumper).dumpfunc, item, NULL)
              if b is None:
                  _append_text_none(out, &pos, with_tab)
                  continue
@@ -206,22 +210,8 @@ cdef object _format_row_text(
  def format_row_text(row: Sequence[Any], tx: Transformer, out: bytearray) -> None:
      cdef Py_ssize_t size = PyByteArray_GET_SIZE(out)
  
-    # exit early, if the row is empty
-    cdef Py_ssize_t rowlen
-    if type(row) is list:
-        rowlen = PyList_GET_SIZE(row)
-    elif type(row) is tuple:
-        rowlen = PyTuple_GET_SIZE(row)
-    else:
-        rowlen = len(row)
-
-    if rowlen == 0:
-        PyByteArray_Resize(out, size + 1)
-        out[size] = b"\n"
-        return
-
      try:
-        _format_row_text(row, rowlen, tx, out)
+        _format_row_text(row, tx, out)
      except Exception as e:
          # Restore the input bytearray to the size it was before entering here
          # to avoid potentially passing junk to copy.
diff --git a/psycopg_c/psycopg_c/_psycopg/transform.pyx b/psycopg_c/psycopg_c/_psycopg/transform.pyx

index b1f530d4f765ce590e888ff5afe7f8e593b462f7..010470d5ee346b5af48464d247dced2df6afbbed 100644 (file)
--- a/psycopg_c/psycopg_c/_psycopg/transform.pyx
+++ b/psycopg_c/psycopg_c/_psycopg/transform.pyx
@@ -350,34 +350,37 @@ cdef class Transformer:
          return ptr
  
      cpdef dump_sequence(self, object params, object formats):
-        # Verify that they are not none and that PyList_GET_ITEM won't blow up
-        cdef Py_ssize_t nparams = len(params)
-        cdef list out = PyList_New(nparams)
-
          cdef int i
          cdef PyObject *dumper_ptr  # borrowed pointer to row dumper
          cdef object dumped
          cdef Py_ssize_t size
  
+        cdef params_fast = PySequence_Fast(
+            params, "'params' is not a valid sequence")
+        cdef formats_fast = PySequence_Fast(
+            formats, "'formats' is not a valid sequence")
+
+        cdef Py_ssize_t nparams = PySequence_Fast_GET_SIZE(params_fast)
+        cdef list out = PyList_New(nparams)
+        cdef PyObject *param
+
          if self._none_oid < 0:
              self._none_oid = self.adapters.get_dumper(NoneType, "s").oid
  
          dumpers = self._row_dumpers
-
          if dumpers:
              for i in range(nparams):
-                param = params[i]
-                if param is not None:
+                param = PySequence_Fast_GET_ITEM(params_fast, i)
+                if param != <PyObject *>None:
                      dumper_ptr = PyList_GET_ITEM(dumpers, i)
                      if (<RowDumper>dumper_ptr).cdumper is not None:
                          dumped = PyByteArray_FromStringAndSize("", 0)
                          size = (<RowDumper>dumper_ptr).cdumper.cdump(
-                            param, <bytearray>dumped, 0)
+                            <object>param, <bytearray>dumped, 0)
                          PyByteArray_Resize(dumped, size)
                      else:
                          dumped = PyObject_CallFunctionObjArgs(
-                            (<RowDumper>dumper_ptr).dumpfunc,
-                            <PyObject *>param, NULL)
+                            (<RowDumper>dumper_ptr).dumpfunc, param, NULL)
                  else:
                      dumped = None
  
@@ -388,21 +391,21 @@ cdef class Transformer:
  
          cdef tuple types = PyTuple_New(nparams)
          cdef list pqformats = PyList_New(nparams)
+        cdef PyObject *format
  
          for i in range(nparams):
-            param = params[i]
-            if param is not None:
-                dumper_ptr = self.get_row_dumper(
-                    <PyObject *>param, <PyObject *>formats[i])
+            param = PySequence_Fast_GET_ITEM(params_fast, i)
+            if param != <PyObject *>None:
+                format = PySequence_Fast_GET_ITEM(formats_fast, i)
+                dumper_ptr = self.get_row_dumper(param, format)
                  if (<RowDumper>dumper_ptr).cdumper is not None:
                      dumped = PyByteArray_FromStringAndSize("", 0)
                      size = (<RowDumper>dumper_ptr).cdumper.cdump(
-                        param, <bytearray>dumped, 0)
+                        <object>param, <bytearray>dumped, 0)
                      PyByteArray_Resize(dumped, size)
                  else:
                      dumped = PyObject_CallFunctionObjArgs(
-                        (<RowDumper>dumper_ptr).dumpfunc,
-                        <PyObject *>param, NULL)
+                        (<RowDumper>dumper_ptr).dumpfunc, param, NULL)
                  oid = (<RowDumper>dumper_ptr).oid
                  fmt = (<RowDumper>dumper_ptr).format
              else:
@@ -526,13 +529,15 @@ cdef class Transformer:
          return record
  
      cpdef object load_sequence(self, record: Sequence[Buffer | None]):
-        cdef Py_ssize_t nfields = len(record)
-        out = PyTuple_New(nfields)
+        cdef record_fast = PySequence_Fast(record, "'record' is not a valid sequence")
+        cdef Py_ssize_t nfields = PySequence_Fast_GET_SIZE(record_fast)
          cdef PyObject *loader  # borrowed RowLoader
          cdef int col
          cdef char *ptr
          cdef Py_ssize_t size
+        cdef PyObject *item
  
+        out = PyTuple_New(nfields)
          row_loaders = self._row_loaders  # avoid an incref/decref per item
          if PyList_GET_SIZE(row_loaders) != nfields:
              raise e.ProgrammingError(
@@ -540,19 +545,17 @@ cdef class Transformer:
                  f" {len(self._row_loaders)} loaders registered")
  
          for col in range(nfields):
-            item = record[col]
-            if item is None:
-                Py_INCREF(None)
-                PyTuple_SET_ITEM(out, col, None)
-                continue
-
-            loader = PyList_GET_ITEM(row_loaders, col)
-            if (<RowLoader>loader).cloader is not None:
-                _buffer_as_string_and_size(item, &ptr, &size)
-                pyval = (<RowLoader>loader).cloader.cload(ptr, size)
+            item = PySequence_Fast_GET_ITEM(record_fast, col)
+            if item == <PyObject *>None:
+                pyval = None
              else:
-                pyval = PyObject_CallFunctionObjArgs(
-                    (<RowLoader>loader).loadfunc, <PyObject *>item, NULL)
+                loader = PyList_GET_ITEM(row_loaders, col)
+                if (<RowLoader>loader).cloader is not None:
+                    _buffer_as_string_and_size(<object>item, &ptr, &size)
+                    pyval = (<RowLoader>loader).cloader.cload(ptr, size)
+                else:
+                    pyval = PyObject_CallFunctionObjArgs(
+                        (<RowLoader>loader).loadfunc, item, NULL)
  
              Py_INCREF(pyval)
              PyTuple_SET_ITEM(out, col, pyval)
diff --git a/psycopg_c/psycopg_c/pq/pgconn.pyx b/psycopg_c/psycopg_c/pq/pgconn.pyx

index 1e1725f03e06c73560245cd5fd6134703c216d27..7590549376e0d2cb41e46894f1af2dce9ea3a8bf 100644 (file)
--- a/psycopg_c/psycopg_c/pq/pgconn.pyx
+++ b/psycopg_c/psycopg_c/pq/pgconn.pyx
@@ -20,6 +20,9 @@ cdef extern from * nogil:
  from libc.stdio cimport fdopen
  from cpython.mem cimport PyMem_Free, PyMem_Malloc
  from cpython.bytes cimport PyBytes_AsString
+from cpython.object cimport PyObject
+from cpython.sequence cimport PySequence_Fast, PySequence_Fast_GET_ITEM
+from cpython.sequence cimport PySequence_Fast_GET_SIZE
  from cpython.memoryview cimport PyMemoryView_FromObject
  
  import sys
@@ -321,12 +324,17 @@ cdef class PGconn:
          _ensure_pgconn(self)
  
          cdef int i
-        cdef Py_ssize_t nparams = len(param_types) if param_types else 0
+        cdef types_fast
+        cdef Py_ssize_t nparams = 0
+        if param_types is not None:
+            types_fast = PySequence_Fast(param_types, "'param_types' is not a sequence")
+            nparams = PySequence_Fast_GET_SIZE(types_fast)
+
          cdef libpq.Oid *atypes = NULL
          if nparams:
              atypes = <libpq.Oid *>PyMem_Malloc(nparams * sizeof(libpq.Oid))
              for i in range(nparams):
-                atypes[i] = param_types[i]
+                atypes[i] = <object>PySequence_Fast_GET_ITEM(types_fast, i)
  
          cdef int rv
          with nogil:
@@ -376,12 +384,17 @@ cdef class PGconn:
          _ensure_pgconn(self)
  
          cdef int i
-        cdef Py_ssize_t nparams = len(param_types) if param_types else 0
+        cdef types_fast
+        cdef Py_ssize_t nparams = 0
+        if param_types is not None:
+            types_fast = PySequence_Fast(param_types, "'param_types' is not a sequence")
+            nparams = PySequence_Fast_GET_SIZE(types_fast)
+
          cdef libpq.Oid *atypes = NULL
          if nparams:
              atypes = <libpq.Oid *>PyMem_Malloc(nparams * sizeof(libpq.Oid))
              for i in range(nparams):
-                atypes[i] = param_types[i]
+                atypes[i] = <object>PySequence_Fast_GET_ITEM(types_fast, i)
  
          cdef libpq.PGresult *rv
          with nogil:
@@ -751,31 +764,35 @@ cdef void notice_receiver(void *arg, const libpq.PGresult *res_ptr) noexcept wit
  
  
  cdef (Py_ssize_t, libpq.Oid *, char * const*, int *, int *) _query_params_args(
-    list param_values: Sequence[bytes | None] | None,
+    param_values: Sequence[bytes | None] | None,
      param_types: Sequence[int] | None,
-    list param_formats: Sequence[int] | None,
+    param_formats: Sequence[int] | None,
  ) except *:
      cdef int i
  
-    # the PostgresQuery converts the param_types to tuple, so this operation
-    # is most often no-op
-    cdef tuple tparam_types
-    if param_types is not None and not isinstance(param_types, tuple):
-        tparam_types = tuple(param_types)
-    else:
-        tparam_types = param_types
+    cdef values_fast
+    cdef types_fast
+    cdef formats_fast
  
-    cdef Py_ssize_t nparams = len(param_values) if param_values else 0
-    if tparam_types is not None and len(tparam_types) != nparams:
-        raise ValueError(
-            "got %d param_values but %d param_types"
-            % (nparams, len(tparam_types))
-        )
-    if param_formats is not None and len(param_formats) != nparams:
-        raise ValueError(
-            "got %d param_values but %d param_formats"
-            % (nparams, len(param_formats))
-        )
+    cdef Py_ssize_t nparams = 0
+    if param_values is not None:
+        values_fast = PySequence_Fast(param_values, "'param_values' is not a sequence")
+        nparams = PySequence_Fast_GET_SIZE(values_fast)
+
+    if param_types is not None:
+        types_fast = PySequence_Fast(param_types, "'param_types' is not a sequence")
+        if PySequence_Fast_GET_SIZE(types_fast) != nparams:
+            raise ValueError(
+                f"got {nparams} param_values but {len(param_types)} param_types"
+            )
+
+    if param_formats is not None:
+        formats_fast = PySequence_Fast(
+            param_formats, "'param_formats' is not a sequence")
+        if PySequence_Fast_GET_SIZE(formats_fast) != nparams:
+            raise ValueError(
+                f"got {nparams} param_values but {len(param_formats)} param_formats"
+            )
  
      cdef char **aparams = NULL
      cdef int *alenghts = NULL
@@ -786,28 +803,28 @@ cdef (Py_ssize_t, libpq.Oid *, char * const*, int *, int *) _query_params_args(
          aparams = <char **>PyMem_Malloc(nparams * sizeof(char *))
          alenghts = <int *>PyMem_Malloc(nparams * sizeof(int))
          for i in range(nparams):
-            obj = param_values[i]
-            if obj is None:
-                aparams[i] = NULL
-                alenghts[i] = 0
-            else:
+            obj = PySequence_Fast_GET_ITEM(values_fast, i)
+            if obj != <PyObject *>None:
                  # TODO: it is a leak if this fails (but it should only fail
                  # on internal error, e.g. if obj is not a buffer)
-                _buffer_as_string_and_size(obj, &ptr, &length)
+                _buffer_as_string_and_size(<object>obj, &ptr, &length)
                  aparams[i] = ptr
                  alenghts[i] = <int>length
+            else:
+                aparams[i] = NULL
+                alenghts[i] = 0
  
      cdef libpq.Oid *atypes = NULL
-    if tparam_types:
+    if param_types is not None:
          atypes = <libpq.Oid *>PyMem_Malloc(nparams * sizeof(libpq.Oid))
          for i in range(nparams):
-            atypes[i] = tparam_types[i]
+            atypes[i] = <object>PySequence_Fast_GET_ITEM(types_fast, i)
  
      cdef int *aformats = NULL
      if param_formats is not None:
          aformats = <int *>PyMem_Malloc(nparams * sizeof(int *))
          for i in range(nparams):
-            aformats[i] = param_formats[i]
+            aformats[i] = <object>PySequence_Fast_GET_ITEM(formats_fast, i)
  
      return (nparams, atypes, aparams, alenghts, aformats)
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Mon, 27 Oct 2025 16:27:10 +0000 (16:27 +0000)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Thu, 30 Oct 2025 12:28:54 +0000 (12:28 +0000)
psycopg_c/psycopg_c/_psycopg/copy.pyx		patch \| blob \| blame \| history
psycopg_c/psycopg_c/_psycopg/transform.pyx		patch \| blob \| blame \| history
psycopg_c/psycopg_c/pq/pgconn.pyx		patch \| blob \| blame \| history