perf(c/array): reuse buffer to unescape array tokens

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Sun, 27 Nov 2022 11:12:50 +0000 (12:12 +0100)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Sat, 10 Dec 2022 13:01:55 +0000 (13:01 +0000)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Sun, 27 Nov 2022 11:12:50 +0000 (12:12 +0100)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Sat, 10 Dec 2022 13:01:55 +0000 (13:01 +0000)
diff --git a/psycopg_c/psycopg_c/types/array.pyx b/psycopg_c/psycopg_c/types/array.pyx

index 9019d0736f65ee46baf0f0e88d2b83753dc97058..a28532c9957492fd46dce227a6c9f91acbdd1469 100644 (file)
--- a/psycopg_c/psycopg_c/types/array.pyx
+++ b/psycopg_c/psycopg_c/types/array.pyx
@@ -8,7 +8,7 @@ import cython
  
  from libc.stdint cimport int32_t, uint32_t
  from libc.string cimport strchr
-from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.mem cimport PyMem_Realloc, PyMem_Free
  from cpython.ref cimport Py_INCREF
  from cpython.list cimport PyList_New,PyList_Append, PyList_GetSlice
  from cpython.list cimport PyList_GET_ITEM, PyList_SET_ITEM, PyList_GET_SIZE
@@ -50,6 +50,10 @@ def array_load_text(
  
      cdef char *end = buf + length
  
+    # Keep and grow a buffer instead of malloc'ing at each element
+    cdef char *scratch = NULL
+    cdef size_t sclen = 0
+
      # Remove the dimensions information prefix (``[...]=``)
      if buf[0] == b"[":
          buf = strchr(buf + 1, b'=')
@@ -62,37 +66,42 @@ def array_load_text(
      rv = a
      cdef PyObject *tmp
  
-    while buf < end:
-        if buf[0] == b'{':
-            if stack:
-                tmp = PyList_GET_ITEM(stack, PyList_GET_SIZE(stack) - 1)
-                PyList_Append(<object>tmp, a)
-            PyList_Append(stack, a)
-            a = []
-            buf += 1
-
-        elif buf[0] == b'}':
-            if not stack:
-                raise e.DataError("malformed array: unexpected '}'")
-            rv = stack.pop()
-            buf += 1
+    try:
+        while buf < end:
+            if buf[0] == b'{':
+                if stack:
+                    tmp = PyList_GET_ITEM(stack, PyList_GET_SIZE(stack) - 1)
+                    PyList_Append(<object>tmp, a)
+                PyList_Append(stack, a)
+                a = []
+                buf += 1
+
+            elif buf[0] == b'}':
+                if not stack:
+                    raise e.DataError("malformed array: unexpected '}'")
+                rv = stack.pop()
+                buf += 1
+
+            elif buf[0] == cdelim:
+                buf += 1
  
-        elif buf[0] == cdelim:
-            buf += 1
+            else:
+                v = _parse_token(
+                    &buf, end, cdelim, &scratch, &sclen, cloader, pyload)
+                if not stack:
+                    raise e.DataError("malformed array: missing initial '{'")
+                tmp = PyList_GET_ITEM(stack, PyList_GET_SIZE(stack) - 1)
+                PyList_Append(<object>tmp, v)
  
-        else:
-            v = parse_token(&buf, end, cdelim, cloader, pyload)
-            if not stack:
-                raise e.DataError("malformed array: missing initial '{'")
-            tmp = PyList_GET_ITEM(stack, PyList_GET_SIZE(stack) - 1)
-            PyList_Append(<object>tmp, v)
+    finally:
+        PyMem_Free(scratch)
  
-    assert rv is not None
      return rv
  
  
-cdef object parse_token(
-    char **bufptr, char *bufend, char cdelim, CLoader cloader, object load
+cdef object _parse_token(
+    char **bufptr, char *bufend, char cdelim,
+    char **scratch, size_t *sclen, CLoader cloader, object load
  ):
      cdef char *start = bufptr[0]
      cdef int has_quotes = start[0] == b'"'
@@ -130,9 +139,9 @@ cdef object parse_token(
              and start[2] == b'L' and start[3] == b'L':
          return None
  
-    cdef char *unesc
      cdef char *src
      cdef char *tgt
+    cdef size_t unesclen
  
      if not num_escapes:
          if cloader is not None:
@@ -142,9 +151,13 @@ cdef object parse_token(
              return load(b)
  
      else:
-        unesc = <char *>PyMem_Malloc(length - num_escapes + 1)
+        unesclen = length - num_escapes + 1
+        if unesclen > sclen[0]:
+            scratch[0] = <char *>PyMem_Realloc(scratch[0], unesclen)
+            sclen[0] = unesclen
+
          src = start
-        tgt = unesc
+        tgt = scratch[0]
          while src < end:
              if src[0] == b'\\':
                  src += 1
@@ -154,14 +167,11 @@ cdef object parse_token(
  
          tgt[0] = b'\x00'
  
-        try:
-            if cloader is not None:
-                return cloader.cload(unesc, length - num_escapes)
-            else:
-                b = unesc[:length - num_escapes]
-                return load(b)
-        finally:
-            PyMem_Free(unesc)
+        if cloader is not None:
+            return cloader.cload(scratch[0], length - num_escapes)
+        else:
+            b = scratch[0][:length - num_escapes]
+            return load(b)
  
  
  @cython.cdivision(True)
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Sun, 27 Nov 2022 11:12:50 +0000 (12:12 +0100)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Sat, 10 Dec 2022 13:01:55 +0000 (13:01 +0000)