Added text copy implementation in C

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Thu, 7 Jan 2021 04:04:39 +0000 (05:04 +0100)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Fri, 8 Jan 2021 01:32:29 +0000 (02:32 +0100)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Thu, 7 Jan 2021 04:04:39 +0000 (05:04 +0100)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Fri, 8 Jan 2021 01:32:29 +0000 (02:32 +0100)
diff --git a/psycopg3/psycopg3/copy.py b/psycopg3/psycopg3/copy.py

index 938bd19bd87448cd511816092a6ffb49b7d4f6df..33481fd9a0b1dcb43537d7e6ffea62ec671c0b05 100644 (file)
--- a/psycopg3/psycopg3/copy.py
+++ b/psycopg3/psycopg3/copy.py
@@ -254,7 +254,7 @@ class AsyncCopy(BaseCopy["AsyncConnection"]):
              yield data
  
  
-def format_row_text(
+def _format_row_text(
      row: Sequence[Any], tx: Transformer, out: Optional[bytearray] = None
  ) -> bytearray:
      """Convert a row of objects to the data to send for copy."""
@@ -336,7 +336,9 @@ format_row_binary: FormatFunc
  if pq.__impl__ == "c":
      from psycopg3_c import _psycopg3
  
+    format_row_text = _psycopg3.format_row_text
      format_row_binary = _psycopg3.format_row_binary
  
  else:
+    format_row_text = _format_row_text
      format_row_binary = _format_row_binary
diff --git a/psycopg3_c/psycopg3_c/_psycopg3.pyi b/psycopg3_c/psycopg3_c/_psycopg3.pyi

index 1352ae4801f6e1d80873c1c7d4675ee1df207dc7..167d2560f1b9d49e0cde7643e0793150ebfcceb0 100644 (file)
--- a/psycopg3_c/psycopg3_c/_psycopg3.pyi
+++ b/psycopg3_c/psycopg3_c/_psycopg3.pyi
@@ -36,8 +36,14 @@ class Transformer(proto.AdaptContext):
      ) -> Tuple[Any, ...]: ...
      def get_loader(self, oid: int, format: Format) -> Loader: ...
  
+# Generators
  def connect(conninfo: str) -> proto.PQGenConn[PGconn]: ...
  def execute(pgconn: PGconn) -> proto.PQGen[List[PGresult]]: ...
+
+# Copy support
+def format_row_text(
+    row: Sequence[Any], tx: proto.Transformer, out: Optional[bytearray] = None
+) -> bytearray: ...
  def format_row_binary(
      row: Sequence[Any], tx: proto.Transformer, out: Optional[bytearray] = None
  ) -> bytearray: ...
diff --git a/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx b/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx

index a29fa96e384728c02306e34f98f568daeeffcbe4..059569f91ff8428a556533f20778d913f5d1736e 100644 (file)
--- a/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx
+++ b/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx
@@ -73,3 +73,114 @@ def format_row_binary(
      # Resize to the final size
      PyByteArray_Resize(out, pos)
      return out
+
+
+def format_row_text(
+    row: Sequence[Any], tx: Transformer, out: bytearray = None
+) -> bytearray:
+    cdef Py_ssize_t pos  # offset in 'out' where to write
+    if out is None:
+        out = PyByteArray_FromStringAndSize("", 0)
+        pos = 0
+    else:
+        pos = PyByteArray_GET_SIZE(out)
+
+    cdef Py_ssize_t rowlen = len(row)
+
+    if rowlen == 0:
+        PyByteArray_Resize(out, pos + 1)
+        out[pos] = b"\n"
+        return out
+
+    cdef Py_ssize_t size, tmpsize
+    cdef char *buf
+    cdef int i, j
+    cdef unsigned char *target
+    cdef int nesc = 0
+    cdef int with_tab
+
+    for i in range(rowlen):
+        # Include the tab before the data, so it gets included in the resizes
+        with_tab = i > 0
+
+        item = row[i]
+        if item is None:
+            if with_tab:
+                target = <unsigned char *>CDumper.ensure_size(out, pos, 3)
+                memcpy(target, b"\t\\N", 3)
+                pos += 3
+            else:
+                target = <unsigned char *>CDumper.ensure_size(out, pos, 2)
+                memcpy(target, b"\\N", 2)
+                pos += 2
+            continue
+
+        dumper = tx.get_dumper(item, FORMAT_TEXT)
+        if isinstance(dumper, CDumper):
+            # A cdumper can resize if necessary and copy in place
+            size = (<CDumper>dumper).cdump(item, out, pos + with_tab)
+            target = <unsigned char *>PyByteArray_AS_STRING(out) + pos
+        else:
+            # A Python dumper, gotta call it and extract its juices
+            b = dumper.dump(item)
+            _buffer_as_string_and_size(b, &buf, &size)
+            target = <unsigned char *>CDumper.ensure_size(out, pos, size + with_tab)
+            memcpy(target + with_tab, buf, size)
+
+        # Prepend a tab to the data just written
+        if with_tab:
+            target[0] = b"\t"
+            target += 1
+            pos += 1
+
+        # Now from pos to pos + size there is a textual representation: it may
+        # contain chars to escape. Scan to find how many such chars there are.
+        for j in range(size):
+            nesc += copy_escape_char[target[j]]
+
+        # If there is any char to escape, walk backwards pushing the chars
+        # forward and interspersing backslashes.
+        if nesc > 0:
+            tmpsize = size + nesc
+            target = <unsigned char *>CDumper.ensure_size(out, pos, tmpsize)
+            for j in range(size - 1, -1, -1):
+                target[j + nesc] = target[j]
+                if copy_escape_char[target[j]] != 0:
+                    nesc -= 1
+                    target[j + nesc] = b"\\"
+                    if nesc <= 0:
+                        break
+            pos += tmpsize
+        else:
+            pos += size
+
+    # Resize to the final size, add the newline
+    PyByteArray_Resize(out, pos + 1)
+    out[pos] = b"\n"
+    return out
+
+
+cdef extern from *:
+    """
+/* The characters to escape in textual copy */
+/* '\b', '\t', '\n', '\v', '\f', '\r', '\\' */
+static const char copy_escape_char[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+    """
+    const char[256] copy_escape_char
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Thu, 7 Jan 2021 04:04:39 +0000 (05:04 +0100)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Fri, 8 Jan 2021 01:32:29 +0000 (02:32 +0100)
psycopg3/psycopg3/copy.py		patch \| blob \| blame \| history
psycopg3_c/psycopg3_c/_psycopg3.pyi		patch \| blob \| blame \| history
psycopg3_c/psycopg3_c/_psycopg3/copy.pyx		patch \| blob \| blame \| history