From: Daniele Varrazzo <daniele.varrazzo@gmail.com>
Date: Thu, 7 Jan 2021 04:04:39 +0000 (+0100)
Subject: Added text copy implementation in C
X-Git-Tag: 3.0.dev0~202
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8e7fff159099c112fcaa140a2e3f55f84241f33c;p=thirdparty%2Fpsycopg.git

Added text copy implementation in C
---

diff --git a/psycopg3/psycopg3/copy.py b/psycopg3/psycopg3/copy.py
index 938bd19bd..33481fd9a 100644
--- a/psycopg3/psycopg3/copy.py
+++ b/psycopg3/psycopg3/copy.py
@@ -254,7 +254,7 @@ class AsyncCopy(BaseCopy["AsyncConnection"]):
             yield data
 
 
-def format_row_text(
+def _format_row_text(
     row: Sequence[Any], tx: Transformer, out: Optional[bytearray] = None
 ) -> bytearray:
     """Convert a row of objects to the data to send for copy."""
@@ -336,7 +336,9 @@ format_row_binary: FormatFunc
 if pq.__impl__ == "c":
     from psycopg3_c import _psycopg3
 
+    format_row_text = _psycopg3.format_row_text
     format_row_binary = _psycopg3.format_row_binary
 
 else:
+    format_row_text = _format_row_text
     format_row_binary = _format_row_binary
diff --git a/psycopg3_c/psycopg3_c/_psycopg3.pyi b/psycopg3_c/psycopg3_c/_psycopg3.pyi
index 1352ae480..167d2560f 100644
--- a/psycopg3_c/psycopg3_c/_psycopg3.pyi
+++ b/psycopg3_c/psycopg3_c/_psycopg3.pyi
@@ -36,8 +36,14 @@ class Transformer(proto.AdaptContext):
     ) -> Tuple[Any, ...]: ...
     def get_loader(self, oid: int, format: Format) -> Loader: ...
 
+# Generators
 def connect(conninfo: str) -> proto.PQGenConn[PGconn]: ...
 def execute(pgconn: PGconn) -> proto.PQGen[List[PGresult]]: ...
+
+# Copy support
+def format_row_text(
+    row: Sequence[Any], tx: proto.Transformer, out: Optional[bytearray] = None
+) -> bytearray: ...
 def format_row_binary(
     row: Sequence[Any], tx: proto.Transformer, out: Optional[bytearray] = None
 ) -> bytearray: ...
diff --git a/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx b/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx
index a29fa96e3..059569f91 100644
--- a/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx
+++ b/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx
@@ -73,3 +73,114 @@ def format_row_binary(
     # Resize to the final size
     PyByteArray_Resize(out, pos)
     return out
+
+
+def format_row_text(
+    row: Sequence[Any], tx: Transformer, out: bytearray = None
+) -> bytearray:
+    cdef Py_ssize_t pos  # offset in 'out' where to write
+    if out is None:
+        out = PyByteArray_FromStringAndSize("", 0)
+        pos = 0
+    else:
+        pos = PyByteArray_GET_SIZE(out)
+
+    cdef Py_ssize_t rowlen = len(row)
+
+    if rowlen == 0:
+        PyByteArray_Resize(out, pos + 1)
+        out[pos] = b"\n"
+        return out
+
+    cdef Py_ssize_t size, tmpsize
+    cdef char *buf
+    cdef int i, j
+    cdef unsigned char *target
+    cdef int nesc = 0
+    cdef int with_tab
+
+    for i in range(rowlen):
+        # Include the tab before the data, so it gets included in the resizes
+        with_tab = i > 0
+
+        item = row[i]
+        if item is None:
+            if with_tab:
+                target = <unsigned char *>CDumper.ensure_size(out, pos, 3)
+                memcpy(target, b"\t\\N", 3)
+                pos += 3
+            else:
+                target = <unsigned char *>CDumper.ensure_size(out, pos, 2)
+                memcpy(target, b"\\N", 2)
+                pos += 2
+            continue
+
+        dumper = tx.get_dumper(item, FORMAT_TEXT)
+        if isinstance(dumper, CDumper):
+            # A cdumper can resize if necessary and copy in place
+            size = (<CDumper>dumper).cdump(item, out, pos + with_tab)
+            target = <unsigned char *>PyByteArray_AS_STRING(out) + pos
+        else:
+            # A Python dumper, gotta call it and extract its juices
+            b = dumper.dump(item)
+            _buffer_as_string_and_size(b, &buf, &size)
+            target = <unsigned char *>CDumper.ensure_size(out, pos, size + with_tab)
+            memcpy(target + with_tab, buf, size)
+
+        # Prepend a tab to the data just written
+        if with_tab:
+            target[0] = b"\t"
+            target += 1
+            pos += 1
+
+        # Now from pos to pos + size there is a textual representation: it may
+        # contain chars to escape. Scan to find how many such chars there are.
+        for j in range(size):
+            nesc += copy_escape_char[target[j]]
+
+        # If there is any char to escape, walk backwards pushing the chars
+        # forward and interspersing backslashes.
+        if nesc > 0:
+            tmpsize = size + nesc
+            target = <unsigned char *>CDumper.ensure_size(out, pos, tmpsize)
+            for j in range(size - 1, -1, -1):
+                target[j + nesc] = target[j]
+                if copy_escape_char[target[j]] != 0:
+                    nesc -= 1
+                    target[j + nesc] = b"\\"
+                    if nesc <= 0:
+                        break
+            pos += tmpsize
+        else:
+            pos += size
+
+    # Resize to the final size, add the newline
+    PyByteArray_Resize(out, pos + 1)
+    out[pos] = b"\n"
+    return out
+
+
+cdef extern from *:
+    """
+/* The characters to escape in textual copy */
+/* '\b', '\t', '\n', '\v', '\f', '\r', '\\' */
+static const char copy_escape_char[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+    """
+    const char[256] copy_escape_char