From: Daniele Varrazzo Date: Thu, 7 Jan 2021 04:04:39 +0000 (+0100) Subject: Added text copy implementation in C X-Git-Tag: 3.0.dev0~202 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8e7fff159099c112fcaa140a2e3f55f84241f33c;p=thirdparty%2Fpsycopg.git Added text copy implementation in C --- diff --git a/psycopg3/psycopg3/copy.py b/psycopg3/psycopg3/copy.py index 938bd19bd..33481fd9a 100644 --- a/psycopg3/psycopg3/copy.py +++ b/psycopg3/psycopg3/copy.py @@ -254,7 +254,7 @@ class AsyncCopy(BaseCopy["AsyncConnection"]): yield data -def format_row_text( +def _format_row_text( row: Sequence[Any], tx: Transformer, out: Optional[bytearray] = None ) -> bytearray: """Convert a row of objects to the data to send for copy.""" @@ -336,7 +336,9 @@ format_row_binary: FormatFunc if pq.__impl__ == "c": from psycopg3_c import _psycopg3 + format_row_text = _psycopg3.format_row_text format_row_binary = _psycopg3.format_row_binary else: + format_row_text = _format_row_text format_row_binary = _format_row_binary diff --git a/psycopg3_c/psycopg3_c/_psycopg3.pyi b/psycopg3_c/psycopg3_c/_psycopg3.pyi index 1352ae480..167d2560f 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3.pyi +++ b/psycopg3_c/psycopg3_c/_psycopg3.pyi @@ -36,8 +36,14 @@ class Transformer(proto.AdaptContext): ) -> Tuple[Any, ...]: ... def get_loader(self, oid: int, format: Format) -> Loader: ... +# Generators def connect(conninfo: str) -> proto.PQGenConn[PGconn]: ... def execute(pgconn: PGconn) -> proto.PQGen[List[PGresult]]: ... + +# Copy support +def format_row_text( + row: Sequence[Any], tx: proto.Transformer, out: Optional[bytearray] = None +) -> bytearray: ... def format_row_binary( row: Sequence[Any], tx: proto.Transformer, out: Optional[bytearray] = None ) -> bytearray: ... diff --git a/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx b/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx index a29fa96e3..059569f91 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx +++ b/psycopg3_c/psycopg3_c/_psycopg3/copy.pyx @@ -73,3 +73,114 @@ def format_row_binary( # Resize to the final size PyByteArray_Resize(out, pos) return out + + +def format_row_text( + row: Sequence[Any], tx: Transformer, out: bytearray = None +) -> bytearray: + cdef Py_ssize_t pos # offset in 'out' where to write + if out is None: + out = PyByteArray_FromStringAndSize("", 0) + pos = 0 + else: + pos = PyByteArray_GET_SIZE(out) + + cdef Py_ssize_t rowlen = len(row) + + if rowlen == 0: + PyByteArray_Resize(out, pos + 1) + out[pos] = b"\n" + return out + + cdef Py_ssize_t size, tmpsize + cdef char *buf + cdef int i, j + cdef unsigned char *target + cdef int nesc = 0 + cdef int with_tab + + for i in range(rowlen): + # Include the tab before the data, so it gets included in the resizes + with_tab = i > 0 + + item = row[i] + if item is None: + if with_tab: + target = CDumper.ensure_size(out, pos, 3) + memcpy(target, b"\t\\N", 3) + pos += 3 + else: + target = CDumper.ensure_size(out, pos, 2) + memcpy(target, b"\\N", 2) + pos += 2 + continue + + dumper = tx.get_dumper(item, FORMAT_TEXT) + if isinstance(dumper, CDumper): + # A cdumper can resize if necessary and copy in place + size = (dumper).cdump(item, out, pos + with_tab) + target = PyByteArray_AS_STRING(out) + pos + else: + # A Python dumper, gotta call it and extract its juices + b = dumper.dump(item) + _buffer_as_string_and_size(b, &buf, &size) + target = CDumper.ensure_size(out, pos, size + with_tab) + memcpy(target + with_tab, buf, size) + + # Prepend a tab to the data just written + if with_tab: + target[0] = b"\t" + target += 1 + pos += 1 + + # Now from pos to pos + size there is a textual representation: it may + # contain chars to escape. Scan to find how many such chars there are. + for j in range(size): + nesc += copy_escape_char[target[j]] + + # If there is any char to escape, walk backwards pushing the chars + # forward and interspersing backslashes. + if nesc > 0: + tmpsize = size + nesc + target = CDumper.ensure_size(out, pos, tmpsize) + for j in range(size - 1, -1, -1): + target[j + nesc] = target[j] + if copy_escape_char[target[j]] != 0: + nesc -= 1 + target[j + nesc] = b"\\" + if nesc <= 0: + break + pos += tmpsize + else: + pos += size + + # Resize to the final size, add the newline + PyByteArray_Resize(out, pos + 1) + out[pos] = b"\n" + return out + + +cdef extern from *: + """ +/* The characters to escape in textual copy */ +/* '\b', '\t', '\n', '\v', '\f', '\r', '\\' */ +static const char copy_escape_char[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + """ + const char[256] copy_escape_char