From 1018c32edadd7d8460db5c6c812d0ef925ccd023 Mon Sep 17 00:00:00 2001
From: =?utf8?q?J=C3=B6rg=20Breitbart?= <jerch@rockborn.de>
Date: Thu, 23 Oct 2025 17:02:25 +0200
Subject: [PATCH] fix(copy): restore selecting loader for each item

In the copy refactoring released in 3.2.11 we changed the behaviour to
choosing a dumper at the first non-null value per column and keeping it.

Turns out more people than expected rely on the more flexible behaviour
of passing strings or types and letting postgres do the conversion.

Restore the previous behaviour; however it's possible to use
`copy.set_types()` to set dumpers per column and gain something in
performance.

Fix #1192
---
 psycopg_c/psycopg_c/_psycopg/copy.pyx | 15 +++----
 tests/test_copy.py                    | 64 +++++++++++++++++++++++++++
 tests/test_copy_async.py              | 64 +++++++++++++++++++++++++++
 3 files changed, 134 insertions(+), 9 deletions(-)
diff --git a/psycopg_c/psycopg_c/_psycopg/copy.pyx b/psycopg_c/psycopg_c/_psycopg/copy.pyx
index b9b2119cd..bfabe4901 100644
--- a/psycopg_c/psycopg_c/_psycopg/copy.pyx
+++ b/psycopg_c/psycopg_c/_psycopg/copy.pyx
@@ -128,12 +128,8 @@ cdef object _format_row_text(object row, Py_ssize_t rowlen, Transformer tx, byte
     cdef PyObject *fmt = <PyObject *>PG_TEXT
     cdef PyObject *row_dumper
 
-    # try to get preloaded dumpers from set_types
-    if not tx._row_dumpers:
-        tx._row_dumpers = PyList_New(rowlen)
-
     dumpers = tx._row_dumpers
-    if PyList_GET_SIZE(dumpers) != rowlen:
+    if dumpers and PyList_GET_SIZE(dumpers) != rowlen:
         raise e.DataError(f"expected {len(dumpers)} values in row, got {rowlen}")
 
     for i in range(rowlen):
@@ -145,11 +141,12 @@ cdef object _format_row_text(object row, Py_ssize_t rowlen, Transformer tx, byte
             _append_text_none(out, &pos, with_tab)
             continue
 
-        row_dumper = PyList_GET_ITEM(dumpers, i)
-        if not row_dumper:
+        if dumpers:
+            # pinned dumpers from set_types are authoritative
+            row_dumper = PyList_GET_ITEM(dumpers, i)
+        else:
+            # no pinned dumpers, thus free value dumping
             row_dumper = tx.get_row_dumper(<PyObject *>item, fmt)
-            Py_INCREF(<object>row_dumper)
-            PyList_SET_ITEM(dumpers, i, <object>row_dumper)
 
         if (<RowDumper>row_dumper).cdumper is not None:
             # A cdumper can resize if necessary and copy in place
diff --git a/tests/test_copy.py b/tests/test_copy.py
index e78636819..4627fa23a 100644
--- a/tests/test_copy.py
+++ b/tests/test_copy.py
@@ -487,6 +487,70 @@ def test_copy_in_records_binary(conn, format):
     assert data == [(1, None, "hello"), (2, None, "world")]
 
 
+def test_copy_in_text_no_pinning(conn):
+    cur = conn.cursor()
+    cols = [
+        "col1 serial primary key",
+        "col2 int",
+        "col3 int",
+        "col4 double precision",
+        "col5 double precision",
+    ]
+    ensure_table(cur, ",".join(cols))
+
+    with cur.copy(
+        "copy copy_in (col2,col3,col4,col5) from stdin (format text)"
+    ) as copy:
+        # no pinned dumpers: type check & cast done on postgres side
+        # allows to mix castable reprs more freely
+        # slower than pinned, late errors from postgres jeopardizing copy cursor
+        copy.write_row([1, "2", 3, "4.1"])
+        copy.write_row(["1", 2, 3.0, 4])
+
+    cur.execute("select col2,col3,col4,col5 from copy_in order by 1")
+    data = cur.fetchall()
+    assert data == [(1, 2, 3, 4.1), (1, 2, 3, 4)]
+
+
+def test_copy_in_text_pinned(conn):
+    # FIXME: this test works currently only in c,
+    # as c/python dumpers differ in what they accept as valid input
+    # here: python int & float text dumpers always allow str as input
+    from psycopg._cmodule import _psycopg
+
+    if not _psycopg:
+        return
+    cur = conn.cursor()
+    cols = [
+        "col1 serial primary key",
+        "col2 int",
+        "col3 int",
+        "col4 double precision",
+        "col5 double precision",
+    ]
+    ensure_table(cur, ",".join(cols))
+
+    with cur.copy(
+        "copy copy_in (col2,col3,col4,col5) from stdin (format text)"
+    ) as copy:
+        # pinned dumpers from set_types: type check & cast done on psycopg side
+        # much faster, allows catching errors early without postgres involvement
+        copy.set_types(["int4", "int4", "double precision", "double precision"])
+        copy.write_row([1, 2, 3, 4.1])
+        with pytest.raises(
+            (e.DataError, TypeError)
+        ):  # FIXME: should errors from dumpers be harmonized?
+            copy.write_row([1.0, 2, 3, 4.1])
+        with pytest.raises((e.DataError, TypeError)):
+            copy.write_row([1, "2", 3, 4.1])
+        with pytest.raises((e.DataError, TypeError)):
+            copy.write_row([1, 2, 3, "4.1"])
+
+    cur.execute("select col2,col3,col4,col5 from copy_in order by 1")
+    data = cur.fetchall()
+    assert data == [(1, 2, 3, 4.1)]
+
+
 def test_copy_in_allchars(conn):
     cur = conn.cursor()
     ensure_table(cur, sample_tabledef)
diff --git a/tests/test_copy_async.py b/tests/test_copy_async.py
index 1c9998b31..cbf38843f 100644
--- a/tests/test_copy_async.py
+++ b/tests/test_copy_async.py
@@ -501,6 +501,70 @@ async def test_copy_in_records_binary(aconn, format):
     assert data == [(1, None, "hello"), (2, None, "world")]
 
 
+async def test_copy_in_text_no_pinning(aconn):
+    cur = aconn.cursor()
+    cols = [
+        "col1 serial primary key",
+        "col2 int",
+        "col3 int",
+        "col4 double precision",
+        "col5 double precision",
+    ]
+    await ensure_table_async(cur, ",".join(cols))
+
+    async with cur.copy(
+        "copy copy_in (col2,col3,col4,col5) from stdin (format text)"
+    ) as copy:
+        # no pinned dumpers: type check & cast done on postgres side
+        # allows to mix castable reprs more freely
+        # slower than pinned, late errors from postgres jeopardizing copy cursor
+        await copy.write_row([1, "2", 3, "4.1"])
+        await copy.write_row(["1", 2, 3.0, 4])
+
+    await cur.execute("select col2,col3,col4,col5 from copy_in order by 1")
+    data = await cur.fetchall()
+    assert data == [(1, 2, 3, 4.1), (1, 2, 3, 4)]
+
+
+async def test_copy_in_text_pinned(aconn):
+    # FIXME: this test works currently only in c,
+    # as c/python dumpers differ in what they accept as valid input
+    # here: python int & float text dumpers always allow str as input
+    from psycopg._cmodule import _psycopg
+
+    if not _psycopg:
+        return
+    cur = aconn.cursor()
+    cols = [
+        "col1 serial primary key",
+        "col2 int",
+        "col3 int",
+        "col4 double precision",
+        "col5 double precision",
+    ]
+    await ensure_table_async(cur, ",".join(cols))
+
+    async with cur.copy(
+        "copy copy_in (col2,col3,col4,col5) from stdin (format text)"
+    ) as copy:
+        # pinned dumpers from set_types: type check & cast done on psycopg side
+        # much faster, allows catching errors early without postgres involvement
+        copy.set_types(["int4", "int4", "double precision", "double precision"])
+        await copy.write_row([1, 2, 3, 4.1])
+        with pytest.raises(
+            (e.DataError, TypeError)
+        ):  # FIXME: should errors from dumpers be harmonized?
+            await copy.write_row([1.0, 2, 3, 4.1])
+        with pytest.raises((e.DataError, TypeError)):
+            await copy.write_row([1, "2", 3, 4.1])
+        with pytest.raises((e.DataError, TypeError)):
+            await copy.write_row([1, 2, 3, "4.1"])
+
+    await cur.execute("select col2,col3,col4,col5 from copy_in order by 1")
+    data = await cur.fetchall()
+    assert data == [(1, 2, 3, 4.1)]
+
+
 async def test_copy_in_allchars(aconn):
     cur = aconn.cursor()
     await ensure_table_async(cur, sample_tabledef)
-- 
2.47.3