From: Daniele Varrazzo Date: Mon, 1 Sep 2025 15:22:34 +0000 (+0200) Subject: fix(c): fix excessive buffer allocation in binary copy X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2c891a1a3cbd4fd9cfacfa17447fdf0bbea2601a;p=thirdparty%2Fpsycopg.git fix(c): fix excessive buffer allocation in binary copy Fix #1147 --- diff --git a/docs/news.rst b/docs/news.rst index 973f62307..705a3f1d0 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -39,6 +39,7 @@ Psycopg 3.2.10 (unreleased) - Fix memory leak when lambda/local functions are used as argument for `~.psycopg.types.json.set_json_dumps()`, `~.psycopg.types.json.set_json_loads()` (:ticket:`#1108`). +- Fix bad data on error in binary copy (:ticket:`#1147`). - Fix `psycopg_binary.__version__`. diff --git a/psycopg_c/psycopg_c/_psycopg/copy.pyx b/psycopg_c/psycopg_c/_psycopg/copy.pyx index cf29252bf..b130f69d4 100644 --- a/psycopg_c/psycopg_c/_psycopg/copy.pyx +++ b/psycopg_c/psycopg_c/_psycopg/copy.pyx @@ -34,10 +34,7 @@ def format_row_binary( else: pos = PyByteArray_GET_SIZE(out) - # let's start from a nice chunk - # (larger than most fixed size; for variable ones, oh well, we'll resize it) - cdef char *target = CDumper.ensure_size( - out, pos, sizeof(berowlen) + 20 * rowlen) + cdef char *target = CDumper.ensure_size(out, pos, sizeof(berowlen)) # Write the number of fields as network-order 16 bits memcpy(target, &berowlen, sizeof(berowlen)) diff --git a/tests/test_copy.py b/tests/test_copy.py index 3c64196d4..9b5604a9c 100644 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -643,6 +643,18 @@ def test_description(conn): assert cur.description[2].name == "column_3" +def test_binary_partial_row(conn): + cur = conn.cursor() + ensure_table(cur, "id serial primary key, num int4, arr int4[][]") + with pytest.raises( + psycopg.DataError, match="nested lists have inconsistent depths" + ): + with cur.copy("copy copy_in (num, arr) from stdin (format binary)") as copy: + copy.set_types(["int4", "int4[]"]) + copy.write_row([15, None]) + copy.write_row([16, [[None], None]]) + + @pytest.mark.parametrize( "format, buffer", [(pq.Format.TEXT, "sample_text"), (pq.Format.BINARY, "sample_binary")], diff --git a/tests/test_copy_async.py b/tests/test_copy_async.py index 7fa8223d6..ae1fce2ad 100644 --- a/tests/test_copy_async.py +++ b/tests/test_copy_async.py @@ -657,6 +657,20 @@ async def test_description(aconn): assert cur.description[2].name == "column_3" +async def test_binary_partial_row(aconn): + cur = aconn.cursor() + await ensure_table_async(cur, "id serial primary key, num int4, arr int4[][]") + with pytest.raises( + psycopg.DataError, match="nested lists have inconsistent depths" + ): + async with cur.copy( + "copy copy_in (num, arr) from stdin (format binary)" + ) as copy: + copy.set_types(["int4", "int4[]"]) + await copy.write_row([15, None]) + await copy.write_row([16, [[None], None]]) + + @pytest.mark.parametrize( "format, buffer", [(pq.Format.TEXT, "sample_text"), (pq.Format.BINARY, "sample_binary")],