From: Daniele Varrazzo Date: Mon, 1 Sep 2025 15:22:34 +0000 (+0200) Subject: fix(c): fix excessive buffer allocation in binary copy X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a5e0748e4cdb07937b3a173996a86b684357da17;p=thirdparty%2Fpsycopg.git fix(c): fix excessive buffer allocation in binary copy Fix #1147 --- diff --git a/docs/news.rst b/docs/news.rst index 0a804f3a7..60c3cb36a 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -46,6 +46,7 @@ Psycopg 3.3.0 (unreleased) Psycopg 3.2.11 (unreleased) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Fix bad data on error in binary copy (:ticket:`#1147`). - Don't raise warning, and don't leak resources, if a builtin function is used as JSON dumper/loader function (:ticket:`#1165`). diff --git a/psycopg_c/psycopg_c/_psycopg/copy.pyx b/psycopg_c/psycopg_c/_psycopg/copy.pyx index cf29252bf..b130f69d4 100644 --- a/psycopg_c/psycopg_c/_psycopg/copy.pyx +++ b/psycopg_c/psycopg_c/_psycopg/copy.pyx @@ -34,10 +34,7 @@ def format_row_binary( else: pos = PyByteArray_GET_SIZE(out) - # let's start from a nice chunk - # (larger than most fixed size; for variable ones, oh well, we'll resize it) - cdef char *target = CDumper.ensure_size( - out, pos, sizeof(berowlen) + 20 * rowlen) + cdef char *target = CDumper.ensure_size(out, pos, sizeof(berowlen)) # Write the number of fields as network-order 16 bits memcpy(target, &berowlen, sizeof(berowlen)) diff --git a/tests/test_copy.py b/tests/test_copy.py index 3c64196d4..9b5604a9c 100644 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -643,6 +643,18 @@ def test_description(conn): assert cur.description[2].name == "column_3" +def test_binary_partial_row(conn): + cur = conn.cursor() + ensure_table(cur, "id serial primary key, num int4, arr int4[][]") + with pytest.raises( + psycopg.DataError, match="nested lists have inconsistent depths" + ): + with cur.copy("copy copy_in (num, arr) from stdin (format binary)") as copy: + copy.set_types(["int4", "int4[]"]) + copy.write_row([15, None]) + copy.write_row([16, [[None], None]]) + + @pytest.mark.parametrize( "format, buffer", [(pq.Format.TEXT, "sample_text"), (pq.Format.BINARY, "sample_binary")], diff --git a/tests/test_copy_async.py b/tests/test_copy_async.py index 7fa8223d6..ae1fce2ad 100644 --- a/tests/test_copy_async.py +++ b/tests/test_copy_async.py @@ -657,6 +657,20 @@ async def test_description(aconn): assert cur.description[2].name == "column_3" +async def test_binary_partial_row(aconn): + cur = aconn.cursor() + await ensure_table_async(cur, "id serial primary key, num int4, arr int4[][]") + with pytest.raises( + psycopg.DataError, match="nested lists have inconsistent depths" + ): + async with cur.copy( + "copy copy_in (num, arr) from stdin (format binary)" + ) as copy: + copy.set_types(["int4", "int4[]"]) + await copy.write_row([15, None]) + await copy.write_row([16, [[None], None]]) + + @pytest.mark.parametrize( "format, buffer", [(pq.Format.TEXT, "sample_text"), (pq.Format.BINARY, "sample_binary")],