From: Daniele Varrazzo Date: Tue, 12 Jan 2021 21:44:28 +0000 (+0100) Subject: Added dumpers row X-Git-Tag: 3.0.dev0~174 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=861e74d8a53882b1fc5215c7aaf39ce2d478ec2c;p=thirdparty%2Fpsycopg.git Added dumpers row Speed up repeated dumps on the same query (e.g. in executemany, in copy, for composite types). Things are a bit iffy around empty list dumping. As a tradeoff, prefer that empty list don't behave too differently from non-empty list, but accept that text and binary work differently (because it doesn't seem possible to pass an array as unknown in binary, whereas in text '{}' works ok). --- diff --git a/docs/adapt-types.rst b/docs/adapt-types.rst index c002fd184..adcd23f5d 100644 --- a/docs/adapt-types.rst +++ b/docs/adapt-types.rst @@ -264,3 +264,6 @@ TODO adaptation .. admonition:: TODO Document the other types + + Document that empty array don't roundtrip in text mode and require + a cast in binary to be used in any context. diff --git a/psycopg3/psycopg3/_transform.py b/psycopg3/psycopg3/_transform.py index 7bd36cadd..a593b2c40 100644 --- a/psycopg3/psycopg3/_transform.py +++ b/psycopg3/psycopg3/_transform.py @@ -61,6 +61,8 @@ class Transformer(AdaptContext): # mapping oid, fmt -> Loader instance self._loaders_cache: Tuple[LoaderCache, LoaderCache] = ({}, {}) + self._row_dumpers: List[Optional["Dumper"]] = [] + # sequence of load functions from value to python # the length of the result columns self._row_loaders: List[LoadFunc] = [] @@ -110,10 +112,17 @@ class Transformer(AdaptContext): ) -> Tuple[List[Any], Tuple[int, ...]]: ps: List[Optional[bytes]] = [None] * len(params) ts = [self._unknown_oid] * len(params) + + dumpers = self._row_dumpers + if not dumpers: + dumpers = self._row_dumpers = [None] * len(params) + for i in range(len(params)): param = params[i] if param is not None: - dumper = self.get_dumper(param, formats[i]) + dumper = dumpers[i] + if not dumper: + dumper = dumpers[i] = self.get_dumper(param, formats[i]) ps[i] = dumper.dump(param) ts[i] = dumper.oid @@ -127,8 +136,6 @@ class Transformer(AdaptContext): else: # TODO: Can be probably generalised to handle other recursive types subobj = self._find_list_element(obj) - if subobj is None: - subobj = "" key = (cls, type(subobj)) try: @@ -143,11 +150,22 @@ class Transformer(AdaptContext): f" to format {Format(format).name}" ) - d = self._dumpers_cache[format][key] = dcls(cls, self) + d = dcls(cls, self) if cls is list: - sub_dumper = self.get_dumper(subobj, format) - cast("BaseListDumper", d).set_sub_dumper(sub_dumper) - + if subobj is not None: + sub_dumper = self.get_dumper(subobj, format) + cast("BaseListDumper", d).set_sub_dumper(sub_dumper) + elif format == Format.TEXT: + # Special case dumping an empty list (or containing no None + # element). In text mode we cast them as unknown, so that + # postgres can cast them automatically to something useful. + # In binary we cannot do it, it doesn't seem there is a + # representation for unknown array, so let's dump it as text[]. + # This means that placeholders receiving a binary array should + # be almost always cast to the target type. + d.oid = self._unknown_oid + + self._dumpers_cache[format][key] = d return d def load_rows(self, row0: int, row1: int) -> List[Tuple[Any, ...]]: diff --git a/psycopg3/psycopg3/types/array.py b/psycopg3/psycopg3/types/array.py index 9c22bad54..1691be7bb 100644 --- a/psycopg3/psycopg3/types/array.py +++ b/psycopg3/psycopg3/types/array.py @@ -15,9 +15,6 @@ from ..proto import AdaptContext class BaseListDumper(Dumper): - - _oid = TEXT_ARRAY_OID - def __init__(self, cls: type, context: Optional[AdaptContext] = None): super().__init__(cls, context) tx = Transformer(context) @@ -104,7 +101,7 @@ class ListBinaryDumper(BaseListDumper): def dump(self, obj: List[Any]) -> bytes: if not obj: - return _struct_head.pack(0, 0, TEXT_OID) + return _struct_head.pack(0, 0, self.sub_oid) data: List[bytes] = [b"", b""] # placeholders to avoid a resize dims: List[int] = [] diff --git a/psycopg3_c/psycopg3_c/_psycopg3/transform.pyx b/psycopg3_c/psycopg3_c/_psycopg3/transform.pyx index 62b9b44d9..ce62c7ddf 100644 --- a/psycopg3_c/psycopg3_c/_psycopg3/transform.pyx +++ b/psycopg3_c/psycopg3_c/_psycopg3/transform.pyx @@ -8,6 +8,7 @@ too many temporary Python objects and performing less memory copying. # Copyright (C) 2020 The Psycopg Team +cimport cython from cpython.ref cimport Py_INCREF from cpython.set cimport PySet_Add, PySet_Contains from cpython.dict cimport PyDict_GetItem, PyDict_SetItem @@ -38,11 +39,19 @@ ctypedef struct pg_result_int: # ...more members, which we ignore +@cython.freelist(16) cdef class RowLoader: - cdef object pyloader + cdef object loadfunc cdef CLoader cloader +@cython.freelist(16) +cdef class RowDumper: + cdef object dumpfunc + cdef object oid + cdef CDumper cdumper + + cdef class Transformer: """ An object that can adapt efficiently between Python and PostgreSQL. @@ -60,6 +69,7 @@ cdef class Transformer: cdef dict _binary_loaders cdef pq.PGresult _pgresult cdef int _nfields, _ntuples + cdef list _row_dumpers cdef list _row_loaders cdef int _unknown_oid @@ -86,6 +96,7 @@ cdef class Transformer: self._binary_loaders = {} self.pgresult = None + self._row_dumpers = None self._row_loaders = [] @property @@ -156,7 +167,7 @@ cdef class Transformer: cdef RowLoader _get_row_loader(self, PyObject *oid, PyObject *fmt): cdef RowLoader row_loader = RowLoader() loader = self._c_get_loader(oid, fmt) - row_loader.pyloader = loader.load + row_loader.loadfunc = loader.load if isinstance(loader, CLoader): row_loader.cloader = loader @@ -175,8 +186,6 @@ cdef class Transformer: key = cls else: subobj = self._find_list_element(obj, set()) - if subobj is None: - subobj = "" key = (cls, type(subobj)) cache = self._binary_dumpers if format else self._text_dumpers @@ -189,14 +198,23 @@ cdef class Transformer: if dcls is None: raise e.ProgrammingError( f"cannot adapt type {cls.__name__}" - f" to format {Format(format).name}" - ) + f" to format {Format(format).name}") d = PyObject_CallFunctionObjArgs( dcls, cls, self, NULL) if cls is list: - sub_dumper = self.get_dumper(subobj, format) - d.set_sub_dumper(sub_dumper) + if subobj is not None: + sub_dumper = self.get_dumper(subobj, format) + d.set_sub_dumper(sub_dumper) + elif format == FORMAT_TEXT: + # Special case dumping an empty list (or containing no None + # element). In text mode we cast them as unknown, so that + # postgres can cast them automatically to something useful. + # In binary we cannot do it, it doesn't seem there is a + # representation for unknown array, so let's dump it as text[]. + # This means that placeholders receiving a binary array should + # be almost always cast to the target type. + d.oid = self._unknown_oid PyDict_SetItem(cache, key, d) return d @@ -208,21 +226,35 @@ cdef class Transformer: cdef tuple ts = PyTuple_New(nparams) cdef object dumped, oid cdef Py_ssize_t size + cdef PyObject *dumper_ptr # borrowed pointer to row dumper + + if self._row_dumpers is None: + self._row_dumpers = PyList_New(nparams) + + dumpers = self._row_dumpers cdef int i for i in range(nparams): param = params[i] if param is not None: - format = formats[i] - dumper = self.get_dumper(param, format) - if isinstance(dumper, CDumper): + dumper_ptr = PyList_GET_ITEM(dumpers, i) + if dumper_ptr == NULL: + format = formats[i] + tmp_dumper = self._get_row_dumper(param, format) + Py_INCREF(tmp_dumper) + PyList_SET_ITEM(dumpers, i, tmp_dumper) + dumper_ptr = tmp_dumper + + oid = (dumper_ptr).oid + if (dumper_ptr).cdumper is not None: dumped = PyByteArray_FromStringAndSize("", 0) - size = (dumper).cdump(param, dumped, 0) + size = (dumper_ptr).cdumper.cdump( + param, dumped, 0) PyByteArray_Resize(dumped, size) - oid = (dumper).oid else: - dumped = dumper.dump(param) - oid = dumper.oid + dumped = PyObject_CallFunctionObjArgs( + (dumper_ptr).dumpfunc, + param, NULL) else: dumped = None oid = self._unknown_oid @@ -234,6 +266,20 @@ cdef class Transformer: return ps, ts + cdef RowDumper _get_row_dumper(self, object param, object fmt): + cdef RowDumper row_dumper = RowDumper() + + dumper = self.get_dumper(param, fmt) + row_dumper.dumpfunc = dumper.dump + row_dumper.oid = dumper.oid + + if isinstance(dumper, CDumper): + row_dumper.cdumper = dumper + else: + row_dumper.cdumper = None + + return row_dumper + def load_rows(self, int row0, int row1) -> List[Tuple[Any, ...]]: if self._pgresult is None: raise e.InterfaceError("result not set") @@ -287,7 +333,7 @@ cdef class Transformer: # TODO: no copy b = attval.value[:attval.len] pyval = PyObject_CallFunctionObjArgs( - (loader).pyloader, b, NULL) + (loader).loadfunc, b, NULL) Py_INCREF(pyval) PyTuple_SET_ITEM(brecord, col, pyval) @@ -326,7 +372,7 @@ cdef class Transformer: # TODO: no copy b = attval.value[:attval.len] pyval = PyObject_CallFunctionObjArgs( - (loader).pyloader, b, NULL) + (loader).loadfunc, b, NULL) Py_INCREF(pyval) PyTuple_SET_ITEM(record, col, pyval) @@ -360,7 +406,7 @@ cdef class Transformer: pyval = (loader).cloader.cload(ptr, size) else: pyval = PyObject_CallFunctionObjArgs( - (loader).pyloader, item, NULL) + (loader).loadfunc, item, NULL) Py_INCREF(pyval) PyTuple_SET_ITEM(out, col, pyval) diff --git a/tests/test_cursor.py b/tests/test_cursor.py index e5be0972f..c267f7d1d 100644 --- a/tests/test_cursor.py +++ b/tests/test_cursor.py @@ -203,6 +203,13 @@ def test_executemany_badquery(conn, query): cur.executemany(query, [(10, "hello"), (20, "world")]) +def test_executemany_null_first(conn): + cur = conn.cursor() + cur.executemany("select %s, %s", [[1, None], [3, 4]]) + with pytest.raises(TypeError): + cur.executemany("select %s, %s", [[1, ""], [3, 4]]) + + def test_rowcount(conn): cur = conn.cursor() @@ -272,10 +279,12 @@ def test_query_params_executemany(conn): assert cur.query == b"select $1, $2" assert cur.params == [b"3", b"4"] - with pytest.raises(psycopg3.DataError): + with pytest.raises((psycopg3.DataError, TypeError)): cur.executemany("select %s::int", [[1], ["x"], [2]]) assert cur.query == b"select $1::int" - assert cur.params == [b"x"] + # TODO: cannot really check this: after introduced row_dumpers, this + # fails dumping, not query passing. + # assert cur.params == [b"x"] class TestColumn: diff --git a/tests/test_cursor_async.py b/tests/test_cursor_async.py index e3d8c8773..a8945f24d 100644 --- a/tests/test_cursor_async.py +++ b/tests/test_cursor_async.py @@ -202,9 +202,19 @@ async def test_executemany_badquery(aconn, query): await cur.executemany(query, [(10, "hello"), (20, "world")]) +async def test_executemany_null_first(aconn): + cur = await aconn.cursor() + await cur.executemany("select %s, %s", [[1, None], [3, 4]]) + with pytest.raises(TypeError): + await cur.executemany("select %s, %s", [[1, ""], [3, 4]]) + + async def test_rowcount(aconn): cur = await aconn.cursor() + await cur.execute("select 1 from generate_series(1, 0)") + assert cur.rowcount == 0 + await cur.execute("select 1 from generate_series(1, 42)") assert cur.rowcount == 42 @@ -231,6 +241,22 @@ async def test_iter(aconn): assert res == [(1,), (2,), (3,)] +async def test_iter_stop(aconn): + cur = await aconn.cursor() + await cur.execute("select generate_series(1, 3)") + async for rec in cur: + assert rec == (1,) + break + + async for rec in cur: + assert rec == (2,) + break + + assert (await cur.fetchone()) == (3,) + async for rec in cur: + assert False + + async def test_query_params_execute(aconn): cur = await aconn.cursor() assert cur.query is None @@ -258,26 +284,12 @@ async def test_query_params_executemany(aconn): assert cur.query == b"select $1, $2" assert cur.params == [b"3", b"4"] - with pytest.raises(psycopg3.DataError): + with pytest.raises((psycopg3.DataError, TypeError)): await cur.executemany("select %s::int", [[1], ["x"], [2]]) assert cur.query == b"select $1::int" - assert cur.params == [b"x"] - - -async def test_iter_stop(aconn): - cur = await aconn.cursor() - await cur.execute("select generate_series(1, 3)") - async for rec in cur: - assert rec == (1,) - break - - async for rec in cur: - assert rec == (2,) - break - - assert (await cur.fetchone()) == (3,) - async for rec in cur: - assert False + # TODO: cannot really check this: after introduced row_dumpers, this + # fails dumping, not query passing. + # assert cur.params == [b"x"] async def test_str(aconn): diff --git a/tests/types/test_array.py b/tests/types/test_array.py index 91aa8149f..69b2eb76c 100644 --- a/tests/types/test_array.py +++ b/tests/types/test_array.py @@ -148,4 +148,60 @@ def test_empty_list_mix(conn, fmt_in): # pro tip: don't get confused with the types f1, f2 = conn.execute(f"select {ph}, {ph}", (objs, [])).fetchone() assert f1 == objs + if f2 == "{}": + pytest.xfail("text empty arrays don't roundtrip well") assert f2 == [] + + +def test_empty_list_text(conn): + cur = conn.cursor() + cur.execute("create table test (id serial primary key, data date[])") + with conn.transaction(): + try: + cur.execute("insert into test (data) values (%s)", ([],)) + except psycopg3.errors.DatatypeMismatch: + if conn.pgconn.server_version < 100000: + pytest.xfail("on PG 9.6 empty arrays are passed as text") + else: + raise + cur.execute("select data from test") + assert cur.fetchone() == ([],) + + # test untyped list in a filter + cur.execute("select data from test where id = any(%s)", ([1],)) + assert cur.fetchone() + cur.execute("select data from test where id = any(%s)", ([],)) + assert not cur.fetchone() + + +def test_empty_list_binary(conn): + cur = conn.cursor() + cur.execute("create table test (id serial primary key, data date[])") + with pytest.raises(psycopg3.errors.DatatypeMismatch): + with conn.transaction(): + cur.execute("insert into test (data) values (%b)", ([],)) + cur.execute("insert into test (data) values (%b::date[])", ([],)) + + cur.execute("select data from test") + assert cur.fetchone() == ([],) + + # test untyped list in a filter + cur.execute("select data from test where id = any(%b)", ([1],)) + assert cur.fetchone() + with pytest.raises(psycopg3.errors.UndefinedFunction): + with conn.transaction(): + cur.execute("select data from test where id = any(%b)", ([],)) + cur.execute("select data from test where id = any(%b::int[])", ([],)) + assert not cur.fetchone() + + +@pytest.mark.parametrize("fmt_in", [Format.TEXT, Format.BINARY]) +def test_empty_list_after_choice(conn, fmt_in): + ph = "%s" if fmt_in == Format.TEXT else "%b" + cur = conn.cursor() + cur.execute("create table test (id serial primary key, data float[])") + cur.executemany( + f"insert into test (data) values ({ph})", [([1.0],), ([],)] + ) + cur.execute("select data from test order by id") + assert cur.fetchall() == [([1.0],), ([],)] diff --git a/tests/types/test_singletons.py b/tests/types/test_singletons.py index c27c1c7f0..87f073f20 100644 --- a/tests/types/test_singletons.py +++ b/tests/types/test_singletons.py @@ -7,7 +7,7 @@ from psycopg3.adapt import Transformer, Format @pytest.mark.parametrize("fmt_in", [Format.TEXT, Format.BINARY]) @pytest.mark.parametrize("fmt_out", [Format.TEXT, Format.BINARY]) -@pytest.mark.parametrize("b", [True, False, None]) +@pytest.mark.parametrize("b", [True, False]) def test_roundtrip_bool(conn, b, fmt_in, fmt_out): cur = conn.cursor(format=fmt_out) ph = "%s" if fmt_in == Format.TEXT else "%b"