From ca2ef566faef175a40980bdea668a373aa0520ce Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Tue, 12 Jan 2021 02:00:59 +0100 Subject: [PATCH] Fixed raising unicode encode errors in C --- psycopg3_c/psycopg3_c/types/text.pyx | 6 +----- tests/types/test_text.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/psycopg3_c/psycopg3_c/types/text.pyx b/psycopg3_c/psycopg3_c/types/text.pyx index 8f85fa536..c790f8f62 100644 --- a/psycopg3_c/psycopg3_c/types/text.pyx +++ b/psycopg3_c/psycopg3_c/types/text.pyx @@ -22,7 +22,7 @@ from psycopg3 import errors as e from psycopg3.encodings import pg2py cdef extern from "Python.h": - const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size) + const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size) except NULL cdef class _StringDumper(CDumper): @@ -57,10 +57,6 @@ cdef class _StringDumper(CDumper): # Probably the fastest path, but doesn't work with subclasses if PyUnicode_CheckExact(obj): src = PyUnicode_AsUTF8AndSize(obj, &size) - if src == NULL: - # re-encode using a function raising an exception. - # TODO: is there a better way? - PyUnicode_AsUTF8String(obj) else: b = PyUnicode_AsUTF8String(obj) PyBytes_AsStringAndSize(b, &src, &size) diff --git a/tests/types/test_text.py b/tests/types/test_text.py index 095d0d24c..3b6e69ba0 100644 --- a/tests/types/test_text.py +++ b/tests/types/test_text.py @@ -96,6 +96,16 @@ def test_dump_badenc(conn, fmt_in): cur.execute(f"select {ph}::bytea", (eur,)) +@pytest.mark.parametrize("fmt_in", [Format.TEXT, Format.BINARY]) +def test_dump_utf8_badenc(conn, fmt_in): + cur = conn.cursor() + ph = "%s" if fmt_in == Format.TEXT else "%b" + + conn.client_encoding = "utf-8" + with pytest.raises(UnicodeEncodeError): + cur.execute(f"select {ph}", ("\uddf8",)) + + @pytest.mark.parametrize("fmt_out", [Format.TEXT, Format.BINARY]) @pytest.mark.parametrize("encoding", ["utf8", "latin9"]) @pytest.mark.parametrize("typename", ["text", "varchar", "name", "bpchar"]) -- 2.47.2