Add StrDumper to dump strings with text oid

author Daniele Varrazzo <daniele.varrazzo@gmail.com>

Fri, 30 Jul 2021 22:56:49 +0000 (00:56 +0200)

committer Daniele Varrazzo <daniele.varrazzo@gmail.com>

Fri, 30 Jul 2021 22:56:49 +0000 (00:56 +0200)
author Daniele Varrazzo <daniele.varrazzo@gmail.com>
Fri, 30 Jul 2021 22:56:49 +0000 (00:56 +0200)
committer Daniele Varrazzo <daniele.varrazzo@gmail.com>
Fri, 30 Jul 2021 22:56:49 +0000 (00:56 +0200)
diff --git a/psycopg/psycopg/types/string.py b/psycopg/psycopg/types/string.py

index 47eb4af073348b796745df2956d3d563a890871f..b99cf4c198229348e39b526856177d579cec568d 100644 (file)
--- a/psycopg/psycopg/types/string.py
+++ b/psycopg/psycopg/types/string.py
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
      from ..pq.abc import Escaping as EscapingProto
  
  
-class _StrDumper(Dumper):
+class _BaseStrDumper(Dumper):
  
      _encoding = "utf-8"
  
@@ -30,7 +30,7 @@ class _StrDumper(Dumper):
                  self._encoding = enc
  
  
-class StrBinaryDumper(_StrDumper):
+class StrBinaryDumper(_BaseStrDumper):
  
      format = Format.BINARY
      _oid = postgres.types["text"].oid
@@ -40,7 +40,7 @@ class StrBinaryDumper(_StrDumper):
          return obj.encode(self._encoding)
  
  
-class StrDumper(_StrDumper):
+class _StrDumper(_BaseStrDumper):
  
      format = Format.TEXT
  
@@ -53,6 +53,33 @@ class StrDumper(_StrDumper):
              return obj.encode(self._encoding)
  
  
+class StrDumper(_StrDumper):
+    """
+    Dumper for strings in text format to the text oid.
+
+    Note that this dumper is not used by deafult because the type is too strict
+    and PostgreSQL would require an explicit casts to everything that is not a
+    text field. However it is useful where the unknown oid is ambiguous and the
+    text oid is required, for instance with variadic functions.
+    """
+
+    _oid = postgres.types["text"].oid
+
+
+class StrDumperUnknown(_StrDumper):
+    """
+    Dumper for strings in text format to the unknown oid.
+
+    This dumper is the default dumper for strings and allows to use Python
+    strings to represent almost every data type. In a few places, however, the
+    unknown oid is not accepted (for instance in variadic functions such as
+    'concat()'). In that case either a cast on the placeholder ('%s::text) or
+    the StrTextDumper should be used.
+    """
+
+    pass
+
+
  class TextLoader(Loader):
  
      format = Format.TEXT
@@ -163,7 +190,7 @@ def register_default_adapters(context: AdaptContext) -> None:
      # Normally, binary is the default dumper, except for text (which plays
      # the role of unknown, so it can be cast automatically to other types).
      adapters.register_dumper(str, StrBinaryDumper)
-    adapters.register_dumper(str, StrDumper)
+    adapters.register_dumper(str, StrDumperUnknown)
      adapters.register_loader(postgres.INVALID_OID, TextLoader)
      adapters.register_loader("bpchar", TextLoader)
      adapters.register_loader("name", TextLoader)
diff --git a/psycopg_c/psycopg_c/types/string.pyx b/psycopg_c/psycopg_c/types/string.pyx

index 27e062cd7d5881f96d84a6314a11d24b3241c580..950cb109debb47aa3cbfaaa0cfd5487960d941be 100644 (file)
--- a/psycopg_c/psycopg_c/types/string.pyx
+++ b/psycopg_c/psycopg_c/types/string.pyx
@@ -25,7 +25,7 @@ cdef extern from "Python.h":
      const char *PyUnicode_AsUTF8AndSize(unicode obj, Py_ssize_t *size) except NULL
  
  
-cdef class _StrDumper(CDumper):
+cdef class _BaseStrDumper(CDumper):
      cdef int is_utf8
      cdef char *encoding
      cdef bytes _bytes_encoding  # needed to keep `encoding` alive
@@ -70,7 +70,7 @@ cdef class _StrDumper(CDumper):
  
  
  @cython.final
-cdef class StrBinaryDumper(_StrDumper):
+cdef class StrBinaryDumper(_BaseStrDumper):
  
      format = PQ_BINARY
  
@@ -78,8 +78,7 @@ cdef class StrBinaryDumper(_StrDumper):
          self.oid = oids.TEXT_OID
  
  
-@cython.final
-cdef class StrDumper(_StrDumper):
+cdef class _StrDumper(_BaseStrDumper):
  
      format = PQ_TEXT
  
@@ -95,6 +94,18 @@ cdef class StrDumper(_StrDumper):
          return size
  
  
+@cython.final
+cdef class StrDumper(_StrDumper):
+
+    def __cinit__(self):
+        self.oid = oids.TEXT_OID
+
+
+@cython.final
+cdef class StrDumperUnknown(_StrDumper):
+    pass
+
+
  cdef class _TextLoader(CLoader):
  
      format = PQ_TEXT
diff --git a/tests/test_adapt.py b/tests/test_adapt.py

index 88033fc8a8b593ef419ec50e4d50d1d3cf11461a..9480147b60f6b4b913a9389b7fe080487acfc17e 100644 (file)
--- a/tests/test_adapt.py
+++ b/tests/test_adapt.py
@@ -5,6 +5,7 @@ import pytest
  
  import psycopg
  from psycopg import pq, sql, postgres
+from psycopg import errors as e
  from psycopg.adapt import Transformer, PyFormat as Format, Dumper, Loader
  from psycopg._cmodule import _psycopg
  from psycopg.postgres import types as builtins, TEXT_OID
@@ -357,12 +358,19 @@ def test_return_untyped(conn, fmt_in):
      # Currently string are passed as unknown oid to libpq. This is because
      # unknown is more easily cast by postgres to different types (see jsonb
      # later).
-    cur.execute("select %s, %s", ["hello", 10])
+    cur.execute(f"select %{fmt_in}, %{fmt_in}", ["hello", 10])
      assert cur.fetchone() == ("hello", 10)
  
      cur.execute("create table testjson(data jsonb)")
-    cur.execute("insert into testjson (data) values (%s)", ["{}"])
-    assert cur.execute("select data from testjson").fetchone() == ({},)
+    if fmt_in != Format.BINARY:
+        cur.execute(f"insert into testjson (data) values (%{fmt_in})", ["{}"])
+        assert cur.execute("select data from testjson").fetchone() == ({},)
+    else:
+        # Binary types cannot be passed as unknown oids.
+        with pytest.raises(e.DatatypeMismatch):
+            cur.execute(
+                f"insert into testjson (data) values (%{fmt_in})", ["{}"]
+            )
  
  
  @pytest.mark.parametrize("fmt_in", [Format.AUTO, Format.TEXT, Format.BINARY])
diff --git a/tests/types/test_string.py b/tests/types/test_string.py

index 37fc064c73152c377a4a9282507f6aab9d95fdb7..0fa7a6bd503ef8f9e855e3762dff0ff730e5e6e1 100644 (file)
--- a/tests/types/test_string.py
+++ b/tests/types/test_string.py
@@ -3,6 +3,7 @@ import pytest
  import psycopg
  from psycopg import pq
  from psycopg import sql
+from psycopg import errors as e
  from psycopg.adapt import PyFormat as Format
  from psycopg import Binary
  
@@ -128,6 +129,17 @@ def test_dump_enum(conn, fmt_in):
      assert res == "foo"
  
  
+@pytest.mark.parametrize("fmt_in", [Format.AUTO, Format.TEXT])
+def test_dump_text_oid(conn, fmt_in):
+    conn.autocommit = True
+
+    with pytest.raises(e.IndeterminateDatatype):
+        conn.execute(f"select concat(%{fmt_in}, %{fmt_in})", ["foo", "bar"])
+    conn.adapters.register_dumper(str, psycopg.types.string.StrDumper)
+    cur = conn.execute(f"select concat(%{fmt_in}, %{fmt_in})", ["foo", "bar"])
+    assert cur.fetchone()[0] == "foobar"
+
+
  @pytest.mark.parametrize("fmt_out", [pq.Format.TEXT, pq.Format.BINARY])
  @pytest.mark.parametrize("encoding", ["utf8", "latin9"])
  @pytest.mark.parametrize("typename", ["text", "varchar", "name", "bpchar"])
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Fri, 30 Jul 2021 22:56:49 +0000 (00:56 +0200)
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>
	Fri, 30 Jul 2021 22:56:49 +0000 (00:56 +0200)
psycopg/psycopg/types/string.py		patch \| blob \| blame \| history
psycopg_c/psycopg_c/types/string.pyx		patch \| blob \| blame \| history
tests/test_adapt.py		patch \| blob \| blame \| history
tests/types/test_string.py		patch \| blob \| blame \| history