From: Daniele Varrazzo <daniele.varrazzo@gmail.com>
Date: Sat, 1 May 2021 23:26:35 +0000 (+0200)
Subject: Use an explicit dumper map for the auto format.
X-Git-Tag: 3.0.dev0~61
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9d49bf880183073eb86bb9db14fb4b5a353350d9;p=thirdparty%2Fpsycopg.git

Use an explicit dumper map for the auto format.

The dumper chosen in auto format is the last one registered, if both the
text and binary ones are available. For all the builtin types the
preferred format is binary, except for strings and for json.

Dumping strings as text by default was handled by a special case; now
they are just handled by registering the text dumper after the binary
one. Json is now text by default, because it's marginally more
efficient.
---

diff --git a/docs/advanced/adapt.rst b/docs/advanced/adapt.rst
index 34f0f2d65..3025dde16 100644
--- a/docs/advanced/adapt.rst
+++ b/docs/advanced/adapt.rst
@@ -85,6 +85,11 @@ right instance.
   (for instance, a Python `int` might be better dumped as a PostgreSQL
   :sql:`integer`, :sql:`bigint`, :sql:`smallint` according to its value).
 
+- According to the placeholder used (``%s``, ``%b``, ``%t``), psycopg3 may
+  pick a binary or a text dumper. When using the ``%s`` "`~Format.AUTO`"
+  format, if the same type has both a text and a binary dumper registered, the
+  last one registered (using `Dumper.register()`) will be selected.
+
 - For every OID returned by the query, the `!Transformer` will instantiate a
   `!Loader`. All the values with the same OID will be converted by the same
   loader.
@@ -169,6 +174,11 @@ Objects involved in types adaptation
         You should call this method on the `Dumper` subclass you create,
         passing the Python type you want to dump as *cls*.
 
+        If two dumpers of different `format` are registered for the same type,
+        the last one registered will be chosen by default when the query
+        doesn't specify a format (i.e. when the value is used with a ``%s``
+        "`~Format.AUTO`" placeholder).
+
         :param cls: The type to manage.
         :type cls: `!type` or `!str`
         :param context: Where the dumper should be used. If `!None` the dumper
diff --git a/docs/basic/params.rst b/docs/basic/params.rst
index 27556a21a..5379ff6b4 100644
--- a/docs/basic/params.rst
+++ b/docs/basic/params.rst
@@ -199,10 +199,10 @@ PostgreSQL has two different ways to represent data type on the wire:
 `~psycopg3.pq.Format.BINARY`, available most of the times. Usually the binary
 format is more efficient to use.
 
-`~psycopg3` can support both the formats of each data type, and normally will
-use the binary type if available, falling back on the text type. The selection
-is automatic and will be performed whenever a query uses a ``%s`` placeholder
-to specify the value.
+`~psycopg3` can support both the formats of each data type. Whenever a value
+is passed to a query using the normal ``%s`` placeholder, the best format
+available is chosen (often, but not always, the binary format is picked as the
+best choice).
 
 If you have a reason to select explicitly the binary format or the text format
 for a value you can use respectively a ``%b`` placeholder or a ``%t``
diff --git a/psycopg3/psycopg3/adapt.py b/psycopg3/psycopg3/adapt.py
index 6d6f5c189..6ba0f798a 100644
--- a/psycopg3/psycopg3/adapt.py
+++ b/psycopg3/psycopg3/adapt.py
@@ -139,7 +139,7 @@ class AdaptersMap(AdaptContext):
     is cheap: a copy is made only on customisation.
     """
 
-    _dumpers: List[Dict[Union[type, str], Type["Dumper"]]]
+    _dumpers: Dict[Format, Dict[Union[type, str], Type["Dumper"]]]
     _loaders: List[Dict[int, Type["Loader"]]]
     types: TypesRegistry
 
@@ -152,14 +152,14 @@ class AdaptersMap(AdaptContext):
         types: Optional[TypesRegistry] = None,
     ):
         if template:
-            self._dumpers = template._dumpers[:]
-            self._own_dumpers = [False, False]
+            self._dumpers = template._dumpers.copy()
+            self._own_dumpers = dict.fromkeys(Format, False)
             self._loaders = template._loaders[:]
             self._own_loaders = [False, False]
             self.types = TypesRegistry(template.types)
         else:
-            self._dumpers = [{}, {}]
-            self._own_dumpers = [True, True]
+            self._dumpers = {fmt: {} for fmt in Format}
+            self._own_dumpers = dict.fromkeys(Format, True)
             self._loaders = [{}, {}]
             self._own_loaders = [True, True]
             self.types = types or TypesRegistry()
@@ -185,12 +185,13 @@ class AdaptersMap(AdaptContext):
             )
 
         dumper = self._get_optimised(dumper)
-        fmt = dumper.format
-        if not self._own_dumpers[fmt]:
-            self._dumpers[fmt] = self._dumpers[fmt].copy()
-            self._own_dumpers[fmt] = True
+        # Register the dumper both as its format and as default
+        for fmt in (Format.from_pq(dumper.format), Format.AUTO):
+            if not self._own_dumpers[fmt]:
+                self._dumpers[fmt] = self._dumpers[fmt].copy()
+                self._own_dumpers[fmt] = True
 
-        self._dumpers[fmt][cls] = dumper
+            self._dumpers[fmt][cls] = dumper
 
     def register_loader(
         self, oid: Union[int, str], loader: Type[Loader]
@@ -219,36 +220,22 @@ class AdaptersMap(AdaptContext):
 
         Raise ProgrammingError if a class is not available.
         """
-        if format == Format.AUTO:
-            # When dumping a string with %s we may refer to any type actually,
-            # but the user surely passed a text format
-            if issubclass(cls, str):
-                dmaps = [self._dumpers[pq.Format.TEXT]]
-            else:
-                dmaps = [
-                    self._dumpers[pq.Format.BINARY],
-                    self._dumpers[pq.Format.TEXT],
-                ]
-        elif format == Format.BINARY:
-            dmaps = [self._dumpers[pq.Format.BINARY]]
-        elif format == Format.TEXT:
-            dmaps = [self._dumpers[pq.Format.TEXT]]
-        else:
+        try:
+            dmap = self._dumpers[format]
+        except KeyError:
             raise ValueError(f"bad dumper format: {format}")
 
         # Look for the right class, including looking at superclasses
         for scls in cls.__mro__:
-            for dmap in dmaps:
-                if scls in dmap:
-                    return dmap[scls]
+            if scls in dmap:
+                return dmap[scls]
 
             # If the adapter is not found, look for its name as a string
             fqn = scls.__module__ + "." + scls.__qualname__
-            for dmap in dmaps:
-                if fqn in dmap:
-                    # Replace the class name with the class itself
-                    d = dmap[scls] = dmap.pop(fqn)
-                    return d
+            if fqn in dmap:
+                # Replace the class name with the class itself
+                d = dmap[scls] = dmap.pop(fqn)
+                return d
 
         raise e.ProgrammingError(
             f"cannot adapt type {cls.__name__}"
diff --git a/psycopg3/psycopg3/types/__init__.py b/psycopg3/psycopg3/types/__init__.py
index 0228f81ee..4ebf711a0 100644
--- a/psycopg3/psycopg3/types/__init__.py
+++ b/psycopg3/psycopg3/types/__init__.py
@@ -137,8 +137,12 @@ from .composite import (
 
 
 def register_default_globals(ctx: AdaptContext) -> None:
-    StringDumper.register(str, ctx)
+    # NOTE: the order the dumpers are registered is relevant.
+    # The last one registered becomes the default for each type.
+    # Normally, binary is the default dumper, except for text (which plays
+    # the role of unknown, so it can be cast automatically to other types).
     StringBinaryDumper.register(str, ctx)
+    StringDumper.register(str, ctx)
     TextLoader.register(INVALID_OID, ctx)
     TextLoader.register("bpchar", ctx)
     TextLoader.register("name", ctx)
@@ -204,10 +208,12 @@ def register_default_globals(ctx: AdaptContext) -> None:
     TimestamptzLoader.register("timestamptz", ctx)
     IntervalLoader.register("interval", ctx)
 
-    JsonDumper.register(Json, ctx)
+    # Currently json binary format is nothing different than text, maybe with
+    # an extra memcopy we can avoid.
     JsonBinaryDumper.register(Json, ctx)
-    JsonbDumper.register(Jsonb, ctx)
+    JsonDumper.register(Json, ctx)
     JsonbBinaryDumper.register(Jsonb, ctx)
+    JsonbDumper.register(Jsonb, ctx)
     JsonLoader.register("json", ctx)
     JsonbLoader.register("jsonb", ctx)
     JsonBinaryLoader.register("json", ctx)
diff --git a/tests/fix_faker.py b/tests/fix_faker.py
index 17cc5803b..0120a7cae 100644
--- a/tests/fix_faker.py
+++ b/tests/fix_faker.py
@@ -155,7 +155,7 @@ class Faker:
             m(spec, g, w)
 
     def get_supported_types(self):
-        dumpers = self.conn.adapters._dumpers[Format.as_pq(self.format)]
+        dumpers = self.conn.adapters._dumpers[self.format]
         rv = set()
         for cls in dumpers.keys():
             if isinstance(cls, str):
diff --git a/tests/test_adapt.py b/tests/test_adapt.py
index 1465803bc..7e1c05f84 100644
--- a/tests/test_adapt.py
+++ b/tests/test_adapt.py
@@ -43,8 +43,8 @@ def test_quote(data, result):
 
 
 def test_dump_connection_ctx(conn):
-    make_dumper("t").register(MyStr, conn)
     make_bin_dumper("b").register(MyStr, conn)
+    make_dumper("t").register(MyStr, conn)
 
     cur = conn.cursor()
     cur.execute("select %s", [MyStr("hello")])
@@ -56,12 +56,12 @@ def test_dump_connection_ctx(conn):
 
 
 def test_dump_cursor_ctx(conn):
-    make_dumper("t").register(str, conn)
     make_bin_dumper("b").register(str, conn)
+    make_dumper("t").register(str, conn)
 
     cur = conn.cursor()
-    make_dumper("tc").register(str, cur)
     make_bin_dumper("bc").register(str, cur)
+    make_dumper("tc").register(str, cur)
 
     cur.execute("select %s", [MyStr("hello")])
     assert cur.fetchone() == ("hellotc",)
@@ -210,17 +210,20 @@ def test_array_dumper(conn, fmt_out):
         assert t.get_dumper(L, fmt_in)
 
 
-def test_string_connection_ctx(conn):
-    make_dumper("t").register(str, conn)
-    make_bin_dumper("b").register(str, conn)
-
+def test_last_dumper_registered_ctx(conn):
     cur = conn.cursor()
-    cur.execute("select %s", ["hello"])
-    assert cur.fetchone() == ("hellot",)  # str prefers text
-    cur.execute("select %t", ["hello"])
-    assert cur.fetchone() == ("hellot",)
-    cur.execute("select %b", ["hello"])
-    assert cur.fetchone() == ("hellob",)
+
+    bd = make_bin_dumper("b")
+    bd.register(str, cur)
+    td = make_dumper("t")
+    td.register(str, cur)
+
+    assert cur.execute("select %s", ["hello"]).fetchone()[0] == "hellot"
+    assert cur.execute("select %t", ["hello"]).fetchone()[0] == "hellot"
+    assert cur.execute("select %b", ["hello"]).fetchone()[0] == "hellob"
+
+    bd.register(str, cur)
+    assert cur.execute("select %s", ["hello"]).fetchone()[0] == "hellob"
 
 
 @pytest.mark.parametrize("fmt_in", [Format.TEXT, Format.BINARY])
@@ -279,9 +282,10 @@ def test_optimised_adapters():
     # All the registered adapters
     reg_adapters = set()
     adapters = (
-        psycopg3.global_adapters._dumpers + psycopg3.global_adapters._loaders
+        list(psycopg3.global_adapters._dumpers.values())
+        + psycopg3.global_adapters._loaders
     )
-    assert len(adapters) == 4
+    assert len(adapters) == 5
     for m in adapters:
         reg_adapters |= set(m.values())