]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
improve detection / errors for unknown hashability w/ unique
authorMike Bayer <mike_mp@zzzcomputing.com>
Wed, 11 Oct 2023 22:06:17 +0000 (18:06 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Thu, 12 Oct 2023 14:58:00 +0000 (10:58 -0400)
Fixed issue where calling :meth:`_engine.Result.unique` with a new-style
:func:`.select` query in the ORM, where one or more columns yields values
that are of "unknown hashability", typically when using JSON functions like
``func.json_build_object()`` without providing a type, would fail
internally when the returned values were not actually hashable. The
behavior is repaired to test the objects as they are received for
hashability in this case, raising an informative error message if not. Note
that for values of "known unhashability", such as when the :class:`.JSON`
or :class:`.ARRAY` types are used directly, an informative error message
was already raised.

The "hashabiltiy testing" fix here is applied to legacy :class:`.Query` as
well, however in the legacy case, :meth:`_engine.Result.unique` is used for
nearly all queries, so no new warning is emitted here; the legacy behavior
of falling back to using ``id()`` in this case is maintained, with the
improvement that an unknown type that turns out to be hashable will now be
uniqufied, whereas previously it would not.

Fixes: #10459
Change-Id: I7317d7ee10c129fc1ab3beeb17c66f34eb063d17

doc/build/changelog/unreleased_20/10459.rst [new file with mode: 0644]
lib/sqlalchemy/orm/loading.py
test/orm/test_query.py

diff --git a/doc/build/changelog/unreleased_20/10459.rst b/doc/build/changelog/unreleased_20/10459.rst
new file mode 100644 (file)
index 0000000..8c2cb0d
--- /dev/null
@@ -0,0 +1,21 @@
+.. change::
+    :tags: bug, orm
+    :tickets: 10459
+
+    Fixed issue where calling :meth:`_engine.Result.unique` with a new-style
+    :func:`.select` query in the ORM, where one or more columns yields values
+    that are of "unknown hashability", typically when using JSON functions like
+    ``func.json_build_object()`` without providing a type, would fail
+    internally when the returned values were not actually hashable. The
+    behavior is repaired to test the objects as they are received for
+    hashability in this case, raising an informative error message if not. Note
+    that for values of "known unhashability", such as when the :class:`.JSON`
+    or :class:`.ARRAY` types are used directly, an informative error message
+    was already raised.
+
+    The "hashabiltiy testing" fix here is applied to legacy :class:`.Query` as
+    well, however in the legacy case, :meth:`_engine.Result.unique` is used for
+    nearly all queries, so no new warning is emitted here; the legacy behavior
+    of falling back to using ``id()`` in this case is maintained, with the
+    improvement that an unknown type that turns out to be hashable will now be
+    uniqufied, whereas previously it would not.
index fb62c2f46edcc652e0056914bc225cae6d9af9b4..cae6f0be21cf5e069eb8de1fcab997794bd3c5e2 100644 (file)
@@ -137,40 +137,64 @@ def instances(cursor: CursorResult[Any], context: QueryContext) -> Result[Any]:
             "Can't use the ORM yield_per feature in conjunction with unique()"
         )
 
-    def _not_hashable(datatype):
-        def go(obj):
-            raise sa_exc.InvalidRequestError(
-                "Can't apply uniqueness to row tuple containing value of "
-                "type %r; this datatype produces non-hashable values"
-                % datatype
-            )
+    def _not_hashable(datatype, *, legacy=False, uncertain=False):
+        if not legacy:
 
-        return go
-
-    if context.load_options._legacy_uniquing:
-        unique_filters = [
-            _no_unique
-            if context.yield_per
-            else id
-            if (
-                ent.use_id_for_hash
-                or ent._non_hashable_value
-                or ent._null_column_type
-            )
-            else None
-            for ent in context.compile_state._entities
-        ]
-    else:
-        unique_filters = [
-            _no_unique
-            if context.yield_per
-            else _not_hashable(ent.column.type)  # type: ignore
-            if (not ent.use_id_for_hash and ent._non_hashable_value)
-            else id
-            if ent.use_id_for_hash
-            else None
-            for ent in context.compile_state._entities
-        ]
+            def go(obj):
+                if uncertain:
+                    try:
+                        return hash(obj)
+                    except:
+                        pass
+
+                raise sa_exc.InvalidRequestError(
+                    "Can't apply uniqueness to row tuple containing value of "
+                    f"""type {datatype!r}; {'the values returned appear to be'
+                    if uncertain else 'this datatype produces'} """
+                    "non-hashable values"
+                )
+
+            return go
+        elif not uncertain:
+            return id
+        else:
+            _use_id = False
+
+            def go(obj):
+                nonlocal _use_id
+
+                if not _use_id:
+                    try:
+                        return hash(obj)
+                    except:
+                        pass
+
+                    # in #10459, we considered using a warning here, however
+                    # as legacy query uses result.unique() in all cases, this
+                    # would lead to too many warning cases.
+                    _use_id = True
+
+                return id(obj)
+
+            return go
+
+    unique_filters = [
+        _no_unique
+        if context.yield_per
+        else _not_hashable(
+            ent.column.type,  # type: ignore
+            legacy=context.load_options._legacy_uniquing,
+            uncertain=ent._null_column_type,
+        )
+        if (
+            not ent.use_id_for_hash
+            and (ent._non_hashable_value or ent._null_column_type)
+        )
+        else id
+        if ent.use_id_for_hash
+        else None
+        for ent in context.compile_state._entities
+    ]
 
     row_metadata = SimpleResultMetaData(
         labels, extra, _unique_filters=unique_filters
index 85307add814a80dc78f3fa55a9d496c78ac459e8..3057087e43b6c83a280bd21b96de5cf2042b3f3b 100644 (file)
@@ -624,15 +624,11 @@ class RowTupleTest(QueryTest, AssertsCompiledSQL):
 
         eq_(q.column_descriptions, asserted)
 
-    def test_unhashable_type_legacy(self):
-        class MyType(TypeDecorator):
-            impl = Integer
-            hashable = False
-            cache_ok = True
-
-            def process_result_value(self, value, dialect):
-                return [value]
-
+    @testing.variation("use_future", [True, False])
+    @testing.variation(
+        "type_style", ["unhashable", "unknown_hashable", "unknown_unhashable"]
+    )
+    def test_unhashable_types(self, use_future, type_style):
         User, users = self.classes.User, self.tables.users
         Address, addresses = self.classes.Address, self.tables.addresses
         self.mapper_registry.map_imperatively(
@@ -640,158 +636,72 @@ class RowTupleTest(QueryTest, AssertsCompiledSQL):
         )
         self.mapper_registry.map_imperatively(Address, addresses)
 
-        s = fixture_session()
-        q = (
-            s.query(User, type_coerce(users.c.id, MyType).label("foo"))
-            .filter(User.id.in_([7, 8]))
-            .join(User.addresses)
-            .order_by(User.id)
-        )
-
-        result = q.all()
+        if type_style.unknown_hashable:
+            expression = type_coerce("Some Unique String", NullType)
+        else:
 
-        # uniquing basically does not occur because we can't hash on
-        # MyType
-        eq_(
-            result,
-            [
-                (User(id=7), [7]),
-                (User(id=8), [8]),
-                (User(id=8), [8]),
-                (User(id=8), [8]),
-            ],
-        )
+            class MyType(TypeDecorator):
+                impl = Integer
 
-    def test_unhashable_type_future(self):
-        class MyType(TypeDecorator):
-            impl = Integer
-            hashable = False
-            cache_ok = True
+                if type_style.unhashable:
+                    hashable = False
+                elif type_style.unknown_unhashable:
+                    _isnull = True
+                cache_ok = True
 
-            def process_result_value(self, value, dialect):
-                return [value]
+                def process_result_value(self, value, dialect):
+                    return [value]
 
-        User, users = self.classes.User, self.tables.users
-        Address, addresses = self.classes.Address, self.tables.addresses
-        self.mapper_registry.map_imperatively(
-            User, users, properties={"addresses": relationship(Address)}
-        )
-        self.mapper_registry.map_imperatively(Address, addresses)
+            expression = type_coerce(users.c.id, MyType)
 
         s = fixture_session()
 
-        stmt = (
-            select(User, type_coerce(users.c.id, MyType).label("foo"))
-            .filter(User.id.in_([7, 8]))
-            .join(User.addresses)
-            .order_by(User.id)
-        )
-
-        result = s.execute(stmt).unique()
-
-        with expect_raises_message(
-            sa_exc.InvalidRequestError,
-            r"Can't apply uniqueness to row tuple "
-            r"containing value of type MyType\(\)",
-        ):
-            result.all()
-
-    def test_unknown_type_assume_not_hashable_legacy(self):
-        User, users = self.classes.User, self.tables.users
-
-        User, users = self.classes.User, self.tables.users
-        Address, addresses = self.classes.Address, self.tables.addresses
-        self.mapper_registry.map_imperatively(
-            User, users, properties={"addresses": relationship(Address)}
-        )
-        self.mapper_registry.map_imperatively(Address, addresses)
-
-        s = fixture_session()
+        if not use_future:
+            q = s.query(User, expression.label("foo"))
+        else:
+            q = select(User, expression.label("foo"))
 
         q = (
-            s.query(
-                User, type_coerce("Some Unique String", NullType).label("foo")
-            )
-            .filter(User.id.in_([7, 8]))
+            q.filter(User.id.in_([7, 8]))
             .join(User.addresses)
             .order_by(User.id)
         )
 
-        result = q.all()
-
-        eq_(
-            result,
-            [
-                (User(id=7, name="jack"), "Some Unique String"),
-                (User(id=8, name="ed"), "Some Unique String"),
-                (User(id=8, name="ed"), "Some Unique String"),
-                (User(id=8, name="ed"), "Some Unique String"),
-            ],
-        )
-
-    def test_unknown_type_assume_hashable_future(self):
-        User, users = self.classes.User, self.tables.users
-
-        User, users = self.classes.User, self.tables.users
-        Address, addresses = self.classes.Address, self.tables.addresses
-        self.mapper_registry.map_imperatively(
-            User, users, properties={"addresses": relationship(Address)}
-        )
-        self.mapper_registry.map_imperatively(Address, addresses)
-
-        s = fixture_session()
-
-        # TODO: it's also unusual I need a label() for type_coerce
-        stmt = (
-            select(
-                User, type_coerce("Some Unique String", NullType).label("foo")
+        if type_style.unknown_hashable:
+            if not use_future:
+                result = q.all()
+            else:
+                result = s.execute(q).unique().all()
+            eq_(
+                result,
+                [
+                    (User(id=7, name="jack"), "Some Unique String"),
+                    (User(id=8, name="ed"), "Some Unique String"),
+                ],
             )
-            .filter(User.id.in_([7, 8]))
-            .join(User.addresses)
-            .order_by(User.id)
-        )
-
-        result = s.execute(stmt).unique()
-
-        eq_(
-            result.all(),
-            [
-                (User(id=7, name="jack"), "Some Unique String"),
-                (User(id=8, name="ed"), "Some Unique String"),
-            ],
-        )
-
-    def test_unknown_type_truly_not_hashable_future(self):
-        User, users = self.classes.User, self.tables.users
-
-        User, users = self.classes.User, self.tables.users
-        Address, addresses = self.classes.Address, self.tables.addresses
-        self.mapper_registry.map_imperatively(
-            User, users, properties={"addresses": relationship(Address)}
-        )
-        self.mapper_registry.map_imperatively(Address, addresses)
-
-        class MyType(TypeDecorator):
-            impl = Integer
-            hashable = True  # which is wrong
-            cache_ok = True
-
-            def process_result_value(self, value, dialect):
-                return [value]
-
-        s = fixture_session()
-
-        stmt = (
-            select(User, type_coerce(User.id, MyType).label("foo"))
-            .filter(User.id.in_([7, 8]))
-            .join(User.addresses)
-            .order_by(User.id)
-        )
-
-        result = s.execute(stmt).unique()
-
-        with expect_raises_message(TypeError, "unhashable type"):
-            result.all()
+        else:
+            uncertain = bool(type_style.unknown_unhashable)
+            if not use_future:
+                result = q.all()
+                eq_(
+                    result,
+                    [
+                        (User(id=7, name="jack"), [7]),
+                        (User(id=8, name="ed"), [8]),
+                        (User(id=8, name="ed"), [8]),
+                        (User(id=8, name="ed"), [8]),
+                    ],
+                )
+            else:
+                with expect_raises_message(
+                    sa_exc.InvalidRequestError,
+                    r"Can't apply uniqueness to row tuple "
+                    r"containing value of type MyType\(\); "
+                    rf"""{'the values returned appear to be'
+                        if uncertain else 'this datatype produces'} """
+                    r"non-hashable values",
+                ):
+                    result = s.execute(q).unique().all()
 
 
 class RowLabelingTest(QueryTest):