From: Mike Bayer Date: Wed, 11 Oct 2023 22:06:17 +0000 (-0400) Subject: improve detection / errors for unknown hashability w/ unique X-Git-Tag: rel_2_0_22~3^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1f388a199ecbce596382b7a5d52c67a785503dd8;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git improve detection / errors for unknown hashability w/ unique Fixed issue where calling :meth:`_engine.Result.unique` with a new-style :func:`.select` query in the ORM, where one or more columns yields values that are of "unknown hashability", typically when using JSON functions like ``func.json_build_object()`` without providing a type, would fail internally when the returned values were not actually hashable. The behavior is repaired to test the objects as they are received for hashability in this case, raising an informative error message if not. Note that for values of "known unhashability", such as when the :class:`.JSON` or :class:`.ARRAY` types are used directly, an informative error message was already raised. The "hashabiltiy testing" fix here is applied to legacy :class:`.Query` as well, however in the legacy case, :meth:`_engine.Result.unique` is used for nearly all queries, so no new warning is emitted here; the legacy behavior of falling back to using ``id()`` in this case is maintained, with the improvement that an unknown type that turns out to be hashable will now be uniqufied, whereas previously it would not. Fixes: #10459 Change-Id: I7317d7ee10c129fc1ab3beeb17c66f34eb063d17 --- diff --git a/doc/build/changelog/unreleased_20/10459.rst b/doc/build/changelog/unreleased_20/10459.rst new file mode 100644 index 0000000000..8c2cb0d47c --- /dev/null +++ b/doc/build/changelog/unreleased_20/10459.rst @@ -0,0 +1,21 @@ +.. change:: + :tags: bug, orm + :tickets: 10459 + + Fixed issue where calling :meth:`_engine.Result.unique` with a new-style + :func:`.select` query in the ORM, where one or more columns yields values + that are of "unknown hashability", typically when using JSON functions like + ``func.json_build_object()`` without providing a type, would fail + internally when the returned values were not actually hashable. The + behavior is repaired to test the objects as they are received for + hashability in this case, raising an informative error message if not. Note + that for values of "known unhashability", such as when the :class:`.JSON` + or :class:`.ARRAY` types are used directly, an informative error message + was already raised. + + The "hashabiltiy testing" fix here is applied to legacy :class:`.Query` as + well, however in the legacy case, :meth:`_engine.Result.unique` is used for + nearly all queries, so no new warning is emitted here; the legacy behavior + of falling back to using ``id()`` in this case is maintained, with the + improvement that an unknown type that turns out to be hashable will now be + uniqufied, whereas previously it would not. diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py index fb62c2f46e..cae6f0be21 100644 --- a/lib/sqlalchemy/orm/loading.py +++ b/lib/sqlalchemy/orm/loading.py @@ -137,40 +137,64 @@ def instances(cursor: CursorResult[Any], context: QueryContext) -> Result[Any]: "Can't use the ORM yield_per feature in conjunction with unique()" ) - def _not_hashable(datatype): - def go(obj): - raise sa_exc.InvalidRequestError( - "Can't apply uniqueness to row tuple containing value of " - "type %r; this datatype produces non-hashable values" - % datatype - ) + def _not_hashable(datatype, *, legacy=False, uncertain=False): + if not legacy: - return go - - if context.load_options._legacy_uniquing: - unique_filters = [ - _no_unique - if context.yield_per - else id - if ( - ent.use_id_for_hash - or ent._non_hashable_value - or ent._null_column_type - ) - else None - for ent in context.compile_state._entities - ] - else: - unique_filters = [ - _no_unique - if context.yield_per - else _not_hashable(ent.column.type) # type: ignore - if (not ent.use_id_for_hash and ent._non_hashable_value) - else id - if ent.use_id_for_hash - else None - for ent in context.compile_state._entities - ] + def go(obj): + if uncertain: + try: + return hash(obj) + except: + pass + + raise sa_exc.InvalidRequestError( + "Can't apply uniqueness to row tuple containing value of " + f"""type {datatype!r}; {'the values returned appear to be' + if uncertain else 'this datatype produces'} """ + "non-hashable values" + ) + + return go + elif not uncertain: + return id + else: + _use_id = False + + def go(obj): + nonlocal _use_id + + if not _use_id: + try: + return hash(obj) + except: + pass + + # in #10459, we considered using a warning here, however + # as legacy query uses result.unique() in all cases, this + # would lead to too many warning cases. + _use_id = True + + return id(obj) + + return go + + unique_filters = [ + _no_unique + if context.yield_per + else _not_hashable( + ent.column.type, # type: ignore + legacy=context.load_options._legacy_uniquing, + uncertain=ent._null_column_type, + ) + if ( + not ent.use_id_for_hash + and (ent._non_hashable_value or ent._null_column_type) + ) + else id + if ent.use_id_for_hash + else None + for ent in context.compile_state._entities + ] row_metadata = SimpleResultMetaData( labels, extra, _unique_filters=unique_filters diff --git a/test/orm/test_query.py b/test/orm/test_query.py index 85307add81..3057087e43 100644 --- a/test/orm/test_query.py +++ b/test/orm/test_query.py @@ -624,15 +624,11 @@ class RowTupleTest(QueryTest, AssertsCompiledSQL): eq_(q.column_descriptions, asserted) - def test_unhashable_type_legacy(self): - class MyType(TypeDecorator): - impl = Integer - hashable = False - cache_ok = True - - def process_result_value(self, value, dialect): - return [value] - + @testing.variation("use_future", [True, False]) + @testing.variation( + "type_style", ["unhashable", "unknown_hashable", "unknown_unhashable"] + ) + def test_unhashable_types(self, use_future, type_style): User, users = self.classes.User, self.tables.users Address, addresses = self.classes.Address, self.tables.addresses self.mapper_registry.map_imperatively( @@ -640,158 +636,72 @@ class RowTupleTest(QueryTest, AssertsCompiledSQL): ) self.mapper_registry.map_imperatively(Address, addresses) - s = fixture_session() - q = ( - s.query(User, type_coerce(users.c.id, MyType).label("foo")) - .filter(User.id.in_([7, 8])) - .join(User.addresses) - .order_by(User.id) - ) - - result = q.all() + if type_style.unknown_hashable: + expression = type_coerce("Some Unique String", NullType) + else: - # uniquing basically does not occur because we can't hash on - # MyType - eq_( - result, - [ - (User(id=7), [7]), - (User(id=8), [8]), - (User(id=8), [8]), - (User(id=8), [8]), - ], - ) + class MyType(TypeDecorator): + impl = Integer - def test_unhashable_type_future(self): - class MyType(TypeDecorator): - impl = Integer - hashable = False - cache_ok = True + if type_style.unhashable: + hashable = False + elif type_style.unknown_unhashable: + _isnull = True + cache_ok = True - def process_result_value(self, value, dialect): - return [value] + def process_result_value(self, value, dialect): + return [value] - User, users = self.classes.User, self.tables.users - Address, addresses = self.classes.Address, self.tables.addresses - self.mapper_registry.map_imperatively( - User, users, properties={"addresses": relationship(Address)} - ) - self.mapper_registry.map_imperatively(Address, addresses) + expression = type_coerce(users.c.id, MyType) s = fixture_session() - stmt = ( - select(User, type_coerce(users.c.id, MyType).label("foo")) - .filter(User.id.in_([7, 8])) - .join(User.addresses) - .order_by(User.id) - ) - - result = s.execute(stmt).unique() - - with expect_raises_message( - sa_exc.InvalidRequestError, - r"Can't apply uniqueness to row tuple " - r"containing value of type MyType\(\)", - ): - result.all() - - def test_unknown_type_assume_not_hashable_legacy(self): - User, users = self.classes.User, self.tables.users - - User, users = self.classes.User, self.tables.users - Address, addresses = self.classes.Address, self.tables.addresses - self.mapper_registry.map_imperatively( - User, users, properties={"addresses": relationship(Address)} - ) - self.mapper_registry.map_imperatively(Address, addresses) - - s = fixture_session() + if not use_future: + q = s.query(User, expression.label("foo")) + else: + q = select(User, expression.label("foo")) q = ( - s.query( - User, type_coerce("Some Unique String", NullType).label("foo") - ) - .filter(User.id.in_([7, 8])) + q.filter(User.id.in_([7, 8])) .join(User.addresses) .order_by(User.id) ) - result = q.all() - - eq_( - result, - [ - (User(id=7, name="jack"), "Some Unique String"), - (User(id=8, name="ed"), "Some Unique String"), - (User(id=8, name="ed"), "Some Unique String"), - (User(id=8, name="ed"), "Some Unique String"), - ], - ) - - def test_unknown_type_assume_hashable_future(self): - User, users = self.classes.User, self.tables.users - - User, users = self.classes.User, self.tables.users - Address, addresses = self.classes.Address, self.tables.addresses - self.mapper_registry.map_imperatively( - User, users, properties={"addresses": relationship(Address)} - ) - self.mapper_registry.map_imperatively(Address, addresses) - - s = fixture_session() - - # TODO: it's also unusual I need a label() for type_coerce - stmt = ( - select( - User, type_coerce("Some Unique String", NullType).label("foo") + if type_style.unknown_hashable: + if not use_future: + result = q.all() + else: + result = s.execute(q).unique().all() + eq_( + result, + [ + (User(id=7, name="jack"), "Some Unique String"), + (User(id=8, name="ed"), "Some Unique String"), + ], ) - .filter(User.id.in_([7, 8])) - .join(User.addresses) - .order_by(User.id) - ) - - result = s.execute(stmt).unique() - - eq_( - result.all(), - [ - (User(id=7, name="jack"), "Some Unique String"), - (User(id=8, name="ed"), "Some Unique String"), - ], - ) - - def test_unknown_type_truly_not_hashable_future(self): - User, users = self.classes.User, self.tables.users - - User, users = self.classes.User, self.tables.users - Address, addresses = self.classes.Address, self.tables.addresses - self.mapper_registry.map_imperatively( - User, users, properties={"addresses": relationship(Address)} - ) - self.mapper_registry.map_imperatively(Address, addresses) - - class MyType(TypeDecorator): - impl = Integer - hashable = True # which is wrong - cache_ok = True - - def process_result_value(self, value, dialect): - return [value] - - s = fixture_session() - - stmt = ( - select(User, type_coerce(User.id, MyType).label("foo")) - .filter(User.id.in_([7, 8])) - .join(User.addresses) - .order_by(User.id) - ) - - result = s.execute(stmt).unique() - - with expect_raises_message(TypeError, "unhashable type"): - result.all() + else: + uncertain = bool(type_style.unknown_unhashable) + if not use_future: + result = q.all() + eq_( + result, + [ + (User(id=7, name="jack"), [7]), + (User(id=8, name="ed"), [8]), + (User(id=8, name="ed"), [8]), + (User(id=8, name="ed"), [8]), + ], + ) + else: + with expect_raises_message( + sa_exc.InvalidRequestError, + r"Can't apply uniqueness to row tuple " + r"containing value of type MyType\(\); " + rf"""{'the values returned appear to be' + if uncertain else 'this datatype produces'} """ + r"non-hashable values", + ): + result = s.execute(q).unique().all() class RowLabelingTest(QueryTest):