From: Mike Bayer Date: Tue, 6 Apr 2021 20:47:00 +0000 (-0400) Subject: Disable and disallow Result.unique() with yield_per X-Git-Tag: rel_1_4_6~5^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5ef1b89d865679fa2ca4bb3e3c1892bdd966ad89;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git Disable and disallow Result.unique() with yield_per Fixed critical regression where the :meth:`_orm.Query.yield_per` method in the ORM would set up the internal :class:`_engine.Result` to yield chunks at a time, however made use of the new :meth:`_engine.Result.unique` method which uniques across the entire result. This would lead to lost rows since the ORM is using ``id(obj)`` as the uniquing function, which leads to repeated identifiers for new objects as already-seen objects are garbage collected. 1.3's behavior here was to "unique" across each chunk, which does not actually produce "uniqued" results when results are yielded in chunks. As the :meth:`_orm.Query.yield_per` method is already explicitly disallowed when joined eager loading is in place, which is the primary rationale for the "uniquing" feature, the "uniquing" feature is now turned off entirely when :meth:`_orm.Query.yield_per` is used. This regression only applies to the legacy :class:`_orm.Query` object; when using :term:`2.0 style` execution, "uniquing" is not automatically applied. To prevent the issue from arising from explicit use of :meth:`_engine.Result.unique`, an error is now raised if rows are fetched from a "uniqued" ORM-level :class:`_engine.Result` if any :ref:`yield per ` API is also in use, as the purpose of ``yield_per`` is to allow for arbitrarily large numbers of rows, which cannot be uniqued in memory without growing the number of entries to fit the complete result size. Fixes: #6206 Change-Id: I3770d1f2e9be44d82c83ca992afb912dcc17af05 --- diff --git a/doc/build/changelog/unreleased_14/6206.rst b/doc/build/changelog/unreleased_14/6206.rst new file mode 100644 index 0000000000..c2452e0615 --- /dev/null +++ b/doc/build/changelog/unreleased_14/6206.rst @@ -0,0 +1,27 @@ +.. change:: + :tags: bug, regression, orm + :tickets: 6206 + + Fixed critical regression where the :meth:`_orm.Query.yield_per` method in + the ORM would set up the internal :class:`_engine.Result` to yield chunks + at a time, however made use of the new :meth:`_engine.Result.unique` method + which uniques across the entire result. This would lead to lost rows since + the ORM is using ``id(obj)`` as the uniquing function, which leads to + repeated identifiers for new objects as already-seen objects are garbage + collected. 1.3's behavior here was to "unique" across each chunk, which + does not actually produce "uniqued" results when results are yielded in + chunks. As the :meth:`_orm.Query.yield_per` method is already explicitly + disallowed when joined eager loading is in place, which is the primary + rationale for the "uniquing" feature, the "uniquing" feature is now turned + off entirely when :meth:`_orm.Query.yield_per` is used. + + This regression only applies to the legacy :class:`_orm.Query` object; when + using :term:`2.0 style` execution, "uniquing" is not automatically applied. + To prevent the issue from arising from explicit use of + :meth:`_engine.Result.unique`, an error is now raised if rows are fetched + from a "uniqued" ORM-level :class:`_engine.Result` if any + :ref:`yield per ` API is also in use, as the + purpose of ``yield_per`` is to allow for arbitrarily large numbers of rows, + which cannot be uniqued in memory without growing the number of entries to + fit the complete result size. + diff --git a/doc/build/orm/queryguide.rst b/doc/build/orm/queryguide.rst index d15b69be2c..58c2c4bc34 100644 --- a/doc/build/orm/queryguide.rst +++ b/doc/build/orm/queryguide.rst @@ -869,12 +869,22 @@ When ``yield_per`` is used, the set for the Core execution, so that a streaming / server side cursor will be used if the backend supports it [1]_ - The ``yield_per`` execution option **is not compatible with subqueryload eager loading or joinedload eager loading when using collections**. It is potentially compatible with selectinload eager loading, **provided the database driver supports multiple, independent cursors** [2]_ . +Additionally, the ``yield_per`` execution option is not compatible +with the :meth:`_engine.Result.unique` method; as this method relies upon +storing a complete set of identities for all rows, it would necessarily +defeat the purpose of using ``yield_per`` which is to handle an arbitrarily +large number of rows. + +.. versionchanged:: 1.4.6 An exception is raised when ORM rows are fetched + from a :class:`_engine.Result` object that makes use of the + :meth:`_engine.Result.unique` filter, at the same time as the ``yield_per`` + execution option is used. + The ``yield_per`` execution option is equvialent to the :meth:`_orm.Query.yield_per` method in :term:`1.x style` ORM queries. diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py index 4a90caeff7..9dcaca0ea0 100644 --- a/lib/sqlalchemy/orm/loading.py +++ b/lib/sqlalchemy/orm/loading.py @@ -87,11 +87,20 @@ def instances(cursor, context): with util.safe_reraise(): cursor.close() + def _no_unique(entry): + raise sa_exc.InvalidRequestError( + "Can't use the ORM yield_per feature in conjunction with unique()" + ) + row_metadata = SimpleResultMetaData( labels, extra, _unique_filters=[ - id if ent.use_id_for_hash else None + _no_unique + if context.yield_per + else id + if ent.use_id_for_hash + else None for ent in context.compile_state._entities ], ) diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 38f1d26b4b..9348d7d48f 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -2827,7 +2827,10 @@ class Query( if result._attributes.get("is_single_entity", False): result = result.scalars() - if result._attributes.get("filtered", False): + if ( + result._attributes.get("filtered", False) + and not self.load_options._yield_per + ): result = result.unique() return result diff --git a/test/orm/test_query.py b/test/orm/test_query.py index 0613cfe06c..9abe20e8a4 100644 --- a/test/orm/test_query.py +++ b/test/orm/test_query.py @@ -4878,7 +4878,7 @@ class YieldTest(_fixtures.FixtureTest): q = sess.query(User).yield_per(15) q = q.execution_options(foo="bar") - q.all() + eq_(len(q.all()), 4) def test_yield_per_and_execution_options(self): self._eagerload_mappings() @@ -4904,7 +4904,8 @@ class YieldTest(_fixtures.FixtureTest): ) stmt = select(User).execution_options(yield_per=15) - sess.execute(stmt) + result = sess.execute(stmt) + eq_(len(result.all()), 4) def test_no_joinedload_opt(self): self._eagerload_mappings() @@ -4950,7 +4951,7 @@ class YieldTest(_fixtures.FixtureTest): Address = self.classes.Address sess = fixture_session() q = sess.query(Address).yield_per(1) - q.all() + eq_(len(q.all()), 5) def test_eagerload_opt_disable(self): self._eagerload_mappings() @@ -4963,7 +4964,7 @@ class YieldTest(_fixtures.FixtureTest): .enable_eagerloads(False) .yield_per(1) ) - q.all() + eq_(len(q.all()), 4) q = ( sess.query(User) @@ -4971,7 +4972,7 @@ class YieldTest(_fixtures.FixtureTest): .enable_eagerloads(False) .yield_per(1) ) - q.all() + eq_(len(q.all()), 4) def test_m2o_joinedload_not_others(self): self._eagerload_mappings(addresses_lazy="joined") @@ -4990,6 +4991,97 @@ class YieldTest(_fixtures.FixtureTest): self.assert_sql_count(testing.db, go, 1) + def test_no_unique_w_yield_per(self): + self._eagerload_mappings() + + User = self.classes.User + + sess = fixture_session() + stmt = select(User).execution_options(yield_per=10) + + result = sess.execute(stmt).unique() + + with expect_raises_message( + sa_exc.InvalidRequestError, + r"Can't use the ORM yield_per feature in " + r"conjunction with unique\(\)", + ): + next(result) + + +class YieldIterationTest(_fixtures.FixtureTest): + run_inserts = "once" + run_setup_mappers = "once" + run_deletes = None + + @classmethod + def setup_mappers(cls): + User = cls.classes.User + users = cls.tables.users + mapper(User, users) + + @classmethod + def fixtures(cls): + rows = [(i, "user %d" % (i)) for i in range(1, 21)] + return dict(users=(("id", "name"),) + tuple(rows)) + + @testing.combinations( + (0,), + (1,), + (5,), + (20,), + argnames="num_rows", + ) + @testing.combinations( + ("all",), + ("allquery",), + ("fetchone",), + ("iter",), + ("iterquery",), + ("iternosavequery",), + argnames="method", + ) + @testing.combinations((1,), (10,), (30,), argnames="yield_per") + def test_iter_combinations(self, num_rows, method, yield_per): + User = self.classes.User + + s = fixture_session() + + if method.endswith("query"): + q = s.query(User).limit(num_rows) + + if yield_per is not None: + q = q.yield_per(yield_per) + + else: + q = select(User).limit(num_rows) + + if yield_per is not None: + q = q.execution_options(yield_per=yield_per) + + result = s.execute(q) + + if method == "allquery": + rows = q.all() + elif method == "iterquery": + rows = [row for row in q] + elif method == "iternosavequery": + rows = [None for row in q] + elif method == "all": + rows = result.all() + elif method == "fetchone": + rows = [] + while True: + row = result.fetchone() + if row is None: + break + else: + rows.append(row) + elif method == "iter": + rows = [r for r in result] + + eq_(len(rows), num_rows) + class HintsTest(QueryTest, AssertsCompiledSQL): __dialect__ = "default"