From 46f34449f389a8aa994485f36e99f95275a19170 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Fri, 16 Apr 2021 23:58:48 -0400 Subject: [PATCH] Uniquify FROMs when traversing through select Fixed a critical performance issue where the traversal of a :func:`_sql.select` construct would traverse a repetitive product of the represented FROM clauses as they were each referred towards by columns in the columns clause; for a series of nested subqueries with lots of columns this could cause a large delay and significant memory growth. This traversal is used by a wide variety of SQL and ORM functions, including by the ORM :class:`_orm.Session` when it's configured to have "table-per-bind", which while this is not a common use case, it seems to be what Flask-SQLAlchemy is hardcoded as using, so the issue impacts Flask-SQLAlchemy users. The traversal has been repaired to uniqify on FROM clauses which was effectively what would happen implicitly with the pre-1.4 architecture. Fixes: #6304 Change-Id: I43497e943db4065deab0bfc830fbb68c17b80a53 --- doc/build/changelog/unreleased_14/6304.rst | 16 +++++++++ lib/sqlalchemy/sql/selectable.py | 27 ++++++++++----- test/sql/test_external_traversal.py | 38 ++++++++++++++++++++++ 3 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 doc/build/changelog/unreleased_14/6304.rst diff --git a/doc/build/changelog/unreleased_14/6304.rst b/doc/build/changelog/unreleased_14/6304.rst new file mode 100644 index 0000000000..860c0c20f6 --- /dev/null +++ b/doc/build/changelog/unreleased_14/6304.rst @@ -0,0 +1,16 @@ +.. change:: + :tags: bug, regression, orm, performance, sql + :tickets: 6304 + + Fixed a critical performance issue where the traversal of a + :func:`_sql.select` construct would traverse a repetitive product of the + represented FROM clauses as they were each referred towards by columns in + the columns clause; for a series of nested subqueries with lots of columns + this could cause a large delay and significant memory growth. This + traversal is used by a wide variety of SQL and ORM functions, including by + the ORM :class:`_orm.Session` when it's configured to have + "table-per-bind", which while this is not a common use case, it seems to be + what Flask-SQLAlchemy is hardcoded as using, so the issue impacts + Flask-SQLAlchemy users. The traversal has been repaired to uniqify on FROM + clauses which was effectively what would happen implicitly with the pre-1.4 + architecture. diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 2a221cdf0b..3a90f77fb4 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -4428,15 +4428,24 @@ class _SelectFromElements(object): # note this does not include elements # in _setup_joins or _legacy_setup_joins - return itertools.chain( - itertools.chain.from_iterable( - [element._from_objects for element in self._raw_columns] - ), - itertools.chain.from_iterable( - [element._from_objects for element in self._where_criteria] - ), - self._from_obj, - ) + seen = set() + for element in self._raw_columns: + for fr in element._from_objects: + if fr in seen: + continue + seen.add(fr) + yield fr + for element in self._where_criteria: + for fr in element._from_objects: + if fr in seen: + continue + seen.add(fr) + yield fr + for element in self._from_obj: + if element in seen: + continue + seen.add(element) + yield element class Select( diff --git a/test/sql/test_external_traversal.py b/test/sql/test_external_traversal.py index e1490adfd3..3021ce5206 100644 --- a/test/sql/test_external_traversal.py +++ b/test/sql/test_external_traversal.py @@ -225,6 +225,44 @@ class TraversalTest( dialect="default", ) + def test_traversal_size(self): + """Test :ticket:`6304`. + + Testing that _iterate_from_elements returns only unique FROM + clauses; overall traversal should be short and all items unique. + + """ + + t = table("t", *[column(x) for x in "pqrxyz"]) + + s1 = select(t.c.p, t.c.q, t.c.r, t.c.x, t.c.y, t.c.z).subquery() + + s2 = ( + select(s1.c.p, s1.c.q, s1.c.r, s1.c.x, s1.c.y, s1.c.z) + .select_from(s1) + .subquery() + ) + + s3 = ( + select(s2.c.p, s2.c.q, s2.c.r, s2.c.x, s2.c.y, s2.c.z) + .select_from(s2) + .subquery() + ) + + tt = list(s3.element._iterate_from_elements()) + eq_(tt, [s2]) + + total = list(visitors.iterate(s3)) + # before the bug was fixed, this was 750 + eq_(len(total), 25) + + seen = set() + for elem in visitors.iterate(s3): + assert elem not in seen + seen.add(elem) + + eq_(len(seen), 25) + def test_change_in_place(self): struct = B( A("expr1"), A("expr2"), B(A("expr1b"), A("expr2b")), A("expr3") -- 2.47.3