--- /dev/null
+.. change::
+ :tags: bug, regression, orm, performance, sql
+ :tickets: 6304
+
+ Fixed a critical performance issue where the traversal of a
+ :func:`_sql.select` construct would traverse a repetitive product of the
+ represented FROM clauses as they were each referred towards by columns in
+ the columns clause; for a series of nested subqueries with lots of columns
+ this could cause a large delay and significant memory growth. This
+ traversal is used by a wide variety of SQL and ORM functions, including by
+ the ORM :class:`_orm.Session` when it's configured to have
+ "table-per-bind", which while this is not a common use case, it seems to be
+ what Flask-SQLAlchemy is hardcoded as using, so the issue impacts
+ Flask-SQLAlchemy users. The traversal has been repaired to uniqify on FROM
+ clauses which was effectively what would happen implicitly with the pre-1.4
+ architecture.
# note this does not include elements
# in _setup_joins or _legacy_setup_joins
- return itertools.chain(
- itertools.chain.from_iterable(
- [element._from_objects for element in self._raw_columns]
- ),
- itertools.chain.from_iterable(
- [element._from_objects for element in self._where_criteria]
- ),
- self._from_obj,
- )
+ seen = set()
+ for element in self._raw_columns:
+ for fr in element._from_objects:
+ if fr in seen:
+ continue
+ seen.add(fr)
+ yield fr
+ for element in self._where_criteria:
+ for fr in element._from_objects:
+ if fr in seen:
+ continue
+ seen.add(fr)
+ yield fr
+ for element in self._from_obj:
+ if element in seen:
+ continue
+ seen.add(element)
+ yield element
class Select(
dialect="default",
)
+ def test_traversal_size(self):
+ """Test :ticket:`6304`.
+
+ Testing that _iterate_from_elements returns only unique FROM
+ clauses; overall traversal should be short and all items unique.
+
+ """
+
+ t = table("t", *[column(x) for x in "pqrxyz"])
+
+ s1 = select(t.c.p, t.c.q, t.c.r, t.c.x, t.c.y, t.c.z).subquery()
+
+ s2 = (
+ select(s1.c.p, s1.c.q, s1.c.r, s1.c.x, s1.c.y, s1.c.z)
+ .select_from(s1)
+ .subquery()
+ )
+
+ s3 = (
+ select(s2.c.p, s2.c.q, s2.c.r, s2.c.x, s2.c.y, s2.c.z)
+ .select_from(s2)
+ .subquery()
+ )
+
+ tt = list(s3.element._iterate_from_elements())
+ eq_(tt, [s2])
+
+ total = list(visitors.iterate(s3))
+ # before the bug was fixed, this was 750
+ eq_(len(total), 25)
+
+ seen = set()
+ for elem in visitors.iterate(s3):
+ assert elem not in seen
+ seen.add(elem)
+
+ eq_(len(seen), 25)
+
def test_change_in_place(self):
struct = B(
A("expr1"), A("expr2"), B(A("expr1b"), A("expr2b")), A("expr3")