From 3e22b7655a3aca24c993fc70e0caa1cb11205704 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Mon, 30 Mar 2020 11:04:24 -0400 Subject: [PATCH] Remove ORDER BY pk from subqueryload, selectinload Modified the queries used by subqueryload and selectinload to no longer ORDER BY the primary key of the parent entity; this ordering was there to allow the rows as they come in to be copied into lists directly with a minimal level of Python-side collation. However, these ORDER BY clauses can negatively impact the performance of the query as in many scenarios these columns are derived from a subquery or are otherwise not actual primary key columns such that SQL planners cannot make use of indexes. The Python-side collation uses the native itertools.group_by() to collate the incoming rows, and has been modified to allow multiple row-groups-per-parent to be assembled together using list.extend(), which should still allow for relatively fast Python-side performance. There will still be an ORDER BY present for a relationship that includes an explicit order_by parameter, however this is the only ORDER BY that will be added to the query for both kinds of loading. Fixes: #5162 Change-Id: I8befd1303c1af7cc24cbf005f39bc01c8b2745f3 (cherry picked from commit f86ee556add28afd4de31c10fce56b00a0014a4e) --- doc/build/changelog/unreleased_13/5162.rst | 18 ++++++++++++ doc/build/orm/tutorial.rst | 2 +- lib/sqlalchemy/orm/strategies.py | 24 +++++++--------- test/orm/inheritance/test_poly_loading.py | 7 ++--- test/orm/inheritance/test_polymorphic_rel.py | 2 +- test/orm/inheritance/test_single.py | 3 +- test/orm/test_ac_relationships.py | 2 +- test/orm/test_deferred.py | 2 +- test/orm/test_of_type.py | 8 ++---- test/orm/test_relationships.py | 2 +- test/orm/test_selectin_relations.py | 26 ++++++++--------- test/orm/test_subquery_relations.py | 30 ++++++++------------ 12 files changed, 66 insertions(+), 60 deletions(-) create mode 100644 doc/build/changelog/unreleased_13/5162.rst diff --git a/doc/build/changelog/unreleased_13/5162.rst b/doc/build/changelog/unreleased_13/5162.rst new file mode 100644 index 0000000000..61ff4a1eda --- /dev/null +++ b/doc/build/changelog/unreleased_13/5162.rst @@ -0,0 +1,18 @@ +.. change:: + :tags: orm, performance + :tickets: 5162 + + Modified the queries used by subqueryload and selectinload to no longer + ORDER BY the primary key of the parent entity; this ordering was there to + allow the rows as they come in to be copied into lists directly with a + minimal level of Python-side collation. However, these ORDER BY clauses + can negatively impact the performance of the query as in many scenarios + these columns are derived from a subquery or are otherwise not actual + primary key columns such that SQL planners cannot make use of indexes. The + Python-side collation uses the native itertools.group_by() to collate the + incoming rows, and has been modified to allow multiple + row-groups-per-parent to be assembled together using list.extend(), which + should still allow for relatively fast Python-side performance. There will + still be an ORDER BY present for a relationship that includes an explicit + order_by parameter, however this is the only ORDER BY that will be added to + the query for both kinds of loading. diff --git a/doc/build/orm/tutorial.rst b/doc/build/orm/tutorial.rst index 09b6cd7e35..05d49c8aea 100644 --- a/doc/build/orm/tutorial.rst +++ b/doc/build/orm/tutorial.rst @@ -1698,7 +1698,7 @@ at once: addresses.email_address AS addresses_email_address FROM addresses WHERE addresses.user_id IN (?) - ORDER BY addresses.user_id, addresses.id + ORDER BY addresses.id (5,) {stop}>>> jack diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 22976321aa..3c1de20689 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -1090,7 +1090,7 @@ class SubqueryLoader(AbstractRelationshipLoader): to_join, local_attr, parent_alias = self._prep_for_joins( left_alias, subq_path ) - q = q.order_by(*local_attr) + q = q.add_columns(*local_attr) q = self._apply_joins( q, to_join, left_alias, parent_alias, effective_entity @@ -1333,10 +1333,9 @@ class SubqueryLoader(AbstractRelationshipLoader): return self._data.get(key, default) def _load(self): - self._data = dict( - (k, [vv[0] for vv in v]) - for k, v in itertools.groupby(self.subq, lambda x: x[1:]) - ) + self._data = collections.defaultdict(list) + for k, v in itertools.groupby(self.subq, lambda x: x[1:]): + self._data[k].extend(vv[0] for vv in v) def loader(self, state, dict_, row): if self._data is None: @@ -2357,7 +2356,7 @@ class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots): q.add_criteria( lambda q: q.filter( in_expr.in_(sql.bindparam("primary_keys", expanding=True)) - ).order_by(*pk_cols) + ) ) orig_query = context.query @@ -2454,13 +2453,12 @@ class SelectInLoader(AbstractRelationshipLoader, util.MemoizedSlots): for key, state, state_dict, overwrite in chunk ] - data = { - k: [vv[1] for vv in v] - for k, v in itertools.groupby( - q(context.session).params(primary_keys=primary_keys), - lambda x: x[0], - ) - } + data = collections.defaultdict(list) + for k, v in itertools.groupby( + q(context.session).params(primary_keys=primary_keys), + lambda x: x[0], + ): + data[k].extend(vv[1] for vv in v) for key, state, state_dict, overwrite in chunk: diff --git a/test/orm/inheritance/test_poly_loading.py b/test/orm/inheritance/test_poly_loading.py index 8f1dc01888..f450b760df 100644 --- a/test/orm/inheritance/test_poly_loading.py +++ b/test/orm/inheritance/test_poly_loading.py @@ -121,14 +121,13 @@ class BaseAndSubFixture(object): "SELECT c.a_sub_id AS c_a_sub_id, " "c.id AS c_id " "FROM c WHERE c.a_sub_id " - "IN ([EXPANDING_primary_keys]) ORDER BY c.a_sub_id", + "IN ([EXPANDING_primary_keys])", {"primary_keys": [2]}, ), ), CompiledSQL( "SELECT b.a_id AS b_a_id, b.id AS b_id FROM b " - "WHERE b.a_id IN ([EXPANDING_primary_keys]) " - "ORDER BY b.a_id", + "WHERE b.a_id IN ([EXPANDING_primary_keys])", {"primary_keys": [1, 2]}, ), ), @@ -255,7 +254,7 @@ class FixtureLoadTest(_Polymorphic, testing.AssertsExecutionResults): "people.name AS people_name, people.type AS people_type " "FROM people WHERE people.company_id " "IN ([EXPANDING_primary_keys]) " - "ORDER BY people.company_id, people.person_id", + "ORDER BY people.person_id", {"primary_keys": [1, 2]}, ), AllOf( diff --git a/test/orm/inheritance/test_polymorphic_rel.py b/test/orm/inheritance/test_polymorphic_rel.py index 5003c61dce..8e09dfc147 100644 --- a/test/orm/inheritance/test_polymorphic_rel.py +++ b/test/orm/inheritance/test_polymorphic_rel.py @@ -1934,7 +1934,7 @@ class PolymorphicUnionsTest(_PolymorphicTestBase, _PolymorphicUnions): "JOIN managers ON people.person_id = managers.person_id) " "AS pjoin WHERE pjoin.name = :name_1) AS anon_1 JOIN " "machines ON anon_1.pjoin_person_id = machines.engineer_id " - "ORDER BY anon_1.pjoin_person_id, machines.machine_id", + "ORDER BY machines.machine_id", params=[{"name_1": "dilbert"}], ), ) diff --git a/test/orm/inheritance/test_single.py b/test/orm/inheritance/test_single.py index 100d6d017c..a33ea5ecb9 100644 --- a/test/orm/inheritance/test_single.py +++ b/test/orm/inheritance/test_single.py @@ -568,8 +568,7 @@ class RelationshipFromSingleTest( "employee.id AS employee_id FROM employee " "WHERE employee.type IN (:type_1)) AS anon_1 " "JOIN employee_stuff ON anon_1.employee_id " - "= employee_stuff.employee_id ORDER BY " - "anon_1.employee_id", + "= employee_stuff.employee_id", use_default_dialect=True, ) diff --git a/test/orm/test_ac_relationships.py b/test/orm/test_ac_relationships.py index d5f9b013d8..aec9ee5e46 100644 --- a/test/orm/test_ac_relationships.py +++ b/test/orm/test_ac_relationships.py @@ -274,7 +274,7 @@ class AltSelectableTest( "SELECT a_1.id AS a_1_id, b.id AS b_id FROM a AS a_1 " "JOIN (b JOIN d ON d.b_id = b.id JOIN c ON c.id = d.c_id) " "ON a_1.b_id = b.id WHERE a_1.id " - "IN ([EXPANDING_primary_keys]) ORDER BY a_1.id", + "IN ([EXPANDING_primary_keys])", [{"primary_keys": [1]}], ), ) diff --git a/test/orm/test_deferred.py b/test/orm/test_deferred.py index d1d3aaadc2..5b97308beb 100644 --- a/test/orm/test_deferred.py +++ b/test/orm/test_deferred.py @@ -661,7 +661,7 @@ class DeferredOptionsTest(AssertsCompiledSQL, _fixtures.FixtureTest): "FROM (SELECT users.id AS " "users_id FROM users WHERE users.id = :id_1) AS anon_1 " "JOIN orders ON anon_1.users_id = orders.user_id ORDER BY " - "anon_1.users_id, orders.id", + "orders.id", [{"id_1": 7}], ), ], diff --git a/test/orm/test_of_type.py b/test/orm/test_of_type.py index e49ea34495..59391b0fc6 100644 --- a/test/orm/test_of_type.py +++ b/test/orm/test_of_type.py @@ -1058,8 +1058,7 @@ class SubclassRelationshipTest2( "anon_1.t_a_id AS anon_1_t_a_id FROM " "(SELECT t_a.id AS t_a_id FROM t_a) AS anon_1 " "JOIN (t_b AS t_b_1 LEFT OUTER JOIN t_b2 AS t_b2_1 " - "ON t_b_1.id = t_b2_1.id) ON anon_1.t_a_id = t_b_1.a_id " - "ORDER BY anon_1.t_a_id", + "ON t_b_1.id = t_b2_1.id) ON anon_1.t_a_id = t_b_1.a_id", {}, ), CompiledSQL( @@ -1069,8 +1068,7 @@ class SubclassRelationshipTest2( "AS anon_1 JOIN (t_b AS t_b_1 LEFT OUTER JOIN t_b2 AS t_b2_1 " "ON t_b_1.id = t_b2_1.id) ON anon_1.t_a_id = t_b_1.a_id " "JOIN (t_c AS t_c_1 LEFT OUTER JOIN t_c2 AS t_c2_1 ON " - "t_c_1.id = t_c2_1.id) ON t_b_1.id = t_c_1.b_id " - "ORDER BY t_b_1.id", + "t_c_1.id = t_c2_1.id) ON t_b_1.id = t_c_1.b_id", {}, ), CompiledSQL( @@ -1083,7 +1081,7 @@ class SubclassRelationshipTest2( "JOIN (t_c AS t_c_1 LEFT OUTER JOIN t_c2 AS t_c2_1 " "ON t_c_1.id = t_c2_1.id) " "ON t_b_1.id = t_c_1.b_id " - "JOIN t_d ON t_c_1.id = t_d.c_id ORDER BY t_c_1.id", + "JOIN t_d ON t_c_1.id = t_d.c_id", {}, ), ) diff --git a/test/orm/test_relationships.py b/test/orm/test_relationships.py index fdfde2cb4a..4b7d4d18e5 100644 --- a/test/orm/test_relationships.py +++ b/test/orm/test_relationships.py @@ -5609,7 +5609,7 @@ class SecondaryIncludesLocalColsTest(fixtures.MappedTest): "(SELECT a.id AS aid, b.id AS id FROM a JOIN b ON a.b_ids " "LIKE :id_1 || b.id || :param_1) AS anon_1 " "ON a_1.id = anon_1.aid JOIN b ON b.id = anon_1.id " - "WHERE a_1.id IN ([EXPANDING_primary_keys]) ORDER BY a_1.id", + "WHERE a_1.id IN ([EXPANDING_primary_keys])", params=[{"id_1": "%", "param_1": "%", "primary_keys": [2]}], ), ) diff --git a/test/orm/test_selectin_relations.py b/test/orm/test_selectin_relations.py index 021566dfec..8bf4054f01 100644 --- a/test/orm/test_selectin_relations.py +++ b/test/orm/test_selectin_relations.py @@ -1724,7 +1724,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "paperwork.description AS paperwork_description " "FROM paperwork WHERE paperwork.person_id " "IN ([EXPANDING_primary_keys]) " - "ORDER BY paperwork.person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", [{"primary_keys": [1]}], ), ) @@ -1774,7 +1774,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "paperwork.description AS paperwork_description " "FROM paperwork WHERE paperwork.person_id " "IN ([EXPANDING_primary_keys]) " - "ORDER BY paperwork.person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", [{"primary_keys": [1]}], ), ) @@ -1820,7 +1820,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "paperwork.description AS paperwork_description " "FROM paperwork WHERE paperwork.person_id " "IN ([EXPANDING_primary_keys]) " - "ORDER BY paperwork.person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", [{"primary_keys": [1]}], ), ) @@ -1874,7 +1874,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "paperwork.description AS paperwork_description " "FROM paperwork WHERE paperwork.person_id " "IN ([EXPANDING_primary_keys]) " - "ORDER BY paperwork.person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", [{"primary_keys": [1]}], ), ) @@ -1922,7 +1922,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "paperwork.description AS paperwork_description " "FROM paperwork WHERE paperwork.person_id " "IN ([EXPANDING_primary_keys]) " - "ORDER BY paperwork.person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", [{"primary_keys": [1]}], ), ) @@ -2142,7 +2142,7 @@ class TupleTest(fixtures.DeclarativeMappedTest): CompiledSQL( "SELECT b.a_id1 AS b_a_id1, b.a_id2 AS b_a_id2, b.id AS b_id " "FROM b WHERE (b.a_id1, b.a_id2) IN " - "([EXPANDING_primary_keys]) ORDER BY b.a_id1, b.a_id2, b.id", + "([EXPANDING_primary_keys]) ORDER BY b.id", [{"primary_keys": [(i, i + 2) for i in range(1, 20)]}], ), ) @@ -2247,19 +2247,19 @@ class ChunkingTest(fixtures.DeclarativeMappedTest): CompiledSQL( "SELECT b.a_id AS b_a_id, b.id AS b_id " "FROM b WHERE b.a_id IN " - "([EXPANDING_primary_keys]) ORDER BY b.a_id, b.id", + "([EXPANDING_primary_keys]) ORDER BY b.id", {"primary_keys": list(range(1, 48))}, ), CompiledSQL( "SELECT b.a_id AS b_a_id, b.id AS b_id " "FROM b WHERE b.a_id IN " - "([EXPANDING_primary_keys]) ORDER BY b.a_id, b.id", + "([EXPANDING_primary_keys]) ORDER BY b.id", {"primary_keys": list(range(48, 95))}, ), CompiledSQL( "SELECT b.a_id AS b_a_id, b.id AS b_id " "FROM b WHERE b.a_id IN " - "([EXPANDING_primary_keys]) ORDER BY b.a_id, b.id", + "([EXPANDING_primary_keys]) ORDER BY b.id", {"primary_keys": list(range(95, 101))}, ), ) @@ -3037,7 +3037,7 @@ class SingleInhSubclassTest( CompiledSQL( "SELECT role.user_id AS role_user_id, role.id AS role_id " "FROM role WHERE role.user_id " - "IN ([EXPANDING_primary_keys]) ORDER BY role.user_id", + "IN ([EXPANDING_primary_keys])", {"primary_keys": [1]}, ), ) @@ -3195,7 +3195,7 @@ class M2OWDegradeTest( "SELECT a_1.id AS a_1_id, b.id AS b_id, b.x AS b_x, " "b.y AS b_y " "FROM a AS a_1 JOIN b ON b.id = a_1.b_id " - "WHERE a_1.id IN ([EXPANDING_primary_keys]) ORDER BY a_1.id", + "WHERE a_1.id IN ([EXPANDING_primary_keys])", [{"primary_keys": [1, 3]}], ), ) @@ -3251,7 +3251,7 @@ class M2OWDegradeTest( CompiledSQL( "SELECT a_1.id AS a_1_id, b.id AS b_id, b.x AS b_x, " "b.y AS b_y FROM a AS a_1 JOIN b ON b.id = a_1.b_id " - "WHERE a_1.id IN ([EXPANDING_primary_keys]) ORDER BY a_1.id", + "WHERE a_1.id IN ([EXPANDING_primary_keys])", [{"primary_keys": [1, 2, 3, 4, 5]}], ), ) @@ -3286,7 +3286,7 @@ class M2OWDegradeTest( "SELECT a_1.id AS a_1_id, b.id AS b_id, b.x AS b_x, " "b.y AS b_y " "FROM a AS a_1 JOIN b ON b.id = a_1.b_id " - "WHERE a_1.id IN ([EXPANDING_primary_keys]) ORDER BY a_1.id", + "WHERE a_1.id IN ([EXPANDING_primary_keys])", [{"primary_keys": [1, 2, 3, 4, 5]}], ), ) diff --git a/test/orm/test_subquery_relations.py b/test/orm/test_subquery_relations.py index 1c2f720467..ddd8306775 100644 --- a/test/orm/test_subquery_relations.py +++ b/test/orm/test_subquery_relations.py @@ -1706,7 +1706,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): ":primary_language_1) AS anon_1 " "JOIN paperwork " "ON anon_1.people_person_id = paperwork.person_id " - "ORDER BY anon_1.people_person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", {"primary_language_1": "java"}, ), ) @@ -1763,7 +1763,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "paperwork.description = :description_1) AS anon_1 " "JOIN paperwork ON anon_1.people_person_id = " "paperwork.person_id " - "ORDER BY anon_1.people_person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", { "primary_language_1": "java", "description_1": "tps report #2", @@ -1828,7 +1828,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "WHERE engineers.primary_language = :primary_language_1) " "AS anon_1 JOIN paperwork " "ON anon_1.people_person_id = paperwork.person_id " - "ORDER BY anon_1.people_person_id, paperwork.paperwork_id", + "ORDER BY paperwork.paperwork_id", {"primary_language_1": "java"}, ), CompiledSQL( @@ -1843,7 +1843,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "AS anon_1 JOIN paperwork AS paperwork_1 " "ON anon_1.people_person_id = paperwork_1.person_id " "JOIN pages ON paperwork_1.paperwork_id = pages.paperwork_id " - "ORDER BY paperwork_1.paperwork_id, pages.page_id", + "ORDER BY pages.page_id", {"primary_language_1": "java"}, ), ) @@ -1893,7 +1893,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "engineers.engineer_id ORDER BY engineers.primary_language " "DESC LIMIT :param_1) AS anon_1 JOIN paperwork " "ON anon_1.people_person_id = paperwork.person_id " - "ORDER BY anon_1.people_person_id, paperwork.paperwork_id" + "ORDER BY paperwork.paperwork_id" ), ) @@ -1960,8 +1960,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "DESC LIMIT :param_1) AS anon_1 " "JOIN paperwork " "ON anon_1.anon_2_people_person_id = paperwork.person_id " - "ORDER BY anon_1.anon_2_people_person_id, " - "paperwork.paperwork_id" + "ORDER BY paperwork.paperwork_id" ), ) @@ -2013,8 +2012,7 @@ class BaseRelationFromJoinedSubclassTest(_Polymorphic): "ON people_1.person_id = engineers_1.engineer_id " "ORDER BY engineers_1.primary_language DESC LIMIT :param_1) " "AS anon_1 JOIN paperwork ON anon_1.people_1_person_id = " - "paperwork.person_id ORDER BY anon_1.people_1_person_id, " - "paperwork.paperwork_id" + "paperwork.person_id ORDER BY paperwork.paperwork_id" ), ) @@ -2712,8 +2710,7 @@ class CyclicalInheritingEagerTestTwo( "ON persistent.id = director.id) AS anon_1 " "JOIN (persistent JOIN movie " "ON persistent.id = movie.id) " - "ON anon_1.director_id = movie.director_id " - "ORDER BY anon_1.director_id", + "ON anon_1.director_id = movie.director_id", dialect="default", ) @@ -2838,8 +2835,7 @@ class SubqueryloadDistinctTest( "anon_1.movie_director_id AS anon_1_movie_director_id " "FROM (SELECT%s movie.director_id AS movie_director_id " "FROM movie) AS anon_1 " - "JOIN director ON director.id = anon_1.movie_director_id " - "ORDER BY anon_1.movie_director_id" + "JOIN director ON director.id = anon_1.movie_director_id" % (" DISTINCT" if expect_distinct else ""), ) @@ -2867,8 +2863,7 @@ class SubqueryloadDistinctTest( "JOIN director AS director_1 " "ON director_1.id = anon_1.movie_director_id " "JOIN director_photo " - "ON director_1.id = director_photo.director_id " - "ORDER BY director_1.id" + "ON director_1.id = director_photo.director_id" % (" DISTINCT" if expect_distinct else ""), ) result = s.execute(q3) @@ -3046,8 +3041,7 @@ class SelfRefInheritanceAliasedTest( "anon_1.foo_foo_id AS anon_1_foo_foo_id " "FROM (SELECT DISTINCT foo.foo_id AS foo_foo_id " "FROM foo WHERE foo.id = :id_1) AS anon_1 " - "JOIN foo AS foo_1 ON foo_1.id = anon_1.foo_foo_id " - "ORDER BY anon_1.foo_foo_id", + "JOIN foo AS foo_1 ON foo_1.id = anon_1.foo_foo_id", {"id_1": 2}, ), CompiledSQL( @@ -3056,7 +3050,7 @@ class SelfRefInheritanceAliasedTest( "FROM (SELECT DISTINCT foo.foo_id AS foo_foo_id FROM foo " "WHERE foo.id = :id_1) AS anon_1 " "JOIN foo AS foo_1 ON foo_1.id = anon_1.foo_foo_id " - "JOIN foo ON foo.id = foo_1.foo_id ORDER BY foo_1.foo_id", + "JOIN foo ON foo.id = foo_1.foo_id", {"id_1": 2}, ), ) -- 2.47.2