From cad7e3ceca55f4b1937e77f6ad93b1da88899446 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Sun, 18 Oct 2009 20:28:19 +0000 Subject: [PATCH] - added a flag to relation(), eagerload(), and eagerload_all() called 'innerjoin'. Specify `True` or `False` to control whether an eager join is constructed as an INNER or OUTER join. Default is `False` as always. The mapper options will override whichever setting is specified on relation(). Should generally be set for many-to-one, not nullable foreign key relations to allow improved join performance. [ticket:1544] --- CHANGES | 9 ++++ lib/sqlalchemy/orm/__init__.py | 59 ++++++++++++++++++++--- lib/sqlalchemy/orm/properties.py | 3 +- lib/sqlalchemy/orm/strategies.py | 51 +++++++++++++------- test/orm/test_eager_relations.py | 82 +++++++++++++++++++++++++++++++- 5 files changed, 178 insertions(+), 26 deletions(-) diff --git a/CHANGES b/CHANGES index 339fa838b6..2f09040686 100644 --- a/CHANGES +++ b/CHANGES @@ -32,6 +32,15 @@ CHANGES - the "named tuple" objects returned when iterating a Query() are now pickleable. + - added a flag to relation(), eagerload(), and eagerload_all() + called 'innerjoin'. Specify `True` or `False` to control + whether an eager join is constructed as an INNER or OUTER + join. Default is `False` as always. The mapper options + will override whichever setting is specified on relation(). + Should generally be set for many-to-one, not nullable + foreign key relations to allow improved join performance. + [ticket:1544] + - mapping to a select() construct now requires that you make an alias() out of it distinctly. This to eliminate confusion over such issues as [ticket:1542] diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py index 7d54c73201..dc212539f9 100644 --- a/lib/sqlalchemy/orm/__init__.py +++ b/lib/sqlalchemy/orm/__init__.py @@ -268,6 +268,16 @@ def relation(argument, secondary=None, **kwargs): ForeignKey's are present in the join condition, or to override the table-defined foreign keys. + :param innerjoin=False: + when ``True``, eager loads will use an inner join to join + against related tables instead of an outer join. The purpose + of this option is strictly one of performance, as inner joins + generally perform better than outer joins. This flag can + be set to ``True`` when the relation references an object + via many-to-one using local foreign keys that are not nullable, + or when the reference is one-to-one or a collection that is + guaranteed to have one or at least one entry. + :param join_depth: when non-``None``, an integer value indicating how many levels deep eagerload joins should be constructed on a self-referring @@ -849,31 +859,68 @@ def extension(ext): return ExtensionOption(ext) @sa_util.accepts_a_list_as_starargs(list_deprecation='deprecated') -def eagerload(*keys): +def eagerload(*keys, **kw): """Return a ``MapperOption`` that will convert the property of the given name into an eager load. Used with ``query.options()``. + examples:: + + # eagerload the "orders" colleciton on "User" + query(User).options(eagerload(User.orders)) + + # eagerload the "keywords" collection on each "Item", + # but not the "items" collection on "Order" - those + # remain lazily loaded. + query(Order).options(eagerload(Order.items, Item.keywords)) + + # to eagerload across both, use eagerload_all() + query(Order).options(eagerload_all(Order.items, Item.keywords)) + + The keyword arguments accept a flag `innerjoin=True|False` which will + override the value of the `innerjoin` flag specified on the relation(). + """ - return strategies.EagerLazyOption(keys, lazy=False) + innerjoin = kw.pop('innerjoin', None) + if innerjoin is not None: + return ( + strategies.EagerLazyOption(keys, lazy=False), + strategies.EagerJoinOption(keys, innerjoin) + ) + else: + return strategies.EagerLazyOption(keys, lazy=False) @sa_util.accepts_a_list_as_starargs(list_deprecation='deprecated') -def eagerload_all(*keys): +def eagerload_all(*keys, **kw): """Return a ``MapperOption`` that will convert all properties along the given dot-separated path into an eager load. - For example, this:: + Used with ``query.options()``. + + For example:: query.options(eagerload_all('orders.items.keywords'))... will set all of 'orders', 'orders.items', and 'orders.items.keywords' to load in one eager load. - Used with ``query.options()``. + Individual descriptors are accepted as arguments as well:: + + query.options(eagerload_all(User.orders, Order.items, Item.keywords)) + + The keyword arguments accept a flag `innerjoin=True|False` which will + override the value of the `innerjoin` flag specified on the relation(). """ - return strategies.EagerLazyOption(keys, lazy=False, chained=True) + innerjoin = kw.pop('innerjoin', None) + if innerjoin is not None: + return ( + strategies.EagerLazyOption(keys, lazy=False, chained=True), + strategies.EagerJoinOption(keys, innerjoin, chained=True) + ) + else: + return strategies.EagerLazyOption(keys, lazy=False, chained=True) @sa_util.accepts_a_list_as_starargs(list_deprecation='deprecated') def lazyload(*keys): diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py index 1def36c19f..9f9bbd62a7 100644 --- a/lib/sqlalchemy/orm/properties.py +++ b/lib/sqlalchemy/orm/properties.py @@ -357,7 +357,7 @@ class RelationProperty(StrategizedProperty): passive_updates=True, remote_side=None, enable_typechecks=True, join_depth=None, comparator_factory=None, - single_parent=False, + single_parent=False, innerjoin=False, strategy_class=None, _local_remote_pairs=None, query_class=None): self.uselist = uselist @@ -377,6 +377,7 @@ class RelationProperty(StrategizedProperty): self.remote_side = remote_side self.enable_typechecks = enable_typechecks self.query_class = query_class + self.innerjoin = innerjoin self.join_depth = join_depth self.local_remote_pairs = _local_remote_pairs diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index a1369fa6b3..a9fca91796 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -676,16 +676,10 @@ class EagerLoader(AbstractRelationLoader): # and then attach eager load joins to that (i.e., in the case of LIMIT/OFFSET etc.) should_nest_selectable = context.query._should_nest_selectable - if entity in context.eager_joins: - entity_key, default_towrap = entity, entity.selectable - - elif should_nest_selectable or not context.from_clause: - # if no from_clause, or a subquery is going to be generated, - # store eager joins per _MappedEntity; Query._compile_context will - # add them as separate selectables to the select(), or splice them together - # after the subquery is generated - entity_key, default_towrap = entity, entity.selectable - else: + entity_key = None + if entity not in context.eager_joins and \ + not should_nest_selectable and \ + context.from_clause: index, clause = sql_util.find_join_source(context.from_clause, entity.selectable) if clause is not None: # join to an existing FROM clause on the query. @@ -693,11 +687,9 @@ class EagerLoader(AbstractRelationLoader): # Query._compile_context will adapt as needed and append to the # FROM clause of the select(). entity_key, default_towrap = index, clause - else: - # if no from_clause to join to, - # store eager joins per _MappedEntity - entity_key, default_towrap = entity, entity.selectable - + if entity_key is None: + entity_key, default_towrap = entity, entity.selectable + towrap = context.eager_joins.setdefault(entity_key, default_towrap) # create AliasedClauses object to build up the eager query. @@ -717,8 +709,16 @@ class EagerLoader(AbstractRelationLoader): join_to_left = True else: onclause = self.parent_property - - context.eager_joins[entity_key] = eagerjoin = mapperutil.outerjoin(towrap, clauses.aliased_class, onclause, join_to_left=join_to_left) + + innerjoin = context.attributes.get(("eager_join_type", path), self.parent_property.innerjoin) + + context.eager_joins[entity_key] = eagerjoin = mapperutil.join( + towrap, + clauses.aliased_class, + onclause, + join_to_left=join_to_left, + isouter=not innerjoin + ) # send a hint to the Query as to where it may "splice" this join eagerjoin.stop_on = entity.selectable @@ -836,6 +836,23 @@ class EagerLazyOption(StrategizedOption): elif self.lazy is None: return NoLoader +class EagerJoinOption(PropertyOption): + + def __init__(self, key, innerjoin, chained=False): + super(EagerJoinOption, self).__init__(key) + self.innerjoin = innerjoin + self.chained = chained + + def is_chained(self): + return self.chained + + def process_query_property(self, query, paths, mappers): + if self.is_chained(): + for path in paths: + query._attributes[("eager_join_type", path)] = self.innerjoin + else: + query._attributes[("eager_join_type", paths[-1])] = self.innerjoin + class LoadEagerFromAliasOption(PropertyOption): def __init__(self, key, alias=None): diff --git a/test/orm/test_eager_relations.py b/test/orm/test_eager_relations.py index c7a04c888c..f70e9c3b8d 100644 --- a/test/orm/test_eager_relations.py +++ b/test/orm/test_eager_relations.py @@ -3,7 +3,7 @@ from sqlalchemy.test.testing import eq_, is_, is_not_ import sqlalchemy as sa from sqlalchemy.test import testing -from sqlalchemy.orm import eagerload, deferred, undefer +from sqlalchemy.orm import eagerload, deferred, undefer, eagerload_all, backref from sqlalchemy import Integer, String, Date, ForeignKey, and_, select, func from sqlalchemy.test.schema import Table, Column from sqlalchemy.orm import mapper, relation, create_session, lazyload, aliased @@ -12,7 +12,7 @@ from sqlalchemy.test.assertsql import CompiledSQL from test.orm import _base, _fixtures import datetime -class EagerTest(_fixtures.FixtureTest): +class EagerTest(_fixtures.FixtureTest, testing.AssertsCompiledSQL): run_inserts = 'once' run_deletes = None @@ -725,6 +725,84 @@ class EagerTest(_fixtures.FixtureTest): Address.user_id==User.id).order_by(User.id) eq_(self.static.user_address_result[1:2], l.all()) + @testing.resolve_artifact_names + def test_inner_join(self): + mapper(User, users, properties = dict( + addresses = relation(mapper(Address, addresses), lazy=False, innerjoin=True, order_by=addresses.c.id) + )) + sess = create_session() + eq_( + [User(id=7, addresses=[ Address(id=1) ]), + User(id=8, + addresses=[ Address(id=2, email_address='ed@wood.com'), + Address(id=3, email_address='ed@bettyboop.com'), + Address(id=4, email_address='ed@lala.com'), ]), + User(id=9, addresses=[ Address(id=5) ])] + ,sess.query(User).all() + ) + self.assert_compile(sess.query(User), + "SELECT users.id AS users_id, users.name AS users_name, " + "addresses_1.id AS addresses_1_id, addresses_1.user_id AS addresses_1_user_id, " + "addresses_1.email_address AS addresses_1_email_address FROM users JOIN " + "addresses AS addresses_1 ON users.id = addresses_1.user_id ORDER BY addresses_1.id" + , use_default_dialect=True) + + @testing.resolve_artifact_names + def test_inner_join_options(self): + mapper(User, users, properties = dict( + orders =relation(Order, backref=backref('user', innerjoin=True)) + )) + mapper(Order, orders, properties=dict( + items=relation(Item, secondary=order_items) + )) + mapper(Item, items) + sess = create_session() + self.assert_compile(sess.query(User).options(eagerload(User.orders, innerjoin=True)), + "SELECT users.id AS users_id, users.name AS users_name, orders_1.id AS orders_1_id, " + "orders_1.user_id AS orders_1_user_id, orders_1.address_id AS orders_1_address_id, " + "orders_1.description AS orders_1_description, orders_1.isopen AS orders_1_isopen " + "FROM users JOIN orders AS orders_1 ON users.id = orders_1.user_id" + , use_default_dialect=True) + + self.assert_compile(sess.query(User).options(eagerload_all(User.orders, Order.items, innerjoin=True)), + "SELECT users.id AS users_id, users.name AS users_name, items_1.id AS items_1_id, " + "items_1.description AS items_1_description, orders_1.id AS orders_1_id, " + "orders_1.user_id AS orders_1_user_id, orders_1.address_id AS orders_1_address_id, " + "orders_1.description AS orders_1_description, orders_1.isopen AS orders_1_isopen " + "FROM users JOIN orders AS orders_1 ON users.id = orders_1.user_id JOIN order_items AS " + "order_items_1 ON orders_1.id = order_items_1.order_id JOIN items AS items_1 ON " + "items_1.id = order_items_1.item_id" + , use_default_dialect=True) + + def go(): + eq_( + sess.query(User).options( + eagerload(User.orders, innerjoin=True), + eagerload(User.orders, Order.items, innerjoin=True)).all(), + + [User(id=7, + orders=[ + Order(id=1, items=[ Item(id=1), Item(id=2), Item(id=3)]), + Order(id=3, items=[ Item(id=3), Item(id=4), Item(id=5)]), + Order(id=5, items=[Item(id=5)])]), + User(id=9, orders=[ + Order(id=2, items=[ Item(id=1), Item(id=2), Item(id=3)]), + Order(id=4, items=[ Item(id=1), Item(id=5)])]) + ] + ) + self.assert_sql_count(testing.db, go, 1) + + # test that default innerjoin setting is used for options + self.assert_compile( + sess.query(Order).options(eagerload(Order.user)).filter(Order.description == 'foo'), + "SELECT orders.id AS orders_id, orders.user_id AS orders_user_id, orders.address_id AS " + "orders_address_id, orders.description AS orders_description, orders.isopen AS " + "orders_isopen, users_1.id AS users_1_id, users_1.name AS users_1_name " + "FROM orders JOIN users AS users_1 ON users_1.id = orders.user_id " + "WHERE orders.description = :description_1", + use_default_dialect=True + ) + class AddEntityTest(_fixtures.FixtureTest): run_inserts = 'once' run_deletes = None -- 2.47.2