From 51ece86c136a9f1295eb6754cb0299b7e2d3c52b Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Tue, 12 Dec 2006 19:22:28 +0000 Subject: [PATCH] - fixes to passive_deletes flag, lazy=None (noload) flag - added example/docs for dealing with large collections - added object_session() method to sqlalchemy namespace --- CHANGES | 3 + doc/build/content/adv_datamapping.txt | 48 +++++++++++++ lib/sqlalchemy/orm/__init__.py | 3 +- lib/sqlalchemy/orm/dependency.py | 99 ++++++++++++++------------- lib/sqlalchemy/orm/strategies.py | 2 +- test/orm/mapper.py | 36 +++++++++- 6 files changed, 142 insertions(+), 49 deletions(-) diff --git a/CHANGES b/CHANGES index 24506d3e54..48100428e8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,7 @@ - string-based FROM clauses fixed, i.e. select(..., from_obj=["sometext"]) +- fixes to passive_deletes flag, lazy=None (noload) flag +- added example/docs for dealing with large collections +- added object_session() method to sqlalchemy namespace 0.3.2 - major connection pool bug fixed. fixes MySQL out of sync diff --git a/doc/build/content/adv_datamapping.txt b/doc/build/content/adv_datamapping.txt index b16b1853d8..50e8980d56 100644 --- a/doc/build/content/adv_datamapping.txt +++ b/doc/build/content/adv_datamapping.txt @@ -251,6 +251,54 @@ Deferred columns can be placed into groups so that they load together: 'photo3' : deferred(book_excerpts.c.photo3, group='photos') }) +#### Working with Large Collections + +(requires some bugfixes released as of version 0.3.3) + +SQLAlchemy relations are generally simplistic; the lazy loader loads in the full list of child objects when accessed, and the eager load builds a query that loads the full list of child objects. Additionally, when you are deleting a parent object, SQLAlchemy insures that it has loaded the full list of child objects so that it can mark them as deleted as well (or to update their parent foreign key to NULL). It does not issue an en-masse "delete from table where parent_id=?" type of statement in such a scenario. This is because the child objects themselves may also have further dependencies, and additionally may also exist in the current session in which case SA needs to know their identity so that their state can be properly updated. + +So there are several techniques that can be used individually or combined together to address these issues, in the context of a large collection where you normally would not want to load the full list of relationships: + +* Use `lazy=None` to disable child object loading (i.e. noload) + + {python} + mapper(MyClass, table, properties=relation{ + 'children':relation(MyObj, lazy=None) + }) + +* To load child objects, just use a query: + + {python} + class Organization(object): + def __init__(self, name): + self.name = name + def find_members(self, criterion): + """locate a subset of the members associated with this Organization""" + return object_session(self).query(Member).select(and_(member_table.c.name.like(criterion), org_table.c.org_id==self.org_id), from_obj=[org_table.join(member_table)]) + +* Use `passive_updates=True` to disable child object loading on a DELETE operation (noload also accomplishes this) +* Use "ON DELETE (CASCADE|SET NULL)" on your database to automatically cascade deletes to child objects (this is the best way to do it) + + {python} + mytable = Table('sometable', meta, + Column('id', Integer, primary_key=True), + Column('parent_id', Integer), + ForeignKeyConstraint(['parent_id'],['parenttable.id'], ondelete="CASCADE"), + ) + +* Alternatively, you can create a simple `MapperExtension` that will issue a DELETE for child objects: + + {python} + class DeleteMemberExt(MapperExtension): + def before_delete(self, mapper, connection, instance): + connection.execute(member_table.delete(member_table.c.org_id==instance.org_id)) + + mapper(Organization, org_table, extension=DeleteMemberExt(), properties = { + 'members' : relation(Member, lazy=None, passive_deletes=True, cascade="all, delete-orphan") + }) + +The latest distribution includes an example `examples/collection/large_collection.py` which illustrates most of these techniques. + #### Relation Options {@name=relationoptions} Keyword options to the `relation` function include: diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py index 2ae9bd3aa5..8f224da1d1 100644 --- a/lib/sqlalchemy/orm/__init__.py +++ b/lib/sqlalchemy/orm/__init__.py @@ -15,10 +15,11 @@ from sqlalchemy.orm.query import Query from sqlalchemy.orm.util import polymorphic_union from sqlalchemy.orm import properties, strategies from sqlalchemy.orm.session import Session as create_session +from sqlalchemy.orm.session import object_session __all__ = ['relation', 'backref', 'eagerload', 'lazyload', 'noload', 'deferred', 'defer', 'undefer', 'extension', 'mapper', 'clear_mappers', 'clear_mapper', 'class_mapper', 'object_mapper', 'MapperExtension', 'Query', - 'cascade_mappers', 'polymorphic_union', 'create_session', 'synonym', 'contains_eager', 'EXT_PASS' + 'cascade_mappers', 'polymorphic_union', 'create_session', 'synonym', 'contains_eager', 'EXT_PASS', 'object_session' ] def relation(*args, **kwargs): diff --git a/lib/sqlalchemy/orm/dependency.py b/lib/sqlalchemy/orm/dependency.py index 5b95b4d7ac..4106d5c373 100644 --- a/lib/sqlalchemy/orm/dependency.py +++ b/lib/sqlalchemy/orm/dependency.py @@ -130,15 +130,16 @@ class OneToManyDP(DependencyProcessor): # the child objects have to have their foreign key to the parent set to NULL if not self.cascade.delete_orphan or self.post_update: for obj in deplist: - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - for child in childlist.deleted_items(): - if child is not None and childlist.hasparent(child) is False: - self._synchronize(obj, child, None, True) - self._conditional_post_update(child, uowcommit, [obj]) - for child in childlist.unchanged_items(): - if child is not None: - self._synchronize(obj, child, None, True) - self._conditional_post_update(child, uowcommit, [obj]) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + for child in childlist.deleted_items(): + if child is not None and childlist.hasparent(child) is False: + self._synchronize(obj, child, None, True) + self._conditional_post_update(child, uowcommit, [obj]) + for child in childlist.unchanged_items(): + if child is not None: + self._synchronize(obj, child, None, True) + self._conditional_post_update(child, uowcommit, [obj]) else: for obj in deplist: childlist = self.get_object_dependencies(obj, uowcommit, passive=True) @@ -160,26 +161,28 @@ class OneToManyDP(DependencyProcessor): pass elif self.cascade.delete_orphan: for obj in deplist: - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - for child in childlist.deleted_items(): - if child is not None and childlist.hasparent(child) is False: - uowcommit.register_object(child, isdelete=True) - for c in self.mapper.cascade_iterator('delete', child): - uowcommit.register_object(c, isdelete=True) - for child in childlist.unchanged_items(): - if child is not None: - uowcommit.register_object(child, isdelete=True) - for c in self.mapper.cascade_iterator('delete', child): - uowcommit.register_object(c, isdelete=True) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + for child in childlist.deleted_items(): + if child is not None and childlist.hasparent(child) is False: + uowcommit.register_object(child, isdelete=True) + for c in self.mapper.cascade_iterator('delete', child): + uowcommit.register_object(c, isdelete=True) + for child in childlist.unchanged_items(): + if child is not None: + uowcommit.register_object(child, isdelete=True) + for c in self.mapper.cascade_iterator('delete', child): + uowcommit.register_object(c, isdelete=True) else: for obj in deplist: - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - for child in childlist.deleted_items(): - if child is not None and childlist.hasparent(child) is False: - uowcommit.register_object(child) - for child in childlist.unchanged_items(): - if child is not None: - uowcommit.register_object(child) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + for child in childlist.deleted_items(): + if child is not None and childlist.hasparent(child) is False: + uowcommit.register_object(child) + for child in childlist.unchanged_items(): + if child is not None: + uowcommit.register_object(child) else: for obj in deplist: childlist = self.get_object_dependencies(obj, uowcommit, passive=True) @@ -221,8 +224,9 @@ class ManyToOneDP(DependencyProcessor): # before we can DELETE the row for obj in deplist: self._synchronize(obj, None, None, True) - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - self._conditional_post_update(obj, uowcommit, childlist.deleted_items() + childlist.unchanged_items() + childlist.added_items()) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + self._conditional_post_update(obj, uowcommit, childlist.deleted_items() + childlist.unchanged_items() + childlist.added_items()) else: for obj in deplist: childlist = self.get_object_dependencies(obj, uowcommit, passive=True) @@ -238,22 +242,24 @@ class ManyToOneDP(DependencyProcessor): if delete: if self.cascade.delete: for obj in deplist: - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - for child in childlist.deleted_items() + childlist.unchanged_items(): - if child is not None and childlist.hasparent(child) is False: - uowcommit.register_object(child, isdelete=True) - for c in self.mapper.cascade_iterator('delete', child): - uowcommit.register_object(c, isdelete=True) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + for child in childlist.deleted_items() + childlist.unchanged_items(): + if child is not None and childlist.hasparent(child) is False: + uowcommit.register_object(child, isdelete=True) + for c in self.mapper.cascade_iterator('delete', child): + uowcommit.register_object(c, isdelete=True) else: for obj in deplist: uowcommit.register_object(obj) if self.cascade.delete_orphan: - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - for child in childlist.deleted_items(): - if childlist.hasparent(child) is False: - uowcommit.register_object(child, isdelete=True) - for c in self.mapper.cascade_iterator('delete', child): - uowcommit.register_object(c, isdelete=True) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + for child in childlist.deleted_items(): + if childlist.hasparent(child) is False: + uowcommit.register_object(child, isdelete=True) + for c in self.mapper.cascade_iterator('delete', child): + uowcommit.register_object(c, isdelete=True) def _synchronize(self, obj, child, associationrow, clearkeys): source = child @@ -286,11 +292,12 @@ class ManyToManyDP(DependencyProcessor): secondary_insert = [] if delete: for obj in deplist: - childlist = self.get_object_dependencies(obj, uowcommit, passive=False) - for child in childlist.deleted_items() + childlist.unchanged_items(): - associationrow = {} - self._synchronize(obj, child, associationrow, False) - secondary_delete.append(associationrow) + childlist = self.get_object_dependencies(obj, uowcommit, passive=self.passive_deletes) + if childlist is not None: + for child in childlist.deleted_items() + childlist.unchanged_items(): + associationrow = {} + self._synchronize(obj, child, associationrow, False) + secondary_delete.append(associationrow) else: for obj in deplist: childlist = self.get_object_dependencies(obj, uowcommit) diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 82234c34a8..78a873c704 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -147,7 +147,7 @@ class AbstractRelationLoader(LoaderStrategy): class NoLoader(AbstractRelationLoader): def init_class_attribute(self): - self.parent_property._get_strategy(LazyLoader).init_class_attribute() + self._register_attribute(self.parent.class_) def process_row(self, selectcontext, instance, row, identitykey, isnew): if isnew: if not self.is_default or len(selectcontext.options): diff --git a/test/orm/mapper.py b/test/orm/mapper.py index 3a56ff6619..533faed6de 100644 --- a/test/orm/mapper.py +++ b/test/orm/mapper.py @@ -701,7 +701,41 @@ class DeferredTest(MapperSuperTest): self.assert_sql_count(db, go, 0) self.assert_(item.item_name == 'item 4') - + +class NoLoadTest(MapperSuperTest): + def testbasic(self): + """tests a basic one-to-many lazy load""" + m = mapper(User, users, properties = dict( + addresses = relation(mapper(Address, addresses), lazy=None) + )) + q = create_session().query(m) + l = [None] + def go(): + x = q.select(users.c.user_id == 7) + x[0].addresses + l[0] = x + self.assert_sql_count(testbase.db, go, 1) + + self.assert_result(l[0], User, + {'user_id' : 7, 'addresses' : (Address, [])}, + ) + def testoptions(self): + m = mapper(User, users, properties = dict( + addresses = relation(mapper(Address, addresses), lazy=None) + )) + q = create_session().query(m).options(lazyload('addresses')) + l = [None] + def go(): + x = q.select(users.c.user_id == 7) + x[0].addresses + l[0] = x + self.assert_sql_count(testbase.db, go, 2) + + self.assert_result(l[0], User, + {'user_id' : 7, 'addresses' : (Address, [{'address_id' : 1}])}, + ) + + class LazyTest(MapperSuperTest): def testbasic(self): -- 2.47.2