From: Mike Bayer Date: Sun, 12 Dec 2010 06:35:37 +0000 (-0500) Subject: - refactor query._get() into two methods - a static one that does X-Git-Tag: rel_0_7b1~173 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6d5dd2214a4cc6340d8f07147a43fac03a12b040;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git - refactor query._get() into two methods - a static one that does just the identity map lookup + expired check, the other which does the load unconditionally. All the refresh/deferred load calls use the unconditional load method, query.get() and LoadLazyAttribute call the identity check by itself first. m2o lazyloads for object already in the identity map callcounts are now cut in half, since no Query object is created. --- diff --git a/lib/sqlalchemy/orm/attributes.py b/lib/sqlalchemy/orm/attributes.py index 6b235e5277..4d03795036 100644 --- a/lib/sqlalchemy/orm/attributes.py +++ b/lib/sqlalchemy/orm/attributes.py @@ -844,7 +844,7 @@ def backref_listeners(attribute, key, uselist): # present when updating via a backref. old_state, old_dict = instance_state(oldchild),\ instance_dict(oldchild) - impl = old_state.get_impl(key) + impl = old_state.manager[key].impl try: impl.remove(old_state, old_dict, @@ -856,7 +856,7 @@ def backref_listeners(attribute, key, uselist): if child is not None: child_state, child_dict = instance_state(child),\ instance_dict(child) - child_state.get_impl(key).append( + child_state.manager[key].impl.append( child_state, child_dict, state.obj(), @@ -867,7 +867,7 @@ def backref_listeners(attribute, key, uselist): def append(state, child, initiator): child_state, child_dict = instance_state(child), \ instance_dict(child) - child_state.get_impl(key).append( + child_state.manager[key].impl.append( child_state, child_dict, state.obj(), @@ -879,7 +879,7 @@ def backref_listeners(attribute, key, uselist): if child is not None: child_state, child_dict = instance_state(child),\ instance_dict(child) - child_state.get_impl(key).remove( + child_state.manager[key].impl.remove( child_state, child_dict, state.obj(), @@ -1077,7 +1077,7 @@ def get_all_pending(state, dict_, key): """ - return state.manager.get_impl(key).get_all_pending(state, dict_) + return state.manager[key].impl.get_all_pending(state, dict_) def has_parent(cls, obj, key, optimistic=False): @@ -1172,7 +1172,7 @@ def init_collection(obj, key): def init_state_collection(state, dict_, key): """Initialize a collection attribute and return the collection adapter.""" - attr = state.get_impl(key) + attr = state.manager[key].impl user_data = attr.initialize(state, dict_) return attr.get_collection(state, dict_, user_data) @@ -1192,7 +1192,7 @@ def set_committed_value(instance, key, value): """ state, dict_ = instance_state(instance), instance_dict(instance) - state.get_impl(key).set_committed_value(state, dict_, value) + state.manager[key].impl.set_committed_value(state, dict_, value) def set_attribute(instance, key, value): """Set the value of an attribute, firing history events. @@ -1205,7 +1205,7 @@ def set_attribute(instance, key, value): """ state, dict_ = instance_state(instance), instance_dict(instance) - state.get_impl(key).set(state, dict_, value, None) + state.manager[key].impl.set(state, dict_, value, None) def get_attribute(instance, key): """Get the value of an attribute, firing any callables required. @@ -1218,7 +1218,7 @@ def get_attribute(instance, key): """ state, dict_ = instance_state(instance), instance_dict(instance) - return state.get_impl(key).get(state, dict_) + return state.manager[key].impl.get(state, dict_) def del_attribute(instance, key): """Delete the value of an attribute, firing history events. @@ -1231,5 +1231,5 @@ def del_attribute(instance, key): """ state, dict_ = instance_state(instance), instance_dict(instance) - state.get_impl(key).delete(state, dict_) + state.manager[key].impl.delete(state, dict_) diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py index 20242c97c0..cd9f01f38e 100644 --- a/lib/sqlalchemy/orm/mapper.py +++ b/lib/sqlalchemy/orm/mapper.py @@ -1232,7 +1232,7 @@ class Mapper(object): A list of values indicating the identifier. """ - return self._identity_class, tuple(util.to_list(primary_key)) + return self._identity_class, tuple(primary_key) def identity_key_from_instance(self, instance): """Return the identity key for the given instance, based on @@ -1910,7 +1910,7 @@ class Mapper(object): # refresh whatever has been expired. if self.eager_defaults and state.unloaded: state.key = self._identity_key_from_state(state) - uowtransaction.session.query(self)._get( + uowtransaction.session.query(self)._load_on_ident( state.key, refresh_state=state, only_load_props=state.unloaded) @@ -2511,7 +2511,7 @@ def _load_scalar_attributes(state, attribute_names): statement = mapper._optimized_get_statement(state, attribute_names) if statement is not None: result = session.query(mapper).from_statement(statement).\ - _get(None, + _load_on_ident(None, only_load_props=attribute_names, refresh_state=state) @@ -2539,7 +2539,7 @@ def _load_scalar_attributes(state, attribute_names): % state_str(state)) return - result = session.query(mapper)._get( + result = session.query(mapper)._load_on_ident( identity_key, refresh_state=state, only_load_props=attribute_names) diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 20f71bb51d..58c2246364 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -612,18 +612,43 @@ class Query(object): given identifier, or None if not found. The `ident` argument is a scalar or tuple of primary key column values - in the order of the table def's primary key columns. + in the order of the mapper's "priamry key" setting, which + defaults to the list of primary key columns for the + mapped :class:`.Table`. """ # convert composite types to individual args if hasattr(ident, '__composite_values__'): ident = ident.__composite_values__() - - key = self._only_mapper_zero( + + ident = util.to_list(ident) + + mapper = self._only_mapper_zero( "get() can only be used against a single mapped class." - ).identity_key_from_primary_key(ident) - return self._get(key, ident) + ) + + if len(ident) != len(mapper.primary_key): + raise sa_exc.InvalidRequestError( + "Incorrect number of values in identifier to formulate " + "primary key for query.get(); primary key columns are %s" % + ','.join("'%s'" % c for c in mapper.primary_key)) + + key = mapper.identity_key_from_primary_key(ident) + + if not self._populate_existing and \ + not mapper.always_refresh and \ + self._lockmode is None: + + instance = self._get_from_identity(self.session, key, False) + if instance is not None: + # reject calls for id in identity map but class + # mismatch. + if not issubclass(instance.__class__, mapper.class_): + return None + return instance + + return self._load_on_ident(key) @_generative() def correlate(self, *args): @@ -1880,43 +1905,42 @@ class Query(object): finally: session.autoflush = autoflush + @classmethod + def _get_from_identity(cls, session, key, passive): + """Look up the given key in the given session's identity map, + check the object for expired state if found. - def _get(self, key=None, ident=None, refresh_state=None, lockmode=None, - only_load_props=None, passive=None): - lockmode = lockmode or self._lockmode - - mapper = self._mapper_zero() - if not self._populate_existing and \ - not refresh_state and \ - not mapper.always_refresh and \ - lockmode is None: - instance = self.session.identity_map.get(key) - if instance: - # item present in identity map with a different class - if not issubclass(instance.__class__, mapper.class_): + """ + instance = session.identity_map.get(key) + if instance: + + state = attributes.instance_state(instance) + + # expired - ensure it still exists + if state.expired: + if passive is attributes.PASSIVE_NO_FETCH: + # TODO: no coverage here + return attributes.PASSIVE_NO_RESULT + try: + state() + except orm_exc.ObjectDeletedError: + session._remove_newly_deleted(state) return None - - state = attributes.instance_state(instance) - - # expired - ensure it still exists - if state.expired: - if passive is attributes.PASSIVE_NO_FETCH: - return attributes.PASSIVE_NO_RESULT - try: - state() - except orm_exc.ObjectDeletedError: - self.session._remove_newly_deleted(state) - return None - return instance - elif passive is attributes.PASSIVE_NO_FETCH: - return attributes.PASSIVE_NO_RESULT - - if ident is None: - if key is not None: - ident = key[1] + return instance else: - ident = util.to_list(ident) + return None + + def _load_on_ident(self, key, refresh_state=None, lockmode=None, + only_load_props=None): + """Load the given identity key from the database.""" + + lockmode = lockmode or self._lockmode + if key is not None: + ident = key[1] + else: + ident = None + if refresh_state is None: q = self._clone() q._get_condition() @@ -1924,11 +1948,7 @@ class Query(object): q = self._clone() if ident is not None: - if len(ident) != len(mapper.primary_key): - raise sa_exc.InvalidRequestError( - "Incorrect number of values in identifier to formulate " - "primary key for query.get(); primary key columns are %s" % - ','.join("'%s'" % c for c in mapper.primary_key)) + mapper = self._mapper_zero() (_get_clause, _get_params) = mapper._get_clause diff --git a/lib/sqlalchemy/orm/session.py b/lib/sqlalchemy/orm/session.py index 30a84bf1ab..e2c1308b89 100644 --- a/lib/sqlalchemy/orm/session.py +++ b/lib/sqlalchemy/orm/session.py @@ -906,7 +906,7 @@ class Session(object): self._expire_state(state, attribute_names) - if self.query(_object_mapper(instance))._get( + if self.query(_object_mapper(instance))._load_on_ident( state.key, refresh_state=state, lockmode=lockmode, only_load_props=attribute_names) is None: diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 1cea5349a9..a6711ae261 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -12,12 +12,14 @@ from sqlalchemy import sql, util, log from sqlalchemy.sql import util as sql_util from sqlalchemy.sql import visitors, expression, operators from sqlalchemy.orm import mapper, attributes, interfaces, exc as orm_exc +from sqlalchemy.orm.mapper import _none_set from sqlalchemy.orm.interfaces import ( LoaderStrategy, StrategizedOption, MapperOption, PropertyOption, serialize_path, deserialize_path, StrategizedProperty ) from sqlalchemy.orm import session as sessionlib from sqlalchemy.orm import util as mapperutil +from sqlalchemy.orm.query import Query import itertools def _register_attribute(strategy, mapper, useobject, @@ -282,13 +284,6 @@ class LoadDeferredColumns(object): # narrow the keys down to just those which have no history group = [k for k in toload if k in state.unmodified] - if strategy._should_log_debug(): - strategy.logger.debug( - "deferred load %s group %s", - (mapperutil.state_attribute_str(state, self.key), - group and ','.join(group) or 'None') - ) - session = sessionlib._state_session(state) if session is None: raise orm_exc.DetachedInstanceError( @@ -298,8 +293,7 @@ class LoadDeferredColumns(object): ) query = session.query(localparent) - ident = state.key[1] - query._get(None, ident=ident, + query._load_on_ident(state.key, only_load_props=group, refresh_state=state) return attributes.ATTR_WAS_SET @@ -588,11 +582,6 @@ class LoadLazyAttribute(object): ): return attributes.PASSIVE_NO_RESULT - if strategy._should_log_debug(): - strategy.logger.debug("loading %s", - mapperutil.state_attribute_str( - state, self.key)) - session = sessionlib._state_session(state) if session is None: raise orm_exc.DetachedInstanceError( @@ -600,52 +589,50 @@ class LoadLazyAttribute(object): "lazy load operation of attribute '%s' cannot proceed" % (mapperutil.state_str(state), self.key) ) - - q = session.query(prop.mapper)._adapt_all_clauses() - - # don't autoflush on pending - # this would be something that's prominent in the - # docs and such - if pending: - q = q.autoflush(False) - - if state.load_path: - q = q._with_current_path(state.load_path + (self.key,)) - # if we have a simple primary key load, use mapper.get() - # to possibly save a DB round trip + # if we have a simple primary key load, check the + # identity map without generating a Query at all if strategy.use_get: - ident = [] - allnulls = True if session._flushing: get_attr = instance_mapper._get_committed_state_attr_by_column else: get_attr = instance_mapper._get_state_attr_by_column - - # The many-to-one get is intended to be very fast. Note - # that we don't want to autoflush() if the get() doesn't - # actually have to hit the DB. It is now not necessary - # now that we use the pending attribute state. - for primary_key in prop.mapper.primary_key: - val = get_attr( - state, - state.dict, - strategy._equated_columns[primary_key], - passive=passive) - if val is attributes.PASSIVE_NO_RESULT: - return val - allnulls = allnulls and val is None - ident.append(val) + + ident = [ + get_attr( + state, + state.dict, + strategy._equated_columns[pk], + passive=passive) + for pk in prop.mapper.primary_key + ] + if attributes.PASSIVE_NO_RESULT in ident: + return attributes.PASSIVE_NO_RESULT - if allnulls: + if _none_set.issuperset(ident): return None - if state.load_options: - q = q._conditional_options(*state.load_options) - key = prop.mapper.identity_key_from_primary_key(ident) - return q._get(key, ident, passive=passive) + instance = Query._get_from_identity(session, key, passive) + if instance is not None: + return instance + elif passive is attributes.PASSIVE_NO_FETCH: + return attributes.PASSIVE_NO_RESULT + + q = session.query(prop.mapper)._adapt_all_clauses() + + # don't autoflush on pending + if pending: + q = q.autoflush(False) + if state.load_path: + q = q._with_current_path(state.load_path + (self.key,)) + + if state.load_options: + q = q._conditional_options(*state.load_options) + + if strategy.use_get: + return q._load_on_ident(key) if prop.order_by: q = q.order_by(*util.to_list(prop.order_by)) @@ -658,9 +645,6 @@ class LoadLazyAttribute(object): not isinstance(rev.strategy, LazyLoader): q = q.options(EagerLazyOption((rev.key,), lazy='select')) - if state.load_options: - q = q._conditional_options(*state.load_options) - lazy_clause = strategy.lazy_clause(state) if pending: diff --git a/test/aaa_profiling/test_orm.py b/test/aaa_profiling/test_orm.py index a3bf816317..41d15d5cea 100644 --- a/test/aaa_profiling/test_orm.py +++ b/test/aaa_profiling/test_orm.py @@ -2,11 +2,11 @@ from test.lib.testing import eq_, assert_raises, \ assert_raises_message from sqlalchemy import exc as sa_exc, util, Integer, String, ForeignKey from sqlalchemy.orm import exc as orm_exc, mapper, relationship, \ - sessionmaker + sessionmaker, Session from test.lib import testing, profiling from test.orm import _base from test.lib.schema import Table, Column - +import sys class MergeTest(_base.MappedTest): @@ -80,7 +80,9 @@ class MergeTest(_base.MappedTest): # (py2.6) @profiling.function_call_count(1257, - versions={'2.6+cextension':1194, '2.4': 807} + versions={'2.5':1191, '2.6':1191, + '2.6+cextension':1194, + '2.4': 807} ) def go(): p2 = sess2.merge(p1) @@ -90,3 +92,86 @@ class MergeTest(_base.MappedTest): sess2 = sessionmaker()() self.assert_sql_count(testing.db, go, 2) + +class LoadManyToOneFromIdentityTest(_base.MappedTest): + """test overhead associated with many-to-one fetches. + + Prior to the refactor of LoadLazyAttribute and + query._get(), the load from identity map took 2x + as many calls (65K calls here instead of around 33K) + to load 1000 related objects from the identity map. + + """ + + # 2.4's profiler has different callcounts + __skip_if__ = lambda : sys.version_info < (2, 5), + + @classmethod + def define_tables(cls, metadata): + parent = Table('parent', metadata, + Column('id', Integer, primary_key=True), + Column('data', String(20)), + Column('child_id', Integer, ForeignKey('child.id')) + ) + + child = Table('child', metadata, + Column('id', Integer,primary_key=True), + Column('data', String(20)) + ) + + @classmethod + def setup_classes(cls): + class Parent(_base.BasicEntity): + pass + + class Child(_base.BasicEntity): + pass + + @classmethod + @testing.resolve_artifact_names + def setup_mappers(cls): + mapper(Parent, parent, properties={ + 'child': relationship(Child)}) + mapper(Child, child) + + @classmethod + @testing.resolve_artifact_names + def insert_data(cls): + child.insert().execute([ + {'id':i, 'data':'c%d' % i} + for i in xrange(1, 251) + ]) + parent.insert().execute([ + { + 'id':i, + 'data':'p%dc%d' % (i, (i % 250) + 1), + 'child_id':(i % 250) + 1 + } + for i in xrange(1, 1000) + ]) + + @testing.resolve_artifact_names + def test_many_to_one_load_no_identity(self): + sess = Session() + parents = sess.query(Parent).all() + + + @profiling.function_call_count(138289, variance=.2) + def go(): + for p in parents: + p.child + go() + + @testing.resolve_artifact_names + def test_many_to_one_load_identity(self): + sess = Session() + parents = sess.query(Parent).all() + children = sess.query(Child).all() + + @profiling.function_call_count(33977) + def go(): + for p in parents: + p.child + go() + + diff --git a/test/lib/requires.py b/test/lib/requires.py index 08fde66c30..d43b601a45 100644 --- a/test/lib/requires.py +++ b/test/lib/requires.py @@ -303,7 +303,7 @@ def python25(fn): "Python version 2.5 or greater is required" ) ) - + def _has_cextensions(): try: from sqlalchemy import cresultproxy, cprocessors diff --git a/test/orm/test_utils.py b/test/orm/test_utils.py index 1ee34c50fd..e196a96172 100644 --- a/test/orm/test_utils.py +++ b/test/orm/test_utils.py @@ -193,9 +193,9 @@ class IdentityKeyTest(_fixtures.FixtureTest): def test_identity_key_1(self): mapper(User, users) - key = util.identity_key(User, 1) + key = util.identity_key(User, [1]) eq_(key, (User, (1,))) - key = util.identity_key(User, ident=1) + key = util.identity_key(User, ident=[1]) eq_(key, (User, (1,))) @testing.resolve_artifact_names