From: Mike Bayer Date: Thu, 28 Aug 2014 16:25:21 +0000 (-0400) Subject: - A new implementation for :class:`.KeyedTuple` used by the X-Git-Tag: rel_1_0_0b1~205^2~39 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=685a014c644477a7e7cdb6aad4436d4422167209;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git - A new implementation for :class:`.KeyedTuple` used by the :class:`.Query` object offers dramatic speed improvements when fetching large numbers of column-oriented rows. fixes #3176 --- diff --git a/doc/build/changelog/changelog_10.rst b/doc/build/changelog/changelog_10.rst index b0ace0d1de..fe17957911 100644 --- a/doc/build/changelog/changelog_10.rst +++ b/doc/build/changelog/changelog_10.rst @@ -22,6 +22,18 @@ on compatibility concerns, see :doc:`/changelog/migration_10`. + .. change:: + :tags: feature, orm + :tickets: 3176 + + A new implementation for :class:`.KeyedTuple` used by the + :class:`.Query` object offers dramatic speed improvements when + fetching large numbers of column-oriented rows. + + .. seealso:: + + :ref:`feature_3176` + .. change:: :tags: feature, orm :tickets: 3008 diff --git a/doc/build/changelog/migration_10.rst b/doc/build/changelog/migration_10.rst index 3fb1b87636..c5b214efe0 100644 --- a/doc/build/changelog/migration_10.rst +++ b/doc/build/changelog/migration_10.rst @@ -284,6 +284,55 @@ based on the following criteria: operation; most DBAPIs support this correctly now. +.. _feature_3176: + +New KeyedTuple implementation dramatically faster +------------------------------------------------- + +We took a look into the :class:`.KeyedTuple` implementation in the hopes +of improving queries like this:: + + rows = sess.query(Foo.a, Foo.b, Foo.c).all() + +The :class:`.KeyedTuple` class is used rather than Python's +``collections.namedtuple()``, because the latter has a very complex +type-creation routine that benchmarks much slower than :class:`.KeyedTuple`. +However, when fetching hundreds of thousands of rows, +``collections.namedtuple()`` quickly overtakes :class:`.KeyedTuple` which +becomes dramatically slower as instance invocation goes up. What to do? +A new type that hedges between the approaches of both. Benching +all three types for "size" (number of rows returned) and "num" +(number of distinct queries), the new "lightweight keyed tuple" either +outperforms both, or lags very slightly behind the faster object, based on +which scenario. In the "sweet spot", where we are both creating a good number +of new types as well as fetching a good number of rows, the lightweight +object totally smokes both namedtuple and KeyedTuple:: + + ----------------- + size=10 num=10000 # few rows, lots of queries + namedtuple: 3.60302400589 # namedtuple falls over + keyedtuple: 0.255059957504 # KeyedTuple very fast + lw keyed tuple: 0.582715034485 # lw keyed trails right on KeyedTuple + ----------------- + size=100 num=1000 # <--- sweet spot + namedtuple: 0.365247011185 + keyedtuple: 0.24896979332 + lw keyed tuple: 0.0889317989349 # lw keyed blows both away! + ----------------- + size=10000 num=100 + namedtuple: 0.572599887848 + keyedtuple: 2.54251694679 + lw keyed tuple: 0.613876104355 + ----------------- + size=1000000 num=10 # few queries, lots of rows + namedtuple: 5.79669594765 # namedtuple very fast + keyedtuple: 28.856498003 # KeyedTuple falls over + lw keyed tuple: 6.74346804619 # lw keyed trails right on namedtuple + + +:ticket:`3176` + + .. _feature_2963: .info dictionary improvements diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py index 232eb89de4..934967b272 100644 --- a/lib/sqlalchemy/orm/loading.py +++ b/lib/sqlalchemy/orm/loading.py @@ -54,6 +54,9 @@ def instances(query, cursor, context): for query_entity in query._entities ])) + if not custom_rows and not single_entity: + keyed_tuple = util.lightweight_named_tuple('result', labels) + while True: context.progress = {} context.partials = {} @@ -72,8 +75,8 @@ def instances(query, cursor, context): elif single_entity: rows = [process[0](row, None) for row in fetch] else: - rows = [util.KeyedTuple([proc(row, None) for proc in process], - labels) for row in fetch] + rows = [keyed_tuple([proc(row, None) for proc in process]) + for row in fetch] if filtered: rows = util.unique_list(rows, filter_fn) @@ -126,6 +129,7 @@ def merge_result(querylib, query, iterator, load=True): if isinstance(e, querylib._MapperEntity)] result = [] keys = [ent._label_name for ent in query._entities] + keyed_tuple = util.lightweight_named_tuple('result', keys) for row in iterator: newrow = list(row) for i in mapped_entities: @@ -134,7 +138,7 @@ def merge_result(querylib, query, iterator, load=True): attributes.instance_state(newrow[i]), attributes.instance_dict(newrow[i]), load=load, _recursive={}) - result.append(util.KeyedTuple(newrow, keys)) + result.append(keyed_tuple(newrow)) return iter(result) finally: diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 12e11b26cd..15e0aa8810 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -3275,9 +3275,10 @@ class Bundle(object): :ref:`bundles` - includes an example of subclassing. """ + keyed_tuple = util.lightweight_named_tuple('result', labels) + def proc(row, result): - return util.KeyedTuple( - [proc(row, None) for proc in procs], labels) + return keyed_tuple([proc(row, None) for proc in procs]) return proc diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py index 15b2ac38ee..d882c26565 100644 --- a/lib/sqlalchemy/util/__init__.py +++ b/lib/sqlalchemy/util/__init__.py @@ -21,7 +21,7 @@ from ._collections import KeyedTuple, ImmutableContainer, immutabledict, \ UniqueAppender, PopulateDict, EMPTY_SET, to_list, to_set, \ to_column_set, update_copy, flatten_iterator, \ LRUCache, ScopedRegistry, ThreadLocalRegistry, WeakSequence, \ - coerce_generator_arg + coerce_generator_arg, lightweight_named_tuple from .langhelpers import iterate_attributes, class_hierarchy, \ portable_instancemethod, unbound_method_to_callable, \ diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py index 0904d454eb..a1fbc0fa09 100644 --- a/lib/sqlalchemy/util/_collections.py +++ b/lib/sqlalchemy/util/_collections.py @@ -17,7 +17,20 @@ import types EMPTY_SET = frozenset() -class KeyedTuple(tuple): +class AbstractKeyedTuple(tuple): + def keys(self): + """Return a list of string key names for this :class:`.KeyedTuple`. + + .. seealso:: + + :attr:`.KeyedTuple._fields` + + """ + + return list(self._fields) + + +class KeyedTuple(AbstractKeyedTuple): """``tuple`` subclass that adds labeled names. E.g.:: @@ -56,23 +69,13 @@ class KeyedTuple(tuple): def __new__(cls, vals, labels=None): t = tuple.__new__(cls, vals) - t._labels = [] if labels: t.__dict__.update(zip(labels, vals)) - t._labels = labels + else: + labels = [] + t.__dict__['_labels'] = labels return t - def keys(self): - """Return a list of string key names for this :class:`.KeyedTuple`. - - .. seealso:: - - :attr:`.KeyedTuple._fields` - - """ - - return [l for l in self._labels if l is not None] - @property def _fields(self): """Return a tuple of string key names for this :class:`.KeyedTuple`. @@ -86,7 +89,10 @@ class KeyedTuple(tuple): :meth:`.KeyedTuple.keys` """ - return tuple(self.keys()) + return tuple([l for l in self._labels if l is not None]) + + def __setattr__(self, key, value): + raise AttributeError("Can't set attribute: %s" % key) def _asdict(self): """Return the contents of this :class:`.KeyedTuple` as a dictionary. @@ -100,6 +106,40 @@ class KeyedTuple(tuple): return dict((key, self.__dict__[key]) for key in self.keys()) +class _LW(AbstractKeyedTuple): + __slots__ = () + + def __new__(cls, vals): + return tuple.__new__(cls, vals) + + def __reduce__(self): + # for pickling, degrade down to the regular + # KeyedTuple, thus avoiding anonymous class pickling + # difficulties + return KeyedTuple, (list(self), self._real_fields) + + def _asdict(self): + """Return the contents of this :class:`.KeyedTuple` as a dictionary.""" + + d = dict(zip(self._real_fields, self)) + d.pop(None, None) + return d + + +def lightweight_named_tuple(name, fields): + + tp_cls = type(name, (_LW,), {}) + for idx, field in enumerate(fields): + if field is None: + continue + setattr(tp_cls, field, property(operator.itemgetter(idx))) + + tp_cls._real_fields = fields + tp_cls._fields = tuple([f for f in fields if f is not None]) + + return tp_cls + + class ImmutableContainer(object): def _immutable(self, *arg, **kw): raise TypeError("%s object is immutable" % self.__class__.__name__) diff --git a/test/base/test_utils.py b/test/base/test_utils.py index 4a53aa54f0..a378b0160c 100644 --- a/test/base/test_utils.py +++ b/test/base/test_utils.py @@ -7,26 +7,26 @@ from sqlalchemy.testing.util import picklers, gc_collect from sqlalchemy.util import classproperty, WeakSequence, get_callable_argspec from sqlalchemy.sql import column -class KeyedTupleTest(): + +class _KeyedTupleTest(object): + + def _fixture(self, values, labels): + raise NotImplementedError() def test_empty(self): - keyed_tuple = util.KeyedTuple([]) - eq_(type(keyed_tuple), util.KeyedTuple) + keyed_tuple = self._fixture([], []) eq_(str(keyed_tuple), '()') eq_(len(keyed_tuple), 0) - eq_(keyed_tuple.__dict__, {'_labels': []}) eq_(list(keyed_tuple.keys()), []) eq_(keyed_tuple._fields, ()) eq_(keyed_tuple._asdict(), {}) def test_values_but_no_labels(self): - keyed_tuple = util.KeyedTuple([1, 2]) - eq_(type(keyed_tuple), util.KeyedTuple) + keyed_tuple = self._fixture([1, 2], []) eq_(str(keyed_tuple), '(1, 2)') eq_(len(keyed_tuple), 2) - eq_(keyed_tuple.__dict__, {'_labels': []}) eq_(list(keyed_tuple.keys()), []) eq_(keyed_tuple._fields, ()) eq_(keyed_tuple._asdict(), {}) @@ -35,14 +35,14 @@ class KeyedTupleTest(): eq_(keyed_tuple[1], 2) def test_basic_creation(self): - keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b']) + keyed_tuple = self._fixture([1, 2], ['a', 'b']) eq_(str(keyed_tuple), '(1, 2)') eq_(list(keyed_tuple.keys()), ['a', 'b']) eq_(keyed_tuple._fields, ('a', 'b')) eq_(keyed_tuple._asdict(), {'a': 1, 'b': 2}) def test_basic_index_access(self): - keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b']) + keyed_tuple = self._fixture([1, 2], ['a', 'b']) eq_(keyed_tuple[0], 1) eq_(keyed_tuple[1], 2) @@ -51,7 +51,7 @@ class KeyedTupleTest(): assert_raises(IndexError, should_raise) def test_basic_attribute_access(self): - keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b']) + keyed_tuple = self._fixture([1, 2], ['a', 'b']) eq_(keyed_tuple.a, 1) eq_(keyed_tuple.b, 2) @@ -60,12 +60,9 @@ class KeyedTupleTest(): assert_raises(AttributeError, should_raise) def test_none_label(self): - keyed_tuple = util.KeyedTuple([1, 2, 3], ['a', None, 'b']) + keyed_tuple = self._fixture([1, 2, 3], ['a', None, 'b']) eq_(str(keyed_tuple), '(1, 2, 3)') - # TODO: consider not allowing None labels - expected = {'a': 1, None: 2, 'b': 3, '_labels': ['a', None, 'b']} - eq_(keyed_tuple.__dict__, expected) eq_(list(keyed_tuple.keys()), ['a', 'b']) eq_(keyed_tuple._fields, ('a', 'b')) eq_(keyed_tuple._asdict(), {'a': 1, 'b': 3}) @@ -80,12 +77,9 @@ class KeyedTupleTest(): eq_(keyed_tuple[2], 3) def test_duplicate_labels(self): - keyed_tuple = util.KeyedTuple([1, 2, 3], ['a', 'b', 'b']) + keyed_tuple = self._fixture([1, 2, 3], ['a', 'b', 'b']) eq_(str(keyed_tuple), '(1, 2, 3)') - # TODO: consider not allowing duplicate labels - expected = {'a': 1, 'b': 3, '_labels': ['a', 'b', 'b']} - eq_(keyed_tuple.__dict__, expected) eq_(list(keyed_tuple.keys()), ['a', 'b', 'b']) eq_(keyed_tuple._fields, ('a', 'b', 'b')) eq_(keyed_tuple._asdict(), {'a': 1, 'b': 3}) @@ -100,21 +94,41 @@ class KeyedTupleTest(): eq_(keyed_tuple[2], 3) def test_immutable(self): - keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b']) + keyed_tuple = self._fixture([1, 2], ['a', 'b']) eq_(str(keyed_tuple), '(1, 2)') - # attribute access: mutable eq_(keyed_tuple.a, 1) - keyed_tuple.a = 100 - eq_(keyed_tuple.a, 100) - keyed_tuple.c = 300 - eq_(keyed_tuple.c, 300) - # index access: immutable + assert_raises(AttributeError, setattr, keyed_tuple, "a", 5) + def should_raise(): keyed_tuple[0] = 100 assert_raises(TypeError, should_raise) + def test_serialize(self): + + keyed_tuple = self._fixture([1, 2, 3], ['a', None, 'b']) + + for loads, dumps in picklers(): + kt = loads(dumps(keyed_tuple)) + + eq_(str(kt), '(1, 2, 3)') + + eq_(list(kt.keys()), ['a', 'b']) + eq_(kt._fields, ('a', 'b')) + eq_(kt._asdict(), {'a': 1, 'b': 3}) + + +class KeyedTupleTest(_KeyedTupleTest, fixtures.TestBase): + def _fixture(self, values, labels): + return util.KeyedTuple(values, labels) + + +class LWKeyedTupleTest(_KeyedTupleTest, fixtures.TestBase): + def _fixture(self, values, labels): + return util.lightweight_named_tuple('n', labels)(values) + + class WeakSequenceTest(fixtures.TestBase): @testing.requires.predictable_gc def test_cleanout_elements(self):