]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
- A new implementation for :class:`.KeyedTuple` used by the
authorMike Bayer <mike_mp@zzzcomputing.com>
Thu, 28 Aug 2014 16:25:21 +0000 (12:25 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Thu, 28 Aug 2014 16:25:21 +0000 (12:25 -0400)
:class:`.Query` object offers dramatic speed improvements when
fetching large numbers of column-oriented rows.
fixes #3176

doc/build/changelog/changelog_10.rst
doc/build/changelog/migration_10.rst
lib/sqlalchemy/orm/loading.py
lib/sqlalchemy/orm/query.py
lib/sqlalchemy/util/__init__.py
lib/sqlalchemy/util/_collections.py
test/base/test_utils.py

index b0ace0d1de7fc0b3edde69558ca04c5dc5785cdc..fe1795791139c542012afb752f3909cfc6cbecc0 100644 (file)
     on compatibility concerns, see :doc:`/changelog/migration_10`.
 
 
+    .. change::
+        :tags: feature, orm
+        :tickets: 3176
+
+        A new implementation for :class:`.KeyedTuple` used by the
+        :class:`.Query` object offers dramatic speed improvements when
+        fetching large numbers of column-oriented rows.
+
+        .. seealso::
+
+            :ref:`feature_3176`
+
     .. change::
         :tags: feature, orm
         :tickets: 3008
index 3fb1b87636d2b62ac324924238b5bf9e98c3559d..c5b214efe0a631609b794cd4e0e6110a3eb937a4 100644 (file)
@@ -284,6 +284,55 @@ based on the following criteria:
   operation; most DBAPIs support this correctly now.
 
 
+.. _feature_3176:
+
+New KeyedTuple implementation dramatically faster
+-------------------------------------------------
+
+We took a look into the :class:`.KeyedTuple` implementation in the hopes
+of improving queries like this::
+
+       rows = sess.query(Foo.a, Foo.b, Foo.c).all()
+
+The :class:`.KeyedTuple` class is used rather than Python's
+``collections.namedtuple()``, because the latter has a very complex
+type-creation routine that benchmarks much slower than :class:`.KeyedTuple`.
+However, when fetching hundreds of thousands of rows,
+``collections.namedtuple()`` quickly overtakes :class:`.KeyedTuple` which
+becomes dramatically slower as instance invocation goes up.   What to do?
+A new type that hedges between the approaches of both.   Benching
+all three types for "size" (number of rows returned) and "num"
+(number of distinct queries), the new "lightweight keyed tuple" either
+outperforms both, or lags very slightly behind the faster object, based on
+which scenario.  In the "sweet spot", where we are both creating a good number
+of new types as well as fetching a good number of rows, the lightweight
+object totally smokes both namedtuple and KeyedTuple::
+
+       -----------------
+       size=10 num=10000                 # few rows, lots of queries
+       namedtuple: 3.60302400589         # namedtuple falls over
+       keyedtuple: 0.255059957504        # KeyedTuple very fast
+       lw keyed tuple: 0.582715034485    # lw keyed trails right on KeyedTuple
+       -----------------
+       size=100 num=1000                 # <--- sweet spot
+       namedtuple: 0.365247011185
+       keyedtuple: 0.24896979332
+       lw keyed tuple: 0.0889317989349   # lw keyed blows both away!
+       -----------------
+       size=10000 num=100
+       namedtuple: 0.572599887848
+       keyedtuple: 2.54251694679
+       lw keyed tuple: 0.613876104355
+       -----------------
+       size=1000000 num=10               # few queries, lots of rows
+       namedtuple: 5.79669594765         # namedtuple very fast
+       keyedtuple: 28.856498003          # KeyedTuple falls over
+       lw keyed tuple: 6.74346804619     # lw keyed trails right on namedtuple
+
+
+:ticket:`3176`
+
+
 .. _feature_2963:
 
 .info dictionary improvements
index 232eb89de44bb120d44a8d22389d9032e41cefe2..934967b272519d14a07fcada151e8c23e8d5fdba 100644 (file)
@@ -54,6 +54,9 @@ def instances(query, cursor, context):
             for query_entity in query._entities
         ]))
 
+    if not custom_rows and not single_entity:
+        keyed_tuple = util.lightweight_named_tuple('result', labels)
+
     while True:
         context.progress = {}
         context.partials = {}
@@ -72,8 +75,8 @@ def instances(query, cursor, context):
         elif single_entity:
             rows = [process[0](row, None) for row in fetch]
         else:
-            rows = [util.KeyedTuple([proc(row, None) for proc in process],
-                                    labels) for row in fetch]
+            rows = [keyed_tuple([proc(row, None) for proc in process])
+                    for row in fetch]
 
         if filtered:
             rows = util.unique_list(rows, filter_fn)
@@ -126,6 +129,7 @@ def merge_result(querylib, query, iterator, load=True):
                                if isinstance(e, querylib._MapperEntity)]
             result = []
             keys = [ent._label_name for ent in query._entities]
+            keyed_tuple = util.lightweight_named_tuple('result', keys)
             for row in iterator:
                 newrow = list(row)
                 for i in mapped_entities:
@@ -134,7 +138,7 @@ def merge_result(querylib, query, iterator, load=True):
                             attributes.instance_state(newrow[i]),
                             attributes.instance_dict(newrow[i]),
                             load=load, _recursive={})
-                result.append(util.KeyedTuple(newrow, keys))
+                result.append(keyed_tuple(newrow))
 
         return iter(result)
     finally:
index 12e11b26cd7d407f6e69a1190f43aca0d8924034..15e0aa88102f20302d309041665c2ecb05b21d83 100644 (file)
@@ -3275,9 +3275,10 @@ class Bundle(object):
             :ref:`bundles` - includes an example of subclassing.
 
         """
+        keyed_tuple = util.lightweight_named_tuple('result', labels)
+
         def proc(row, result):
-            return util.KeyedTuple(
-                [proc(row, None) for proc in procs], labels)
+            return keyed_tuple([proc(row, None) for proc in procs])
         return proc
 
 
index 15b2ac38eec0ee8cdde0832a87cc51e2aec995d1..d882c26565db3092bf1614aaeeb4b00a44360693 100644 (file)
@@ -21,7 +21,7 @@ from ._collections import KeyedTuple, ImmutableContainer, immutabledict, \
     UniqueAppender, PopulateDict, EMPTY_SET, to_list, to_set, \
     to_column_set, update_copy, flatten_iterator, \
     LRUCache, ScopedRegistry, ThreadLocalRegistry, WeakSequence, \
-    coerce_generator_arg
+    coerce_generator_arg, lightweight_named_tuple
 
 from .langhelpers import iterate_attributes, class_hierarchy, \
     portable_instancemethod, unbound_method_to_callable, \
index 0904d454ebd5bb81fc39de187f773ec8bb44bd94..a1fbc0fa09bb215a38dfb909762e817f93bfd979 100644 (file)
@@ -17,7 +17,20 @@ import types
 EMPTY_SET = frozenset()
 
 
-class KeyedTuple(tuple):
+class AbstractKeyedTuple(tuple):
+    def keys(self):
+        """Return a list of string key names for this :class:`.KeyedTuple`.
+
+        .. seealso::
+
+            :attr:`.KeyedTuple._fields`
+
+        """
+
+        return list(self._fields)
+
+
+class KeyedTuple(AbstractKeyedTuple):
     """``tuple`` subclass that adds labeled names.
 
     E.g.::
@@ -56,23 +69,13 @@ class KeyedTuple(tuple):
 
     def __new__(cls, vals, labels=None):
         t = tuple.__new__(cls, vals)
-        t._labels = []
         if labels:
             t.__dict__.update(zip(labels, vals))
-            t._labels = labels
+        else:
+            labels = []
+        t.__dict__['_labels'] = labels
         return t
 
-    def keys(self):
-        """Return a list of string key names for this :class:`.KeyedTuple`.
-
-        .. seealso::
-
-            :attr:`.KeyedTuple._fields`
-
-        """
-
-        return [l for l in self._labels if l is not None]
-
     @property
     def _fields(self):
         """Return a tuple of string key names for this :class:`.KeyedTuple`.
@@ -86,7 +89,10 @@ class KeyedTuple(tuple):
             :meth:`.KeyedTuple.keys`
 
         """
-        return tuple(self.keys())
+        return tuple([l for l in self._labels if l is not None])
+
+    def __setattr__(self, key, value):
+        raise AttributeError("Can't set attribute: %s" % key)
 
     def _asdict(self):
         """Return the contents of this :class:`.KeyedTuple` as a dictionary.
@@ -100,6 +106,40 @@ class KeyedTuple(tuple):
         return dict((key, self.__dict__[key]) for key in self.keys())
 
 
+class _LW(AbstractKeyedTuple):
+    __slots__ = ()
+
+    def __new__(cls, vals):
+        return tuple.__new__(cls, vals)
+
+    def __reduce__(self):
+        # for pickling, degrade down to the regular
+        # KeyedTuple, thus avoiding anonymous class pickling
+        # difficulties
+        return KeyedTuple, (list(self), self._real_fields)
+
+    def _asdict(self):
+        """Return the contents of this :class:`.KeyedTuple` as a dictionary."""
+
+        d = dict(zip(self._real_fields, self))
+        d.pop(None, None)
+        return d
+
+
+def lightweight_named_tuple(name, fields):
+
+    tp_cls = type(name, (_LW,), {})
+    for idx, field in enumerate(fields):
+        if field is None:
+            continue
+        setattr(tp_cls, field, property(operator.itemgetter(idx)))
+
+    tp_cls._real_fields = fields
+    tp_cls._fields = tuple([f for f in fields if f is not None])
+
+    return tp_cls
+
+
 class ImmutableContainer(object):
     def _immutable(self, *arg, **kw):
         raise TypeError("%s object is immutable" % self.__class__.__name__)
index 4a53aa54f01317ab3b1cbb7bcbe6831ef2333d06..a378b0160c790144f2525b9a436390998caca72c 100644 (file)
@@ -7,26 +7,26 @@ from sqlalchemy.testing.util import picklers, gc_collect
 from sqlalchemy.util import classproperty, WeakSequence, get_callable_argspec
 from sqlalchemy.sql import column
 
-class KeyedTupleTest():
+
+class _KeyedTupleTest(object):
+
+    def _fixture(self, values, labels):
+        raise NotImplementedError()
 
     def test_empty(self):
-        keyed_tuple = util.KeyedTuple([])
-        eq_(type(keyed_tuple), util.KeyedTuple)
+        keyed_tuple = self._fixture([], [])
         eq_(str(keyed_tuple), '()')
         eq_(len(keyed_tuple), 0)
 
-        eq_(keyed_tuple.__dict__, {'_labels': []})
         eq_(list(keyed_tuple.keys()), [])
         eq_(keyed_tuple._fields, ())
         eq_(keyed_tuple._asdict(), {})
 
     def test_values_but_no_labels(self):
-        keyed_tuple = util.KeyedTuple([1, 2])
-        eq_(type(keyed_tuple), util.KeyedTuple)
+        keyed_tuple = self._fixture([1, 2], [])
         eq_(str(keyed_tuple), '(1, 2)')
         eq_(len(keyed_tuple), 2)
 
-        eq_(keyed_tuple.__dict__, {'_labels': []})
         eq_(list(keyed_tuple.keys()), [])
         eq_(keyed_tuple._fields, ())
         eq_(keyed_tuple._asdict(), {})
@@ -35,14 +35,14 @@ class KeyedTupleTest():
         eq_(keyed_tuple[1], 2)
 
     def test_basic_creation(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(str(keyed_tuple), '(1, 2)')
         eq_(list(keyed_tuple.keys()), ['a', 'b'])
         eq_(keyed_tuple._fields, ('a', 'b'))
         eq_(keyed_tuple._asdict(), {'a': 1, 'b': 2})
 
     def test_basic_index_access(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(keyed_tuple[0], 1)
         eq_(keyed_tuple[1], 2)
 
@@ -51,7 +51,7 @@ class KeyedTupleTest():
         assert_raises(IndexError, should_raise)
 
     def test_basic_attribute_access(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(keyed_tuple.a, 1)
         eq_(keyed_tuple.b, 2)
 
@@ -60,12 +60,9 @@ class KeyedTupleTest():
         assert_raises(AttributeError, should_raise)
 
     def test_none_label(self):
-        keyed_tuple = util.KeyedTuple([1, 2, 3], ['a', None, 'b'])
+        keyed_tuple = self._fixture([1, 2, 3], ['a', None, 'b'])
         eq_(str(keyed_tuple), '(1, 2, 3)')
 
-        # TODO: consider not allowing None labels
-        expected = {'a': 1, None: 2, 'b': 3, '_labels': ['a', None, 'b']}
-        eq_(keyed_tuple.__dict__, expected)
         eq_(list(keyed_tuple.keys()), ['a', 'b'])
         eq_(keyed_tuple._fields, ('a', 'b'))
         eq_(keyed_tuple._asdict(), {'a': 1, 'b': 3})
@@ -80,12 +77,9 @@ class KeyedTupleTest():
         eq_(keyed_tuple[2], 3)
 
     def test_duplicate_labels(self):
-        keyed_tuple = util.KeyedTuple([1, 2, 3], ['a', 'b', 'b'])
+        keyed_tuple = self._fixture([1, 2, 3], ['a', 'b', 'b'])
         eq_(str(keyed_tuple), '(1, 2, 3)')
 
-        # TODO: consider not allowing duplicate labels
-        expected = {'a': 1, 'b': 3, '_labels': ['a', 'b', 'b']}
-        eq_(keyed_tuple.__dict__, expected)
         eq_(list(keyed_tuple.keys()), ['a', 'b', 'b'])
         eq_(keyed_tuple._fields, ('a', 'b', 'b'))
         eq_(keyed_tuple._asdict(), {'a': 1, 'b': 3})
@@ -100,21 +94,41 @@ class KeyedTupleTest():
         eq_(keyed_tuple[2], 3)
 
     def test_immutable(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(str(keyed_tuple), '(1, 2)')
 
-        # attribute access: mutable
         eq_(keyed_tuple.a, 1)
-        keyed_tuple.a = 100
-        eq_(keyed_tuple.a, 100)
-        keyed_tuple.c = 300
-        eq_(keyed_tuple.c, 300)
 
-        # index access: immutable
+        assert_raises(AttributeError, setattr, keyed_tuple, "a", 5)
+
         def should_raise():
             keyed_tuple[0] = 100
         assert_raises(TypeError, should_raise)
 
+    def test_serialize(self):
+
+        keyed_tuple = self._fixture([1, 2, 3], ['a', None, 'b'])
+
+        for loads, dumps in picklers():
+            kt = loads(dumps(keyed_tuple))
+
+            eq_(str(kt), '(1, 2, 3)')
+
+            eq_(list(kt.keys()), ['a', 'b'])
+            eq_(kt._fields, ('a', 'b'))
+            eq_(kt._asdict(), {'a': 1, 'b': 3})
+
+
+class KeyedTupleTest(_KeyedTupleTest, fixtures.TestBase):
+    def _fixture(self, values, labels):
+        return util.KeyedTuple(values, labels)
+
+
+class LWKeyedTupleTest(_KeyedTupleTest, fixtures.TestBase):
+    def _fixture(self, values, labels):
+        return util.lightweight_named_tuple('n', labels)(values)
+
+
 class WeakSequenceTest(fixtures.TestBase):
     @testing.requires.predictable_gc
     def test_cleanout_elements(self):