From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Thu, 28 Aug 2014 16:25:21 +0000 (-0400)
Subject: - A new implementation for :class:`.KeyedTuple` used by the
X-Git-Tag: rel_1_0_0b1~205^2~39
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=685a014c644477a7e7cdb6aad4436d4422167209;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git

- A new implementation for :class:`.KeyedTuple` used by the
:class:`.Query` object offers dramatic speed improvements when
fetching large numbers of column-oriented rows.
fixes #3176
---

diff --git a/doc/build/changelog/changelog_10.rst b/doc/build/changelog/changelog_10.rst
index b0ace0d1de..fe17957911 100644
--- a/doc/build/changelog/changelog_10.rst
+++ b/doc/build/changelog/changelog_10.rst
@@ -22,6 +22,18 @@
     on compatibility concerns, see :doc:`/changelog/migration_10`.
 
 
+    .. change::
+        :tags: feature, orm
+        :tickets: 3176
+
+        A new implementation for :class:`.KeyedTuple` used by the
+        :class:`.Query` object offers dramatic speed improvements when
+        fetching large numbers of column-oriented rows.
+
+        .. seealso::
+
+            :ref:`feature_3176`
+
     .. change::
         :tags: feature, orm
         :tickets: 3008
diff --git a/doc/build/changelog/migration_10.rst b/doc/build/changelog/migration_10.rst
index 3fb1b87636..c5b214efe0 100644
--- a/doc/build/changelog/migration_10.rst
+++ b/doc/build/changelog/migration_10.rst
@@ -284,6 +284,55 @@ based on the following criteria:
   operation; most DBAPIs support this correctly now.
 
 
+.. _feature_3176:
+
+New KeyedTuple implementation dramatically faster
+-------------------------------------------------
+
+We took a look into the :class:`.KeyedTuple` implementation in the hopes
+of improving queries like this::
+
+	rows = sess.query(Foo.a, Foo.b, Foo.c).all()
+
+The :class:`.KeyedTuple` class is used rather than Python's
+``collections.namedtuple()``, because the latter has a very complex
+type-creation routine that benchmarks much slower than :class:`.KeyedTuple`.
+However, when fetching hundreds of thousands of rows,
+``collections.namedtuple()`` quickly overtakes :class:`.KeyedTuple` which
+becomes dramatically slower as instance invocation goes up.   What to do?
+A new type that hedges between the approaches of both.   Benching
+all three types for "size" (number of rows returned) and "num"
+(number of distinct queries), the new "lightweight keyed tuple" either
+outperforms both, or lags very slightly behind the faster object, based on
+which scenario.  In the "sweet spot", where we are both creating a good number
+of new types as well as fetching a good number of rows, the lightweight
+object totally smokes both namedtuple and KeyedTuple::
+
+	-----------------
+	size=10 num=10000                 # few rows, lots of queries
+	namedtuple: 3.60302400589         # namedtuple falls over
+	keyedtuple: 0.255059957504        # KeyedTuple very fast
+	lw keyed tuple: 0.582715034485    # lw keyed trails right on KeyedTuple
+	-----------------
+	size=100 num=1000                 # <--- sweet spot
+	namedtuple: 0.365247011185
+	keyedtuple: 0.24896979332
+	lw keyed tuple: 0.0889317989349   # lw keyed blows both away!
+	-----------------
+	size=10000 num=100
+	namedtuple: 0.572599887848
+	keyedtuple: 2.54251694679
+	lw keyed tuple: 0.613876104355
+	-----------------
+	size=1000000 num=10               # few queries, lots of rows
+	namedtuple: 5.79669594765         # namedtuple very fast
+	keyedtuple: 28.856498003          # KeyedTuple falls over
+	lw keyed tuple: 6.74346804619     # lw keyed trails right on namedtuple
+
+
+:ticket:`3176`
+
+
 .. _feature_2963:
 
 .info dictionary improvements
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index 232eb89de4..934967b272 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -54,6 +54,9 @@ def instances(query, cursor, context):
             for query_entity in query._entities
         ]))
 
+    if not custom_rows and not single_entity:
+        keyed_tuple = util.lightweight_named_tuple('result', labels)
+
     while True:
         context.progress = {}
         context.partials = {}
@@ -72,8 +75,8 @@ def instances(query, cursor, context):
         elif single_entity:
             rows = [process[0](row, None) for row in fetch]
         else:
-            rows = [util.KeyedTuple([proc(row, None) for proc in process],
-                                    labels) for row in fetch]
+            rows = [keyed_tuple([proc(row, None) for proc in process])
+                    for row in fetch]
 
         if filtered:
             rows = util.unique_list(rows, filter_fn)
@@ -126,6 +129,7 @@ def merge_result(querylib, query, iterator, load=True):
                                if isinstance(e, querylib._MapperEntity)]
             result = []
             keys = [ent._label_name for ent in query._entities]
+            keyed_tuple = util.lightweight_named_tuple('result', keys)
             for row in iterator:
                 newrow = list(row)
                 for i in mapped_entities:
@@ -134,7 +138,7 @@ def merge_result(querylib, query, iterator, load=True):
                             attributes.instance_state(newrow[i]),
                             attributes.instance_dict(newrow[i]),
                             load=load, _recursive={})
-                result.append(util.KeyedTuple(newrow, keys))
+                result.append(keyed_tuple(newrow))
 
         return iter(result)
     finally:
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 12e11b26cd..15e0aa8810 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -3275,9 +3275,10 @@ class Bundle(object):
             :ref:`bundles` - includes an example of subclassing.
 
         """
+        keyed_tuple = util.lightweight_named_tuple('result', labels)
+
         def proc(row, result):
-            return util.KeyedTuple(
-                [proc(row, None) for proc in procs], labels)
+            return keyed_tuple([proc(row, None) for proc in procs])
         return proc
 
 
diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py
index 15b2ac38ee..d882c26565 100644
--- a/lib/sqlalchemy/util/__init__.py
+++ b/lib/sqlalchemy/util/__init__.py
@@ -21,7 +21,7 @@ from ._collections import KeyedTuple, ImmutableContainer, immutabledict, \
     UniqueAppender, PopulateDict, EMPTY_SET, to_list, to_set, \
     to_column_set, update_copy, flatten_iterator, \
     LRUCache, ScopedRegistry, ThreadLocalRegistry, WeakSequence, \
-    coerce_generator_arg
+    coerce_generator_arg, lightweight_named_tuple
 
 from .langhelpers import iterate_attributes, class_hierarchy, \
     portable_instancemethod, unbound_method_to_callable, \
diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py
index 0904d454eb..a1fbc0fa09 100644
--- a/lib/sqlalchemy/util/_collections.py
+++ b/lib/sqlalchemy/util/_collections.py
@@ -17,7 +17,20 @@ import types
 EMPTY_SET = frozenset()
 
 
-class KeyedTuple(tuple):
+class AbstractKeyedTuple(tuple):
+    def keys(self):
+        """Return a list of string key names for this :class:`.KeyedTuple`.
+
+        .. seealso::
+
+            :attr:`.KeyedTuple._fields`
+
+        """
+
+        return list(self._fields)
+
+
+class KeyedTuple(AbstractKeyedTuple):
     """``tuple`` subclass that adds labeled names.
 
     E.g.::
@@ -56,23 +69,13 @@ class KeyedTuple(tuple):
 
     def __new__(cls, vals, labels=None):
         t = tuple.__new__(cls, vals)
-        t._labels = []
         if labels:
             t.__dict__.update(zip(labels, vals))
-            t._labels = labels
+        else:
+            labels = []
+        t.__dict__['_labels'] = labels
         return t
 
-    def keys(self):
-        """Return a list of string key names for this :class:`.KeyedTuple`.
-
-        .. seealso::
-
-            :attr:`.KeyedTuple._fields`
-
-        """
-
-        return [l for l in self._labels if l is not None]
-
     @property
     def _fields(self):
         """Return a tuple of string key names for this :class:`.KeyedTuple`.
@@ -86,7 +89,10 @@ class KeyedTuple(tuple):
             :meth:`.KeyedTuple.keys`
 
         """
-        return tuple(self.keys())
+        return tuple([l for l in self._labels if l is not None])
+
+    def __setattr__(self, key, value):
+        raise AttributeError("Can't set attribute: %s" % key)
 
     def _asdict(self):
         """Return the contents of this :class:`.KeyedTuple` as a dictionary.
@@ -100,6 +106,40 @@ class KeyedTuple(tuple):
         return dict((key, self.__dict__[key]) for key in self.keys())
 
 
+class _LW(AbstractKeyedTuple):
+    __slots__ = ()
+
+    def __new__(cls, vals):
+        return tuple.__new__(cls, vals)
+
+    def __reduce__(self):
+        # for pickling, degrade down to the regular
+        # KeyedTuple, thus avoiding anonymous class pickling
+        # difficulties
+        return KeyedTuple, (list(self), self._real_fields)
+
+    def _asdict(self):
+        """Return the contents of this :class:`.KeyedTuple` as a dictionary."""
+
+        d = dict(zip(self._real_fields, self))
+        d.pop(None, None)
+        return d
+
+
+def lightweight_named_tuple(name, fields):
+
+    tp_cls = type(name, (_LW,), {})
+    for idx, field in enumerate(fields):
+        if field is None:
+            continue
+        setattr(tp_cls, field, property(operator.itemgetter(idx)))
+
+    tp_cls._real_fields = fields
+    tp_cls._fields = tuple([f for f in fields if f is not None])
+
+    return tp_cls
+
+
 class ImmutableContainer(object):
     def _immutable(self, *arg, **kw):
         raise TypeError("%s object is immutable" % self.__class__.__name__)
diff --git a/test/base/test_utils.py b/test/base/test_utils.py
index 4a53aa54f0..a378b0160c 100644
--- a/test/base/test_utils.py
+++ b/test/base/test_utils.py
@@ -7,26 +7,26 @@ from sqlalchemy.testing.util import picklers, gc_collect
 from sqlalchemy.util import classproperty, WeakSequence, get_callable_argspec
 from sqlalchemy.sql import column
 
-class KeyedTupleTest():
+
+class _KeyedTupleTest(object):
+
+    def _fixture(self, values, labels):
+        raise NotImplementedError()
 
     def test_empty(self):
-        keyed_tuple = util.KeyedTuple([])
-        eq_(type(keyed_tuple), util.KeyedTuple)
+        keyed_tuple = self._fixture([], [])
         eq_(str(keyed_tuple), '()')
         eq_(len(keyed_tuple), 0)
 
-        eq_(keyed_tuple.__dict__, {'_labels': []})
         eq_(list(keyed_tuple.keys()), [])
         eq_(keyed_tuple._fields, ())
         eq_(keyed_tuple._asdict(), {})
 
     def test_values_but_no_labels(self):
-        keyed_tuple = util.KeyedTuple([1, 2])
-        eq_(type(keyed_tuple), util.KeyedTuple)
+        keyed_tuple = self._fixture([1, 2], [])
         eq_(str(keyed_tuple), '(1, 2)')
         eq_(len(keyed_tuple), 2)
 
-        eq_(keyed_tuple.__dict__, {'_labels': []})
         eq_(list(keyed_tuple.keys()), [])
         eq_(keyed_tuple._fields, ())
         eq_(keyed_tuple._asdict(), {})
@@ -35,14 +35,14 @@ class KeyedTupleTest():
         eq_(keyed_tuple[1], 2)
 
     def test_basic_creation(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(str(keyed_tuple), '(1, 2)')
         eq_(list(keyed_tuple.keys()), ['a', 'b'])
         eq_(keyed_tuple._fields, ('a', 'b'))
         eq_(keyed_tuple._asdict(), {'a': 1, 'b': 2})
 
     def test_basic_index_access(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(keyed_tuple[0], 1)
         eq_(keyed_tuple[1], 2)
 
@@ -51,7 +51,7 @@ class KeyedTupleTest():
         assert_raises(IndexError, should_raise)
 
     def test_basic_attribute_access(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(keyed_tuple.a, 1)
         eq_(keyed_tuple.b, 2)
 
@@ -60,12 +60,9 @@ class KeyedTupleTest():
         assert_raises(AttributeError, should_raise)
 
     def test_none_label(self):
-        keyed_tuple = util.KeyedTuple([1, 2, 3], ['a', None, 'b'])
+        keyed_tuple = self._fixture([1, 2, 3], ['a', None, 'b'])
         eq_(str(keyed_tuple), '(1, 2, 3)')
 
-        # TODO: consider not allowing None labels
-        expected = {'a': 1, None: 2, 'b': 3, '_labels': ['a', None, 'b']}
-        eq_(keyed_tuple.__dict__, expected)
         eq_(list(keyed_tuple.keys()), ['a', 'b'])
         eq_(keyed_tuple._fields, ('a', 'b'))
         eq_(keyed_tuple._asdict(), {'a': 1, 'b': 3})
@@ -80,12 +77,9 @@ class KeyedTupleTest():
         eq_(keyed_tuple[2], 3)
 
     def test_duplicate_labels(self):
-        keyed_tuple = util.KeyedTuple([1, 2, 3], ['a', 'b', 'b'])
+        keyed_tuple = self._fixture([1, 2, 3], ['a', 'b', 'b'])
         eq_(str(keyed_tuple), '(1, 2, 3)')
 
-        # TODO: consider not allowing duplicate labels
-        expected = {'a': 1, 'b': 3, '_labels': ['a', 'b', 'b']}
-        eq_(keyed_tuple.__dict__, expected)
         eq_(list(keyed_tuple.keys()), ['a', 'b', 'b'])
         eq_(keyed_tuple._fields, ('a', 'b', 'b'))
         eq_(keyed_tuple._asdict(), {'a': 1, 'b': 3})
@@ -100,21 +94,41 @@ class KeyedTupleTest():
         eq_(keyed_tuple[2], 3)
 
     def test_immutable(self):
-        keyed_tuple = util.KeyedTuple([1, 2], ['a', 'b'])
+        keyed_tuple = self._fixture([1, 2], ['a', 'b'])
         eq_(str(keyed_tuple), '(1, 2)')
 
-        # attribute access: mutable
         eq_(keyed_tuple.a, 1)
-        keyed_tuple.a = 100
-        eq_(keyed_tuple.a, 100)
-        keyed_tuple.c = 300
-        eq_(keyed_tuple.c, 300)
 
-        # index access: immutable
+        assert_raises(AttributeError, setattr, keyed_tuple, "a", 5)
+
         def should_raise():
             keyed_tuple[0] = 100
         assert_raises(TypeError, should_raise)
 
+    def test_serialize(self):
+
+        keyed_tuple = self._fixture([1, 2, 3], ['a', None, 'b'])
+
+        for loads, dumps in picklers():
+            kt = loads(dumps(keyed_tuple))
+
+            eq_(str(kt), '(1, 2, 3)')
+
+            eq_(list(kt.keys()), ['a', 'b'])
+            eq_(kt._fields, ('a', 'b'))
+            eq_(kt._asdict(), {'a': 1, 'b': 3})
+
+
+class KeyedTupleTest(_KeyedTupleTest, fixtures.TestBase):
+    def _fixture(self, values, labels):
+        return util.KeyedTuple(values, labels)
+
+
+class LWKeyedTupleTest(_KeyedTupleTest, fixtures.TestBase):
+    def _fixture(self, values, labels):
+        return util.lightweight_named_tuple('n', labels)(values)
+
+
 class WeakSequenceTest(fixtures.TestBase):
     @testing.requires.predictable_gc
     def test_cleanout_elements(self):