]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
- fix for very large topological sorts, courtesy ants.aasma at gmail [ticket:423]
authorMike Bayer <mike_mp@zzzcomputing.com>
Tue, 13 Feb 2007 01:25:51 +0000 (01:25 +0000)
committerMike Bayer <mike_mp@zzzcomputing.com>
Tue, 13 Feb 2007 01:25:51 +0000 (01:25 +0000)
CHANGES
lib/sqlalchemy/topological.py
test/base/dependency.py

diff --git a/CHANGES b/CHANGES
index fc59d0b85154cfda0daa16abc16e8aa18323f7b5..61f43d426431cbe99e4acff2f5e6d8b15769650f 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -18,6 +18,7 @@
   Table instance.
   - eager relation to an inheriting mapper wont fail if no rows returned for
   the relationship.
+  - fix for very large topological sorts, courtesy ants.aasma at gmail [ticket:423]
   - eager loading is slightly more strict about detecting "self-referential"
   relationships, specifically between polymorphic mappers.
   - improved support for complex queries embedded into "where" criterion
index 566a83f92c5a0d59bcca076b30433a8cfb1db014..a1b03b89a2c28712f3d52ffc6853182f70ea36fb 100644 (file)
@@ -53,16 +53,13 @@ class _Node(object):
             string.join([n.safestr(indent + 1) for n in self.children], '')
     def __repr__(self):
         return "%s" % (str(self.item))
-    def is_dependent(self, child):
+    def all_deps(self):
+        """Returns a set of dependencies for this node and all its cycles"""
+        deps = util.Set(self.dependencies)
         if self.cycles is not None:
             for c in self.cycles:
-                if child in c.dependencies:
-                    return True
-        if child.cycles is not None:
-            for c in child.cycles:
-                if c in self.dependencies:
-                    return True
-        return child in self.dependencies
+                deps.update(c.dependencies)
+        return deps
 
 class _EdgeCollection(object):
     """a collection of directed edges."""
@@ -196,38 +193,44 @@ class QueueDependencySorter(object):
     def _create_batched_tree(self, nodes):
         """given a list of nodes from a topological sort, organizes the nodes into a tree structure,
         with as many non-dependent nodes set as silbings to each other as possible."""
-        def sort(index=None, l=None):
-            if index is None:
-                index = 0
-            
-            if index >= len(nodes):
-                return None
-            
-            node = nodes[index]
-            l2 = []
-            sort(index + 1, l2)
-            for n in l2:
-                if l is None or search_dep(node, n):
-                    node.children.append(n)
-                else:
-                    l.append(n)
-            if l is not None:
-                l.append(node)
-            return node
-            
-        def search_dep(parent, child):
-            if child is None:
-                return False
-            elif parent.is_dependent(child):
-                return True
+        if not len(nodes):
+            return None
+        # a list of all currently independent subtrees as a tuple of
+        # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree)
+        # order of the list has no semantics for the algorithmic 
+        independents = []
+        # in reverse topological order
+        for node in reversed(nodes):
+            # nodes subtree and cycles contain the node itself
+            subtree = util.Set([node])
+            if node.cycles is not None:
+                cycles = util.Set(node.cycles)
             else:
-                for c in child.children:
-                    x = search_dep(parent, c)
-                    if x is True:
-                        return True
-                else:
-                    return False
-        return sort()
+                cycles = util.Set()
+            # get a set of dependent nodes of node and its cycles
+            nodealldeps = node.all_deps()
+            if nodealldeps:
+                # iterate over independent node indexes in reverse order so we can efficiently remove them
+                for index in xrange(len(independents)-1,-1,-1):
+                    child, childsubtree, childcycles = independents[index] 
+                    # if there is a dependency between this node and an independent node
+                    if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)):
+                        # prepend child to nodes children
+                        # (append should be fine, but previous implemetation used prepend)
+                        node.children[0:0] = (child,)
+                        # merge childs subtree and cycles
+                        subtree.update(childsubtree)
+                        cycles.update(childcycles)
+                        # remove the child from list of independent subtrees
+                        independents[index:index+1] = []
+            # add node as a new independent subtree
+            independents.append((node,subtree,cycles))
+        # choose an arbitrary node from list of all independent subtrees
+        head = independents.pop()[0]
+        # add all other independent subtrees as a child of the chosen root
+        # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation
+        head.children[0:0] = [i[0] for i in independents]
+        return head
         
     def _find_cycles(self, edges):
         involved_in_cycles = util.Set()
index 7c6578cb8d96f74aa0c6284f6440b7dfbc0280b2..c5e54fc9fa5630a02a90b1f99c8ec0d552707d95 100644 (file)
@@ -181,6 +181,11 @@ class DependencySortTest(PersistTest):
         head = DependencySorter(tuples, []).sort(allow_all_cycles=True)
         self.assert_sort(tuples, head)
         
+    def testbigsort(self):
+        tuples = []
+        for i in range(0,1500, 2):
+            tuples.append((i, i+1))
+        head = DependencySorter(tuples, []).sort()