]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
get_matching_blocks(): rewrote code & comments so they match; added
authorTim Peters <tim.peters@gmail.com>
Tue, 13 Jun 2006 03:30:07 +0000 (03:30 +0000)
committerTim Peters <tim.peters@gmail.com>
Tue, 13 Jun 2006 03:30:07 +0000 (03:30 +0000)
more comments about why it's this way at all; and removed what looked
like needless expense (sorting (i, j, k) triples directly should give
exactly the same order as sorting (i, (i, j, k)) pairs).

Lib/difflib.py

index 39bb2d9728742fc14198e1b8bc564592bee4bc00..9a9071058857f78dcae2a7f993abc579fa6fe39b 100644 (file)
@@ -474,30 +474,30 @@ class SequenceMatcher:
         if self.matching_blocks is not None:
             return self.matching_blocks
         la, lb = len(self.a), len(self.b)
-
-        indexed_blocks = []
+        self.matching_blocks = matching_blocks = []
+
+        # This is most naturally expressed as a recursive algorithm, but
+        # at least one user bumped into extreme use cases that exceeded
+        # the recursion limit on their box.  So, now we maintain a list
+        # ('queue`) of blocks we still need to look at, and append partial
+        # results to `matching_blocks` in a loop; the matches are sorted
+        # at the end.
         queue = [(0, la, 0, lb)]
         while queue:
-            # builds list of matching blocks covering a[alo:ahi] and
-            # b[blo:bhi], appending them in increasing order to answer
             alo, ahi, blo, bhi = queue.pop()
-
+            i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
             # a[alo:i] vs b[blo:j] unknown
             # a[i:i+k] same as b[j:j+k]
             # a[i+k:ahi] vs b[j+k:bhi] unknown
-            i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
-
-            if k:
+            if k:   # if k is 0, there was no matching block
+                matching_blocks.append(x)
                 if alo < i and blo < j:
                     queue.append((alo, i, blo, j))
-                indexed_blocks.append((i, x))
                 if i+k < ahi and j+k < bhi:
                     queue.append((i+k, ahi, j+k, bhi))
-        indexed_blocks.sort()
-
-        self.matching_blocks = [elem[1] for elem in indexed_blocks]
-        self.matching_blocks.append( (la, lb, 0) )
-        return self.matching_blocks
+        matching_blocks.sort()
+        matching_blocks.append( (la, lb, 0) )
+        return matching_blocks
 
     def get_opcodes(self):
         """Return list of 5-tuples describing how to turn a into b.