gh-142939: difflib.get_close_matches performance (#142940)

author dgpb <3577712+dg-pb@users.noreply.github.com>

Tue, 30 Dec 2025 07:15:59 +0000 (09:15 +0200)

committer GitHub <noreply@github.com>

Tue, 30 Dec 2025 07:15:59 +0000 (07:15 +0000)
author dgpb <3577712+dg-pb@users.noreply.github.com>
Tue, 30 Dec 2025 07:15:59 +0000 (09:15 +0200)
committer GitHub <noreply@github.com>
Tue, 30 Dec 2025 07:15:59 +0000 (07:15 +0000)
diff --git a/Lib/difflib.py b/Lib/difflib.py

index 4a0600e4ebb01b2008193b84f876368abc1948fb..7c7e233b013a7648860ab1b1f78bb7b097d899f4 100644 (file)
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -638,15 +638,15 @@ class SequenceMatcher:
          # avail[x] is the number of times x appears in 'b' less the
          # number of times we've seen it in 'a' so far ... kinda
          avail = {}
-        availhas, matches = avail.__contains__, 0
+        matches = 0
          for elt in self.a:
-            if availhas(elt):
+            if elt in avail:
                  numb = avail[elt]
              else:
                  numb = fullbcount.get(elt, 0)
              avail[elt] = numb - 1
              if numb > 0:
-                matches = matches + 1
+                matches += 1
          return _calculate_ratio(matches, len(self.a) + len(self.b))
  
      def real_quick_ratio(self):
@@ -702,10 +702,12 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6):
      s.set_seq2(word)
      for x in possibilities:
          s.set_seq1(x)
-        if s.real_quick_ratio() >= cutoff and \
-           s.quick_ratio() >= cutoff and \
-           s.ratio() >= cutoff:
-            result.append((s.ratio(), x))
+        if s.real_quick_ratio() < cutoff or s.quick_ratio() < cutoff:
+            continue
+
+        ratio = s.ratio()
+        if ratio >= cutoff:
+            result.append((ratio, x))
  
      # Move the best scorers to head of list
      result = _nlargest(n, result)
diff --git a/Misc/NEWS.d/next/Library/2025-12-29-21-12-12.gh-issue-142939.OyQQr5.rst b/Misc/NEWS.d/next/Library/2025-12-29-21-12-12.gh-issue-142939.OyQQr5.rst

new file mode 100644 (file)

index 0000000..65523f0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-29-21-12-12.gh-issue-142939.OyQQr5.rst
@@ -0,0 +1 @@
+Performance optimisations for :func:`difflib.get_close_matches`
author	dgpb <3577712+dg-pb@users.noreply.github.com>
	Tue, 30 Dec 2025 07:15:59 +0000 (09:15 +0200)
committer	GitHub <noreply@github.com>
	Tue, 30 Dec 2025 07:15:59 +0000 (07:15 +0000)
Lib/difflib.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-12-29-21-12-12.gh-issue-142939.OyQQr5.rst	[new file with mode: 0644]	patch \| blob