]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
In fuzzy matching, also .lower().strip() fuzzy candidates
authorJean Abou Samra <jean@abou-samra.fr>
Sun, 12 Feb 2023 01:46:30 +0000 (02:46 +0100)
committerJean Abou Samra <jean@abou-samra.fr>
Sun, 12 Feb 2023 01:54:21 +0000 (02:54 +0100)
This seems intended at easing fuzzy matching with trivial edits in the
msgstr (changing case and adding whitespace), but it was only done on
the new msgstr, not on the old mgstr candidates, so it was possible for
merging catalogs to miss messages.

babel/messages/catalog.py
tests/messages/test_catalog.py

index dead4aac7f95ccb589942ec0c2500b584c6ba806..190264346133c98dfccb475b7292579953c6f87a 100644 (file)
@@ -803,10 +803,13 @@ class Catalog:
         # Prepare for fuzzy matching
         fuzzy_candidates = []
         if not no_fuzzy_matching:
-            fuzzy_candidates = {
-                self._key_for(msgid): messages[msgid].context
-                for msgid in messages if msgid and messages[msgid].string
-            }
+            fuzzy_candidates = {}
+            for msgid in messages:
+                if msgid and messages[msgid].string:
+                    key = self._key_for(msgid)
+                    ctxt = messages[msgid].context
+                    modified_key = key.lower().strip()
+                    fuzzy_candidates[modified_key] = (key, ctxt)
         fuzzy_matches = set()
 
         def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
@@ -861,8 +864,8 @@ class Catalog:
                         matches = get_close_matches(matchkey.lower().strip(),
                                                     fuzzy_candidates.keys(), 1)
                         if matches:
-                            newkey = matches[0]
-                            newctxt = fuzzy_candidates[newkey]
+                            modified_key = matches[0]
+                            newkey, newctxt = fuzzy_candidates[modified_key]
                             if newctxt is not None:
                                 newkey = newkey, newctxt
                             _merge(message, newkey, key)
index 273c83f51c8bba0bffa1d753cc8fac0ed83b9a07..c2e7aeda0474cf7ceed757f830813f9e3f3cf201 100644 (file)
@@ -121,16 +121,16 @@ class CatalogTestCase(unittest.TestCase):
 
     def test_update_fuzzy_matching_with_case_change(self):
         cat = catalog.Catalog()
-        cat.add('foo', 'Voh')
+        cat.add('FOO', 'Voh')
         cat.add('bar', 'Bahr')
         tmpl = catalog.Catalog()
-        tmpl.add('Foo')
+        tmpl.add('foo')
         cat.update(tmpl)
         assert len(cat.obsolete) == 1
-        assert 'foo' not in cat
+        assert 'FOO' not in cat
 
-        assert cat['Foo'].string == 'Voh'
-        assert cat['Foo'].fuzzy is True
+        assert cat['foo'].string == 'Voh'
+        assert cat['foo'].fuzzy is True
 
     def test_update_fuzzy_matching_with_char_change(self):
         cat = catalog.Catalog()