]> git.ipfire.org Git - thirdparty/google/fonts.git/commitdiff
Test languages exemplars canonical duplicates
authorDenis Moyogo Jacquerye <moyogo@gmail.com>
Tue, 1 Nov 2022 09:14:58 +0000 (10:14 +0100)
committerDenis Moyogo Jacquerye <moyogo@gmail.com>
Fri, 28 Apr 2023 19:18:15 +0000 (21:18 +0200)
tests/test_data_languages.py

index 9c30318be6a71d9ee4b5098581737868e13d4d12..b15262ae029060740a18fefbe5d4b716140685af 100644 (file)
@@ -14,8 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from collections import Counter
+from collections import defaultdict, Counter
 import re
+import unicodedata
 
 from gflanguages import LoadLanguages, languages_public_pb2, LoadScripts
 import pytest
@@ -41,6 +42,25 @@ SKIP_EXEMPLARS = {
 }
 
 
+@pytest.mark.parametrize("lang_code", LANGUAGES)
+@pytest.mark.parametrize(
+    "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]
+)
+def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name):
+    lang = LANGUAGES[lang_code]
+    exemplar = getattr(lang.exemplar_chars, exemplar_name).split()
+    normalized = defaultdict(set)
+
+    for g in exemplar:
+        if g[0] == "{" and g[-1] == "}":
+            g = g.lstrip("{").rstrip("}")
+        normalized[unicodedata.normalize("NFC", g)].add(g)
+
+    result = [(len(gs), gs) for n, gs in normalized.items()]
+    expected = [(1, {n}) for n, gs in normalized.items()]
+    assert result == expected
+
+
 @pytest.mark.parametrize("lang_code", LANGUAGES)
 @pytest.mark.parametrize(
     "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]