From: Denis Moyogo Jacquerye <moyogo@gmail.com>
Date: Tue, 1 Nov 2022 09:14:58 +0000 (+0100)
Subject: Test languages exemplars canonical duplicates
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0cc89cfc410207dfc4654b0a4a2bb135b3af1231;p=thirdparty%2Fgoogle%2Ffonts.git

Test languages exemplars canonical duplicates
---

diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py
index 9c30318be6..b15262ae02 100644
--- a/tests/test_data_languages.py
+++ b/tests/test_data_languages.py
@@ -14,8 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from collections import Counter
+from collections import defaultdict, Counter
 import re
+import unicodedata
 
 from gflanguages import LoadLanguages, languages_public_pb2, LoadScripts
 import pytest
@@ -41,6 +42,25 @@ SKIP_EXEMPLARS = {
 }
 
 
+@pytest.mark.parametrize("lang_code", LANGUAGES)
+@pytest.mark.parametrize(
+    "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]
+)
+def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name):
+    lang = LANGUAGES[lang_code]
+    exemplar = getattr(lang.exemplar_chars, exemplar_name).split()
+    normalized = defaultdict(set)
+
+    for g in exemplar:
+        if g[0] == "{" and g[-1] == "}":
+            g = g.lstrip("{").rstrip("}")
+        normalized[unicodedata.normalize("NFC", g)].add(g)
+
+    result = [(len(gs), gs) for n, gs in normalized.items()]
+    expected = [(1, {n}) for n, gs in normalized.items()]
+    assert result == expected
+
+
 @pytest.mark.parametrize("lang_code", LANGUAGES)
 @pytest.mark.parametrize(
     "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]