]> git.ipfire.org Git - thirdparty/google/fonts.git/commitdiff
Expand character class for language names
authornathan-williams <nathan.le.williams@gmail.com>
Mon, 22 Jul 2024 04:43:01 +0000 (00:43 -0400)
committernathan-williams <nathan.le.williams@gmail.com>
Mon, 22 Jul 2024 04:43:01 +0000 (00:43 -0400)
14 files changed:
Lib/gflanguages/data/languages/bdh_Latn.textproto
Lib/gflanguages/data/languages/bkc_Latn.textproto
Lib/gflanguages/data/languages/bm_Nkoo.textproto
Lib/gflanguages/data/languages/bsq_Bass.textproto
Lib/gflanguages/data/languages/cbk_Latn.textproto
Lib/gflanguages/data/languages/chn_Dupl.textproto
Lib/gflanguages/data/languages/de_Dupl.textproto
Lib/gflanguages/data/languages/dyu_Nkoo.textproto
Lib/gflanguages/data/languages/eto_Latn.textproto
Lib/gflanguages/data/languages/fr_Dupl.textproto
Lib/gflanguages/data/languages/gcf_Latn.textproto
Lib/gflanguages/data/languages/man_Nkoo.textproto
Lib/gflanguages/data/scripts/Beng.textproto
tests/test_data_languages.py

index 7e7ccf6dda5c64ba5125a94df4c39da2d7bf540e..ecbbb850b381ff98d83fe3d3506f625241fef7ff 100644 (file)
@@ -1,7 +1,7 @@
 id: "bdh_Latn"
 language: "bdh"
 script: "Latn"
-name: "Baka (DRC/South Sudan)"
+name: "Baka, DRC/South Sudan"
 autonym: "Tara Baká"
 population: 60000
 region: "CD"
index 02b8bca30d3837be2b026f085f503882a3ce76b8..08da4d26dac927ea029349ae8d63ca391975ce11 100644 (file)
@@ -1,7 +1,7 @@
 id: "bkc_Latn"
 language: "bkc"
 script: "Latn"
-name: "Baka (Cameroon/Gabon)"
+name: "Baka, Cameroon/Gabon"
 population: 71000
 region: "CM"
 region: "GA"
index 2734cdca76635d038ef28011feca96543f9fbce3..1136be516c1cc6fbbd4386f74a6cf137ad929267 100644 (file)
@@ -1,6 +1,6 @@
 id: "bm_Nkoo"
 language: "bm"
 script: "Nkoo"
-name: "Bambara (Nko)"
+name: "Bambara (N’Ko)"
 population: 16000000
 region: "ML"
index 3d76b53b1daca2c82feb889d8e9501d4bdb81155..e2984fefbbe133b9ec8d6849f5827504cccde5cd 100644 (file)
@@ -1,7 +1,7 @@
 id: "bsq_Bass"
 language: "bsq"
 script: "Bass"
-name: "Bassa (Vah)"
+name: "Bassa (Bassa Vah)"
 population: 410000
 region: "LR"
 region: "SL"
index 7053e530f99021fbe050446d917bf4bfd6f9137c..748d555e183541f6d2da5f84232404048ff7401a 100644 (file)
@@ -1,7 +1,7 @@
 id: "cbk_Latn"
 language: "cbk"
 script: "Latn"
-name: "Chavacano, Latin, Philippines"
+name: "Chavacano, Philippines (Latin)"
 region: "PH"
 sample_text {
   masthead_full: "TtOo"
index d402556fcbe747c9a15fc9cf883eec482304fde2..2e8e8213cb7e5da3980f049b2b9f647396fc6eb2 100644 (file)
@@ -1,6 +1,6 @@
 id: "chn_Dupl"
 language: "chn"
 script: "Dupl"
-name: "Chinook Jargon (Duployan shorthand)"
+name: "Chinook Jargon (Duployan)"
 region: "US"
 region: "CA"
index 7910e1de427fe8776e358df4ca45ad946ec47de1..718b6fd74790e4463d7e0626edb0540a4280696c 100644 (file)
@@ -1,5 +1,5 @@
 id: "de_Dupl"
 language: "de"
 script: "Dupl"
-name: "German (Duployan shorthand)"
+name: "German (Duployan)"
 region: "DE"
index 7edc157ecd50897db062eca620de27f492a8357f..eb891abd2d9b0c02caff05aedeaebe555e6e2278 100644 (file)
@@ -1,5 +1,5 @@
 id: "dyu_Nkoo"
 language: "dyu"
 script: "Nkoo"
-name: "Dyula (Nko)"
+name: "Dyula (N’Ko)"
 region: "CI"
index d016fd39f97ed54a2ad1c84de5036eebb970ac35..2da83f6b0784ff210ae1bb964fe35da9103433f2 100644 (file)
@@ -1,7 +1,7 @@
 id: "eto_Latn"
 language: "eto"
 script: "Latn"
-name: "Eton (Cameroon)"
+name: "Eton, Cameroon"
 population: 400000
 region: "CM"
 exemplar_chars {
index 584d4cbda769f8c32280becaedf62b7b2c9bce92..0093d282a836549fd9e379b167b80b3af1fbea56 100644 (file)
@@ -1,5 +1,5 @@
 id: "fr_Dupl"
 language: "fr"
 script: "Dupl"
-name: "French (Duployan shorthand)"
+name: "French (Duployan)"
 historical: true
index 56401d24d837bcf6668c4ee3ddfb332cfe46c3db..b36c6e6044959d63dc84ae9b42b9a4090661139f 100644 (file)
@@ -1,7 +1,7 @@
 id: "gcf_Latn"
 language: "gcf"
 script: "Latn"
-name: "Guadeloupean Creole French, Latin, Martinique"
+name: "Guadeloupean Creole French, Martinique (Latin)"
 region: "GP"
 region: "MQ"
 sample_text {
index 2ba2349b1831130dd2f93baa61192f9d6a04fd95..4e9166d0201ffdeb883196c606f31c21dcba5080 100644 (file)
@@ -1,5 +1,5 @@
 id: "man_Nkoo"
 language: "man"
 script: "Nkoo"
-name: "Mandingo (Nko)"
+name: "Mandingo (N’Ko)"
 region: "GN"
index d583c7ead81f52bb86c6b9a31cf2a9439f95f497..d68be9e4d79d73326c35217fbf160e5bf1f87466 100644 (file)
@@ -1,3 +1,2 @@
 id: "Beng"
-name: "Bangla"
-
+name: "Bengali"
index c05b7d8a5748a3a5cf84f2795a098a7547db206c..a859cf78fe4c0d84bbdf2a15bfa6b180ef27cb15 100644 (file)
@@ -84,7 +84,15 @@ SKIP_REGION = {
 }
 
 # "ʼ" allowed as last character in language name for Metaʼ
-LANGUAGE_NAME_REGEX = "^[-A-Za-zÀ-ÿ ]+(ʼ)?(, [-A-Za-zÀ-ÿ ]+)?( [(][-A-Za-zÀ-ÿ ]+[)])?$"
+LANGUAGE_NAME_REGEX = "^[-’A-Za-zÀ-ÿ ]+(ʼ)?(, [-’A-Za-zÀ-ÿ/ ]+)?( [(][-’A-Za-zÀ-ÿ ]+[)])?$"
+# Some scripts have abbreviated names for reference in language names that are
+# sufficient in context. If an alternate is listed here, it should be used
+# universally and consistently across all language names.
+ALTERNATE_SCRIPT_NAMES = {
+    "Dupl": "Duployan",
+    "Hans": "Simplified",
+    "Hant": "Traditional",
+}
 
 
 @pytest.mark.parametrize("lang_code", LANGUAGES)
@@ -291,7 +299,7 @@ def test_language_uniqueness():
 def test_language_name_structure():
     languages_with_bad_name_structure = {}
     for lang in LANGUAGES.values():
-        script_name = SCRIPTS[lang.script].name
+        script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script]
         names = [["name", lang.name]]
         if lang.preferred_name:
             names += [["preferred_name", lang.preferred_name]]