From: Simon Cozens Date: Thu, 17 Oct 2024 13:25:35 +0000 (+0100) Subject: fix marks in base (#172) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=51880d3ea86d15388c3379c9409757041de115ef;p=thirdparty%2Fgoogle%2Ffonts.git fix marks in base (#172) * Add Todhri description * Update family/description for Duployan * Fix scripts with marks in base exemplars * Script for fixing the above * Test for marks in bases * Tibetan needed a bit of extra help --- diff --git a/Lib/gflanguages/data/languages/as_Beng.textproto b/Lib/gflanguages/data/languages/as_Beng.textproto index 0bafd88c58..6efd717752 100644 --- a/Lib/gflanguages/data/languages/as_Beng.textproto +++ b/Lib/gflanguages/data/languages/as_Beng.textproto @@ -6,9 +6,9 @@ autonym: "অসমীয়া" population: 17239170 region: "IN" exemplar_chars { - base: "় অ আ ই ঈ উ ঊ ঋ এ ঐ ও ঔ ং ঁ ঃ ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড {ড়} ঢ {ঢ়} ণ ত থ দ ধ ন প ফ ব ভ ম য {য়} ৰ ল ৱ শ ষ স হ {ক্ষ} া ি ী ু ূ ৃ ে ৈ ো ৌ ্" + base: "অ আ ই ঈ উ ঊ ঋ এ ঐ ও ঔ ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড {ড়} ঢ {ঢ়} ণ ত থ দ ধ ন প ফ ব ভ ম য {য়} ৰ ল ৱ শ ষ স হ {ক্ষ}" auxiliary: "‌‍ ৲ ৎ র" - marks: "◌ঁ ◌ং ◌ঃ ◌় ◌া ◌ি ◌ী ◌ু ◌ূ ◌ৃ ◌ৄ ◌ে ◌ৈ ◌্ ◌ৗ ◌ৢ ◌ৣ" + marks: "◌ঁ ◌ং ◌ঃ ◌় ◌া ◌ি ◌ী ◌ু ◌ূ ◌ৃ ◌ৄ ◌ে ◌ৈ ◌্ ◌ৗ ◌ৢ ◌ৣ ◌ো ◌ৌ" numerals: "- , . % + 0০ 1১ 2২ 3৩ 4৪ 5৫ 6৬ 7৭ 8৮ 9৯" punctuation: "- – — , ; : ! ? . … । \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" index: "় অ আ ই ঈ উ ঊ ঋ এ ঐ ও ঔ ং ঃ ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ৎ ত থ দ ধ ন প ফ ব ভ ম য ৰ ল ৱ শ ষ স হ ্" diff --git a/Lib/gflanguages/data/languages/bn_Beng.textproto b/Lib/gflanguages/data/languages/bn_Beng.textproto index 15b609a907..01ed417281 100644 --- a/Lib/gflanguages/data/languages/bn_Beng.textproto +++ b/Lib/gflanguages/data/languages/bn_Beng.textproto @@ -9,9 +9,9 @@ region: "GB" region: "IN" region: "NP" exemplar_chars { - base: "় ৺ অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ এ ঐ ও ঔ ং ঃ ঁ ক {ক্ষ} খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড {ড়} ঢ {ঢ়} ণ ত ৎ থ দ ধ ন প ফ ব ভ ম য {য়} র ল শ ষ স হ ঽ া ি ী ু ূ ৃ ৄ ৢ ৣ ে ৈ ো ৌ ্ ৗ" + base: "৺ অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ এ ঐ ও ঔ ক {ক্ষ} খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড {ড়} ঢ {ঢ়} ণ ত ৎ থ দ ধ ন প ফ ব ভ ম য {য়} র ল শ ষ স হ ঽ" auxiliary: "‌‍ ৲ ৳ ৴ ৵ ৶ ৷ ৸ ৹ ৰ ৱ" - marks: "◌ঁ ◌ং ◌ঃ ◌় ◌া ◌ি ◌ী ◌ু ◌ূ ◌ৃ ◌ৄ ◌ে ◌ৈ ◌্ ◌ৗ ◌ৢ ◌ৣ" + marks: "◌ঁ ◌ং ◌ঃ ◌় ◌া ◌ি ◌ী ◌ু ◌ূ ◌ৃ ◌ৄ ◌ে ◌ৈ ◌্ ◌ৗ ◌ৢ ◌ৣ ◌ো ◌ৌ" numerals: "- , . % + 0০ 1১ 2২ 3৩ 4৪ 5৫ 6৬ 7৭ 8৮ 9৯" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" index: "অ আ ই ঈ উ ঊ ঋ এ ঐ ও ঔ ক {ক্ষ} খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল শ ষ স হ" diff --git a/Lib/gflanguages/data/languages/bo_Tibt.textproto b/Lib/gflanguages/data/languages/bo_Tibt.textproto index 7efe51ea1b..c46864bd1b 100644 --- a/Lib/gflanguages/data/languages/bo_Tibt.textproto +++ b/Lib/gflanguages/data/languages/bo_Tibt.textproto @@ -8,9 +8,9 @@ region: "CN" region: "IN" region: "NP" exemplar_chars { - base: "ཾ ཿ ཀ {ཀྵ} ྐ {ྐྵ} ཁ ྑ ག {གྷ} ྒ {ྒྷ} ང ྔ ཅ ྕ ཆ ྖ ཇ ྗ ཉ ྙ ཊ ྚ ཋ ྛ ཌ {ཌྷ} ྜ {ྜྷ} ཎ ྞ ཏ ྟ ཐ ྠ ད {དྷ} ྡ {ྡྷ} ན ྣ པ ྤ ཕ ྥ བ {བྷ} ྦ {ྦྷ} མ ྨ ཙ ྩ ཚ ྪ ཛ {ཛྷ} ྫ {ྫྷ} ཝ ྭ ྺ ཞ ྮ ཟ ྯ འ ྰ ཡ ྱ ྻ ར ཪ ྲ ྼ ལ ླ ཤ ྴ ཥ ྵ ས ྶ ཧ ྷ ཨ ྸ ི {ཱི} ྀ {ཱྀ} ུ {ཱུ} {ྲྀ} ཷ {ླྀ} ཹ ེ ཻ ོ ཽ ྄" + base: "ཀ {ཀྵ} ཁ ག {གྷ} ང ཅ ཆ ཇ ཉ ཊ ཋ ཌ {ཌྷ} ཎ ཏ ཐ ད {དྷ} ན པ ཕ བ {བྷ} མ ཙ ཚ ཛ {ཛྷ} ཝ ཞ ཟ འ ཡ ར ཪ ལ ཤ ཥ ས ཧ ཨ " auxiliary: "ༀ" - marks: "◌ི ◌ུ ◌ེ ◌ོ" + marks: "◌ི ◌ུ ◌ེ ◌ོ ◌ཾ ◌ཿ ◌ྐ ◌ྑ ◌ྒ ◌ྔ ◌ྕ ◌ྖ ◌ྗ ◌ྙ ◌ྚ ◌ྛ ◌ྜ ◌ྞ ◌ྟ ◌ྠ ◌ྡ ◌ྣ ◌ྤ ◌ྥ ◌ྦ ◌ྨ ◌ྩ ◌ྪ ◌ྫ ◌ྭ ◌ྺ ◌ྮ ◌ྯ ◌ྰ ◌ྱ ◌ྻ ◌ྲ ◌ྼ ◌ླ ◌ྴ ◌ྵ ◌ྶ ◌ྷ ◌ྸ ◌ྀ ◌ཷ ◌ཹ ◌ཻ ◌ཽ ◌྄ {ྐྵ} {ྒྷ} {ྜྷ} {ྡྷ} {ྦྷ} {ྫྷ} {ཱི} {ཱྀ} {ཱུ} {ྲྀ} {ླྀ}" numerals: "- , . % + 0༠ 1༡ 2༢ 3༣ 4༤ 5༥ 6༦ 7༧ 8༨ 9༩" punctuation: ": ་ །" index: "ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ ཨ" diff --git a/Lib/gflanguages/data/languages/brx_Deva.textproto b/Lib/gflanguages/data/languages/brx_Deva.textproto index 6e9b439598..e8c8124d35 100644 --- a/Lib/gflanguages/data/languages/brx_Deva.textproto +++ b/Lib/gflanguages/data/languages/brx_Deva.textproto @@ -6,9 +6,9 @@ autonym: "बरʼ" population: 1856526 region: "IN" exemplar_chars { - base: "़ ँ ं अ आ इ ई उ ऊ ऍ ए ऐ ऑ ओ औ क ख ग घ च छ ज झ ञ ट ठ ड {ड़} ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ा ि ी ु ू ृ ॅ े ै ॉ ो ौ ्" + base: "अ आ इ ई उ ऊ ऍ ए ऐ ऑ ओ औ क ख ग घ च छ ज झ ञ ट ठ ड {ड़} ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह" auxiliary: "‌‍" - marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌्" + marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌् ◌ॅ ◌ॉ" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "। ॥ ॰" index: "अ आ इ ई उ ऊ ऍ ए ऐ ऑ ओ औ क ख ग घ च छ ज झ ञ ट ठ ड {ड़} ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह" diff --git a/Lib/gflanguages/data/languages/doi_Deva.textproto b/Lib/gflanguages/data/languages/doi_Deva.textproto index a8b825af33..3fc00966ed 100644 --- a/Lib/gflanguages/data/languages/doi_Deva.textproto +++ b/Lib/gflanguages/data/languages/doi_Deva.textproto @@ -6,8 +6,9 @@ autonym: "𑠖𑠵𑠌𑠤𑠮" population: 2652180 region: "IN" exemplar_chars { - base: "॑ ॒ ़ ँ ं ः ॐ अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ए ऐ ओ औ क {क्ष} ख ग घ ङ च छ ज झ ञ ट ठ ड {ड़} ढ {ढ़} ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ ा ि ी ु ू ृ ॄ ॢ ॣ े ै ो ौ ्" + base: "ॐ अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ए ऐ ओ औ क {क्ष} ख ग घ ङ च छ ज झ ञ ट ठ ड {ड़} ढ {ढ़} ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ" auxiliary: "‌‍ ऍ ऑ ॅ" + marks: "◌॑ ◌॒ ◌़ ◌ँ ◌ं ◌ः ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌ॄ ◌ॢ ◌ॣ ◌े ◌ै ◌ो ◌ौ ◌्" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "_ – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) @ * / & #" index: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ए ऐ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह" diff --git a/Lib/gflanguages/data/languages/dz_Tibt.textproto b/Lib/gflanguages/data/languages/dz_Tibt.textproto index 15d5fa0519..c2448d6618 100644 --- a/Lib/gflanguages/data/languages/dz_Tibt.textproto +++ b/Lib/gflanguages/data/languages/dz_Tibt.textproto @@ -7,9 +7,9 @@ population: 370341 region: "BT" region: "IN" exemplar_chars { - base: "ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ ཨ ི ུ ེ ོ ྐ ྑ ྒ ྔ ྗ ྙ ྟ ྠ ྡ ྣ ྤ ྥ ྦ ྨ ྩ ྪ ྫ ྭ ྱ ྲ ླ ྵ ྶ ྷ" + base: "ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ ཨ" auxiliary: "྄ ཊ ཋ ཌ ཎ ཾ ཥ ྀ ཻ ཽ ྚ ྛ ྜ ྞ ྺ ྻ ྼ" - marks: "◌ི ◌ུ ◌ེ ◌ོ ◌ྵ" + marks: "◌ི ◌ུ ◌ེ ◌ོ ◌ྵ ◌ྐ ◌ྑ ◌ྒ ◌ྔ ◌ྗ ◌ྙ ◌ྟ ◌ྠ ◌ྡ ◌ྣ ◌ྤ ◌ྥ ◌ྦ ◌ྨ ◌ྩ ◌ྪ ◌ྫ ◌ྭ ◌ྱ ◌ྲ ◌ླ ◌ྶ ◌ྷ" numerals: "- , . % + 0༠ 1༡ 2༢ 3༣ 4༤ 5༥ 6༦ 7༧ 8༨ 9༩" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] ༼ ༽ @ * / & # ༄ ༅ ༆ ༈ ༉ ༊ ࿐ ࿑ ༒ ࿒ ࿓ ࿔ ༶ ྾ ྿ ༌ ། ༎ ༏ ༐ ༑ ༔ ༴" index: "ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ ཨ" diff --git a/Lib/gflanguages/data/languages/ff_Adlm.textproto b/Lib/gflanguages/data/languages/ff_Adlm.textproto index 188d95f79d..f482d46315 100644 --- a/Lib/gflanguages/data/languages/ff_Adlm.textproto +++ b/Lib/gflanguages/data/languages/ff_Adlm.textproto @@ -16,7 +16,8 @@ region: "NG" region: "SL" region: "SN" exemplar_chars { - base: "𞥄𞥅𞥆 𞤢 𞤣 𞤤 𞤥 𞤦 𞤧 𞤨 𞤩 𞤪 𞤫 𞤬 𞤭 𞤮 𞤯 𞤰 𞤱 𞤲 𞤳 𞤴 𞤵 𞤶 𞤷 𞤸 𞤹 𞤺 𞤻 𞤼 𞤽 𞥋" + base: "𞤢 𞤣 𞤤 𞤥 𞤦 𞤧 𞤨 𞤩 𞤪 𞤫 𞤬 𞤭 𞤮 𞤯 𞤰 𞤱 𞤲 𞤳 𞤴 𞤵 𞤶 𞤷 𞤸 𞤹 𞤺 𞤻 𞤼 𞤽" + marks: "𞥄 𞥅 𞥆 𞥋" auxiliary: "𞤾 𞤿 𞥀 𞥁 𞥂 𞥃" numerals: "𞥐 𞥑 𞥒 𞥓 𞥔 𞥕 𞥖 𞥗 𞥘 𞥙" punctuation: "- 𞥞 𞥟 . % " diff --git a/Lib/gflanguages/data/languages/hi_Deva.textproto b/Lib/gflanguages/data/languages/hi_Deva.textproto index fb5c6e6c30..af9bd7cae8 100644 --- a/Lib/gflanguages/data/languages/hi_Deva.textproto +++ b/Lib/gflanguages/data/languages/hi_Deva.textproto @@ -10,9 +10,9 @@ region: "NP" region: "UG" region: "ZA" exemplar_chars { - base: "़ ॐ ं ँ ः अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ ा ि ी ु ू ृ ॄ ॅ े ै ॉ ो ौ ्" + base: "ॐ अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ" auxiliary: "‌‍" - marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌्" + marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌् ◌ॄ ◌ॅ ◌ॉ" numerals: "- , . % + 0० 1१ 2२ 3३ 4४ 5५ 6६ 7७ 8८ 9९" punctuation: "- , ; : ! ? . ‘ ’ “ ” ( ) [ ] { } ॰" index: "अ आ इ ई उ ऊ ऋ ए ऐ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह" diff --git a/Lib/gflanguages/data/languages/km_Khmr.textproto b/Lib/gflanguages/data/languages/km_Khmr.textproto index 286f9f9a33..c4ec49e955 100644 --- a/Lib/gflanguages/data/languages/km_Khmr.textproto +++ b/Lib/gflanguages/data/languages/km_Khmr.textproto @@ -6,7 +6,7 @@ autonym: "ភាសាខ្មែរ" population: 15065030 region: "KH" exemplar_chars { - base: "័ ៈ ់ ៉ ៊ ៍ ក ខ គ ឃ ង ច ឆ ជ ឈ ញ ដ ឋ ឌ ឍ ណ ត ថ ទ ធ ន ប ផ ព ភ ម យ រ ឫ ឬ ល ឭ ឮ វ ស ហ ឡ អ {អា} ឥ ឦ ឧ {ឧក} ឩ ឪ ឯ ឰ ឱ ឲ ឳ ា ិ ី ឹ ឺ ុ ូ ួ ើ ឿ ៀ េ ែ ៃ ោ ៅ ំ ះ ្" + base: "ក ខ គ ឃ ង ច ឆ ជ ឈ ញ ដ ឋ ឌ ឍ ណ ត ថ ទ ធ ន ប ផ ព ភ ម យ រ ឫ ឬ ល ឭ ឮ វ ស ហ ឡ អ {អា} ឥ ឦ ឧ {ឧក} ឩ ឪ ឯ ឰ ឱ ឲ ឳ" auxiliary: "឴឵​ ៌ ៎ ៏ ៑ ឝ ឞ" marks: "◌឴ ◌឵ ◌ា ◌ិ ◌ី ◌ឹ ◌ឺ ◌ុ ◌ូ ◌ួ ◌ើ ◌ឿ ◌ៀ ◌េ ◌ែ ◌ៃ ◌ោ ◌ៅ ◌ំ ◌ះ ◌ៈ ◌៉ ◌៊ ◌់ ◌៍ ◌័ ◌្" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" diff --git a/Lib/gflanguages/data/languages/kn_Knda.textproto b/Lib/gflanguages/data/languages/kn_Knda.textproto index ddf13ffd2f..17d738208e 100644 --- a/Lib/gflanguages/data/languages/kn_Knda.textproto +++ b/Lib/gflanguages/data/languages/kn_Knda.textproto @@ -6,9 +6,9 @@ autonym: "ಕನ್ನಡ" population: 49065330 region: "IN" exemplar_chars { - base: "಼ ೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ ಂ ಃ ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಱ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಽ ಾ ಿ ೀ ು ೂ ೃ ೄ ೆ ೇ ೈ ೊ ೋ ೌ ್ ೕ ೖ" + base: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಱ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಽ" auxiliary: "‌‍ ೞ" - marks: "◌ಂ ◌ಃ ◌ಾ ◌ಿ ◌ು ◌ೂ ◌ೃ ◌ೄ ◌ೆ ◌ೌ ◌್ ◌ೕ ◌ೖ" + marks: "◌ಂ ◌ಃ ◌ಾ ◌ಿ ◌ು ◌ೂ ◌ೃ ◌ೄ ◌ೆ ◌ೌ ◌್ ◌ೕ ◌ೖ ◌಼ ◌ೀ ◌ೇ ◌ೈ ◌ೊ ◌ೋ" numerals: "- , . % + 0೦ 1೧ 2೨ 3೩ 4೪ 5೫ 6೬ 7೭ 8೮ 9೯" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" index: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಱ ಲ ವ ಶ ಷ ಸ ಹ ಳ ೞ" diff --git a/Lib/gflanguages/data/languages/kok_Deva.textproto b/Lib/gflanguages/data/languages/kok_Deva.textproto index d8d5862ad8..cb3c63bdfc 100644 --- a/Lib/gflanguages/data/languages/kok_Deva.textproto +++ b/Lib/gflanguages/data/languages/kok_Deva.textproto @@ -5,8 +5,9 @@ name: "Konkani" population: 4906533 region: "IN" exemplar_chars { - base: "़ ० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ं ँ ः अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क {क़} ख {ख़} ग {ग़} घ ङ च छ ज {ज़} झ ञ ट ठ ड {ड़} ढ {ढ़} ण त थ द ध न प फ {फ़} ब भ म य {य़} र ल व श ष स ह ळ ऽ ा ि ी ु ू ृ ॄ ॅ े ै ॉ ो ौ ्" + base: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क {क़} ख {ख़} ग {ग़} घ ङ च छ ज {ज़} झ ञ ट ठ ड {ड़} ढ {ढ़} ण त थ द ध न प फ {फ़} ब भ म य {य़} र ल व श ष स ह ळ ऽ" auxiliary: "‌‍" + marks: "◌़ ◌ं ◌ँ ◌ः ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌ॄ ◌ॅ ◌े ◌ै ◌ॉ ◌ो ◌ौ ◌्" numerals: "- , . % + 0० 1१ 2२ 3३ 4४ 5५ 6६ 7७ 8८ 9९" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" index: "अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ" diff --git a/Lib/gflanguages/data/languages/lo_Laoo.textproto b/Lib/gflanguages/data/languages/lo_Laoo.textproto index 7a89007c4d..ca4d057f02 100644 --- a/Lib/gflanguages/data/languages/lo_Laoo.textproto +++ b/Lib/gflanguages/data/languages/lo_Laoo.textproto @@ -6,7 +6,7 @@ autonym: "ພາສາລາວ" population: 5138706 region: "LA" exemplar_chars { - base: "່ ້ ໊ ໋ ໌ ໍ ໆ ກ ຂ ຄ ງ ຈ ສ ຊ ຍ ດ ຕ ຖ ທ ນ ບ ປ ຜ ຝ ພ ຟ ມ ຢ ຣ ລ ວ ຫ ໜ ໝ ອ ຮ ຯ ະ ັ າ ຳ ິ ີ ຶ ື ຸ ູ ົ ຼ ຽ ເ ແ ໂ ໃ ໄ" + base: "ໆ ກ ຂ ຄ ງ ຈ ສ ຊ ຍ ດ ຕ ຖ ທ ນ ບ ປ ຜ ຝ ພ ຟ ມ ຢ ຣ ລ ວ ຫ ໜ ໝ ອ ຮ ຯ ະ າ ຳ ຽ ເ ແ ໂ ໃ ໄ" auxiliary: "​ ໐ ໑ ໒ ໓ ໔ ໕ ໖ ໗ ໘ ໙" marks: "◌ັ ◌ິ ◌ີ ◌ຶ ◌ື ◌ຸ ◌ູ ◌ົ ◌ຼ ◌່ ◌້ ◌໊ ◌໋ ◌໌ ◌ໍ" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" diff --git a/Lib/gflanguages/data/languages/mai_Deva.textproto b/Lib/gflanguages/data/languages/mai_Deva.textproto index c46274c1b4..a92af4a9c6 100644 --- a/Lib/gflanguages/data/languages/mai_Deva.textproto +++ b/Lib/gflanguages/data/languages/mai_Deva.textproto @@ -7,7 +7,7 @@ population: 19249149 region: "IN" region: "NP" exemplar_chars { - base: "़ ं ः क {क्ष} ख ग घ च छ ज {ज्ञ} झ ञ ट ठ ड {डं} ढ ण त {त्र} थ द ध न प फ ब भ म य र ल व श {श्र} ष स ह ा ि ी ु ू े ै ो ौ" + base: "क {क्ष} ख ग घ च छ ज {ज्ञ} झ ञ ट ठ ड {डं} ढ ण त {त्र} थ द ध न प फ ब भ म य र ल व श {श्र} ष स ह" auxiliary: "अ {अं} {अः} आ इ ई उ ऊ ऋ ऌ ॡ ए ऐ ओ औ" marks: "◌ँ ◌ं ◌ः ◌ऺ ◌ऻ ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌ॆ ◌े ◌ै ◌ॊ ◌ो ◌ौ ◌् ◌ॏ" numerals: "० १ २ ३ ४ ५ ६ ७ ८ ९ 0 1 2 3 4 5 6 7 8 9" diff --git a/Lib/gflanguages/data/languages/ml_Mlym.textproto b/Lib/gflanguages/data/languages/ml_Mlym.textproto index 6b7e35675e..2cca78678a 100644 --- a/Lib/gflanguages/data/languages/ml_Mlym.textproto +++ b/Lib/gflanguages/data/languages/ml_Mlym.textproto @@ -13,7 +13,7 @@ region: "MY" region: "QA" region: "SG" exemplar_chars { - base: "‌‍ ഃ അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ ക ൿ ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ൺ ത ഥ ദ ധ ന ൻ പ ഫ ബ ഭ മ ം യ ര ർ ല ൽ വ ശ ഷ സ ഹ ള ൾ ഴ റ ാ ി ീ ു ൂ ൃ െ േ ൈ ൊ ോ ൌ ൗ ്" + base: "‌‍ ഃ അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ ക ൿ ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ൺ ത ഥ ദ ധ ന ൻ പ ഫ ബ ഭ മ ം യ ര ർ ല ൽ വ ശ ഷ സ ഹ ള ൾ ഴ റ" auxiliary: "" marks: "◌ം ◌ഃ ◌ാ ◌ി ◌ീ ◌ു ◌ൂ ◌ൃ ◌െ ◌േ ◌ൈ ◌് ◌ൗ" numerals: "- , . % + 0൦ 1൧ 2൨ 3൩ 4൪ 5൫ 6൬ 7൭ 8൮ 9൯" diff --git a/Lib/gflanguages/data/languages/mni_Beng.textproto b/Lib/gflanguages/data/languages/mni_Beng.textproto index 4bb9b29264..98861a91a1 100644 --- a/Lib/gflanguages/data/languages/mni_Beng.textproto +++ b/Lib/gflanguages/data/languages/mni_Beng.textproto @@ -7,9 +7,9 @@ population: 1476590 region: "BD" region: "IN" exemplar_chars { - base: "় ঁ ং ঃ অ আ ই ঈ উ ঊ ঋ এ ঐ ও ঔ ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড {ড়} ঢ {ঢ়} ণ ত থ দ ধ ন প ফ ব ভ ম য {য়} র ল ৱ শ ষ স হ া ি ী ু ূ ৃ ে ৈ ো ৌ ্" + base: "অ আ ই ঈ উ ঊ ঋ এ ঐ ও ঔ ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড {ড়} ঢ {ঢ়} ণ ত থ দ ধ ন প ফ ব ভ ম য {য়} র ল ৱ শ ষ স হ" auxiliary: "‌‍" - marks: "◌ঁ ◌ং ◌ঃ ◌় ◌া ◌ি ◌ু ◌ে ◌ৈ ◌্ ◌ৗ" + marks: "◌ঁ ◌ং ◌ঃ ◌় ◌া ◌ি ◌ু ◌ে ◌ৈ ◌্ ◌ৗ ◌ী ◌ূ ◌ৃ ◌ো ◌ৌ" numerals: "- , . % + 0০ 1১ 2২ 3৩ 4৪ 5৫ 6৬ 7৭ 8৮ 9৯" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" } diff --git a/Lib/gflanguages/data/languages/mr_Deva.textproto b/Lib/gflanguages/data/languages/mr_Deva.textproto index 6d1401f829..9c9423cfcc 100644 --- a/Lib/gflanguages/data/languages/mr_Deva.textproto +++ b/Lib/gflanguages/data/languages/mr_Deva.textproto @@ -6,9 +6,9 @@ autonym: "मराठी" population: 92826300 region: "IN" exemplar_chars { - base: "़ ॐ ं ँ ः अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ऱ ल व श ष स ह ळ ऽ ा ि ी ु ू ृ ॄ ॅ े ै ॉ ो ौ ्" + base: "ॐ अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ऱ ल व श ष स ह ळ ऽ" auxiliary: "‌‍" - marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌्" + marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌् ◌ॄ ◌ॅ ◌ॉ" numerals: "- , . % + 0० 1१ 2२ 3३ 4४ 5५ 6६ 7७ 8८ 9९" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" index: "‍ ॐ ं ः अ आ इ ई उ ऊ ऋ ऌ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ ऽ ॅ ्" diff --git a/Lib/gflanguages/data/languages/my_Mymr.textproto b/Lib/gflanguages/data/languages/my_Mymr.textproto index f142839ef0..ff75fa35fe 100644 --- a/Lib/gflanguages/data/languages/my_Mymr.textproto +++ b/Lib/gflanguages/data/languages/my_Mymr.textproto @@ -7,7 +7,8 @@ population: 36559231 region: "BD" region: "MM" exemplar_chars { - base: "က ခ ဂ ဃ င စ ဆ ဇ ဈ ဉ ည ဋ ဌ ဍ ဎ ဏ တ ထ ဒ ဓ န ပ ဖ ဗ ဘ မ ယ ရ လ ဝ သ ဟ ဠ အ ဣ ၏ ဤ ဥ ဦ ဧ ဩ ဪ ာ ါ ိ ီ ု ူ ေ ဲ ံ ဿ ျ ြ ွ ှ ္ ် ့ း" + base: "က ခ ဂ ဃ င စ ဆ ဇ ဈ ဉ ည ဋ ဌ ဍ ဎ ဏ တ ထ ဒ ဓ န ပ ဖ ဗ ဘ မ ယ ရ လ ဝ သ ဟ ဠ အ ဣ ၏ ဤ ဥ ဦ ဧ ဩ ဪ ာ ါ ေ ဿ" + marks: "'◌ိ ◌ီ ◌ု ◌ူ ◌ဲ ◌ံ ◌ွ ေ ဲ ံ ျ ြ ွ ှ ္ ် ့ း" auxiliary: "၀႐ ၁႑ ၂႒ ၃႓ ၄႔ ၅႕ ၆႖ ၇႗ ၈႘ ၉႙ ၵ ၚ ၽ ၾ ၐ ၑ ၥ ဨ ဢ ၒ ၓ ၔ ၕ ဳ ၖ ၗ ၘ ၙ ဴ ၢ ႆ ၤ ႈ ႊ ႏ" numerals: "- , . % + 0၀ 1၁ 2၂ 3၃ 4၄ 5၅ 6၆ 7၇ 8၈ 9၉" punctuation: "၊ ။ ‘ ’ “ ”" diff --git a/Lib/gflanguages/data/languages/ne_Deva.textproto b/Lib/gflanguages/data/languages/ne_Deva.textproto index c5a655dcc3..bc30c6f05a 100644 --- a/Lib/gflanguages/data/languages/ne_Deva.textproto +++ b/Lib/gflanguages/data/languages/ne_Deva.textproto @@ -8,9 +8,9 @@ region: "BT" region: "IN" region: "NP" exemplar_chars { - base: "़ ँ ं ः ॐ अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ ा ि ी ु ू ृ ॄ ॅ े ै ॉ ो ौ ्" + base: "ॐ अ आ इ ई उ ऊ ऋ ऌ ऍ ए ऐ ऑ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ" auxiliary: "‌‍" - marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌्" + marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌् ◌ॄ ◌ॅ ◌ॉ" numerals: "- , . % + 0० 1१ 2२ 3३ 4४ 5५ 6६ 7७ 8८ 9९" punctuation: "- — , ; ! ? । \' ‘ ’ \" “ ” ( ) [ ] { }" index: "अ आ इ ई उ ऊ ऋ ए ऐ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह" diff --git a/Lib/gflanguages/data/languages/or_Orya.textproto b/Lib/gflanguages/data/languages/or_Orya.textproto index 2d7d7e2d8b..cf5ebeee90 100644 --- a/Lib/gflanguages/data/languages/or_Orya.textproto +++ b/Lib/gflanguages/data/languages/or_Orya.textproto @@ -6,8 +6,9 @@ autonym: "ଓଡ଼ିଆ" population: 42434880 region: "IN" exemplar_chars { - base: "଼ ଅ ଆ ଇ ଈ ଉ ଊ ଋ ଏ ଐ ଓ ଔ ଁ ଂ ଃ କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ {ଡ଼} ଢ {ଢ଼} ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ୟ ର ଲ ଳ ଵ ୱ ଶ ଷ ସ ହ ା ି ୀ ୁ ୂ ୃ େ ୈ ୋ ୌ ୍" + base: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ଏ ଐ ଓ ଔ କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ {ଡ଼} ଢ {ଢ଼} ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ୟ ର ଲ ଳ ଵ ୱ ଶ ଷ ସ ହ" auxiliary: "‌‍" + marks: "◌଼ ◌ଁ ◌ଂ ◌ଃ ◌ା ◌ି ◌ୀ ◌ୁ ◌ୂ ◌ୃ ◌େ ◌ୈ ◌ୋ ◌ୌ ◌୍" numerals: "- , . % + 0୦ 1୧ 2୨ 3୩ 4୪ 5୫ 6୬ 7୭ 8୮ 9୯" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" index: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ଏ ଐ ଓ ଔ କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଳ ଶ ଷ ସ ହ {କ୍ଷ}" diff --git a/Lib/gflanguages/data/languages/pa_Guru.textproto b/Lib/gflanguages/data/languages/pa_Guru.textproto index 234716d05c..d59c44d4ca 100644 --- a/Lib/gflanguages/data/languages/pa_Guru.textproto +++ b/Lib/gflanguages/data/languages/pa_Guru.textproto @@ -9,9 +9,9 @@ region: "IN" region: "KE" region: "SG" exemplar_chars { - base: "ੱ ੰ ਼ ੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ੴ ੳ ਉ ਊ ਓ ਅ ਆ ਐ ਔ ੲ ਇ ਈ ਏ ਸ {ਸ਼} ਹ ਕ ਖ {ਖ਼} ਗ {ਗ਼} ਘ ਙ ਚ ਛ ਜ {ਜ਼} ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ {ਫ਼} ਬ ਭ ਮ ਯ ਰ ਲ ਵ ੜ ੍ ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ" + base: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ੴ ੳ ਉ ਊ ਓ ਅ ਆ ਐ ਔ ੲ ਇ ਈ ਏ ਸ {ਸ਼} ਹ ਕ ਖ {ਖ਼} ਗ {ਗ਼} ਘ ਙ ਚ ਛ ਜ {ਜ਼} ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ {ਫ਼} ਬ ਭ ਮ ਯ ਰ ਲ ਵ ੜ" auxiliary: "‌‍ ਃ ਂ ਁ {ਲ਼}" - marks: "◌਼ ◌ਾ ◌ਿ ◌ੀ ◌ੁ ◌ੂ ◌ੇ ◌ੈ ◌ੋ ◌ੌ" + marks: "◌਼ ◌ਾ ◌ਿ ◌ੀ ◌ੁ ◌ੂ ◌ੇ ◌ੈ ◌ੋ ◌ੌ ੱ ੰ ਼ ੍ ਾ ਿ ੀ ੁ ੂ ੇ ੈ ੋ ੌ" numerals: "- , . % + 0੦ 1੧ 2੨ 3੩ 4੪ 5੫ 6੬ 7੭ 8੮ 9੯" punctuation: "- – — , ; : ! ? . \' ‘ ’ \" “ ” ( ) [ ] / &" index: "ੳ ਅ ੲ ਸ ਹ ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ੜ" diff --git a/Lib/gflanguages/data/languages/sa_Deva.textproto b/Lib/gflanguages/data/languages/sa_Deva.textproto index e95875cdc5..9b7309fad2 100644 --- a/Lib/gflanguages/data/languages/sa_Deva.textproto +++ b/Lib/gflanguages/data/languages/sa_Deva.textproto @@ -6,9 +6,9 @@ autonym: "संस्कृतम्" population: 15913 region: "IN" exemplar_chars { - base: "॑ ॒ ़ ँ ं ः ॐ अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ए ऐ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ ा ि ी ु ू ृ ॄ ॢ ॣ े ै ो ौ ्" + base: "ॐ अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ए ऐ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह ऽ" auxiliary: "‌‍ ऍ ऑ ॅ ॉ" - marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌्" + marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌् ◌॑ ◌॒ ◌ॄ ◌ॢ ◌ॣ" numerals: "- , . % + 0० 1१ 2२ 3३ 4४ 5५ 6६ 7७ 8८ 9९" punctuation: "_ - – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] { } @ * / \\ & #` + | ~" index: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ए ऐ ओ औ क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह" diff --git a/Lib/gflanguages/data/languages/sd_Deva.textproto b/Lib/gflanguages/data/languages/sd_Deva.textproto index 619932d82c..6accca24f6 100644 --- a/Lib/gflanguages/data/languages/sd_Deva.textproto +++ b/Lib/gflanguages/data/languages/sd_Deva.textproto @@ -6,9 +6,9 @@ autonym: "सिन्धी" population: 344783 region: "IN" exemplar_chars { - base: "़ ं अ आ इ ई उ ऊ ए ऐ ओ औ क ख ग ॻ घ ङ च छ ज ॼ झ ञ ट ठ ड ॾ ढ ण त थ द ध न प फ ब ॿ भ म य र ल व श ष स ह ा ि ी ु ू ृ ॄ ॅ े ै ॉ ो ौ ्" + base: "अ आ इ ई उ ऊ ए ऐ ओ औ क ख ग ॻ घ ङ च छ ज ॼ झ ञ ट ठ ड ॾ ढ ण त थ द ध न प फ ब ॿ भ म य र ल व श ष स ह" auxiliary: "‌‍" - marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌्" + marks: "◌ँ ◌ं ◌ः ◌़ ◌ा ◌ि ◌ी ◌ु ◌ू ◌ृ ◌े ◌ै ◌ो ◌ौ ◌् ◌ॄ ◌ॅ ◌ॉ" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" } diff --git a/Lib/gflanguages/data/languages/si_Sinh.textproto b/Lib/gflanguages/data/languages/si_Sinh.textproto index b9019069f3..9cc0295ee3 100644 --- a/Lib/gflanguages/data/languages/si_Sinh.textproto +++ b/Lib/gflanguages/data/languages/si_Sinh.textproto @@ -6,7 +6,8 @@ autonym: "සිංහල" population: 15564656 region: "LK" exemplar_chars { - base: "අ ආ ඇ ඈ ඉ ඊ උ ඌ ඍ එ ඒ ඓ ඔ ඕ ඖ ං ඃ ක ඛ ග ඝ ඞ ඟ ච ඡ ජ ඣ ඥ ඤ ට ඨ ඩ ඪ ණ ඬ ත ථ ද ධ න ඳ ප ඵ බ භ ම ඹ ය ර ල ව ශ ෂ ස හ ළ ෆ ා ැ ෑ ි ී ු ූ ෘ ෲ ෟ ෙ ේ ෛ ො ෝ ෞ ්" + base: "අ ආ ඇ ඈ ඉ ඊ උ ඌ ඍ එ ඒ ඓ ඔ ඕ ඖ ං ඃ ක ඛ ග ඝ ඞ ඟ ච ඡ ජ ඣ ඥ ඤ ට ඨ ඩ ඪ ණ ඬ ත ථ ද ධ න ඳ ප ඵ බ භ ම ඹ ය ර ල ව ශ ෂ ස හ ළ ෆ" + marks: " ා ැ ෑ ි ී ු ූ ෘ ෲ ෟ ෙ ේ ෛ ො ෝ ෞ ්" auxiliary: "​‌‍ ඎ ඏ ඐ ඦ ෳ" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" diff --git a/Lib/gflanguages/data/languages/ta_Taml.textproto b/Lib/gflanguages/data/languages/ta_Taml.textproto index a64d44b96a..a12cce18cb 100644 --- a/Lib/gflanguages/data/languages/ta_Taml.textproto +++ b/Lib/gflanguages/data/languages/ta_Taml.textproto @@ -11,7 +11,7 @@ region: "MY" region: "RE" region: "SG" exemplar_chars { - base: "அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ ஃ க ங ச ஞ ட ண த ந ப ம ய ர ல வ ழ ள ற ன ஜ ஷ ஸ ஹ ா ி ீ ு ூ ெ ே ை ொ ோ ௌ ்" + base: "அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ ஃ க ங ச ஞ ட ண த ந ப ம ய ர ல வ ழ ள ற ன ஜ ஷ ஸ ஹ ா ி ு ூ ெ ே ை ொ ோ ௌ " auxiliary: "‌‍" marks: "◌ா ◌ி ◌ீ ◌ு ◌ூ ◌ெ ◌ே ◌ை ◌் ◌ௗ" numerals: "- , . % + 0௦ 1௧ 2௨ 3௩ 4௪ 5௫ 6௬ 7௭ 8௮ 9௯" diff --git a/Lib/gflanguages/data/languages/te_Telu.textproto b/Lib/gflanguages/data/languages/te_Telu.textproto index e1e401f023..cd1575da19 100644 --- a/Lib/gflanguages/data/languages/te_Telu.textproto +++ b/Lib/gflanguages/data/languages/te_Telu.textproto @@ -6,9 +6,9 @@ autonym: "తెలుగు" population: 95478480 region: "IN" exemplar_chars { - base: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ ఁ ం ః క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ఱ ల వ శ ష స హ ళ ా ి ీ ు ూ ృ ౄ ె ే ై ొ ో ౌ ్ ౕ ౖ" + base: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ఱ ల వ శ ష స హ ళ" auxiliary: "‌‍ ౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯" - marks: "◌ఁ ◌ం ◌ః ◌ా ◌ి ◌ీ ◌ు ◌ూ ◌ృ ◌ౄ ◌ె ◌ే ◌ొ ◌ో ◌ౌ ◌్ ◌ౖ ◌ౢ ◌ౣ" + marks: "◌ఁ ◌ం ◌ః ◌ా ◌ి ◌ీ ◌ు ◌ూ ◌ృ ◌ౄ ◌ె ◌ే ◌ొ ◌ో ◌ౌ ◌్ ◌ౖ ◌ౢ ◌ౣ ◌ై ◌ౕ" numerals: "- , . % + 0౦ 1౧ 2౨ 3౩ 4౪ 5౫ 6౬ 7౭ 8౮ 9౯" punctuation: "- , ; : ! ? . \' ‘ ’ \" “ ” ( ) [ ] { }" index: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఎ ఏ ఐ ఒ ఓ ఔ క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ఱ ల వ శ ష స హ ళ" diff --git a/Lib/gflanguages/data/languages/th_Thai.textproto b/Lib/gflanguages/data/languages/th_Thai.textproto index e7e569aafb..99ac6288bc 100644 --- a/Lib/gflanguages/data/languages/th_Thai.textproto +++ b/Lib/gflanguages/data/languages/th_Thai.textproto @@ -6,9 +6,9 @@ autonym: "ภาษาไทย" population: 55181920 region: "TH" exemplar_chars { - base: "ฯ ๆ ๎ ์ ็ ่ ้ ๊ ๋ ก ข ฃ ค ฅ ฆ ง จ ฉ ช ซ ฌ ญ ฎ ฏ ฐ ฑ ฒ ณ ด ต ถ ท ธ น บ ป ผ ฝ พ ฟ ภ ม ย ร ฤ ล ฦ ว ศ ษ ส ห ฬ อ ฮ ํ ะ ั า ๅ ำ ิ ี ึ ื ุ ู เ แ โ ใ ไ ฺ" + base: "ฯ ๆ ก ข ฃ ค ฅ ฆ ง จ ฉ ช ซ ฌ ญ ฎ ฏ ฐ ฑ ฒ ณ ด ต ถ ท ธ น บ ป ผ ฝ พ ฟ ภ ม ย ร ฤ ล ฦ ว ศ ษ ส ห ฬ อ ฮ ะ า ๅ ำ เ แ โ ใ ไ" auxiliary: "​" - marks: "◌ั ◌ิ ◌ุ ◌ู ◌็ ◌ํ" + marks: "◌ั ◌ิ ◌ุ ◌ู ◌็ ◌ํ ◌๎ ◌์ ◌่ ◌้ ◌๊ ◌๋ ◌ี ◌ึ ◌ื ◌ฺ" numerals: "% , - . + 0 1 2 3 4 5 6 7 8 9" punctuation: "! \" # \' ( ) * , - . / : @ [ ] – — ‘ ’ “ ” …" index: "ก ข ฃ ค ฅ ฆ ง จ ฉ ช ซ ฌ ญ ฎ ฏ ฐ ฑ ฒ ณ ด ต ถ ท ธ น บ ป ผ ฝ พ ฟ ภ ม ย ร ฤ ล ฦ ว ศ ษ ส ห ฬ อ ฮ" diff --git a/Lib/gflanguages/data/languages/ti_Ethi.textproto b/Lib/gflanguages/data/languages/ti_Ethi.textproto index 8f55c0080e..0a4761cb59 100644 --- a/Lib/gflanguages/data/languages/ti_Ethi.textproto +++ b/Lib/gflanguages/data/languages/ti_Ethi.textproto @@ -8,7 +8,8 @@ region: "ER" region: "ET" region: "IL" exemplar_chars { - base: "፟ ሀ-ሆ ለ-ቆ ቈ ቊ-ቍ ቐ-ቖ ቘ ቚ-ቝ በ-ኆ ኈ ኊ-ኍ ነ-ኮ ኰ ኲ-ኵ ኸ-ኾ ዀ ዂ-ዅ ወ-ዎ ዐ-ዖ ዘ-ዮ ደ-ዷ ጀ-ጎ ጐ ጒ-ጕ ጠ-ፗ" + base: "ሀ-ሆ ለ-ቆ ቈ ቊ-ቍ ቐ-ቖ ቘ ቚ-ቝ በ-ኆ ኈ ኊ-ኍ ነ-ኮ ኰ ኲ-ኵ ኸ-ኾ ዀ ዂ-ዅ ወ-ዎ ዐ-ዖ ዘ-ዮ ደ-ዷ ጀ-ጎ ጐ ጒ-ጕ ጠ-ፗ" + marks: "፟" auxiliary: "᎐ ᎑ ᎒ ᎓ ᎔ ᎕ ᎖ ᎗ ᎘ ᎙ ሇ ⶀ ᎀ ᎁ ᎂ ᎃ ⶁ ⶂ ⶃ ⶄ ቇ ᎄ ᎅ ᎆ ᎇ ⶅ ⶆ ⶇ ኇ ⶈ ⶉ ⶊ ኯ ዏ ⶋ ዯ ⶌ ዸ ዹ ዺ ዻ ዼ ዽ ዾ ዿ ⶍ ⶎ ጏ ጘ ጙ ጚ ጛ ጜ ጝ ጞ ጟ ⶓ ⶔ ⶕ ⶖ ⶏ ⶐ ⶑ ᎈ ᎉ ᎊ ᎋ ᎌ ᎍ ᎎ ᎏ ⶒ ፘ ፙ ፚ ⶠ ⶡ ⶢ ⶣ ⶤ ⶥ ⶦ ⶨ ⶩ ⶪ ⶫ ⶬ ⶭ ⶮ ⶰ ⶱ ⶲ ⶳ ⶴ ⶵ ⶶ ⶸ ⶹ ⶺ ⶻ ⶼ ⶽ ⶾ ⷀ ⷁ ⷂ ⷃ ⷄ ⷅ ⷆ ⷈ ⷉ ⷊ ⷋ ⷌ ⷍ ⷎ ⷐ ⷑ ⷒ ⷓ ⷔ ⷕ ⷖ ⷘ ⷙ ⷚ ⷛ ⷜ ⷝ ⷞ" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" index: "ሀ ለ ሐ መ ሠ ረ ሰ ሸ ቀ ቈ ቐ ቘ በ ቨ ተ ቸ ኀ ኈ ነ ኘ አ ከ ኰ ኸ ዀ ወ ዐ ዘ ዠ የ ደ ጀ ገ ጐ ጠ ጨ ጰ ጸ ፀ ፈ ፐ" diff --git a/Lib/gflanguages/data/scripts/Dupl.textproto b/Lib/gflanguages/data/scripts/Dupl.textproto index 870ef699f7..5db3999139 100644 --- a/Lib/gflanguages/data/scripts/Dupl.textproto +++ b/Lib/gflanguages/data/scripts/Dupl.textproto @@ -1,4 +1,4 @@ id: "Dupl" name: "Duployan shorthand" -family: "American" -summary: "Duployan shorthand (Sloan-Duployan shorthand, Duployan stenography) is an European alphabet, written left-to-right. Geometric stenography script created in 1860 by Father Émile Duployé for writing French, later expanded and adapted for writing English, Chinook Jargon and many others. Heavily cursive (connected), allows words to be written in a single stroke. Praised for simplicity and speed of writing. Needs software support for complex text layout (shaping)." +family: "European" +summary: "Duployan shorthand (Sloan-Duployan shorthand, Duployan stenography) is a European alphabet, written left-to-right. Geometric stenography script created in 1860 by Father Émile Duployé for writing French, later expanded and adapted for writing English, Chinook Jargon and many others. Heavily cursive (connected), allows words to be written in a single stroke. Praised for simplicity and speed of writing. Needs software support for complex text layout (shaping)." diff --git a/Lib/gflanguages/data/scripts/Todr.textproto b/Lib/gflanguages/data/scripts/Todr.textproto index 4b2d440554..54c4215db5 100644 --- a/Lib/gflanguages/data/scripts/Todr.textproto +++ b/Lib/gflanguages/data/scripts/Todr.textproto @@ -1,3 +1,6 @@ id: "Todr" name: "Todhri" +historical: true +family: "European" +summary: "Todhri is a historical European bicameral alphabet, written left-to-right. Created in the late 18th century by Theodor Haxhifilipi for the Albanian language." diff --git a/snippets/fix-exemplars-bases.py b/snippets/fix-exemplars-bases.py new file mode 100644 index 0000000000..8623a5fd03 --- /dev/null +++ b/snippets/fix-exemplars-bases.py @@ -0,0 +1,56 @@ +from collections import Counter +import unicodedata +from google.protobuf import text_format +from gflanguages import languages_public_pb2 + +ATTRIBUTES = "base auxiliary marks punctuation index".split(" ") + + +def main(args=None): + for path in args: + with open(path, encoding="utf-8") as fp: + language = text_format.Parse( + fp.read(), languages_public_pb2.LanguageProto() + ) + changed = False + exemplar_values = {} + bases = language.exemplar_chars.base.split(" ") + marks = language.exemplar_chars.marks.split(" ") + if not len(bases) or bases == [""]: + continue + new_marks = [] + new_bases = [] + for chars in marks: + if not chars: + continue + if chars[0] != "\u25CC": + chars = "\u25CC" + chars + if chars not in new_marks: + new_marks.append(chars) + + for chars in bases: + if not chars: + continue + if chars[0] == "\u25CC": + chars = chars[1:] + cat = unicodedata.category(chars[0]) + if cat in ["Mn", "Mc"]: + if chars[0] != "\u25CC": + chars = "\u25CC" + chars + if chars not in new_marks: + new_marks.append(chars) + else: + new_bases.append(chars) + + language.exemplar_chars.base = " ".join(new_bases) + language.exemplar_chars.marks = " ".join(new_marks) + + with open(path, "w", encoding="utf-8") as fp: + fp.write(text_format.MessageToString(language, as_utf8=True)) + fp.close() + + +if __name__ == "__main__": + import sys + + main(args=sys.argv[1:]) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 823a5791dd..9b9ef8c84d 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,7 +83,9 @@ SKIP_REGION = { "tlh_Latn": "Klingon is an artifical language.", } -LANGUAGE_NAME_REGEX = regex.compile(r"^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$") +LANGUAGE_NAME_REGEX = regex.compile( + r"^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$" +) # Some scripts have abbreviated names for reference in language names that are # sufficient in context. If an alternate is listed here, it should be used # universally and consistently across all language names. @@ -96,8 +98,7 @@ ALTERNATE_SCRIPT_NAMES = { @pytest.mark.parametrize("lang_code", LANGUAGES) @pytest.mark.parametrize( - "exemplar_name", ["base", "auxiliary", "marks", - "numerals", "punctuation", "index"] + "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"] ) def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name): lang = LANGUAGES[lang_code] @@ -116,15 +117,13 @@ def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name): @pytest.mark.parametrize("lang_code", LANGUAGES) @pytest.mark.parametrize( - "exemplar_name", ["base", "auxiliary", "marks", - "numerals", "punctuation", "index"] + "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"] ) def test_languages_exemplars_duplicates(lang_code, exemplar_name): lang = LANGUAGES[lang_code] exemplar = getattr(lang.exemplar_chars, exemplar_name).split() counter = Counter(exemplar) - counts = sorted(counter.most_common(), - key=lambda pair: exemplar.index(pair[0])) + counts = sorted(counter.most_common(), key=lambda pair: exemplar.index(pair[0])) assert counts == [(v, 1) for v in exemplar] @@ -143,6 +142,21 @@ def test_exemplars_bracketed_sequences(lang_code, exemplar_name): assert len(chars[1:-1]) > 1 +@pytest.mark.parametrize("lang_code", LANGUAGES) +def test_languages_exemplars_marks_in_base(lang_code): + lang = LANGUAGES[lang_code] + bases = lang.exemplar_chars.base + problems = [] + for chars in bases.split(): + if len(chars) > 1: + chars = chars.lstrip("{").rstrip("}") + if unicodedata.category(chars[0]) == "Mn": + problems.append("\u25CC" + chars) + if "\u25CC" in chars: + problems.append(chars) + assert not problems, f"Found marks in base: {problems}" + + SampleText = languages_public_pb2.SampleTextProto().DESCRIPTOR ExemplarChars = languages_public_pb2.ExemplarCharsProto().DESCRIPTOR @@ -224,8 +238,7 @@ def test_sample_texts_are_in_script(lang_code): "idu_Latn", "ban_Bali", ]: - pytest.xfail( - "These languages have known issues with their sample text") + pytest.xfail("These languages have known issues with their sample text") return lang = LANGUAGES[lang_code] script_name = SCRIPTS[lang.script].name @@ -244,8 +257,7 @@ def test_sample_texts_are_in_script(lang_code): chars = set(samples) for char in chars: char_script = ( - youseedee.ucd_data(ord(char)).get( - "Script", "").replace("_", " ") + youseedee.ucd_data(ord(char)).get("Script", "").replace("_", " ") ) if char_script == "Common" or char_script == "Inherited": continue @@ -290,29 +302,37 @@ def test_language_uniqueness(): else: names[lang.name] += 1 if any(count > 1 for count in names.values()): - duplicates = {name: count for name, - count in names.items() if count > 1} + duplicates = {name: count for name, count in names.items() if count > 1} pytest.fail(f"Duplicate language names: {duplicates}") def test_language_name_structure(): languages_with_bad_name_structure = {} for lang in LANGUAGES.values(): - script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script] + script_name = ( + SCRIPTS[lang.script].name + if lang.script not in ALTERNATE_SCRIPT_NAMES + else ALTERNATE_SCRIPT_NAMES[lang.script] + ) names = [["name", lang.name]] if lang.preferred_name: names += [["preferred_name", lang.preferred_name]] bad_names = [] for type, name in names: bad_structure = not regex.match(LANGUAGE_NAME_REGEX, name) - bad_script_suffix = name.endswith( - ")") and not name.endswith(f"({script_name})") + bad_script_suffix = name.endswith(")") and not name.endswith( + f"({script_name})" + ) if bad_structure or bad_script_suffix: bad_names.append(type) if len(bad_names) > 0: languages_with_bad_name_structure[lang.id] = bad_names if len(languages_with_bad_name_structure) > 0: - misstructured_language_names = [f"{language_id}" if len( - types) == 1 else f"{language_id}: {types}" for language_id, types in languages_with_bad_name_structure.items() if len(types) > 0] + misstructured_language_names = [ + f"{language_id}" if len(types) == 1 else f"{language_id}: {types}" + for language_id, types in languages_with_bad_name_structure.items() + if len(types) > 0 + ] pytest.fail( - f"Languages names without expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {misstructured_language_names}") + f'Languages names without expected structure ("LANGUAGE, MODIFIER (SCRIPT)"): {misstructured_language_names}' + )