id: "ain_Latn"
language: "ain"
script: "Latn"
-name: "Ainu, Latin"
+name: "Ainu (Latin)"
population: 0
historical: true
id: "ar_Syrc"
language: "ar"
script: "Syrc"
-name: "Arabic, Syriac"
+name: "Arabic (Syriac)"
population: 0
historical: true
id: "arc_Nbat"
language: "arc"
script: "Nbat"
-name: "Aramaic, Nabataean"
+name: "Aramaic (Nabataean)"
population: 0
sample_text {
masthead_full: "𐢅𐢕𐢆𐢚"
id: "arc_Palm"
language: "arc"
script: "Palm"
-name: "Aramaic, Palmyrene"
+name: "Aramaic (Palmyrene)"
population: 0
sample_text {
masthead_full: "𐡲𐡫𐡬𐡩"
id: "art_Latn"
language: "art"
script: "Latn"
-name: "Artificial languages, Latin, World"
+name: "Artificial languages, World (Latin)"
sample_text {
masthead_full: "JjAa"
masthead_partial: "Nn"
id: "azj_Cyrl"
language: "azj"
script: "Cyrl"
-name: "Azerbaijani, North, Cyrillic"
+name: "Azerbaijani, North (Cyrillic)"
autonym: "азәрбајҹан (Кирил, Азәрбајҹан)"
region: "AZ"
sample_text {
id: "ba_Latn"
language: "ba"
script: "Latn"
-name: "Bashkir, Latin"
+name: "Bashkir (Latin)"
region: "RU"
sample_text {
masthead_full: "BbAa"
id: "bal_Latn"
language: "bal"
script: "Latn"
-name: "Baluchi, Latin"
-preferred_name: "Balochi"
+name: "Baluchi (Latin)"
+preferred_name: "Balochi (Latin)"
population: 0
historical: true
id: "ban_Bali"
language: "ban"
script: "Bali"
-name: "Balinese, Balinese"
+name: "Balinese (Balinese)"
autonym: "ᬪᬵᬱᬩᬮᬶ"
population: 0
region: "ID"
id: "bax_Latn"
language: "bax"
script: "Latn"
-name: "Bamun, Latin"
+name: "Bamun (Latin)"
region: "CM"
population: 673000
exemplar_chars {
id: "bdh_Latn"
language: "bdh"
script: "Latn"
-name: "Baka (DRC/South Sudan)"
+name: "Baka, DRC/South Sudan"
autonym: "Tara Baká"
population: 60000
region: "CD"
id: "be_Latn"
language: "be"
script: "Latn"
-name: "Belarusian, Latin"
+name: "Belarusian (Latin)"
region: "BY"
sample_text {
masthead_full: "UuSs"
id: "ber_Latn"
language: "ber"
script: "Latn"
-name: "Berber, Latin"
+name: "Berber (Latin)"
id: "ber_Tfng"
language: "ber"
script: "Tfng"
-name: "Berber, Tifinagh"
+name: "Berber (Tifinagh)"
region: "MA"
id: "bft_Tibt"
language: "bft"
script: "Tibt"
-name: "Balti, Tibetan"
+name: "Balti (Tibetan)"
population: 0
historical: true
language: "bgn"
script: "Arab"
name: "Western Balochi"
-#preferred_name: "Balochi"
population: 2037382
region: "AF"
region: "IR"
id: "bho_Kthi"
language: "bho"
script: "Kthi"
-name: "Bhojpuri, Kaithi"
+name: "Bhojpuri (Kaithi)"
sample_text {
masthead_full: "𑂃𑂮𑂥𑂰𑂥"
masthead_partial: "𑂉𑂍"
id: "bkc_Latn"
language: "bkc"
script: "Latn"
-name: "Baka (Cameroon/Gabon)"
+name: "Baka, Cameroon/Gabon"
population: 71000
region: "CM"
region: "GA"
id: "bku_Buhd"
language: "bku"
script: "Buhd"
-name: "Buhid, Buhid"
+name: "Buhid (Buhid)"
population: 0
historical: true
sample_text {
specimen_32: "ᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ"
specimen_21: "ᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ\nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ"
specimen_16: "ᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ\nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ \nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ \nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ"
- }
\ No newline at end of file
+ }
id: "bm_Nkoo"
language: "bm"
script: "Nkoo"
-name: "Bambara, Nko"
+name: "Bambara (N’Ko)"
population: 16000000
region: "ML"
id: "bn_Newa"
language: "bn"
script: "Newa"
-name: "Bengali, Newa"
+name: "Bengali (Newa)"
population: 6000
region: "NP"
id: "bo_Marc"
language: "bo"
script: "Marc"
-name: "Tibetan, Marchen"
+name: "Tibetan (Marchen)"
region: "CN"
id: "bo_Zanb"
language: "bo"
script: "Zanb"
-name: "Tibetan, Zanabazar"
+name: "Tibetan (Zanabazar)"
id: "brh_Latn"
language: "brh"
script: "Latn"
-name: "Brahui, Latin"
+name: "Brahui (Latin)"
population: 0
region: "PK"
sample_text {
id: "brx_Beng"
language: "brx"
script: "Beng"
-name: "Bodo, Bangla"
+name: "Bodo (Bengali)"
region: "IN"
sample_text {
masthead_full: "গসবঙ"
id: "brx_Latn"
language: "brx"
script: "Latn"
-name: "Bodo, Latin"
+name: "Bodo (Latin)"
region: "IN"
sample_text {
masthead_full: "GgAa"
id: "bsq_Bass"
language: "bsq"
script: "Bass"
-name: "Bassa, Vah"
+name: "Bassa (Bassa Vah)"
population: 410000
region: "LR"
region: "SL"
id: "bsq_Latn"
language: "bsq"
script: "Latn"
-name: "Bassa, Latin"
+name: "Bassa (Latin)"
population: 410000
region: "LR"
region: "SL"
id: "bug_Bugi"
language: "bug"
script: "Bugi"
-name: "Buginese, Buginese"
+name: "Buginese (Buginese)"
population: 0
region: "ID"
sample_text {
id: "byh_Deva"
language: "byh"
script: "Deva"
-name: "Bhujel, Devanagari"
+name: "Bhujel (Devanagari)"
region: "NP"
sample_text {
masthead_full: "सकतब"
id: "cbk_Latn"
language: "cbk"
script: "Latn"
-name: "Chavacano, Latin, Philippines"
+name: "Chavacano, Philippines (Latin)"
region: "PH"
sample_text {
masthead_full: "TtOo"
id: "ccp_Beng"
language: "ccp"
script: "Beng"
-name: "Chakma, Bengali"
+name: "Chakma (Bengali)"
population: 729137
region: "BD"
region: "IN"
id: "chn_Dupl"
language: "chn"
script: "Dupl"
-name: "Chinook Jargon, Duployan"
+name: "Chinook Jargon (Duployan)"
region: "US"
region: "CA"
id: "chx_Deva"
language: "chx"
script: "Deva"
-name: "Chantyal, Devanagari"
+name: "Chantyal (Devanagari)"
region: "NP"
sample_text {
masthead_full: "झउनम"
id: "cop_Copt"
language: "cop"
script: "Copt"
-name: "Coptic, Coptic"
+name: "Coptic (Coptic)"
population: 0
region: "EG"
sample_text {
id: "cop_Grek"
language: "cop"
script: "Grek"
-name: "Coptic, Greek"
+name: "Coptic (Greek)"
population: 0
historical: true
id: "cr_Latn"
language: "cr"
script: "Latn"
-name: "Cree, Latin"
+name: "Cree (Latin)"
population: 96000
region: "CA"
region: "US"
id: "cyo_Latn"
language: "cyo"
script: "Latn"
-name: "Cuyonon, Latin"
+name: "Cuyonon (Latin)"
region: "PH"
sample_text {
masthead_full: "TtAa"
id: "de_Dupl"
language: "de"
script: "Dupl"
-name: "German, Duployan"
+name: "German (Duployan)"
region: "DE"
id: "de_Runr"
language: "de"
script: "Runr"
-name: "German, Runic"
+name: "German (Runic)"
population: 0
historical: true
id: "dhi_Deva"
language: "dhi"
script: "Deva"
-name: "Dhimal, Devanagari"
+name: "Dhimal (Devanagari)"
region: "NP"
sample_text {
masthead_full: "हदबङ"
id: "dhw_Deva"
language: "dhw"
script: "Deva"
-name: "Dhanwar, Nepal, Devanagari"
+name: "Dhanwar, Nepal (Devanagari)"
region: "NP"
sample_text {
masthead_full: "सभमन"
id: "doi_Arab"
language: "doi"
script: "Arab"
-name: "Dogri, Arabic"
+name: "Dogri (Arabic)"
population: 0
historical: true
id: "doi_Dogr"
language: "doi"
script: "Dogr"
-name: "Dogri, Dogra"
+name: "Dogri (Dogra)"
population: 2600000
region: "IN"
sample_text {
id: "doi_Takr"
language: "doi"
script: "Takr"
-name: "Dogri, Takri"
+name: "Dogri (Takri)"
population: 0
historical: true
sample_text {
specimen_32: "𑚂𑚊 𑚁𑚛𑚢𑚯𑚛𑚲 𑚛𑚴 𑚞𑚰𑚙𑚕 𑚚𑚲। 𑚄𑚫𑚛𑚲 𑚦𑚮𑚏𑚭 𑚝𑚮𑚊𑚖𑚷𑚝𑚲 𑚦𑚦𑚲𑚊𑚯 𑚁𑚋𑚮𑚁 𑚑𑚲 𑚒𑚲 𑚦𑚭𑚞𑚱𑚑𑚯 𑚑𑚭𑚆𑚛𑚭𑚙𑚯𑚛 𑚑𑚲 𑚙𑚮𑚨𑚭 𑚢𑚮𑚊𑚯 𑚛𑚲𑚃 𑚛𑚲𑚈 । 𑚙𑚭𑚫 𑚄𑚌𑚝𑚳 𑚢𑚭𑚥 𑚄𑚝𑚲𑚊𑚯 𑚦𑚫𑚙𑚯 𑚛𑚮𑚙𑚭 ।,"
specimen_21: "𑚂𑚊 𑚁𑚛𑚢𑚯𑚛𑚲 𑚛𑚴 𑚞𑚰𑚙𑚕 𑚚𑚲। 𑚄𑚫𑚛𑚲 𑚦𑚮𑚏𑚭 𑚝𑚮𑚊𑚖𑚷𑚝𑚲 𑚦𑚦𑚲𑚊𑚯 𑚁𑚋𑚮𑚁 𑚑𑚲 𑚒𑚲 𑚦𑚭𑚞𑚱𑚑𑚯 𑚑𑚭𑚆𑚛𑚭𑚙𑚯𑚛 𑚑𑚲 𑚙𑚮𑚨𑚭 𑚢𑚮𑚊𑚯 𑚛𑚲𑚃 𑚛𑚲𑚈 । 𑚙𑚭𑚫 𑚄𑚌𑚝𑚳 𑚢𑚭𑚥 𑚄𑚝𑚲𑚊𑚯 𑚦𑚫𑚙𑚯 𑚛𑚮𑚙𑚭 । \n𑚀𑚙𑚳 𑚚𑚴𑚖𑚷𑚲 𑚛𑚮𑚘𑚲𑚫 𑚞𑚮𑚐𑚴𑚫 𑚝𑚮𑚊𑚖𑚷𑚳 𑚞𑚰𑚙𑚤𑚳𑚝𑚲 𑚨𑚦𑚊𑚮𑚒 𑚊𑚮𑚕𑚭 𑚊𑚤𑚯 𑚛𑚰𑚤 𑚛𑚲𑚨𑚲𑚛𑚭 𑚞𑚳𑚫𑚖𑚭 𑚊𑚯𑚙𑚭 𑚀𑚙𑚳 𑚄𑚚𑚲𑚫 𑚀𑚞𑚝𑚭 𑚢𑚭𑚥 𑚥𑚰𑚏𑚞𑚘𑚲 𑚊𑚝𑚲 𑚄𑚖𑚭𑚃 𑚛𑚮𑚙𑚭 ।,"
specimen_16: "𑚂𑚊 𑚁𑚛𑚢𑚯𑚛𑚲 𑚛𑚴 𑚞𑚰𑚙𑚕 𑚚𑚲। 𑚄𑚫𑚛𑚲 𑚦𑚮𑚏𑚭 𑚝𑚮𑚊𑚖𑚷𑚝𑚲 𑚦𑚦𑚲𑚊𑚯 𑚁𑚋𑚮𑚁 𑚑𑚲 𑚒𑚲 𑚦𑚭𑚞𑚱𑚑𑚯 𑚑𑚭𑚆𑚛𑚭𑚙𑚯𑚛 𑚑𑚲 𑚙𑚮𑚨𑚭 𑚢𑚮𑚊𑚯 𑚛𑚲𑚃 𑚛𑚲𑚈 । 𑚙𑚭𑚫 𑚄𑚌𑚝𑚳 𑚢𑚭𑚥 𑚄𑚝𑚲𑚊𑚯 𑚦𑚫𑚙𑚯 𑚛𑚮𑚙𑚭 । \n𑚀𑚙𑚳 𑚚𑚴𑚖𑚷𑚲 𑚛𑚮𑚘𑚲𑚫 𑚞𑚮𑚐𑚴𑚫 𑚝𑚮𑚊𑚖𑚷𑚳 𑚞𑚰𑚙𑚤𑚳𑚝𑚲 𑚨𑚦𑚊𑚮𑚒 𑚊𑚮𑚕𑚭 𑚊𑚤𑚯 𑚛𑚰𑚤 𑚛𑚲𑚨𑚲𑚛𑚭 𑚞𑚳𑚫𑚖𑚭 𑚊𑚯𑚙𑚭 𑚀𑚙𑚳 𑚄𑚚𑚲𑚫 𑚀𑚞𑚝𑚭 𑚢𑚭𑚥 𑚥𑚰𑚏𑚞𑚘𑚲 𑚊𑚝𑚲 𑚄𑚖𑚭𑚃 𑚛𑚮𑚙𑚭 । \n𑚀𑚙𑚲 𑚑𑚛 𑚨𑚦 𑚋𑚤𑚏 𑚊𑚤𑚯 𑚏𑚰𑚊𑚮𑚁 𑚄𑚨 𑚢𑚰𑚥𑚋𑚳 𑚦𑚮𑚏 𑚦𑚖𑚭 𑚊𑚭𑚥 𑚞𑚃 𑚌𑚮𑚁 𑚀𑚙𑚲 𑚈𑚩 𑚀𑚫𑚌𑚭𑚥 𑚩𑚴𑚝 𑚥𑚌𑚮𑚁 ।,"
-}
\ No newline at end of file
+}
id: "dyu_Nkoo"
language: "dyu"
script: "Nkoo"
-name: "Dyula, Nko"
+name: "Dyula (N’Ko)"
region: "CI"
id: "en_Brai"
language: "en"
script: "Brai"
-name: "English, Braille"
+name: "English (Braille)"
sample_text {
masthead_full: "⠉⠽⠑⠝"
masthead_partial: "⠉⠽"
id: "en_Dsrt"
language: "en"
script: "Dsrt"
-name: "English, Deseret"
+name: "English (Deseret)"
population: 0
sample_text {
masthead_full: "𐐃𐐫𐐢𐑊"
id: "en_Shaw"
language: "en"
script: "Shaw"
-name: "English, Shavian"
+name: "English (Shavian)"
population: 0
region: "US"
sample_text {
id: "eto_Latn"
language: "eto"
script: "Latn"
-name: "Eton (Cameroon)"
+name: "Eton, Cameroon"
population: 400000
region: "CM"
exemplar_chars {
id: "ett_Latn"
language: "ett"
script: "Latn"
-name: "Etruscan, Latin"
+name: "Etruscan (Latin)"
population: 0
historical: true
id: "evn_Latn"
language: "evn"
script: "Latn"
-name: "Evenki, Latin"
+name: "Evenki (Latin)"
population: 16000
region: "RU"
region: "CN"
id: "fil_Tglg"
language: "fil"
script: "Tglg"
-name: "Filipino, Tagalog"
+name: "Filipino (Tagalog)"
population: 0
region: "PH"
sample_text {
id: "fr_Dupl"
language: "fr"
script: "Dupl"
-name: "French, Duployan"
+name: "French (Duployan)"
historical: true
id: "fuf_Adlm"
language: "fuf"
script: "Adlm"
-name: "Pular, Adlam"
+name: "Pular (Adlam)"
region: "GN"
sample_text {
masthead_full: "𞤋𞤭𞤐𞤲"
id: "gag_Cyrl"
language: "gag"
script: "Cyrl"
-name: "Gagauz, Cyrillic"
+name: "Gagauz (Cyrillic)"
population: 0
region: "MD"
exemplar_chars {
id: "gcf_Latn"
language: "gcf"
script: "Latn"
-name: "Guadeloupean Creole French, Latin, Martinique"
+name: "Guadeloupean Creole French, Martinique (Latin)"
region: "GP"
region: "MQ"
sample_text {
id: "gju_Deva"
language: "gju"
script: "Deva"
-name: "Gujari, Devanagari"
+name: "Gujari (Devanagari)"
region: "IN"
sample_text {
masthead_full: "सगलम"
id: "gon_Telu"
language: "gon"
script: "Telu"
-name: "Gondi, Telugu"
+name: "Gondi (Telugu)"
population: 0
region: "IN"
id: "got_Runr"
language: "got"
script: "Runr"
-name: "Gothic, Runic"
+name: "Gothic (Runic)"
region: "UA"
sample_text {
masthead_full: "ᚨᛚᛁᛗ"
language: "grc"
script: "Cprt"
name: "Ancient Greek"
-preferred_name: "Ancient Greek"
population: 0
sample_text {
masthead_full: "𐠀𐠜𐠍𐠚"
id: "grc_Grek"
language: "grc"
script: "Grek"
-name: "Ancient Greek, Greek"
-preferred_name: "Ancient Greek"
+name: "Ancient Greek (Greek)"
autonym: "Ελληνικά"
population: 0
exemplar_chars {
id: "grc_Linb"
language: "grc"
script: "Linb"
-name: "Ancient Greek, Linear B"
-preferred_name: "Ancient Greek"
+name: "Ancient Greek (Linear B)"
population: 0
sample_text {
masthead_full: "𐀐𐀮𐀝𐀹"
id: "hi_Mahj"
language: "hi"
script: "Mahj"
-name: "Hindi, Mahajani"
+name: "Hindi (Mahajani)"
population: 0
historical: true
specimen_21: "𑅕𑅑𑅰𑅑 𑅯𑅛𑅕𑅣𑅑 𑅕𑅑 𑅓𑅕𑅐𑅧𑅣𑅣𑅐, 𑅨𑅭𑅑𑅯𑅐𑅭, 𑅘𑅭 𑅛𑅐 𑅨𑅣𑅭𑅯𑅛𑅯𑅱𑅐𑅭 𑅕𑅓 𑅨𑅭𑅣𑅑 𑅕𑅔𑅑 𑅬𑅧𑅬𑅐𑅧𑅐 𑅱𑅰𑅣𑅕𑅖𑅳𑅓𑅨 𑅧 𑅕𑅑𑅛𑅐 𑅛𑅐𑅓𑅗𑅐, 𑅧 𑅕𑅑𑅰𑅑 𑅕𑅓 𑅰𑅬𑅬𑅐𑅧 𑅒𑅭 𑅖𑅛𑅐𑅣𑅑 𑅨𑅭 𑅕𑅔𑅑 𑅐𑅕𑅖𑅳𑅓𑅨 𑅱𑅔 𑅰𑅕𑅓𑅗𑅐 । 𑅑𑅰𑅓 𑅱𑅰𑅣𑅕𑅖𑅳𑅓𑅨 𑅛𑅐 𑅐𑅦𑅓𑅨𑅔𑅧 𑅕𑅓 𑅯𑅑𑅭𑅒𑅥𑅦 𑅨𑅭𑅣𑅛𑅓𑅕 𑅕𑅔 𑅕𑅳𑅐𑅧𑅒𑅧𑅑 𑅭𑅕𑅖𑅳𑅐 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅨𑅭𑅐𑅨𑅣 𑅱𑅑 ।\n𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅯𑅑𑅙𑅐𑅭, 𑅐𑅧𑅣𑅭𑅐𑅣𑅬𑅐 𑅒𑅭 𑅦𑅭𑅬 𑅕𑅑 𑅐𑅛𑅳𑅐𑅥𑅑 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 । 𑅑𑅰 𑅐𑅦𑅑𑅕𑅐𑅭 𑅕𑅓 𑅐𑅧𑅣𑅭𑅗𑅣 𑅐𑅨𑅧𑅐 𑅦𑅭𑅬 𑅛𑅐 𑅯𑅑𑅰𑅳𑅯𑅐𑅰 𑅪𑅥𑅮𑅧𑅓 𑅒𑅭 𑅐𑅕𑅓𑅮𑅓 𑅛𑅐 𑅥𑅒𑅰𑅭𑅔𑅧 𑅕𑅓 𑅰𑅐𑅤 𑅬𑅑𑅮𑅕𑅭 𑅣𑅤𑅐 𑅰𑅐𑅭𑅯𑅛𑅧𑅑𑅕 𑅭𑅒𑅨 𑅬𑅓𑅧 𑅐𑅤𑅯𑅐 𑅧𑅑𑅛𑅑 𑅣𑅔𑅭 𑅨𑅭 𑅐𑅨𑅧𑅓 𑅦𑅭𑅬 𑅛𑅐 𑅯𑅑𑅰𑅳𑅯𑅐𑅰 𑅕𑅔 𑅰𑅳𑅑𑅕𑅖𑅳𑅐, 𑅕𑅭𑅑𑅛𑅐, 𑅒𑅨𑅐𑅰𑅧𑅐, 𑅣𑅤𑅐 𑅯𑅛𑅯𑅱𑅐𑅭 𑅕𑅓 𑅥𑅯𑅐𑅭𑅐 𑅨𑅭𑅕𑅞 𑅕𑅭𑅧𑅓 𑅕𑅑 𑅰𑅯𑅣𑅧𑅣𑅭𑅣𑅐 𑅱𑅑 ।"
specimen_16: "𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅯𑅑𑅙𑅐𑅭 𑅒𑅭 𑅒𑅰𑅕𑅑 𑅐𑅫𑅑𑅯𑅛𑅕𑅣𑅑 𑅕𑅑 𑅰𑅯𑅣𑅧𑅣𑅭𑅣𑅐 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 । 𑅑𑅰𑅕𑅓 𑅐𑅧𑅣𑅭𑅗𑅣 𑅪𑅑𑅧𑅐 𑅱𑅰𑅣𑅕𑅖𑅳𑅓𑅨 𑅕𑅓 𑅕𑅔𑅑 𑅭𑅐𑅛 𑅭𑅖𑅧𑅐 𑅒𑅭 𑅕𑅑𑅰𑅑 𑅫𑅑 𑅬𑅐𑅦𑅛𑅬 𑅕𑅓 𑅛𑅳𑅭𑅑𑅓 𑅰𑅓 𑅣𑅤𑅐 𑅰𑅑𑅬𑅐𑅔𑅧 𑅕𑅑 𑅨𑅭𑅯𑅐𑅱 𑅧 𑅕𑅭 𑅕𑅓 𑅕𑅑𑅰𑅑 𑅕𑅑 𑅬𑅒𑅙𑅧𑅐 𑅒𑅭 𑅦𑅐𑅭𑅢𑅐 𑅕𑅐 𑅐𑅧𑅯𑅓𑅖𑅳𑅢, 𑅨𑅭𑅱𑅢 𑅣𑅤𑅐 𑅨𑅭𑅥𑅐𑅧 𑅰𑅬𑅬𑅑𑅮𑅑𑅣 𑅱𑅑 ।\n𑅰𑅬𑅐𑅛 𑅕𑅓 𑅓𑅕 𑅰𑅥𑅰𑅛 𑅕𑅓 𑅭𑅒𑅨 𑅬𑅓𑅧 𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅰𑅐𑅬𑅐𑅛𑅑𑅕 𑅰𑅒𑅭𑅕𑅖𑅳𑅐 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 𑅒𑅭 𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅐𑅨𑅧𑅓 𑅯𑅛𑅕𑅣𑅑𑅣𑅯 𑅕𑅓 𑅒𑅰 𑅰𑅯𑅣𑅧𑅣𑅭 𑅯𑅑𑅕𑅐𑅰 𑅣𑅤𑅐 𑅗𑅔𑅭𑅯 𑅕𑅓 𑅮𑅑𑅓—𑅛𑅔 𑅭𑅐𑅖𑅳𑅞𑅭𑅑𑅛 𑅨𑅭𑅛𑅣𑅧 𑅛𑅐 𑅐𑅧𑅣𑅭𑅭𑅐𑅖𑅳𑅞𑅭𑅑𑅛 𑅰𑅱𑅛𑅔𑅗 𑅣𑅤𑅐 𑅨𑅭𑅣𑅛𑅓𑅕 𑅭𑅐𑅛𑅛 𑅕𑅓 𑅰𑅧𑅗𑅟𑅧 𑅓𑅯𑅧 𑅰𑅐𑅦𑅧𑅔𑅧 𑅕𑅓 𑅐𑅧𑅒𑅕𑅒𑅮 𑅱𑅔—𑅐𑅧𑅑𑅕𑅐𑅭𑅛𑅣𑅱 𑅐𑅯𑅰𑅳𑅛𑅕 𑅐𑅭𑅤𑅑𑅕, 𑅰𑅐𑅬𑅐𑅛𑅑𑅕, 𑅒𑅭 𑅰𑅐𑅧𑅰𑅕𑅭𑅒𑅣𑅑𑅕 𑅐𑅦𑅑𑅕𑅐𑅭𑅔𑅧 𑅕𑅑 𑅨𑅭𑅐𑅨𑅣𑅑 𑅕𑅐 𑅱𑅕𑅳 𑅱𑅑 ।\n𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅯𑅑𑅰𑅳𑅭𑅐𑅬 𑅒𑅭 𑅐𑅯𑅕𑅐𑅰𑅳 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 । 𑅑𑅰𑅕𑅓 𑅐𑅧𑅣𑅭𑅗𑅣 𑅕𑅐𑅬 𑅕𑅓 𑅘𑅧𑅞𑅔𑅧 𑅕𑅑 𑅒𑅙𑅑𑅣 𑅱𑅥𑅪𑅧𑅥𑅑 𑅒𑅭 𑅰𑅬𑅛𑅰𑅬𑅛 𑅨𑅭 𑅬𑅛𑅳𑅥𑅒𑅭𑅑 𑅰𑅱𑅑𑅣 𑅚𑅒𑅞𑅞𑅑𑅛𑅐𑅧 𑅰𑅬𑅬𑅑𑅮𑅑𑅣 𑅱𑅑 ।\n𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅑𑅰𑅑 𑅰𑅐𑅬𑅐𑅛𑅑𑅕 𑅒𑅭 𑅐𑅧𑅣𑅭𑅭𑅐𑅖𑅳𑅞𑅭𑅑𑅛 𑅯𑅛𑅯𑅰𑅤𑅐 𑅕𑅑 𑅨𑅭𑅐𑅨𑅣𑅑 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 𑅛𑅑𑅰𑅬𑅓𑅧 𑅑𑅰 𑅘𑅔𑅖𑅳𑅢𑅐 𑅬𑅓𑅧 𑅒𑅮𑅮𑅑𑅖𑅑𑅣 𑅐𑅦𑅑𑅕𑅐𑅭𑅔𑅧 𑅒𑅭 𑅰𑅯𑅣𑅧𑅣𑅭𑅣𑅐𑅔𑅧 𑅕𑅔 𑅨𑅒𑅭𑅢𑅣𑅱 𑅨𑅭𑅐𑅨𑅣 𑅕𑅑𑅛𑅐 𑅛𑅐 𑅰𑅕𑅓 ।"
}
-
\ No newline at end of file
id: "hi_Newa"
language: "hi"
script: "Newa"
-name: "Hindi, Newa"
+name: "Hindi (Newa)"
region: "IN"
id: "hnn_Hano"
language: "hnn"
script: "Hano"
-name: "Hanunoo, Hanunoo"
+name: "Hanunoo (Hanunoo)"
autonym: "ᜱᜨᜳᜨᜳᜢ"
population: 13000
exemplar_chars {
id: "id_Arab"
language: "id"
script: "Arab"
-name: "Indonesian, Arabic"
+name: "Indonesian (Arabic)"
population: 0
historical: true
id: "ie_Latn"
language: "ie"
script: "Latn"
-name: "Interlingue, Latin"
+name: "Interlingue (Latin)"
sample_text {
masthead_full: "OoMm"
masthead_partial: "Nn"
language: "ii"
script: "Latn"
name: "Sichuan Yi, Latin"
-preferred_name: "Nuosu"
+preferred_name: "Nuosu (Latin)"
population: 0
historical: true
id: "inh_Arab"
language: "inh"
script: "Arab"
-name: "Ingush, Arabic"
+name: "Ingush (Arabic)"
population: 0
historical: true
id: "inh_Latn"
language: "inh"
script: "Latn"
-name: "Ingush, Latin"
+name: "Ingush (Latin)"
population: 0
historical: true
id: "ja_Hira"
language: "ja"
script: "Hira"
-name: "Japanese, Hiragana"
+name: "Japanese (Hiragana)"
autonym: "日本語"
region: "BR"
region: "JP"
id: "ja_Kana"
language: "ja"
script: "Kana"
-name: "Japanese, Katakana"
+name: "Japanese (Katakana)"
autonym: "日本語"
region: "BR"
region: "JP"
id: "jbo_Latn"
language: "jbo"
script: "Latn"
-name: "Lojban, Latin"
+name: "Lojban (Latin)"
autonym: "lojban (Latin)"
sample_text {
masthead_full: "RrOo"
id: "jiv_Latn"
language: "jiv"
script: "Latn"
-name: "Shuar, Latin, Ecuador"
+name: "Shuar, Ecuador (Latin)"
region: "EC"
sample_text {
masthead_full: "PpEe"
id: "jra_Latn"
language: "jra"
script: "Latn"
-name: "Jarai, Latin"
+name: "Jarai (Latin)"
population: 530000
region: "VN"
exemplar_chars {
specimen_21: "Abih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi.\nAbih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi."
specimen_16: "Abih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi.\nAbih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi.\nAbih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi."
}
-source: "Lap Minh Siu, Developing the First Preliminary Dictionary of North American Jarai, Texas Tech University, 2009"
\ No newline at end of file
+source: "Lap Minh Siu, Developing the First Preliminary Dictionary of North American Jarai, Texas Tech University, 2009"
id: "jv_Java"
language: "jv"
script: "Java"
-name: "Javanese, Javanese"
+name: "Javanese (Javanese)"
autonym: "ꦧꦱꦗꦮ"
population: 0
region: "ID"
id: "kab_Tfng"
language: "kab"
script: "Tfng"
-name: "Kabyle, Tifinagh"
+name: "Kabyle (Tifinagh)"
region: "DZ"
sample_text {
masthead_full: "ⵉⵎⴷⴰ"
id: "kek_Latn"
language: "kek"
script: "Latn"
-name: "Q\'eqchi\'"
+name: "Qʼeqchiʼ"
region: "GT"
region: "BZ"
sample_text {
id: "kha_Beng"
language: "kha"
script: "Beng"
-name: "Khasi, Bengali"
+name: "Khasi (Bengali)"
population: 0
historical: true
id: "khr_Beng"
language: "khr"
script: "Beng"
-name: "Kharia, Bangla"
+name: "Kharia (Bengali)"
region: "IN"
sample_text {
masthead_full: "সউবম"
id: "khr_Deva"
language: "khr"
script: "Deva"
-name: "Kharia, Devanagari"
+name: "Kharia (Devanagari)"
region: "IN"
sample_text {
masthead_full: "सउबम"
id: "khr_Orya"
language: "khr"
script: "Orya"
-name: "Kharia, Odia"
+name: "Kharia (Odia)"
region: "IN"
sample_text {
masthead_full: "ସଉବମ"
id: "khw_Latn"
language: "khw"
script: "Latn"
-name: "Khowar, Latin"
+name: "Khowar (Latin)"
region: "PK"
sample_text {
masthead_full: "SsAa"
id: "kjg_Latn"
language: "kjg"
script: "Latn"
-name: "Khmu, Latin"
+name: "Khmu (Latin)"
population: 0
historical: true
id: "kk_Latn"
language: "kk"
script: "Latn"
-name: "Kazakh, Latin"
+name: "Kazakh (Latin)"
region: "TR"
sample_text {
masthead_full: "BbAa"
id: "kr_Arab"
language: "kr"
script: "Arab"
-name: "Kanuri, Arabic"
+name: "Kanuri (Arabic)"
id: "krs_Latn"
language: "krs"
script: "Latn"
-name: "Gbaya (Sudan)"
+name: "Gbaya, Sudan"
population: 47000
region: "SD"
region: "SS"
base: "a A b B c C d D e E f F g G h H i I j J k K l L ḷ Ḷ m M n N ŋ Ŋ o O p P r R s S t T u U v V w W y Y z Z"
marks: "◌̣"
auxiliary: "q Q x X"
-}
\ No newline at end of file
+}
id: "ks_Deva"
language: "ks"
script: "Deva"
-name: "Kashmiri, Devanagari"
+name: "Kashmiri (Devanagari)"
population: 0
region: "BT"
region: "FJ"
id: "ksw_Mymr"
language: "ksw"
script: "Mymr"
-name: "S\'gaw Karen, Myanmar"
+name: "S’gaw Karen, Myanmar"
region: "MM"
sample_text {
masthead_full: "ဟခပက"
id: "ku_Latn"
language: "ku"
script: "Latn"
-name: "Kurdish, Latin"
+name: "Kurdish (Latin)"
autonym: "Kurmancî"
population: 25000000
region: "TR"
id: "ku_Yezi"
language: "ku"
script: "Yezi"
-name: "Kurdish, Yezidi"
+name: "Kurdish (Yezidi)"
region: "GE"
sample_text {
masthead_full: "𐺍𐺁𐺄𐺀"
id: "kyw_Beng"
language: "kyw"
script: "Beng"
-name: "Kudmali, Bangla"
+name: "Kudmali (Bengali)"
region: "IN"
sample_text {
masthead_full: "সভমন"
id: "kyw_Orya"
language: "kyw"
script: "Orya"
-name: "Kudmali, Odia"
+name: "Kudmali (Odia)"
region: "IN"
sample_text {
masthead_full: "ସଭମନ"
id: "lad_Latn"
language: "lad"
script: "Latn"
-name: "Ladino, Latin"
+name: "Ladino (Latin)"
region: "IL"
sample_text {
masthead_full: "TtOo"
id: "laj_Latn"
language: "laj"
script: "Latn"
-name: "Lango [Uganda]"
+name: "Lango, Uganda"
population: 1643614
region: "UG"
exemplar_chars {
id: "las_Latn"
language: "las"
script: "Latn"
-name: "Lama (Togo)"
+name: "Lama, Togo"
autonym: "lamʋ"
population: 260000
region: "TG"
marks: "◌̃"
}
source: "Wikipedia"
-source: "https://www.webonary.org/lama"
\ No newline at end of file
+source: "https://www.webonary.org/lama"
id: "lhm_Deva"
language: "lhm"
script: "Deva"
-name: "Lhomi, Devanagari"
+name: "Lhomi (Devanagari)"
region: "NP"
sample_text {
masthead_full: "वङजय"
id: "lif_Limb"
language: "lif"
script: "Limb"
-name: "Limbu, Limbu"
+name: "Limbu (Limbu)"
population: 0
# This sample text was taken from
id: "lus_Latn"
language: "lus"
script: "Latn"
-name: "Mizo, Latin"
+name: "Mizo (Latin)"
region: "IN"
sample_text {
masthead_full: "MmIi"
id: "lzz_Latn"
language: "lzz"
script: "Latn"
-name: "Laz, Latin"
+name: "Laz (Latin)"
population: 22000
region: "GE"
region: "TR"
auxiliary: ""
}
source: "İsmail Avci, Lazuri 5: doguroni materyali – lazca öğretim matryali, Ankara, Milli Eğitim Bakanlığı Yayınları, 2019"
-source: "K’lemurişi Ramazan Kosanoğlu, “Lazla ve Lazca”, Kiana, April 2016"
\ No newline at end of file
+source: "K’lemurişi Ramazan Kosanoğlu, “Lazla ve Lazca”, Kiana, April 2016"
id: "mai_Newa"
language: "mai"
script: "Newa"
-name: "Maithili, Newa"
+name: "Maithili (Newa)"
region: "IN"
id: "mai_Tirh"
language: "mai"
script: "Tirh"
-name: "Maithili, Tirhuta"
+name: "Maithili (Tirhuta)"
population: 0
historical: true
id: "mak_Bugi"
language: "mak"
script: "Bugi"
-name: "Makasar, Buginese"
+name: "Makasar (Buginese)"
population: 0
historical: true
id: "man_Nkoo"
language: "man"
script: "Nkoo"
-name: "Mandingo, Nko"
+name: "Mandingo (N’Ko)"
region: "GN"
id: "mcf_Latn"
language: "mcf"
script: "Latn"
-name: "Matsés, Latin, Peru"
+name: "Matsés (Latin)"
region: "PE"
region: "BR"
sample_text {
id: "mdr_Bugi"
language: "mdr"
script: "Bugi"
-name: "Mandar, Buginese"
+name: "Mandar (Buginese)"
population: 0
historical: true
id: "men_Mend"
language: "men"
script: "Mend"
-name: "Mende, Mende"
+name: "Mende (Mende)"
population: 0
sample_text {
masthead_full: "𞡥𞠖𞢻𞠢"
id: "mic_Latn"
language: "mic"
script: "Latn"
-name: "Mi\'kmaq"
+name: "Mi'kmaq"
autonym: "Míkmawísimk"
population: 7140
region: "CA"
id: "min_Arab"
language: "min"
script: "Arab"
-name: "Minangkabau, Arabic"
+name: "Minangkabau (Arabic)"
region: "ID"
sample_text {
masthead_full: "سادو"
id: "mis_Latn"
language: "mis"
script: "Latn"
-name: "Uncoded languages, Latin, World"
+name: "Uncoded languages, World (Latin)"
region: "ES"
region: "FR"
sample_text {
id: "mn_Zanb"
language: "mn"
script: "Zanb"
-name: "Mongolian, Zanabazar"
+name: "Mongolian (Zanabazar)"
id: "mr_Modi"
language: "mr"
script: "Modi"
-name: "Marathi, Modi"
+name: "Marathi (Modi)"
population: 0
region: "IN"
sample_text {
id: "mro_Mroo"
language: "mro"
script: "Mroo"
-name: "Mru, Mro"
+name: "Mru (Mro)"
population: 0
historical: true
sample_text {
specimen_21: "𖩏𖩖𖩔𖩆𖩊 𖩗𖩖𖩊 𖩍𖩖𖩌 𖩎𖩆𖩁 𖩋𖩖 𖩍𖩖𖩌𖩯 𖩏𖩖𖩎𖩊 𖩏𖩖𖩔𖩆𖩊 𖩌𖩖 𖩐𖩓𖩆𖩎 𖩖𖩂𖩑𖩌 𖩎𖩖𖩯 𖩌𖩍𖩖𖩁𖩐𖩖 𖩂𖩑𖩌 𖩎𖩖 𖩖𖩎𖩆𖩁 𖩀𖩑𖩖𖩏 𖩈𖩝𖩐 𖩐𖩖𖩮 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩌𖩓𖩑𖩖𖩗 𖩌𖩖 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩈𖩝𖩆 𖩀𖩐𖩘𖩅 𖩐𖩓𖩆𖩁𖩮\n𖩍𖩖𖩁𖩔𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩗𖩖𖩊 𖩀𖩔𖩆𖩎 𖩈𖩘𖩒 𖩌𖩖 𖩖𖩌𖩆𖩓 𖩎𖩊 𖩌𖩆𖩓 𖩅𖩖𖩌 𖩖𖩊 𖩌𖩆𖩓 𖩔𖩘 𖩍𖩖𖩎𖩊 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩆𖩁𖩊𖩁 𖩌𖩖𖩁 𖩈𖩖𖩄𖩖𖩅𖩯 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩘 𖩗𖩆𖩁 𖩍𖩝𖩁 𖩄𖩑𖩖𖩗 𖩅𖩊𖩂𖩯 𖩎𖩊𖩂𖩐𖩖 𖩌𖩖 𖩍𖩝𖩁 𖩌𖩖 𖩈𖩖𖩁 𖩖𖩌𖩖𖩎 𖩎𖩊 𖩗𖩜 𖩅𖩖 𖩌𖩄𖩑𖩖𖩗 𖩌𖩖𖩯 𖩈𖩖𖩅𖩏𖩖 𖩐𖩆𖩗 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩍𖩖𖩁 𖩀𖩑𖩅 𖩘 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩌𖩖 𖩌𖩖𖩊𖩯"
specimen_16: "𖩏𖩖𖩔𖩆𖩊 𖩗𖩖𖩊 𖩍𖩖𖩌 𖩎𖩆𖩁 𖩋𖩖 𖩍𖩖𖩌𖩯 𖩏𖩖𖩎𖩊 𖩏𖩖𖩔𖩆𖩊 𖩌𖩖 𖩐𖩓𖩆𖩎 𖩖𖩂𖩑𖩌 𖩎𖩖𖩯 𖩌𖩍𖩖𖩁𖩐𖩖 𖩂𖩑𖩌 𖩎𖩖 𖩖𖩎𖩆𖩁 𖩀𖩑𖩖𖩏 𖩈𖩝𖩐 𖩐𖩖𖩮 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩌𖩓𖩑𖩖𖩗 𖩌𖩖 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩈𖩝𖩆 𖩀𖩐𖩘𖩅 𖩐𖩓𖩆𖩁𖩮\n𖩍𖩖𖩁𖩔𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩗𖩖𖩊 𖩀𖩔𖩆𖩎 𖩈𖩘𖩒 𖩌𖩖 𖩖𖩌𖩆𖩓 𖩎𖩊 𖩌𖩆𖩓 𖩅𖩖𖩌 𖩖𖩊 𖩌𖩆𖩓 𖩔𖩘 𖩍𖩖𖩎𖩊 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩆𖩁𖩊𖩁 𖩌𖩖𖩁 𖩈𖩖𖩄𖩖𖩅𖩯 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩘 𖩗𖩆𖩁 𖩍𖩝𖩁 𖩄𖩑𖩖𖩗 𖩅𖩊𖩂𖩯 𖩎𖩊𖩂𖩐𖩖 𖩌𖩖 𖩍𖩝𖩁 𖩌𖩖 𖩈𖩖𖩁 𖩖𖩌𖩖𖩎 𖩎𖩊 𖩗𖩜 𖩅𖩖 𖩌𖩄𖩑𖩖𖩗 𖩌𖩖𖩯 𖩈𖩖𖩅𖩏𖩖 𖩐𖩆𖩗 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩍𖩖𖩁 𖩀𖩑𖩅 𖩘 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩌𖩖 𖩌𖩖𖩊𖩯\n𖩄𖩖𖩌 𖩄𖩖𖩌 𖩍𖩆𖩊 𖩌𖩍𖩖 𖩀𖩆 𖩀𖩖𖩏𖩖 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩏𖩖𖩎 𖩏𖩖𖩎 𖩀𖩗𖩆𖩌 𖩌𖩓𖩆𖩅𖩯 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩍𖩖𖩁 𖩌𖩖 𖩉𖩝 𖩖 𖩏𖩖 𖩀𖩘𖩌 𖩐𖩘𖩊 𖩎𖩊 𖩀𖩆𖩊𖩏𖩆𖩗 𖩍𖩖𖩁 𖩖𖩒 𖩘𖩏 𖩀𖩝𖩁 𖩐𖩖 𖩉𖩆𖩁 𖩌𖩖 𖩋𖩆𖩁𖩯 𖩍𖩖𖩁 𖩎𖩊 𖩒𖩝𖩕 𖩓𖩝𖩕 𖩌𖩖 𖩓𖩘𖩏 𖩓𖩘𖩏 𖩔𖩘 𖩀𖩘𖩌 𖩍𖩖𖩕𖩊, 𖩆𖩁 𖩖𖩌𖩖 𖩌𖩖𖩊 𖩖𖩄𖩖𖩅 𖩏𖩖𖩔𖩊 𖩀𖩑𖩅 𖩘 𖩈𖩖𖩅𖩏𖩖 𖩍𖩖𖩁 𖩎𖩊 𖩔𖩆𖩔𖩆 𖩋𖩖 𖩌𖩖𖩊 𖩖𖩏𖩖 𖩏𖩖𖩔𖩆𖩊 𖩐𖩓𖩆𖩎 𖩍𖩆𖩌 𖩌𖩖𖩌𖩖 𖩌𖩑𖩐 𖩏𖩖𖩔𖩆𖩊 𖩀𖩘𖩌 𖩕𖩊, 𖩕𖩊𖩂𖩋𖩖? 𖩘𖩏 𖩆𖩁 𖩐𖩓𖩆𖩎 𖩌𖩖 𖩗𖩆𖩁 𖩐𖩍𖩆𖩏 𖩌𖩑𖩐 𖩀𖩆𖩌𖩯"
}
-
\ No newline at end of file
id: "mrw_Arab"
language: "mrw"
script: "Arab"
-name: "Maranao, Arabic"
+name: "Maranao (Arabic)"
region: "PH"
sample_text {
masthead_full: "لاڠو"
id: "mrw_Latn"
language: "mrw"
script: "Latn"
-name: "Maranao, Latin"
+name: "Maranao (Latin)"
region: "PH"
sample_text {
masthead_full: "LlAa"
+++ /dev/null
-id: "ms_Arab"
-language: "ms"
-script: "Arab"
-name: "Malay (Arabic)"
-population: 3228030
-region: "BN"
-region: "CC"
-region: "ID"
-region: "MY"
-sample_text {
- masthead_full: "سموا"
- masthead_partial: "أن"
- styles: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي"
- tester: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك"
- poster_sm: "سموا مأنسي"
- poster_md: "سموا مأنسي"
- poster_lg: "سموا"
- specimen_48: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن"
- specimen_36: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن."
- specimen_32: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن."
- specimen_21: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن."
- specimen_16: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن."
-}
+++ /dev/null
-id: "ms_Latn"
-language: "ms"
-script: "Latn"
-name: "Malay"
-autonym: "Malaysia"
-population: 34869275
-region: "BN"
-region: "ID"
-region: "MY"
-region: "SG"
-exemplar_chars {
- base: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z"
- numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9"
- punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #"
- index: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"
-}
id: "mui_Latn"
language: "mui"
script: "Latn"
-name: "Musi, Latin"
+name: "Musi (Latin)"
region: "ID"
sample_text {
masthead_full: "GgAa"
id: "ne_Newa"
language: "ne"
script: "Newa"
-name: "Nepali, Newa"
+name: "Nepali (Newa)"
region: "NP"
id: "new_Newa"
language: "new"
script: "Newa"
-name: "Newari, Newa"
+name: "Newari (Newa)"
region: "NP"
sample_text {
masthead_full: "𑐳𑐎𑐮𑐩"
id: "non_Latn"
language: "non"
script: "Latn"
-name: "Old Norse, Latin, Sweden"
+name: "Old Norse, Sweden (Latin)"
region: "SE"
sample_text {
masthead_full: "AaLl"
id: "nsk_Latn"
language: "nsk"
script: "Latn"
-name: "Naskapi, Latin"
+name: "Naskapi (Latin)"
population: 0
historical: true
id: "oj_Latn"
language: "oj"
script: "Latn"
-name: "Ojibwa, Latin"
+name: "Ojibwa (Latin)"
population: 0
historical: true
id: "ojb_Cans"
language: "ojb"
script: "Cans"
-name: "Northwestern Ojibwa, Unified Canadian Aboriginal Syllabics, Canada"
+name: "Northwestern Ojibwa, Canada (Unified Canadian Aboriginal Syllabics)"
region: "CA"
sample_text {
masthead_full: "ᑲᐦᑭᓇ"
id: "om_Ethi"
language: "om"
script: "Ethi"
-name: "Oromo, Ethiopic"
+name: "Oromo (Ethiopic)"
population: 0
historical: true
id: "osa_Latn"
language: "osa"
script: "Latn"
-name: "Osage, Latin"
+name: "Osage (Latin)"
population: 0
historical: true
id: "osc_Latn"
language: "osc"
script: "Latn"
-name: "Oscan, Latin"
+name: "Oscan (Latin)"
population: 0
historical: true
id: "pi_Brah"
language: "pi"
script: "Brah"
-name: "Pali, Brahmi"
+name: "Pali (Brahmi)"
historical: true
id: "pi_Sinh"
language: "pi"
script: "Sinh"
-name: "Pali, Sinhala"
+name: "Pali (Sinhala)"
population: 0
historical: true
id: "pi_Thai"
language: "pi"
script: "Thai"
-name: "Pali, Thai"
+name: "Pali (Thai)"
population: 0
historical: true
id: "pnt_Grek"
language: "pnt"
script: "Grek"
-name: "Pontic, Greek"
+name: "Pontic (Greek)"
population: 0
id: "pnt_Latn"
language: "pnt"
script: "Latn"
-name: "Pontic, Latin"
+name: "Pontic (Latin)"
population: 0
id: "pnz_Latn"
language: "pnz"
script: "Latn"
-name: "Pana (Central African Republic)"
+name: "Pana, Central African Republic"
population: 153000
region: "CF"
region: "CM"
id: "qud_Latn"
language: "qud"
script: "Latn"
-name: "Quechua, Unified Quichua, old Hispanic orthography"
+name: "Kichwa, Unified Quichua"
region: "PE"
sample_text {
masthead_full: "TtUu"
id: "rab_Deva"
language: "rab"
script: "Deva"
-name: "Camling, Devanagari"
+name: "Camling (Devanagari)"
region: "NP"
sample_text {
masthead_full: "झरमन"
id: "ray_Latn"
language: "ray"
script: "Latn"
-name: "Rapa, Latin"
+name: "Rapa (Latin)"
region: "PF"
sample_text {
masthead_full: "TtEe"
id: "rej_Rjng"
language: "rej"
script: "Rjng"
-name: "Rejang, Rejang"
+name: "Rejang (Rejang)"
population: 0
sample_text {
masthead_full: "ꤰꤳꤾꥁ"
id: "rhg_Latn"
language: "rhg"
script: "Latn"
-name: "Rohingya, Latin"
+name: "Rohingya (Latin)"
region: "MM"
sample_text {
masthead_full: "MmAa"
id: "ria_Latn"
language: "ria"
script: "Latn"
-name: "Riang [India]"
+name: "Riang, India"
population: 172391
region: "IN"
id: "ro_Cyrl"
language: "ro"
script: "Cyrl"
-name: "Romanian, Cyrillic"
+name: "Romanian (Cyrillic)"
autonym: "Молдовеняскэ"
population: 0
region: "MD"
id: "rob_Latn"
language: "rob"
script: "Latn"
-name: "Tae\'"
+name: "Taeʼ"
population: 293728
region: "ID"
id: "rom_Cyrl"
language: "rom"
script: "Cyrl"
-name: "Romany, Cyrillic"
+name: "Romany (Cyrillic)"
population: 0
historical: true
id: "sa_Ahom"
language: "sa"
script: "Ahom"
-name: "Sanskrit, Ahom"
+name: "Sanskrit (Ahom)"
region: "IN"
sample_text {
masthead_full: "𑜏𑜍𑜈𑜉"
id: "sa_Bali"
language: "sa"
script: "Bali"
-name: "Sanskrit, Balinese"
+name: "Sanskrit (Balinese)"
region: "IN"
sample_text {
masthead_full: "ᬲᬯᬫᬦ"
id: "sa_Bhks"
language: "sa"
script: "Bhks"
-name: "Sanskrit, Bhaiksuki"
+name: "Sanskrit (Bhaiksuki)"
region: "IN"
sample_text {
masthead_full: "𑰭𑰨𑰪𑰦"
id: "sa_Brah"
language: "sa"
script: "Brah"
-name: "Sanskrit, Brahmi"
+name: "Sanskrit (Brahmi)"
region: "IN"
sample_text {
masthead_full: "𑀲𑀭𑀯𑀫"
id: "sa_Bugi"
language: "sa"
script: "Bugi"
-name: "Sanskrit, Buginese"
+name: "Sanskrit (Buginese)"
region: "IN"
sample_text {
masthead_full: "ᨔᨑᨓᨆ"
id: "sa_Cham"
language: "sa"
script: "Cham"
-name: "Sanskrit, Cham"
+name: "Sanskrit (Cham)"
region: "IN"
sample_text {
masthead_full: "ꨧꨣꨠꨘ"
id: "sa_Gran"
language: "sa"
script: "Gran"
-name: "Sanskrit, Grantha"
+name: "Sanskrit (Grantha)"
population: 0
region: "IN"
sample_text {
id: "sa_Khar"
language: "sa"
script: "Khar"
-name: "Sanskrit, Kharoshthi"
+name: "Sanskrit (Kharoshthi)"
region: "IN"
sample_text {
masthead_full: "𐨯𐨪𐨬𐨨"
id: "sa_Marc"
language: "sa"
script: "Marc"
-name: "Sanskrit, Marchen"
+name: "Sanskrit (Marchen)"
region: "IN"
sample_text {
masthead_full: "𑲍𑲊𑲁𑱽"
id: "sa_Mong"
language: "sa"
script: "Mong"
-name: "Sanskrit, Mongolian"
+name: "Sanskrit (Mongolian)"
sample_text {
masthead_full: "ᠰᠠᠷᠸ"
masthead_partial: "ᠧᠮ"
id: "sa_Mroo"
language: "sa"
script: "Mroo"
-name: "Sanskrit, Mro"
+name: "Sanskrit (Mro)"
region: "IN"
sample_text {
masthead_full: "𖩔𖩒𖩓𖩗"
id: "sa_Mult"
language: "sa"
script: "Mult"
-name: "Sanskrit, Multani"
+name: "Sanskrit (Multani)"
region: "IN"
sample_text {
masthead_full: "𑊥𑊢𑊤𑊠"
id: "sa_Nand"
language: "sa"
script: "Nand"
-name: "Sanskrit, Nandinagari"
+name: "Sanskrit (Nandinagari)"
autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠"
region: "IN"
exemplar_chars {
id: "sa_Newa"
language: "sa"
script: "Newa"
-name: "Sanskrit, Newa"
+name: "Sanskrit (Newa)"
region: "IN"
sample_text {
masthead_full: "𑐳𑐬𑐰𑐩"
id: "sa_Rjng"
language: "sa"
script: "Rjng"
-name: "Sanskrit, Rejang"
+name: "Sanskrit (Rejang)"
region: "IN"
sample_text {
masthead_full: "ꤼꥀꤸꤵ"
id: "sa_Shrd"
language: "sa"
script: "Shrd"
-name: "Sanskrit, Sharada"
+name: "Sanskrit (Sharada)"
population: 0
region: "IN"
sample_text {
id: "sa_Sidd"
language: "sa"
script: "Sidd"
-name: "Sanskrit, Siddham"
+name: "Sanskrit (Siddham)"
population: 0
region: "IN"
sample_text {
id: "sa_Sinh"
language: "sa"
script: "Sinh"
-name: "Sanskrit, Sinhala"
+name: "Sanskrit (Sinhala)"
population: 0
sample_text {
masthead_full: "සරවම"
id: "sa_Soyo"
language: "sa"
script: "Soyo"
-name: "Sanskrit, Soyombo"
+name: "Sanskrit (Soyombo)"
region: "IN"
sample_text {
masthead_full: "𑪁𑩖𑩥𑪖"
id: "sa_Tagb"
language: "sa"
script: "Tagb"
-name: "Sanskrit, Tagbanwa"
+name: "Sanskrit (Tagbanwa)"
region: "IN"
sample_text {
masthead_full: "ᝰᝮᝯᝫ"
id: "sa_Tirh"
language: "sa"
script: "Tirh"
-name: "Sanskrit, Tirhuta"
+name: "Sanskrit (Tirhuta)"
region: "IN"
sample_text {
masthead_full: "𑒮𑒩𑒫𑒧"
id: "sa_Wcho"
language: "sa"
script: "Wcho"
-name: "Sanskrit, Wancho"
+name: "Sanskrit (Wancho)"
region: "IN"
sample_text {
masthead_full: "𞋃𞋁𞋆𞋀"
id: "sa_Zanb"
language: "sa"
script: "Zanb"
-name: "Sanskrit, Zanabazar"
+name: "Sanskrit (Zanabazar)"
region: "IN"
sample_text {
masthead_full: "𑨒𑨍𑨙𑨁"
id: "sat_Beng"
language: "sat"
script: "Beng"
-name: "Santali, Bengali"
+name: "Santali (Bengali)"
population: 0
historical: true
id: "sat_Deva"
language: "sat"
script: "Deva"
-name: "Santali, Devanagari"
+name: "Santali (Devanagari)"
population: 0
region: "NP"
sample_text {
id: "sat_Latn"
language: "sat"
script: "Latn"
-name: "Santali, Latin"
+name: "Santali (Latin)"
population: 0
historical: true
id: "sat_Orya"
language: "sat"
script: "Orya"
-name: "Santali, Odia"
+name: "Santali (Odia)"
population: 0
historical: true
id: "sd_Khoj"
language: "sd"
script: "Khoj"
-name: "Sindhi, Khojki"
+name: "Sindhi (Khojki)"
historical: true
population: 0
region: "IN"
id: "sd_Sind"
language: "sd"
script: "Sind"
-name: "Sindhi, Khudawadi"
+name: "Sindhi (Khudawadi)"
population: 0
region: "IN"
region: "PK"
specimen_21: "𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘."
specimen_16: "𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।"
}
-
+
id: "shi_Tfng"
language: "shi"
script: "Tfng"
-name: "Tachelhit, Tifinagh"
+name: "Tachelhit (Tifinagh)"
population: 0
region: "MA"
exemplar_chars {
id: "sja_Latn"
language: "sja"
script: "Latn"
-name: "Epena, Latin"
+name: "Epena (Latin)"
region: "CO"
sample_text {
masthead_full: "ŨũMm"
id: "skr_Mult"
language: "skr"
script: "Mult"
-name: "Saraiki, Multani"
+name: "Saraiki (Multani)"
historical: true
sample_text {
masthead_full: "𑊦𑊄𑊙𑊀"
specimen_32: "𑊦𑊄 𑊥𑊅𑊥 𑊙𑊀 𑊔𑊂 𑊛𑊂𑊟𑊢𑊦𑊚𑊩 𑊂𑊚𑊦 𑊤𑊊𑊂 𑊚𑊔𑊕𑊛𑊂 𑊄𑊂 𑊀𑊅𑊀𑊩 𑊕𑊂 𑊦𑊀 𑊛𑊂 𑊠𑊄𑊂 𑊊𑊀 𑊌𑊟𑊁 𑊦𑊥𑊀 𑊠𑊀𑊣 𑊙𑊀 𑊠𑊄𑊂 𑊀𑊙𑊀 𑊦𑊀𑊩 𑊀𑊟𑊀 𑊂 𑊀𑊛𑊕𑊁 𑊌𑊀𑊙𑊙 𑊂𑊚𑊦 𑊄𑊂 𑊤𑊔𑊊𑊟𑊁 𑊀𑊟𑊀 𑊗𑊂𑊣𑊀 𑊊𑊦𑊢𑊀 𑊄𑊕𑊂 𑊛𑊋𑊀 𑊚𑊕𑊀 𑊛𑊂𑊖𑊢 𑊥𑊛 𑊄𑊂𑊌 𑊀𑊄𑊐 𑊄𑊢𑊄𑊀 𑊦𑊄 𑊛𑊢𑊀𑊀 𑊙𑊔 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊏 𑊣𑊦𑊥𑊩 𑊌𑊘𑊀 𑊀𑊛𑊕 𑊠𑊀𑊥 𑊟𑊙𑊊𑊥𑊁 𑊤𑊊 𑊢𑊒𑊀𑊂𑊥𑊩"
specimen_21: "𑊦𑊄 𑊥𑊅𑊥 𑊙𑊀 𑊔𑊂 𑊛𑊂𑊟𑊢𑊦𑊚𑊩 𑊂𑊚𑊦 𑊤𑊊𑊂 𑊚𑊔𑊕𑊛𑊂 𑊄𑊂 𑊀𑊅𑊀𑊩 𑊕𑊂 𑊦𑊀 𑊛𑊂 𑊠𑊄𑊂 𑊊𑊀 𑊌𑊟𑊁 𑊦𑊥𑊀 𑊠𑊀𑊣 𑊙𑊀 𑊠𑊄𑊂 𑊀𑊙𑊀 𑊦𑊀𑊩 𑊀𑊟𑊀 𑊂 𑊀𑊛𑊕𑊁 𑊌𑊀𑊙𑊙 𑊂𑊚𑊦 𑊄𑊂 𑊤𑊔𑊊𑊟𑊁 𑊀𑊟𑊀 𑊗𑊂𑊣𑊀 𑊊𑊦𑊢𑊀 𑊄𑊕𑊂 𑊛𑊋𑊀 𑊚𑊕𑊀 𑊛𑊂𑊖𑊢 𑊥𑊛 𑊄𑊂𑊌 𑊀𑊄𑊐 𑊄𑊢𑊄𑊀 𑊦𑊄 𑊛𑊢𑊀𑊀 𑊙𑊔 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊏 𑊣𑊦𑊥𑊩 \n𑊌𑊘𑊀 𑊀𑊛𑊕 𑊠𑊀𑊥 𑊟𑊙𑊊𑊥𑊁 𑊤𑊊 𑊢𑊒𑊀𑊂𑊥𑊩 𑊟𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊒𑊀 𑊄𑊀𑊣𑊛𑊀 𑊀𑊟𑊀 𑊦𑊂𑊕 𑊠𑊂𑊗𑊌 𑊗𑊁𑊤𑊕 𑊣𑊆𑊀𑊩 𑊟𑊊𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊙𑊀 𑊦𑊄 𑊢𑊁𑊥 𑊙𑊀 𑊄𑊂𑊣 𑊤𑊏 𑊐𑊄𑊀 𑊌𑊀 𑊀𑊛𑊕𑊁𑊀 𑊢𑊦𑊢𑊁 𑊤𑊊 𑊂𑊄𑊂 𑊥𑊂𑊦𑊢 𑊊𑊢𑊣𑊕 𑊌𑊐𑊀𑊩"
specimen_16: "𑊦𑊄 𑊥𑊅𑊥 𑊙𑊀 𑊔𑊂 𑊛𑊂𑊟𑊢𑊦𑊚𑊩 𑊂𑊚𑊦 𑊤𑊊𑊂 𑊚𑊔𑊕𑊛𑊂 𑊄𑊂 𑊀𑊅𑊀𑊩 𑊕𑊂 𑊦𑊀 𑊛𑊂 𑊠𑊄𑊂 𑊊𑊀 𑊌𑊟𑊁 𑊦𑊥𑊀 𑊠𑊀𑊣 𑊙𑊀 𑊠𑊄𑊂 𑊀𑊙𑊀 𑊦𑊀𑊩 𑊀𑊟𑊀 𑊂 𑊀𑊛𑊕𑊁 𑊌𑊀𑊙𑊙 𑊂𑊚𑊦 𑊄𑊂 𑊤𑊔𑊊𑊟𑊁 𑊀𑊟𑊀 𑊗𑊂𑊣𑊀 𑊊𑊦𑊢𑊀 𑊄𑊕𑊂 𑊛𑊋𑊀 𑊚𑊕𑊀 𑊛𑊂𑊖𑊢 𑊥𑊛 𑊄𑊂𑊌 𑊀𑊄𑊐 𑊄𑊢𑊄𑊀 𑊦𑊄 𑊛𑊢𑊀𑊀 𑊙𑊔 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊏 𑊣𑊦𑊥𑊩 \n𑊌𑊘𑊀 𑊀𑊛𑊕 𑊠𑊀𑊥 𑊟𑊙𑊊𑊥𑊁 𑊤𑊊 𑊢𑊒𑊀𑊂𑊥𑊩 𑊟𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊒𑊀 𑊄𑊀𑊣𑊛𑊀 𑊀𑊟𑊀 𑊦𑊂𑊕 𑊠𑊂𑊗𑊌 𑊗𑊁𑊤𑊕 𑊣𑊆𑊀𑊩 𑊟𑊊𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊙𑊀 𑊦𑊄 𑊢𑊁𑊥 𑊙𑊀 𑊄𑊂𑊣 𑊤𑊏 𑊐𑊄𑊀 𑊌𑊀 𑊀𑊛𑊕𑊁𑊀 𑊢𑊦𑊢𑊁 𑊤𑊊 𑊂𑊄𑊂 𑊥𑊂𑊦𑊢 𑊊𑊢𑊣𑊕 𑊌𑊐𑊀𑊩 𑊀𑊟𑊀 𑊂𑊥𑊙𑊁 𑊠𑊢𑊌𑊁 𑊦𑊁𑊩 𑊌𑊂 𑊂𑊚𑊦𑊀 𑊋𑊣𑊢𑊀 𑊚𑊀𑊣 𑊌𑊂 𑊥𑊂𑊦𑊢 𑊅𑊙𑊀 𑊦𑊚 𑊀𑊛𑊕 𑊔𑊔 𑊟𑊢𑊀𑊩 \n𑊛𑊢 𑊄𑊦𑊁 𑊂𑊄𑊂 𑊒𑊖𑊁𑊩 𑊖𑊒𑊀 𑊦𑊂𑊥 𑊤𑊊 𑊀 𑊄𑊀 𑊀𑊅𑊂𑊥 𑊠𑊒𑊀 𑊛𑊂 𑊙𑊀 𑊄𑊖𑊣𑊀 𑊄𑊠𑊁𑊀 𑊄𑊂 𑊢𑊌 𑊢𑊂𑊐𑊁 𑊛𑊁 𑊠𑊣𑊙𑊁 𑊦𑊀𑊩"
-}
\ No newline at end of file
+}
id: "so_Arab"
language: "so"
script: "Arab"
-name: "Somali, Arabic"
+name: "Somali (Arabic)"
population: 0
historical: true
id: "so_Osma"
language: "so"
script: "Osma"
-name: "Somali, Osmanya"
+name: "Somali (Osmanya)"
population: 0
region: "SO"
sample_text {
id: "sq_Elba"
language: "sq"
script: "Elba"
-name: "Albanian, Elbasan"
+name: "Albanian (Elbasan)"
population: 0
sample_text {
masthead_full: "𐔟𐔁𐔀𐔒"
id: "sq_Vith"
language: "sq"
script: "Vith"
-name: "Albanian, Vithkuqi"
+name: "Albanian (Vithkuqi)"
population: 0
sample_text {
masthead_full: "𐖎𐖵𐕸𐖟"
id: "su_Sund"
language: "su"
script: "Sund"
-name: "Sundanese, Sundanese"
+name: "Sundanese (Sundanese)"
autonym: "ᮘᮞ ᮞᮥᮔ᮪ᮓ"
population: 0
region: "ID"
id: "sus_Arab"
language: "sus"
script: "Arab"
-name: "Susu, Arabic"
+name: "Susu (Arabic)"
population: 0
historical: true
id: "taq_Latn"
language: "taq"
script: "Latn"
-name: "Tamasheq, Latin"
+name: "Tamasheq (Latin)"
region: "ML"
population: 914000
exemplar_chars {
id: "taq_Tfng"
language: "taq"
script: "Tfng"
-name: "Tamasheq, Tifinagh"
+name: "Tamasheq (Tifinagh)"
region: "ML"
sample_text {
masthead_full: "ⵎⴸⴰⵏ"
id: "tbw_Tagb"
language: "tbw"
script: "Tagb"
-name: "Tagbanwa, Tagbanwa"
+name: "Tagbanwa (Tagbanwa)"
population: 0
sample_text {
masthead_full: "ᝣᝧᝮᝤ"
id: "tg_Latn"
language: "tg"
script: "Latn"
-name: "Tajik, Latin"
+name: "Tajik (Latin)"
population: 0
region: "TJ"
sample_text {
id: "thf_Deva"
language: "thf"
script: "Deva"
-name: "Thangmi, Devanagari"
+name: "Thangmi (Devanagari)"
region: "NP"
sample_text {
masthead_full: "सकखम"
id: "ths_Deva"
language: "ths"
script: "Deva"
-name: "Thakali, Devanagari"
+name: "Thakali (Devanagari)"
region: "NP"
sample_text {
masthead_full: "हयमच"
id: "tiw_Latn"
language: "tiw"
script: "Latn"
-name: "Tiwi, Latin"
+name: "Tiwi (Latin)"
region: "AU"
sample_text {
masthead_full: "TtAa"
id: "tk_Cyrl"
language: "tk"
script: "Cyrl"
-name: "Turkmen, Cyrillic"
+name: "Turkmen (Cyrillic)"
autonym: "Түркменче"
population: 0
region: "TM"
id: "tk_Latn"
language: "tk"
script: "Latn"
-name: "Turkmen, Latin"
+name: "Turkmen (Latin)"
autonym: "Türkmençe"
population: 11000000
region: "TM"
id: "tkr_Latn"
language: "tkr"
script: "Latn"
-name: "Tsakhur, Latin"
+name: "Tsakhur (Latin)"
autonym: "Ts‘əxna miz"
population: 22300
region: "AZ"
id: "tlh_Latn"
language: "tlh"
script: "Latn"
-name: "Klingon, Latin"
+name: "Klingon (Latin)"
id: "tly_Cyrl"
language: "tly"
script: "Cyrl"
-name: "Talysh, Cyrillic"
+name: "Talysh (Cyrillic)"
autonym: "Толыши"
population: 0
exemplar_chars {
id: "tly_Latn"
language: "tly"
script: "Latn"
-name: "Talysh, Latin"
+name: "Talysh (Latin)"
autonym: "Tolışi"
population: 229590
region: "AZ"
id: "tr_Arab"
language: "tr"
script: "Arab"
-name: "Turkish, Arabic"
+name: "Turkish (Arabic)"
population: 0
historical: true
id: "tru_Syrc"
language: "tru"
script: "Syrc"
-name: "Turoyo, Syriac"
+name: "Turoyo (Syriac)"
population: 0
sample_text {
masthead_full: "ܐܘܢܫ"
id: "tt_Arab"
language: "tt"
script: "Arab"
-name: "Tatar, Arabic"
+name: "Tatar (Arabic)"
region: "RU"
sample_text {
masthead_full: "نارل"
id: "tt_Latn"
language: "tt"
script: "Latn"
-name: "Tatar, Latin"
+name: "Tatar (Latin)"
region: "RU"
sample_text {
masthead_full: "BbAa"
id: "udm_Latn"
language: "udm"
script: "Latn"
-name: "Udmurt, Latin"
+name: "Udmurt (Latin)"
population: 0
historical: true
id: "ug_Latn"
language: "ug"
script: "Latn"
-name: "Uyghur, Latin"
+name: "Uyghur (Latin)"
autonym: "Uighur"
population: 0
region: "CN"
id: "unr_Orya"
language: "unr"
script: "Orya"
-name: "Mundari, Odia"
+name: "Mundari (Odia)"
region: "IN"
sample_text {
masthead_full: "ସବନହ"
id: "unx_Deva"
language: "unx"
script: "Deva"
-name: "Munda, Devanagari"
+name: "Munda (Devanagari)"
population: 0
region: "BT"
region: "FJ"
id: "vi_Hani"
language: "vi"
script: "Hani"
-name: "Vietnamese, Han"
+name: "Vietnamese (Han)"
population: 0
region: "VN"
sample_text {
id: "wal_Ethi"
language: "wal"
script: "Ethi"
-name: "Wolaytta, Ethiopic"
+name: "Wolaytta (Ethiopic)"
population: 1946034
region: "ET"
id: "wal_Latn"
language: "wal"
script: "Latn"
-name: "Wolaytta, Latin"
+name: "Wolaytta (Latin)"
population: 7000000
region: "ET"
exemplar_chars {
id: "wo_Arab"
language: "wo"
script: "Arab"
-name: "Wolof, Arabic"
+name: "Wolof (Arabic)"
population: 0
historical: true
id: "xum_Latn"
language: "xum"
script: "Latn"
-name: "Umbrian, Latin"
+name: "Umbrian (Latin)"
population: 0
historical: true
id: "ybh_Deva"
language: "ybh"
script: "Deva"
-name: "Yakha, Devanagari"
+name: "Yakha (Devanagari)"
region: "NP"
sample_text {
masthead_full: "घकओथ"
language: "yue"
script: "Hani"
name: "Yue Chinese"
-preferred_name: "Cantonese"
+preferred_name: "Cantonese (Han)"
autonym: "粵語"
region: "CN"
region: "HK"
specimen_32: "鑑於對人類家庭所有成員嘅固有尊嚴及其平等嘅同不移嘅權利嘅承認,乃係世界自由、正義同和平嘅基礎,\n鑑於對人權嘅無視同侮蔑已經發展為野蠻暴行,呢啲暴行玷污咗人類嘅良心,而一個人人享有言論同信仰自由並免予恐懼同匱乏嘅世界嘅來臨,已經被宣布為普通人民嘅最高願望,\n鑑於為咗使人類唔致迫不得已鋌而走險對暴政同壓迫進行反叛,有必要使人權受法治嘅保護,"
specimen_21: "鑑於對人類家庭所有成員嘅固有尊嚴及其平等嘅同不移嘅權利嘅承認,乃係世界自由、正義同和平嘅基礎,\n鑑於對人權嘅無視同侮蔑已經發展為野蠻暴行,呢啲暴行玷污咗人類嘅良心,而一個人人享有言論同信仰自由並免予恐懼同匱乏嘅世界嘅來臨,已經被宣布為普通人民嘅最高願望,\n鑑於為咗使人類唔致迫不得已鋌而走險對暴政同壓迫進行反叛,有必要使人權受法治嘅保護,\n鑑於有必要促進各國間友好關係嘅發展,\n鑑於各聯合國國家嘅人民已經喺聯合國憲章中重申佢哋對基本人權、人格尊嚴同價值以及男女平等權利嘅信念,並決心促成較大自由中嘅社會進步同生活水平嘅改善,\n鑑於各會員國業已誓願同聯合國合作以促進對人權同基本自由嘅普遍尊重同遵行,\n鑑於對呢啲權利同自由嘅普遍了解對於呢個誓願嘅充分實現具有好大嘅重要性,"
specimen_16: "鑑於對人類家庭所有成員嘅固有尊嚴及其平等嘅同不移嘅權利嘅承認,乃係世界自由、正義同和平嘅基礎,\n鑑於對人權嘅無視同侮蔑已經發展為野蠻暴行,呢啲暴行玷污咗人類嘅良心,而一個人人享有言論同信仰自由並免予恐懼同匱乏嘅世界嘅來臨,已經被宣布為普通人民嘅最高願望,\n鑑於為咗使人類唔致迫不得已鋌而走險對暴政同壓迫進行反叛,有必要使人權受法治嘅保護,\n鑑於有必要促進各國間友好關係嘅發展,\n鑑於各聯合國國家嘅人民已經喺聯合國憲章中重申佢哋對基本人權、人格尊嚴同價值以及男女平等權利嘅信念,並決心促成較大自由中嘅社會進步同生活水平嘅改善,\n鑑於各會員國業已誓願同聯合國合作以促進對人權同基本自由嘅普遍尊重同遵行,\n鑑於對呢啲權利同自由嘅普遍了解對於呢個誓願嘅充分實現具有好大嘅重要性,\n因此而家,\n大會,\n發布呢一個世界人權宣言,作為所有人民同所有國家努力實現嘅共同標準,以期每一個人同社會機構經常銘念本宣言,努力通過教誨同教育促進對權利同自由嘅尊重,並通過國家嘅和國際嘅漸進措施,令呢啲權利同自由喺各會員國本身人民及喺佢管轄下領土嘅人民中得到普遍同有效嘅承認同遵行;\n 人人生而自由,喺尊嚴同權利上一律平等。佢哋賦有理性同良心,並應以兄弟關係嘅精神相對待。\n人人有資格享有本宣言所載嘅一切權利同自由,唔分種族、膚色、性別、語言、宗教、政治或其他見解、國籍或社會出身、財產、出生或其他身分等任何區別。"
-}
\ No newline at end of file
+}
language: "yue"
script: "Hant"
name: "Yue Chinese"
-preferred_name: "Cantonese"
+preferred_name: "Cantonese (Traditional)"
autonym: "粵語"
population: 6524919
region: "CN"
id: "zh_Hans"
language: "zh"
script: "Hans"
-name: "Simplified Chinese"
+name: "Chinese (Simplified)"
autonym: "中文(简体,中国)"
population: 1265387866
region: "CN"
id: "zh_Hebr"
language: "zh"
script: "Hebr"
-name: "Chinese, Hebrew"
+name: "Chinese (Hebrew)"
region: "CN"
sample_text {
masthead_full: "עיןז"
id: "zlm_Arab"
language: "zlm"
script: "Arab"
-name: "Malay, Arabic"
+name: "Malay (Arabic)"
region: "BN"
region: "ID"
region: "MY"
id: "zlm_Latn"
language: "zlm"
script: "Latn"
-name: "Malay, Latin"
+name: "Malay (Latin)"
region: "BN"
region: "ID"
region: "MY"
id: "Beng"
-name: "Bangla"
-
+name: "Bengali"
youseedee
black
isort
-pytest
\ No newline at end of file
+pytest
+regex
name = "gflanguages"
description = "A python API for evaluating language support in the Google Fonts collection."
readme = "README.md"
-authors = [
+authors = [
{ name = "Simon Cozens", email = "simon@simon-cozens.org" }
]
dev = [
"uharfbuzz",
"youseedee",
- "pytest"
+ "pytest",
+ "regex"
]
# limitations under the License.
#
from collections import defaultdict, Counter
-import re
+import regex
import unicodedata
from gflanguages import (
"tlh_Latn": "Klingon is an artifical language.",
}
+LANGUAGE_NAME_REGEX = regex.compile(r"^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$")
+# Some scripts have abbreviated names for reference in language names that are
+# sufficient in context. If an alternate is listed here, it should be used
+# universally and consistently across all language names.
+ALTERNATE_SCRIPT_NAMES = {
+ "Dupl": "Duployan",
+ "Hans": "Simplified",
+ "Hant": "Traditional",
+}
+
@pytest.mark.parametrize("lang_code", LANGUAGES)
@pytest.mark.parametrize(
- "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]
+ "exemplar_name", ["base", "auxiliary", "marks",
+ "numerals", "punctuation", "index"]
)
def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name):
lang = LANGUAGES[lang_code]
@pytest.mark.parametrize("lang_code", LANGUAGES)
@pytest.mark.parametrize(
- "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]
+ "exemplar_name", ["base", "auxiliary", "marks",
+ "numerals", "punctuation", "index"]
)
def test_languages_exemplars_duplicates(lang_code, exemplar_name):
lang = LANGUAGES[lang_code]
exemplar = getattr(lang.exemplar_chars, exemplar_name).split()
counter = Counter(exemplar)
- counts = sorted(counter.most_common(), key=lambda pair: exemplar.index(pair[0]))
+ counts = sorted(counter.most_common(),
+ key=lambda pair: exemplar.index(pair[0]))
assert counts == [(v, 1) for v in exemplar]
if field.name == "auxiliary" or field.name == "index":
continue
exemplars = getattr(lang.exemplar_chars, field.name)
- group_of_chars = re.findall(r"(\{[^}]+\}|\S+)", exemplars)
+ group_of_chars = regex.findall(r"(\{[^}]+\}|\S+)", exemplars)
for chars in group_of_chars:
for char in chars:
char_script = youseedee.ucd_data(ord(char)).get("Script")
"idu_Latn",
"ban_Bali",
]:
- pytest.xfail("These languages have known issues with their sample text")
+ pytest.xfail(
+ "These languages have known issues with their sample text")
return
lang = LANGUAGES[lang_code]
script_name = SCRIPTS[lang.script].name
chars = set(samples)
for char in chars:
char_script = (
- youseedee.ucd_data(ord(char)).get("Script", "").replace("_", " ")
+ youseedee.ucd_data(ord(char)).get(
+ "Script", "").replace("_", " ")
)
if char_script == "Common" or char_script == "Inherited":
continue
def test_language_uniqueness():
names = Counter([])
for lang in LANGUAGES.values():
- # We check that names are unique *within a script* since
- # when we display them in a menu we segment that menu by
- # script and then by language
if lang.preferred_name:
- names[lang.script + "/" + lang.preferred_name] += 1
+ names[lang.preferred_name] += 1
else:
- names[lang.name + "/" + lang.preferred_name] += 1
+ names[lang.name] += 1
if any(count > 1 for count in names.values()):
- duplicates = {name: count for name, count in names.items() if count > 1}
+ duplicates = {name: count for name,
+ count in names.items() if count > 1}
pytest.fail(f"Duplicate language names: {duplicates}")
+
+
+def test_language_name_structure():
+ languages_with_bad_name_structure = {}
+ for lang in LANGUAGES.values():
+ script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script]
+ names = [["name", lang.name]]
+ if lang.preferred_name:
+ names += [["preferred_name", lang.preferred_name]]
+ bad_names = []
+ for type, name in names:
+ bad_structure = not regex.match(LANGUAGE_NAME_REGEX, name)
+ bad_script_suffix = name.endswith(
+ ")") and not name.endswith(f"({script_name})")
+ if bad_structure or bad_script_suffix:
+ bad_names.append(type)
+ if len(bad_names) > 0:
+ languages_with_bad_name_structure[lang.id] = bad_names
+ if len(languages_with_bad_name_structure) > 0:
+ misstructured_language_names = [f"{language_id}" if len(
+ types) == 1 else f"{language_id}: {types}" for language_id, types in languages_with_bad_name_structure.items() if len(types) > 0]
+ pytest.fail(
+ f"Languages names without expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {misstructured_language_names}")