Add Beria Erfe script (#246)

author Simon Cozens <simon@simon-cozens.org>

Thu, 11 Sep 2025 07:38:36 +0000 (08:38 +0100)

committer GitHub <noreply@github.com>

Thu, 11 Sep 2025 07:38:36 +0000 (08:38 +0100)
author Simon Cozens <simon@simon-cozens.org>
Thu, 11 Sep 2025 07:38:36 +0000 (08:38 +0100)
committer GitHub <noreply@github.com>
Thu, 11 Sep 2025 07:38:36 +0000 (08:38 +0100)
diff --git a/Lib/gflanguages/data/languages/zag_Berf.textproto b/Lib/gflanguages/data/languages/zag_Berf.textproto

new file mode 100644 (file)

index 0000000..b05199a
--- /dev/null
+++ b/Lib/gflanguages/data/languages/zag_Berf.textproto
@@ -0,0 +1,21 @@
+id: "zag_Berf"
+language: "zag"
+script: "Berf"
+name: "Zaghawa (Beria Erfe)"
+population: 350000
+region: "TD"
+region: "SD"
+sample_text {
+    masthead_full: "𖻊𖻀𖻀𖻎"
+    masthead_partial: "𖻊𖻊"
+    styles: "𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂."
+    tester: "𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂."
+    poster_sm: "𖺽𖻂𖻍𖻊 𖺼𖺻́𖻌𖻂𖻂"
+    poster_md: "𖻐𖺾𖻓𖻂𖻍𖻂𖻂"
+    poster_lg: "𖻏𖺾𖻌𖺾𖺿"
+    specimen_48: "𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊."
+    specimen_36: "𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂."
+    specimen_32: "𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂."
+    specimen_21: "𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊. 𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂."
+    specimen_16: "𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊. 𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊. 𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂."
+}
+\ No newline at end of file
diff --git a/Lib/gflanguages/data/scripts/Berf.textproto b/Lib/gflanguages/data/scripts/Berf.textproto

new file mode 100644 (file)

index 0000000..5a787ba
--- /dev/null
+++ b/Lib/gflanguages/data/scripts/Berf.textproto
@@ -0,0 +1,2 @@
+id: "Berf"
+name: "Beria Erfe"
diff --git a/snippets/add-all-scripts.py b/snippets/add-all-scripts.py

new file mode 100644 (file)

index 0000000..7e658e2
--- /dev/null
+++ b/snippets/add-all-scripts.py
@@ -0,0 +1,51 @@
+import requests
+from bs4 import BeautifulSoup as Soup
+from gflanguages import LoadScripts, languages_public_pb2
+from gftools.util.google_fonts import WriteProto
+from google.protobuf.json_format import ParseDict
+
+SCRIPT_CODE_URL = "https://www.unicode.org/iso15924/iso15924-codes.html"
+SKIP_SCRIPTS = {
+    "Cyrs",
+    "Egyd",
+    "Egyh",
+    "Hanb",
+    "Hang",
+    "Hntl",
+    "Hrkt",
+    "Jamo",
+    "Seal",
+}
+
+
+def fetch_script_codes():
+    response = requests.get(SCRIPT_CODE_URL)
+    response.raise_for_status()
+    soup = Soup(response.text, "html.parser")
+
+    script_codes = []
+    table = soup.find("table", {"class": "simple"})
+    for row in table.find_all("tr")[1:]:  # Skip header row
+        cols = row.find_all("td")
+        if len(cols) >= 2:
+            code = cols[0].text.strip()
+            name = cols[2].text.strip()
+            script_codes.append((code, name))
+
+    return script_codes
+
+
+known_scripts = LoadScripts()
+for code, name in fetch_script_codes():
+    if code[0] == "Z" or code[0] == "Q" or code in SKIP_SCRIPTS or "variant" in name:
+        continue
+    if code not in known_scripts:
+        print(f"Adding script {code} - {name}")
+        message = ParseDict(
+            {
+                "id": code,
+                "name": name,
+            },
+            languages_public_pb2.ScriptProto(),
+        )
+        WriteProto(message, f"Lib/gflanguages/data/scripts/{code}.textproto")
diff --git a/snippets/fill_sample.py b/snippets/fill_sample.py

new file mode 100644 (file)

index 0000000..3352a81
--- /dev/null
+++ b/snippets/fill_sample.py
@@ -0,0 +1,57 @@
+import re
+import random
+
+from gflanguages import languages_public_pb2
+from google.protobuf.json_format import ParseDict
+from google.protobuf.text_format import MessageToString
+
+text = "𖺯 𖻎𖻊𖻄𖻄𖻊𖻊 𖻏𖺾𖻌𖺾𖺿 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻀𖺻𖻌𖺽𖻂 𖺼𖺾́𖻀𖻂𖻀𖺻 𖻂𖻇𖻇𖺾𖻌 𖻐𖺾𖻓𖻂𖻍𖻂𖻂. 𖺰𖺾𖺾𖻌 𖺼𖺻́𖻌𖻂𖻂 𖺾𖻀𖺾𖻀𖻂𖻇𖺾𖺾 𖻄𖻂𖻓𖺾𖻍𖺾 𖺻𖻀𖻂𖻅 𖻀𖺻 𖻄𖻂𖺽𖻂𖻓𖺾𖻓𖻂. 𖺵𖻊𖻌𖺾 𖻇𖻂𖻀𖻑 𖻇𖻂 𖻁𖻂𖻓𖺾𖻀𖻂 𖺼𖺻𖻀𖺻𖻌𖺻𖺻𖻀𖻊 𖻂𖻇𖻇𖺾𖻌 𖻇𖻊𖻀𖻂𖻇𖻊𖻇𖻉 𖺽𖻂𖻍𖻊."
+
+sentences = re.split(r"(?<=[。\.])\s+", text)
+sentences = [s for s in sentences if s.strip()]
+
+words = re.findall(r"\S+", text.replace(".", ""))
+glyphs = set(re.findall(r"\S", text))
+
+
+def random_phrase(inputs, min_len, max_len, separator=" "):
+    phrase = ""
+    satisfied = False
+    repetitions = 1
+    while not satisfied:
+        sentence_bag = list(inputs) * repetitions
+        remaining = max_len
+        while remaining > 0 and len(sentence_bag) > 0:
+            new_sentence = random.choice(sentence_bag)
+            sentence_bag.remove(new_sentence)
+            phrase += new_sentence + separator
+            remaining -= len(new_sentence) + len(separator)
+            if len(phrase) >= min_len:
+                satisfied = True
+                break
+        if not satisfied:
+            print("Trying again with a new bag of sentences")
+            repetitions += 1
+    return phrase
+
+
+sample_text = {
+    "masthead_full": "".join(random.choices(list(glyphs), k=4)),
+    "masthead_partial": "".join(random.choices(list(glyphs), k=2)),
+    "styles": random_phrase(sentences, 40, 60, ". "),
+    "tester": random_phrase(sentences, 60, 90, ". "),
+    "poster_sm": random_phrase(words, 10, 17, " "),
+    "poster_md": random_phrase(words, 6, 12, " "),
+    "poster_lg": random_phrase(words, 3, 8, " "),
+    "specimen_48": random_phrase(sentences, 50, 80, ". "),
+    "specimen_36": random_phrase(sentences, 100, 120, ". "),
+    "specimen_32": random_phrase(sentences, 140, 180, ". "),
+    "specimen_21": random_phrase(sentences, 300, 500, ". "),
+    "specimen_16": random_phrase(sentences, 550, 750, ". "),
+}
+# print(words)
+message = ParseDict(
+    sample_text,
+    languages_public_pb2.SampleTextProto(),
+)
+print(MessageToString(message, as_utf8=True))
author	Simon Cozens <simon@simon-cozens.org>
	Thu, 11 Sep 2025 07:38:36 +0000 (08:38 +0100)
committer	GitHub <noreply@github.com>
	Thu, 11 Sep 2025 07:38:36 +0000 (08:38 +0100)
Lib/gflanguages/data/languages/zag_Berf.textproto	[new file with mode: 0644]	patch \| blob
Lib/gflanguages/data/scripts/Berf.textproto	[new file with mode: 0644]	patch \| blob
snippets/add-all-scripts.py	[new file with mode: 0644]	patch \| blob
snippets/fill_sample.py	[new file with mode: 0644]	patch \| blob