--- /dev/null
+id: "zag_Berf"
+language: "zag"
+script: "Berf"
+name: "Zaghawa (Beria Erfe)"
+population: 350000
+region: "TD"
+region: "SD"
+sample_text {
+ masthead_full: "π»π»π»π»"
+ masthead_partial: "π»π»"
+ styles: "πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π»."
+ tester: "πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π»."
+ poster_sm: "πΊ½π»π»π» πΊΌπΊ»Μπ»π»π»"
+ poster_md: "π»πΊΎπ»π»π»π»π»"
+ poster_lg: "π»πΊΎπ»πΊΎπΊΏ"
+ specimen_48: "πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π»."
+ specimen_36: "πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π»."
+ specimen_32: "πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π»."
+ specimen_21: "πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π». πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π»."
+ specimen_16: "πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π». πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π». πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π»."
+}
\ No newline at end of file
--- /dev/null
+import requests
+from bs4 import BeautifulSoup as Soup
+from gflanguages import LoadScripts, languages_public_pb2
+from gftools.util.google_fonts import WriteProto
+from google.protobuf.json_format import ParseDict
+
+SCRIPT_CODE_URL = "https://www.unicode.org/iso15924/iso15924-codes.html"
+SKIP_SCRIPTS = {
+ "Cyrs",
+ "Egyd",
+ "Egyh",
+ "Hanb",
+ "Hang",
+ "Hntl",
+ "Hrkt",
+ "Jamo",
+ "Seal",
+}
+
+
+def fetch_script_codes():
+ response = requests.get(SCRIPT_CODE_URL)
+ response.raise_for_status()
+ soup = Soup(response.text, "html.parser")
+
+ script_codes = []
+ table = soup.find("table", {"class": "simple"})
+ for row in table.find_all("tr")[1:]: # Skip header row
+ cols = row.find_all("td")
+ if len(cols) >= 2:
+ code = cols[0].text.strip()
+ name = cols[2].text.strip()
+ script_codes.append((code, name))
+
+ return script_codes
+
+
+known_scripts = LoadScripts()
+for code, name in fetch_script_codes():
+ if code[0] == "Z" or code[0] == "Q" or code in SKIP_SCRIPTS or "variant" in name:
+ continue
+ if code not in known_scripts:
+ print(f"Adding script {code} - {name}")
+ message = ParseDict(
+ {
+ "id": code,
+ "name": name,
+ },
+ languages_public_pb2.ScriptProto(),
+ )
+ WriteProto(message, f"Lib/gflanguages/data/scripts/{code}.textproto")
--- /dev/null
+import re
+import random
+
+from gflanguages import languages_public_pb2
+from google.protobuf.json_format import ParseDict
+from google.protobuf.text_format import MessageToString
+
+text = "πΊ― π»π»π»π»π»π» π»πΊΎπ»πΊΎπΊΏ πΊΌπΊΎΜπ»π»π»πΊ» π»πΊ»π»πΊ½π» πΊΌπΊΎΜπ»π»π»πΊ» π»π»π»πΊΎπ» π»πΊΎπ»π»π»π»π». πΊ°πΊΎπΊΎπ» πΊΌπΊ»Μπ»π»π» πΊΎπ»πΊΎπ»π»π»πΊΎπΊΎ π»π»π»πΊΎπ»πΊΎ πΊ»π»π»π»
π»πΊ» π»π»πΊ½π»π»πΊΎπ»π». πΊ΅π»π»πΊΎ π»π»π»π» π»π» π»π»π»πΊΎπ»π» πΊΌπΊ»π»πΊ»π»πΊ»πΊ»π»π» π»π»π»πΊΎπ» π»π»π»π»π»π»π»π» πΊ½π»π»π»."
+
+sentences = re.split(r"(?<=[γ\.])\s+", text)
+sentences = [s for s in sentences if s.strip()]
+
+words = re.findall(r"\S+", text.replace(".", ""))
+glyphs = set(re.findall(r"\S", text))
+
+
+def random_phrase(inputs, min_len, max_len, separator=" "):
+ phrase = ""
+ satisfied = False
+ repetitions = 1
+ while not satisfied:
+ sentence_bag = list(inputs) * repetitions
+ remaining = max_len
+ while remaining > 0 and len(sentence_bag) > 0:
+ new_sentence = random.choice(sentence_bag)
+ sentence_bag.remove(new_sentence)
+ phrase += new_sentence + separator
+ remaining -= len(new_sentence) + len(separator)
+ if len(phrase) >= min_len:
+ satisfied = True
+ break
+ if not satisfied:
+ print("Trying again with a new bag of sentences")
+ repetitions += 1
+ return phrase
+
+
+sample_text = {
+ "masthead_full": "".join(random.choices(list(glyphs), k=4)),
+ "masthead_partial": "".join(random.choices(list(glyphs), k=2)),
+ "styles": random_phrase(sentences, 40, 60, ". "),
+ "tester": random_phrase(sentences, 60, 90, ". "),
+ "poster_sm": random_phrase(words, 10, 17, " "),
+ "poster_md": random_phrase(words, 6, 12, " "),
+ "poster_lg": random_phrase(words, 3, 8, " "),
+ "specimen_48": random_phrase(sentences, 50, 80, ". "),
+ "specimen_36": random_phrase(sentences, 100, 120, ". "),
+ "specimen_32": random_phrase(sentences, 140, 180, ". "),
+ "specimen_21": random_phrase(sentences, 300, 500, ". "),
+ "specimen_16": random_phrase(sentences, 550, 750, ". "),
+}
+# print(words)
+message = ParseDict(
+ sample_text,
+ languages_public_pb2.SampleTextProto(),
+)
+print(MessageToString(message, as_utf8=True))