]> git.ipfire.org Git - thirdparty/google/fonts.git/commitdiff
Add our own parse function for exemplars
authorSimon Cozens <simon@simon-cozens.org>
Wed, 1 May 2024 08:24:32 +0000 (09:24 +0100)
committerSimon Cozens <simon@simon-cozens.org>
Wed, 1 May 2024 08:24:32 +0000 (09:24 +0100)
Lib/gflanguages/__init__.py

index 2957742e93f3858dee6cdea1074b3e722116b31c..c7b0bc61817a8b2466504419f6394cdb31257e55 100644 (file)
@@ -21,6 +21,7 @@ data on the Google Fonts collection.
 """
 import glob
 import os
+import unicodedata
 
 from gflanguages import languages_public_pb2
 from google.protobuf import text_format
@@ -71,3 +72,18 @@ def LoadRegions(base_dir=DATA_DIR):
             region = text_format.Parse(f.read(), languages_public_pb2.RegionProto())
             regions[region.id] = region
     return regions
+
+
+def parse(exemplars: str):
+    """Parses a list of exemplar characters into a set of codepoints."""
+    codepoints = set()
+    for chars in exemplars.split():
+        if len(chars) > 1:
+            chars = chars.lstrip("{").rstrip("}")
+        normalized_chars = unicodedata.normalize("NFC", chars)
+        if normalized_chars != chars:
+            for char in normalized_chars:
+                codepoints.add(char)
+        for char in chars:
+            codepoints.add(char)
+    return codepoints