From b57fdba5341a7f65e284583412d463baf56ff3ca Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Wed, 1 May 2024 09:24:32 +0100 Subject: [PATCH] Add our own parse function for exemplars --- Lib/gflanguages/__init__.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Lib/gflanguages/__init__.py b/Lib/gflanguages/__init__.py index 2957742e93..c7b0bc6181 100644 --- a/Lib/gflanguages/__init__.py +++ b/Lib/gflanguages/__init__.py @@ -21,6 +21,7 @@ data on the Google Fonts collection. """ import glob import os +import unicodedata from gflanguages import languages_public_pb2 from google.protobuf import text_format @@ -71,3 +72,18 @@ def LoadRegions(base_dir=DATA_DIR): region = text_format.Parse(f.read(), languages_public_pb2.RegionProto()) regions[region.id] = region return regions + + +def parse(exemplars: str): + """Parses a list of exemplar characters into a set of codepoints.""" + codepoints = set() + for chars in exemplars.split(): + if len(chars) > 1: + chars = chars.lstrip("{").rstrip("}") + normalized_chars = unicodedata.normalize("NFC", chars) + if normalized_chars != chars: + for char in normalized_chars: + codepoints.add(char) + for char in chars: + codepoints.add(char) + return codepoints -- 2.47.3