]> git.ipfire.org Git - thirdparty/google/fonts.git/commitdiff
Add snippets/fix-exemplars-duplicates.py
authorDenis Moyogo Jacquerye <moyogo@gmail.com>
Mon, 31 Oct 2022 16:32:48 +0000 (17:32 +0100)
committerDenis Moyogo Jacquerye <moyogo@gmail.com>
Mon, 31 Oct 2022 17:01:08 +0000 (18:01 +0100)
snippets/fix-exemplars-duplicates.py [new file with mode: 0644]

diff --git a/snippets/fix-exemplars-duplicates.py b/snippets/fix-exemplars-duplicates.py
new file mode 100644 (file)
index 0000000..fdc0f6a
--- /dev/null
@@ -0,0 +1,63 @@
+from collections import Counter
+from google.protobuf import text_format
+from gflanguages import languages_public_pb2
+
+ATTRIBUTES = "base auxiliary marks punctuation index".split(" ")
+
+
+def main(args=None):
+    for path in args:
+        with open(path, encoding="utf-8") as fp:
+            language = text_format.Parse(
+                fp.read(), languages_public_pb2.LanguageProto()
+            )
+        changed = False
+        exemplar_values = {}
+        if not hasattr(language, "exemplar_chars"):
+            exit()
+        for attr in ATTRIBUTES:
+            if hasattr(language.exemplar_chars, attr):
+                values = getattr(language.exemplar_chars, attr).split(" ")
+                value_set = set()
+                clean_values = []
+                for value in values:
+                    if value in value_set:
+                        continue
+                    else:
+                        value_set.add(value)
+                        clean_values.append(value)
+
+                if clean_values != values:
+                    if {len(set(values))} != {len(set(clean_values))}:
+                        print("before: " + " ".join(values))
+                        print("after: " + " ".join(clean_values))
+                        sys.exit("Failed fixing exemplar.")
+                    setattr(language.exemplar_chars, attr, " ".join(clean_values))
+                    changed = True
+                    exemplar_values[attr] = {
+                        "before": values,
+                        "after": clean_values
+                    }
+
+        if changed:
+            for exemplar, values in exemplar_values.items():
+                before = values["before"]
+                after = values["after"]
+                counter = Counter(before)
+                duplicates = [(g, c - 1) for g, c in counter.most_common() if c > 1]
+            print(
+                f"Changed {path} {exemplar} exemplar:\n"
+                f"- from {len(before)} ({len(set(before))} as set) "
+                f"to {len(after)} elements\n"
+                f"- removing {len(before) - len(after)} duplicate(s):\n"
+                f"  {duplicates}\n"
+            )
+            with open(path, "w", encoding="utf-8") as fp:
+                fp.write(text_format.MessageToString(language, as_utf8=True))
+                fp.close()
+
+
+if __name__ == "__main__":
+    import sys
+
+    main(args=sys.argv[1:])