]> git.ipfire.org Git - thirdparty/google/fonts.git/commitdiff
Tag scripts (#8275)
authorRod <rsheeter@google.com>
Fri, 4 Oct 2024 19:21:49 +0000 (12:21 -0700)
committerGitHub <noreply@github.com>
Fri, 4 Oct 2024 19:21:49 +0000 (12:21 -0700)
tags/all/lowtags.py [new file with mode: 0644]
tags/all/stats.py

diff --git a/tags/all/lowtags.py b/tags/all/lowtags.py
new file mode 100644 (file)
index 0000000..4a1cde6
--- /dev/null
@@ -0,0 +1,65 @@
+"""
+Lists families with fewer than the median # tags.
+
+Pass a # to cutoff at <= that # of tags. By default lists items
+with < median #tags.
+
+Usage:
+
+       # One-time: setup a venv
+
+       $ python3 -m venv venv
+       $ source venv/bin/activate
+       $ pip install requests
+
+       # Once you have a venv
+
+       # list families with < median tags
+       $ python3 lowtags.py
+       # list families with 0 tags
+       $ python3 lowtags.py 0
+"""
+
+import collections
+import csv
+from pathlib import Path
+import requests
+from statistics import mean, median, stdev
+import sys
+
+def main():
+       args = sys.argv[1:]
+       if len(args) > 1:
+               sys.exit("Too many args")
+
+       with open('families.csv') as f:
+               reader = csv.DictReader(f)
+               records = [r for r in reader]
+
+       count_by_family = collections.defaultdict(int)
+       tags_by_family = collections.defaultdict(list)
+
+       # we want a result for every public family, even if it's 0 tags
+       resp = requests.get("https://fonts.google.com/metadata/fonts")
+       resp.raise_for_status()
+       for family in resp.json()["familyMetadataList"]:
+               count_by_family[family["family"]] = 0
+
+       for r in records:
+               count_by_family[r["Family"]] += 1
+               tags_by_family[r["Family"]].append(r["Group/Tag"])
+       counts = sorted(count_by_family.values())
+
+       if len(args) == 1:
+               cutoff = int(args[0])
+       else:
+               cutoff = median(counts) - 1
+
+       for (family, count) in sorted(count_by_family.items()):
+               if count > cutoff:
+                       continue
+               print(family, count, sorted(tags_by_family[family]))
+
+
+if __name__ == '__main__':
+       main()
\ No newline at end of file
index dfa2fa14fc6daf70b125763f565e332a288c4c89..babe7136312c72bafc8be4b53c3f6d822a47bc4e 100644 (file)
@@ -1,17 +1,14 @@
 """
-Result 5/29/2024:
-
-Num tags 9125
-Mean tags  5.086399108138239
-Median tags 5.0
-Stdev tags 2.474364733745681
-Max tags 27
+Gives basic stats about tags.
 
 Usage:
 
-       clone https://github.com/google/fonts
-       write to a file in tags/all called stats.py
-       python3 stats.py
+       $ python3 stats.py
+       Num tags 9125
+       Mean tags  5.086399108138239
+       Median tags 5.0
+       Stdev tags 2.474364733745681
+       Max tags 27
 """
 
 import collections