Use importlib API (#186)

author Simon Cozens <simon@simon-cozens.org>

Fri, 1 Nov 2024 10:36:11 +0000 (10:36 +0000)

committer GitHub <noreply@github.com>

Fri, 1 Nov 2024 10:36:11 +0000 (10:36 +0000)
author Simon Cozens <simon@simon-cozens.org>
Fri, 1 Nov 2024 10:36:11 +0000 (10:36 +0000)
committer GitHub <noreply@github.com>
Fri, 1 Nov 2024 10:36:11 +0000 (10:36 +0000)
diff --git a/Lib/gflanguages/__init__.py b/Lib/gflanguages/__init__.py

index 0754d6c135547cd140115f6e63f85a8fc16dba6b..aa6f565d9f0f7c7d7e95e7f3e52254db9bac0cde 100644 (file)
--- a/Lib/gflanguages/__init__.py
+++ b/Lib/gflanguages/__init__.py
@@ -25,56 +25,46 @@ import unicodedata
  
  from gflanguages import languages_public_pb2
  from google.protobuf import text_format
-from pkg_resources import resource_filename
+from importlib_resources import files
  
  try:
      from ._version import version as __version__  # type: ignore
  except ImportError:
      __version__ = "0.0.0+unknown"
  
-DATA_DIR = resource_filename("gflanguages", "data")
  
+def _load_thing(thing_type, proto_class, base_dir=None):
+    things = {}
  
-def LoadLanguages(base_dir=DATA_DIR):
-    if base_dir is None:
-        base_dir = DATA_DIR
+    def read_a_thing(contents):
+        proto = proto_class()
+        thing = text_format.Parse(contents, proto)
+        assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}"
+        things[thing.id] = thing
  
-    languages_dir = os.path.join(base_dir, "languages")
-    langs = {}
-    for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")):
-        with open(textproto_file, "r", encoding="utf-8") as f:
-            language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
-            assert language.id not in langs, f"Duplicate language id: {language.id}"
-            langs[language.id] = language
-    return langs
+    if base_dir is not None:
+        thing_dir = os.path.join(base_dir, thing_type)
+        for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")):
+            with open(textproto_file, "r", encoding="utf-8") as f:
+                read_a_thing(f.read())
+    else:
+        for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir():
+            if not textproto_file.name.endswith(".textproto"):
+                continue
+            read_a_thing(textproto_file.read_text(encoding="utf-8"))
+    return things
  
  
-def LoadScripts(base_dir=DATA_DIR):
-    if base_dir is None:
-        base_dir = DATA_DIR
+def LoadLanguages(base_dir=None):
+    return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir)
  
-    scripts_dir = os.path.join(base_dir, "scripts")
-    scripts = {}
-    for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")):
-        with open(textproto_file, "r", encoding="utf-8") as f:
-            script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto())
-            assert script.id not in scripts, f"Duplicate script id: {script.id}"
-            scripts[script.id] = script
-    return scripts
  
+def LoadScripts(base_dir=None):
+    return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir)
  
-def LoadRegions(base_dir=DATA_DIR):
-    if base_dir is None:
-        base_dir = DATA_DIR
  
-    regions_dir = os.path.join(base_dir, "regions")
-    regions = {}
-    for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")):
-        with open(textproto_file, "r", encoding="utf-8") as f:
-            region = text_format.Parse(f.read(), languages_public_pb2.RegionProto())
-            assert region.id not in regions, f"Duplicate region id: {region.id}"
-            regions[region.id] = region
-    return regions
+def LoadRegions(base_dir=None):
+    return _load_thing("regions", languages_public_pb2.RegionProto, base_dir)
  
  
  def parse(exemplars: str):
diff --git a/pyproject.toml b/pyproject.toml

index 61546649abbf8e72dfdcdc2470948da49daffa8e..f0866ccc6756045a77fdda340aad0ea33e0c9ca4 100644 (file)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,8 @@ authors = [
  ]
  
  dependencies = [
-   "protobuf>=3.7.0, <4"
+   "protobuf>=3.7.0, <4",
+   "importlib_resources",  # Needed for 3.9 and below
  ]
  
  [project.optional-dependencies]
diff --git a/tests/test_parsable.py b/tests/test_parsable.py

index f209bc3befc95bd679caa4cb946df52709a95fc0..aeaf3f3a8ee4a9088ee2610e7a605a3f3e0e0f75 100644 (file)
--- a/tests/test_parsable.py
+++ b/tests/test_parsable.py
@@ -1,4 +1,4 @@
-from gflanguages import DATA_DIR
+from importlib_resources import files
  import glob
  import os
  import pytest
@@ -6,17 +6,19 @@ from gflanguages import languages_public_pb2
  from google.protobuf import text_format
  
  
-languages_dir = os.path.join(DATA_DIR, "languages")
+languages_dir = files("gflanguages.data").joinpath("languages")
  textproto_files = [
-    os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))
+    file.name for file in languages_dir.iterdir() if file.name.endswith(".textproto")
  ]
  
  
  @pytest.mark.parametrize("lang_code", textproto_files)
  def test_parsable(lang_code):
-    with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
-        msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
-        assert msg.id
-        assert msg.language
-        assert msg.script
-        assert msg.population is not None
+    f = languages_dir.joinpath(lang_code)
+    msg = text_format.Parse(
+        f.read_text(encoding="utf-8"), languages_public_pb2.LanguageProto()
+    )
+    assert msg.id
+    assert msg.language
+    assert msg.script
+    assert msg.population is not None
author	Simon Cozens <simon@simon-cozens.org>
	Fri, 1 Nov 2024 10:36:11 +0000 (10:36 +0000)
committer	GitHub <noreply@github.com>
	Fri, 1 Nov 2024 10:36:11 +0000 (10:36 +0000)
Lib/gflanguages/__init__.py		patch \| blob \| blame \| history
pyproject.toml		patch \| blob \| blame \| history
tests/test_parsable.py		patch \| blob \| blame \| history