From a495359cdf3c536688f79f712554d55afb902615 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Fri, 1 Nov 2024 10:36:11 +0000 Subject: [PATCH] Use importlib API (#186) * Don't repeat yourself * Use the new-fangled importlib.resources * Use backport until 3.9 is dead --- Lib/gflanguages/__init__.py | 60 ++++++++++++++++--------------------- pyproject.toml | 3 +- tests/test_parsable.py | 20 +++++++------ 3 files changed, 38 insertions(+), 45 deletions(-) diff --git a/Lib/gflanguages/__init__.py b/Lib/gflanguages/__init__.py index 0754d6c135..aa6f565d9f 100644 --- a/Lib/gflanguages/__init__.py +++ b/Lib/gflanguages/__init__.py @@ -25,56 +25,46 @@ import unicodedata from gflanguages import languages_public_pb2 from google.protobuf import text_format -from pkg_resources import resource_filename +from importlib_resources import files try: from ._version import version as __version__ # type: ignore except ImportError: __version__ = "0.0.0+unknown" -DATA_DIR = resource_filename("gflanguages", "data") +def _load_thing(thing_type, proto_class, base_dir=None): + things = {} -def LoadLanguages(base_dir=DATA_DIR): - if base_dir is None: - base_dir = DATA_DIR + def read_a_thing(contents): + proto = proto_class() + thing = text_format.Parse(contents, proto) + assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}" + things[thing.id] = thing - languages_dir = os.path.join(base_dir, "languages") - langs = {} - for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")): - with open(textproto_file, "r", encoding="utf-8") as f: - language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) - assert language.id not in langs, f"Duplicate language id: {language.id}" - langs[language.id] = language - return langs + if base_dir is not None: + thing_dir = os.path.join(base_dir, thing_type) + for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")): + with open(textproto_file, "r", encoding="utf-8") as f: + read_a_thing(f.read()) + else: + for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir(): + if not textproto_file.name.endswith(".textproto"): + continue + read_a_thing(textproto_file.read_text(encoding="utf-8")) + return things -def LoadScripts(base_dir=DATA_DIR): - if base_dir is None: - base_dir = DATA_DIR +def LoadLanguages(base_dir=None): + return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir) - scripts_dir = os.path.join(base_dir, "scripts") - scripts = {} - for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")): - with open(textproto_file, "r", encoding="utf-8") as f: - script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto()) - assert script.id not in scripts, f"Duplicate script id: {script.id}" - scripts[script.id] = script - return scripts +def LoadScripts(base_dir=None): + return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir) -def LoadRegions(base_dir=DATA_DIR): - if base_dir is None: - base_dir = DATA_DIR - regions_dir = os.path.join(base_dir, "regions") - regions = {} - for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")): - with open(textproto_file, "r", encoding="utf-8") as f: - region = text_format.Parse(f.read(), languages_public_pb2.RegionProto()) - assert region.id not in regions, f"Duplicate region id: {region.id}" - regions[region.id] = region - return regions +def LoadRegions(base_dir=None): + return _load_thing("regions", languages_public_pb2.RegionProto, base_dir) def parse(exemplars: str): diff --git a/pyproject.toml b/pyproject.toml index 61546649ab..f0866ccc67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,8 @@ authors = [ ] dependencies = [ - "protobuf>=3.7.0, <4" + "protobuf>=3.7.0, <4", + "importlib_resources", # Needed for 3.9 and below ] [project.optional-dependencies] diff --git a/tests/test_parsable.py b/tests/test_parsable.py index f209bc3bef..aeaf3f3a8e 100644 --- a/tests/test_parsable.py +++ b/tests/test_parsable.py @@ -1,4 +1,4 @@ -from gflanguages import DATA_DIR +from importlib_resources import files import glob import os import pytest @@ -6,17 +6,19 @@ from gflanguages import languages_public_pb2 from google.protobuf import text_format -languages_dir = os.path.join(DATA_DIR, "languages") +languages_dir = files("gflanguages.data").joinpath("languages") textproto_files = [ - os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto")) + file.name for file in languages_dir.iterdir() if file.name.endswith(".textproto") ] @pytest.mark.parametrize("lang_code", textproto_files) def test_parsable(lang_code): - with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f: - msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) - assert msg.id - assert msg.language - assert msg.script - assert msg.population is not None + f = languages_dir.joinpath(lang_code) + msg = text_format.Parse( + f.read_text(encoding="utf-8"), languages_public_pb2.LanguageProto() + ) + assert msg.id + assert msg.language + assert msg.script + assert msg.population is not None -- 2.47.3