from gflanguages import languages_public_pb2
from google.protobuf import text_format
-from pkg_resources import resource_filename
+from importlib_resources import files
try:
from ._version import version as __version__ # type: ignore
except ImportError:
__version__ = "0.0.0+unknown"
-DATA_DIR = resource_filename("gflanguages", "data")
+def _load_thing(thing_type, proto_class, base_dir=None):
+ things = {}
-def LoadLanguages(base_dir=DATA_DIR):
- if base_dir is None:
- base_dir = DATA_DIR
+ def read_a_thing(contents):
+ proto = proto_class()
+ thing = text_format.Parse(contents, proto)
+ assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}"
+ things[thing.id] = thing
- languages_dir = os.path.join(base_dir, "languages")
- langs = {}
- for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")):
- with open(textproto_file, "r", encoding="utf-8") as f:
- language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
- assert language.id not in langs, f"Duplicate language id: {language.id}"
- langs[language.id] = language
- return langs
+ if base_dir is not None:
+ thing_dir = os.path.join(base_dir, thing_type)
+ for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")):
+ with open(textproto_file, "r", encoding="utf-8") as f:
+ read_a_thing(f.read())
+ else:
+ for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir():
+ if not textproto_file.name.endswith(".textproto"):
+ continue
+ read_a_thing(textproto_file.read_text(encoding="utf-8"))
+ return things
-def LoadScripts(base_dir=DATA_DIR):
- if base_dir is None:
- base_dir = DATA_DIR
+def LoadLanguages(base_dir=None):
+ return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir)
- scripts_dir = os.path.join(base_dir, "scripts")
- scripts = {}
- for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")):
- with open(textproto_file, "r", encoding="utf-8") as f:
- script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto())
- assert script.id not in scripts, f"Duplicate script id: {script.id}"
- scripts[script.id] = script
- return scripts
+def LoadScripts(base_dir=None):
+ return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir)
-def LoadRegions(base_dir=DATA_DIR):
- if base_dir is None:
- base_dir = DATA_DIR
- regions_dir = os.path.join(base_dir, "regions")
- regions = {}
- for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")):
- with open(textproto_file, "r", encoding="utf-8") as f:
- region = text_format.Parse(f.read(), languages_public_pb2.RegionProto())
- assert region.id not in regions, f"Duplicate region id: {region.id}"
- regions[region.id] = region
- return regions
+def LoadRegions(base_dir=None):
+ return _load_thing("regions", languages_public_pb2.RegionProto, base_dir)
def parse(exemplars: str):
-from gflanguages import DATA_DIR
+from importlib_resources import files
import glob
import os
import pytest
from google.protobuf import text_format
-languages_dir = os.path.join(DATA_DIR, "languages")
+languages_dir = files("gflanguages.data").joinpath("languages")
textproto_files = [
- os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))
+ file.name for file in languages_dir.iterdir() if file.name.endswith(".textproto")
]
@pytest.mark.parametrize("lang_code", textproto_files)
def test_parsable(lang_code):
- with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
- msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
- assert msg.id
- assert msg.language
- assert msg.script
- assert msg.population is not None
+ f = languages_dir.joinpath(lang_code)
+ msg = text_format.Parse(
+ f.read_text(encoding="utf-8"), languages_public_pb2.LanguageProto()
+ )
+ assert msg.id
+ assert msg.language
+ assert msg.script
+ assert msg.population is not None