]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
Initial support for reading mapping configuration as TOML (#1108)
authorAarni Koskela <akx@iki.fi>
Wed, 7 Aug 2024 09:46:55 +0000 (12:46 +0300)
committerGitHub <noreply@github.com>
Wed, 7 Aug 2024 09:46:55 +0000 (12:46 +0300)
* Rename parse_mapping to parse_mapping_cfg and remove duplicated test
* Add initial support for TOML mapping configuration (prefer tomllib to tomli)

---------

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Tomas R <tomas.roun8@gmail.com>
15 files changed:
babel/messages/frontend.py
tests/messages/test_frontend.py
tests/messages/test_toml_config.py [new file with mode: 0644]
tests/messages/toml-test-cases/bad.extractor.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.extractors-not-a-dict.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.just-a-mapping.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.mappings-not-a-list.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.missing-extraction-method.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.multiple-mappings-not-a-list.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.non-string-extraction-method.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.pattern-type-2.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.pattern-type.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.pyproject-without-tool-babel.toml [new file with mode: 0644]
tests/messages/toml-test-cases/bad.standalone-with-babel-prefix.toml [new file with mode: 0644]

index 65ff661a02dcb0974f926d87c1fa5253cfe82b1e..7a9ce385f4368a1b56c9ffb4d08a473e7f786daa 100644 (file)
@@ -19,10 +19,11 @@ import re
 import shutil
 import sys
 import tempfile
+import warnings
 from collections import OrderedDict
 from configparser import RawConfigParser
 from io import StringIO
-from typing import Iterable
+from typing import BinaryIO, Iterable, Literal
 
 from babel import Locale, localedata
 from babel import __version__ as VERSION
@@ -53,6 +54,12 @@ class SetupError(BaseError):
     pass
 
 
+class ConfigurationError(BaseError):
+    """
+    Raised for errors in configuration files.
+    """
+
+
 def listify_value(arg, split=None):
     """
     Make a list out of an argument.
@@ -534,8 +541,21 @@ class ExtractMessages(CommandMixin):
         mappings = []
 
         if self.mapping_file:
-            with open(self.mapping_file) as fileobj:
-                method_map, options_map = parse_mapping(fileobj)
+            if self.mapping_file.endswith(".toml"):
+                with open(self.mapping_file, "rb") as fileobj:
+                    file_style = (
+                        "pyproject.toml"
+                        if os.path.basename(self.mapping_file) == "pyproject.toml"
+                        else "standalone"
+                    )
+                    method_map, options_map = _parse_mapping_toml(
+                        fileobj,
+                        filename=self.mapping_file,
+                        style=file_style,
+                    )
+            else:
+                with open(self.mapping_file) as fileobj:
+                    method_map, options_map = parse_mapping_cfg(fileobj, filename=self.mapping_file)
             for path in self.input_paths:
                 mappings.append((path, method_map, options_map))
 
@@ -543,7 +563,7 @@ class ExtractMessages(CommandMixin):
             message_extractors = self.distribution.message_extractors
             for path, mapping in message_extractors.items():
                 if isinstance(mapping, str):
-                    method_map, options_map = parse_mapping(StringIO(mapping))
+                    method_map, options_map = parse_mapping_cfg(StringIO(mapping))
                 else:
                     method_map, options_map = [], {}
                     for pattern, method, options in mapping:
@@ -980,53 +1000,19 @@ def main():
 
 
 def parse_mapping(fileobj, filename=None):
-    """Parse an extraction method mapping from a file-like object.
+    warnings.warn(
+        "parse_mapping is deprecated, use parse_mapping_cfg instead",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return parse_mapping_cfg(fileobj, filename)
 
-    >>> buf = StringIO('''
-    ... [extractors]
-    ... custom = mypackage.module:myfunc
-    ...
-    ... # Python source files
-    ... [python: **.py]
-    ...
-    ... # Genshi templates
-    ... [genshi: **/templates/**.html]
-    ... include_attrs =
-    ... [genshi: **/templates/**.txt]
-    ... template_class = genshi.template:TextTemplate
-    ... encoding = latin-1
-    ...
-    ... # Some custom extractor
-    ... [custom: **/custom/*.*]
-    ... ''')
-
-    >>> method_map, options_map = parse_mapping(buf)
-    >>> len(method_map)
-    4
-
-    >>> method_map[0]
-    ('**.py', 'python')
-    >>> options_map['**.py']
-    {}
-    >>> method_map[1]
-    ('**/templates/**.html', 'genshi')
-    >>> options_map['**/templates/**.html']['include_attrs']
-    ''
-    >>> method_map[2]
-    ('**/templates/**.txt', 'genshi')
-    >>> options_map['**/templates/**.txt']['template_class']
-    'genshi.template:TextTemplate'
-    >>> options_map['**/templates/**.txt']['encoding']
-    'latin-1'
-
-    >>> method_map[3]
-    ('**/custom/*.*', 'mypackage.module:myfunc')
-    >>> options_map['**/custom/*.*']
-    {}
+
+def parse_mapping_cfg(fileobj, filename=None):
+    """Parse an extraction method mapping from a file-like object.
 
     :param fileobj: a readable file-like object containing the configuration
                     text to parse
-    :see: `extract_from_directory`
     """
     extractors = {}
     method_map = []
@@ -1053,6 +1039,94 @@ def parse_mapping(fileobj, filename=None):
     return method_map, options_map
 
 
+def _parse_config_object(config: dict, *, filename="(unknown)"):
+    extractors = {}
+    method_map = []
+    options_map = {}
+
+    extractors_read = config.get("extractors", {})
+    if not isinstance(extractors_read, dict):
+        raise ConfigurationError(f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}")
+    for method, callable_spec in extractors_read.items():
+        if not isinstance(method, str):
+            # Impossible via TOML, but could happen with a custom object.
+            raise ConfigurationError(f"{filename}: extractors: Extraction method must be a string, got {method!r}")
+        if not isinstance(callable_spec, str):
+            raise ConfigurationError(f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}")
+        extractors[method] = callable_spec
+
+    if "mapping" in config:
+        raise ConfigurationError(f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?")
+
+    mappings_read = config.get("mappings", [])
+    if not isinstance(mappings_read, list):
+        raise ConfigurationError(f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}")
+    for idx, entry in enumerate(mappings_read):
+        if not isinstance(entry, dict):
+            raise ConfigurationError(f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}")
+        entry = entry.copy()
+
+        method = entry.pop("method", None)
+        if not isinstance(method, str):
+            raise ConfigurationError(f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}")
+        method = extractors.get(method, method)  # Map the extractor name to the callable now
+
+        pattern = entry.pop("pattern", None)
+        if not isinstance(pattern, (list, str)):
+            raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}")
+        if not isinstance(pattern, list):
+            pattern = [pattern]
+
+        for pat in pattern:
+            if not isinstance(pat, str):
+                raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}")
+            method_map.append((pat, method))
+            options_map[pat] = entry
+
+    return method_map, options_map
+
+
+def _parse_mapping_toml(
+    fileobj: BinaryIO,
+    filename: str = "(unknown)",
+    style: Literal["standalone", "pyproject.toml"] = "standalone",
+):
+    """Parse an extraction method mapping from a binary file-like object.
+
+    .. warning: As of this version of Babel, this is a private API subject to changes.
+
+    :param fileobj: a readable binary file-like object containing the configuration TOML to parse
+    :param filename: the name of the file being parsed, for error messages
+    :param style: whether the file is in the style of a `pyproject.toml` file, i.e. whether to look for `tool.babel`.
+    """
+    try:
+        import tomllib
+    except ImportError:
+        try:
+            import tomli as tomllib
+        except ImportError as ie:  # pragma: no cover
+            raise ImportError("tomli or tomllib is required to parse TOML files") from ie
+
+    try:
+        parsed_data = tomllib.load(fileobj)
+    except tomllib.TOMLDecodeError as e:
+        raise ConfigurationError(f"{filename}: Error parsing TOML file: {e}") from e
+
+    if style == "pyproject.toml":
+        try:
+            babel_data = parsed_data["tool"]["babel"]
+        except (TypeError, KeyError) as e:
+            raise ConfigurationError(f"{filename}: No 'tool.babel' section found in file") from e
+    elif style == "standalone":
+        babel_data = parsed_data
+        if "babel" in babel_data:
+            raise ConfigurationError(f"{filename}: 'babel' should not be present in a stand-alone configuration file")
+    else:  # pragma: no cover
+        raise ValueError(f"Unknown TOML style {style!r}")
+
+    return _parse_config_object(babel_data, filename=filename)
+
+
 def _parse_spec(s: str) -> tuple[int | None, tuple[int | tuple[int, str], ...]]:
     inds = []
     number = None
index 45638b3b179eedefac36eb096993a7eba7ddf58a..7a6b08c4418906e71d0afe325c602d8899391a32 100644 (file)
 # history and logs, available at http://babel.edgewall.org/log/.
 import logging
 import os
+import re
 import shlex
 import shutil
 import sys
 import time
 import unittest
 from datetime import datetime, timedelta
+from functools import partial
 from io import BytesIO, StringIO
 from typing import List
 
@@ -1388,25 +1390,86 @@ msgstr[2] ""
             assert len(catalog) == 4  # Catalog was updated
 
 
-def test_parse_mapping():
-    buf = StringIO(
-        '[extractors]\n'
-        'custom = mypackage.module:myfunc\n'
-        '\n'
-        '# Python source files\n'
-        '[python: **.py]\n'
-        '\n'
-        '# Genshi templates\n'
-        '[genshi: **/templates/**.html]\n'
-        'include_attrs =\n'
-        '[genshi: **/templates/**.txt]\n'
-        'template_class = genshi.template:TextTemplate\n'
-        'encoding = latin-1\n'
-        '\n'
-        '# Some custom extractor\n'
-        '[custom: **/custom/*.*]\n')
-
-    method_map, options_map = frontend.parse_mapping(buf)
+mapping_cfg = """
+[extractors]
+custom = mypackage.module:myfunc
+
+# Python source files
+[python: **.py]
+
+# Genshi templates
+[genshi: **/templates/**.html]
+include_attrs =
+
+[genshi: **/templates/**.txt]
+template_class = genshi.template:TextTemplate
+encoding = latin-1
+
+# Some custom extractor
+[custom: **/custom/*.*]
+"""
+
+mapping_toml = """
+[extractors]
+custom = "mypackage.module:myfunc"
+
+# Python source files
+[[mappings]]
+method = "python"
+pattern = "**.py"
+
+# Genshi templates
+[[mappings]]
+method = "genshi"
+pattern = "**/templates/**.html"
+include_attrs = ""
+
+[[mappings]]
+method = "genshi"
+pattern = "**/templates/**.txt"
+template_class = "genshi.template:TextTemplate"
+encoding = "latin-1"
+
+# Some custom extractor
+[[mappings]]
+method = "custom"
+pattern = "**/custom/*.*"
+"""
+
+
+@pytest.mark.parametrize(
+    ("data", "parser", "preprocess", "is_toml"),
+    [
+        (
+            mapping_cfg,
+            frontend.parse_mapping_cfg,
+            None,
+            False,
+        ),
+        (
+            mapping_toml,
+            frontend._parse_mapping_toml,
+            None,
+            True,
+        ),
+        (
+            mapping_toml,
+            partial(frontend._parse_mapping_toml, style="pyproject.toml"),
+            lambda s: re.sub(r"^(\[+)", r"\1tool.babel.", s, flags=re.MULTILINE),
+            True,
+        ),
+    ],
+    ids=("cfg", "toml", "pyproject-toml"),
+)
+def test_parse_mapping(data: str, parser, preprocess, is_toml):
+    if preprocess:
+        data = preprocess(data)
+    if is_toml:
+        buf = BytesIO(data.encode())
+    else:
+        buf = StringIO(data)
+
+    method_map, options_map = parser(buf)
     assert len(method_map) == 4
 
     assert method_map[0] == ('**.py', 'python')
diff --git a/tests/messages/test_toml_config.py b/tests/messages/test_toml_config.py
new file mode 100644 (file)
index 0000000..6a3c157
--- /dev/null
@@ -0,0 +1,38 @@
+import pathlib
+from io import BytesIO
+
+import pytest
+
+from babel.messages import frontend
+
+toml_test_cases_path = pathlib.Path(__file__).parent / "toml-test-cases"
+assert toml_test_cases_path.is_dir(), "toml-test-cases directory not found"
+
+
+def test_toml_mapping_multiple_patterns():
+    """
+    Test that patterns may be specified as a list in TOML,
+    and are expanded to multiple entries in the method map.
+    """
+    method_map, options_map = frontend._parse_mapping_toml(BytesIO(b"""
+[[mappings]]
+method = "python"
+pattern = ["xyz/**.py", "foo/**.py"]
+"""))
+    assert len(method_map) == 2
+    assert method_map[0] == ('xyz/**.py', 'python')
+    assert method_map[1] == ('foo/**.py', 'python')
+
+
+@pytest.mark.parametrize("test_case", toml_test_cases_path.glob("bad.*.toml"), ids=lambda p: p.name)
+def test_bad_toml_test_case(test_case: pathlib.Path):
+    """
+    Test that bad TOML files raise a ValueError.
+    """
+    with pytest.raises(frontend.ConfigurationError):
+        with test_case.open("rb") as f:
+            frontend._parse_mapping_toml(
+                f,
+                filename=test_case.name,
+                style="pyproject.toml" if "pyproject" in test_case.name else "standalone",
+            )
diff --git a/tests/messages/toml-test-cases/bad.extractor.toml b/tests/messages/toml-test-cases/bad.extractor.toml
new file mode 100644 (file)
index 0000000..9992684
--- /dev/null
@@ -0,0 +1,2 @@
+[extractors]
+custom = { module = "mypackage.module", func = "myfunc" }
diff --git a/tests/messages/toml-test-cases/bad.extractors-not-a-dict.toml b/tests/messages/toml-test-cases/bad.extractors-not-a-dict.toml
new file mode 100644 (file)
index 0000000..92d7678
--- /dev/null
@@ -0,0 +1 @@
+[[extractors]]
diff --git a/tests/messages/toml-test-cases/bad.just-a-mapping.toml b/tests/messages/toml-test-cases/bad.just-a-mapping.toml
new file mode 100644 (file)
index 0000000..40006f5
--- /dev/null
@@ -0,0 +1,3 @@
+[mapping]
+method = "jinja2"
+pattern = "**.html"
diff --git a/tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml b/tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml
new file mode 100644 (file)
index 0000000..f22367f
--- /dev/null
@@ -0,0 +1 @@
+mappings = [8]
diff --git a/tests/messages/toml-test-cases/bad.mappings-not-a-list.toml b/tests/messages/toml-test-cases/bad.mappings-not-a-list.toml
new file mode 100644 (file)
index 0000000..f308741
--- /dev/null
@@ -0,0 +1 @@
+mappings = "python"
diff --git a/tests/messages/toml-test-cases/bad.missing-extraction-method.toml b/tests/messages/toml-test-cases/bad.missing-extraction-method.toml
new file mode 100644 (file)
index 0000000..69a2be2
--- /dev/null
@@ -0,0 +1,2 @@
+[[mappings]]
+pattern = ["xyz/**.py", "foo/**.py"]
diff --git a/tests/messages/toml-test-cases/bad.multiple-mappings-not-a-list.toml b/tests/messages/toml-test-cases/bad.multiple-mappings-not-a-list.toml
new file mode 100644 (file)
index 0000000..48c1513
--- /dev/null
@@ -0,0 +1,10 @@
+[mappings]
+method = "genshi"
+pattern = "**/templates/**.html"
+include_attrs = ""
+
+[mappings]
+method = "genshi"
+pattern = "**/templates/**.txt"
+template_class = "genshi.template:TextTemplate"
+encoding = "latin-1"
diff --git a/tests/messages/toml-test-cases/bad.non-string-extraction-method.toml b/tests/messages/toml-test-cases/bad.non-string-extraction-method.toml
new file mode 100644 (file)
index 0000000..19c3ece
--- /dev/null
@@ -0,0 +1,2 @@
+[[mappings]]
+method = 42
diff --git a/tests/messages/toml-test-cases/bad.pattern-type-2.toml b/tests/messages/toml-test-cases/bad.pattern-type-2.toml
new file mode 100644 (file)
index 0000000..d6c1382
--- /dev/null
@@ -0,0 +1,3 @@
+[[mappings]]
+method = "big snake"
+pattern = [42]
diff --git a/tests/messages/toml-test-cases/bad.pattern-type.toml b/tests/messages/toml-test-cases/bad.pattern-type.toml
new file mode 100644 (file)
index 0000000..598b722
--- /dev/null
@@ -0,0 +1,3 @@
+[[mappings]]
+method = "big snake"
+pattern = 2048
diff --git a/tests/messages/toml-test-cases/bad.pyproject-without-tool-babel.toml b/tests/messages/toml-test-cases/bad.pyproject-without-tool-babel.toml
new file mode 100644 (file)
index 0000000..598b722
--- /dev/null
@@ -0,0 +1,3 @@
+[[mappings]]
+method = "big snake"
+pattern = 2048
diff --git a/tests/messages/toml-test-cases/bad.standalone-with-babel-prefix.toml b/tests/messages/toml-test-cases/bad.standalone-with-babel-prefix.toml
new file mode 100644 (file)
index 0000000..cbc1d8d
--- /dev/null
@@ -0,0 +1,2 @@
+[babel.extractors]
+custom = "mypackage.module:myfunc"