From: Aleš Mrázek Date: Thu, 19 Mar 2026 14:56:17 +0000 (+0100) Subject: python: modeling: parsing: added support for YAML '!include' tag X-Git-Url: http://git.ipfire.org/index.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fpython-refactoring-modeling;p=thirdparty%2Fknot-resolver.git python: modeling: parsing: added support for YAML '!include' tag --- diff --git a/python/knot_resolver/utils/modeling/errors.py b/python/knot_resolver/utils/modeling/errors.py index 133ec005d..16f18c69d 100644 --- a/python/knot_resolver/utils/modeling/errors.py +++ b/python/knot_resolver/utils/modeling/errors.py @@ -31,6 +31,14 @@ class DataAnnotationError(DataModelingError): super().__init__(msg, error_pointer) +class DataReadingError(DataModelingError): + """Exception class for data reading errors.""" + + def __init__(self, msg: str, error_pointer: str = "") -> None: + msg = f"reading error: {msg}" + super().__init__(msg, error_pointer) + + class DataParsingError(DataModelingError): """Exception class for data parsing errors.""" diff --git a/python/knot_resolver/utils/modeling/parsing.py b/python/knot_resolver/utils/modeling/parsing.py index d57000c69..f9df10e88 100644 --- a/python/knot_resolver/utils/modeling/parsing.py +++ b/python/knot_resolver/utils/modeling/parsing.py @@ -2,43 +2,68 @@ from __future__ import annotations import json from enum import Enum, auto -from typing import TYPE_CHECKING, Any +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Hashable, List, Union import yaml from yaml.constructor import ConstructorError -from knot_resolver.utils.modeling.errors import DataParsingError +from knot_resolver.utils.modeling.errors import DataParsingError, DataReadingError, DataTypeError if TYPE_CHECKING: from yaml.nodes import MappingNode +_YAML_INCLUDE_KEY = "include" +_YAML_INCLUDE_TAG = "!include" -def _json_raise_duplicates(pairs: list[tuple[Any, Any]]) -> dict[Any, Any]: - """ - JSON hook used in 'json.loads()' that detects duplicate keys in the parsed data. - The code for this hook was highly inspired by: https://stackoverflow.com/q/14902299/12858520 +class ParsedDataWrapper: + """A wrapper for included files and their data. + + Attributes: + data (ParsedData): Data that has been read and parsed from the file. + file (str | Path): The path to the file containing the data. + """ - mapping: dict[Any, Any] = {} - for key, value in pairs: - if key in mapping: - msg = f"duplicate key detected: {key}" - raise DataParsingError(msg) - mapping[key] = value - return mapping + def __init__(self, data: ParsedData, file: str | Path): + self.data = data + self.file = Path(file) + + +ParsedData = Union[Dict[str, "ParsedData"], List["ParsedData"], ParsedDataWrapper, str, int, float, bool, None] -class _YAMLRaiseDuplicatesLoader(yaml.SafeLoader): + +def _yaml_include_constructor(self: _YAMLRaiseDuplicatesIncludeLoader, node: MappingNode) -> ParsedDataWrapper: + """Construct include wrapper for detected '!include' keys. + + The code for this constructor was highly inspired by: + https://gist.github.com/joshbode/569627ced3076931b02f """ - YAML loader used in 'yaml.loads()' that detects duplicate keys in the parsed data. + file_path = Path(self.construct_scalar(node)) + if not file_path.is_absolute() and self.stream_path: + file_path = self.stream_path.parent / file_path + return try_to_parse_file(file_path) + + +class _YAMLRaiseDuplicatesIncludeLoader(yaml.SafeLoader): + """Custom YAML loader used in 'yaml.loads()'. + + The loader detects duplicate keys in the parsed data. + It also detects '!include' keys and loads data from included files. The code for this loader was highly inspired by: https://gist.github.com/pypt/94d747fe5180851196eb The loader extends yaml.SafeLoader, so it should be safe, even though the linter reports unsafe-yaml-load (S506). More about safe loader: https://python.land/data-processing/python-yaml#PyYAML_safe_load_vs_load """ - def construct_mapping(self, node: MappingNode, deep: bool = False) -> dict[Any, Any]: - mapping: dict[Any, Any] = {} + def __init__(self, stream: str, stream_path: str | Path | None = None) -> None: + self.stream_path = Path(stream_path) if stream_path else None + self.add_constructor(_YAML_INCLUDE_TAG, _yaml_include_constructor) + super().__init__(stream) + + def construct_mapping(self, node: MappingNode, deep: bool = False) -> dict[Hashable, Any]: + mapping: dict[Hashable, Any] = {} for key_node, value_node in node.value: key = self.construct_object(key_node, deep=deep) # we need to check, that the key object can be used in a hash table @@ -58,47 +83,121 @@ class _YAMLRaiseDuplicatesLoader(yaml.SafeLoader): return mapping +def _json_raise_duplicates(pairs: list[tuple[str, ParsedData]]) -> dict[str, ParsedData]: + """JSON hook used in 'json.loads()' that detects duplicate keys in the parsed data. + + The code for this hook was highly inspired by: https://stackoverflow.com/q/14902299/12858520 + """ + mapping: dict[str, ParsedData] = {} + for key, value in pairs: + if key in mapping: + msg = f"duplicate key detected: {key}" + raise DataParsingError(msg) + mapping[key] = value + return mapping + + +def _include_key_root(parsed_data: ParsedDataWrapper) -> ParsedDataWrapper: + data = parsed_data.data + base_path = parsed_data.file.parent + + if isinstance(data, ParsedDataWrapper): + parsed_data.data = _include_key_root(data) + + elif isinstance(data, dict) and _YAML_INCLUDE_KEY in data: + files = data[_YAML_INCLUDE_KEY] + parsed_files: list[ParsedData] = [] + + if isinstance(files, str): + file_path = Path(files) + if not file_path.is_absolute(): + file_path = base_path / file_path + parsed_files.append(try_to_parse_file(file_path)) + + elif isinstance(files, list): + for file in files: + if isinstance(file, str): + file_path = Path(file) + if not file_path.is_absolute(): + file_path = base_path / file_path + parsed_files.append(try_to_parse_file(file_path)) + else: + msg = "" + pointer = f"{parsed_data.file}:/{_YAML_INCLUDE_KEY}" + raise DataTypeError(msg, pointer) + + else: + msg = f"expected string or list, got {type(files)}" + pointer = f"{parsed_data.file}:/{_YAML_INCLUDE_KEY}" + raise DataTypeError(msg, pointer) + + data[_YAML_INCLUDE_KEY] = parsed_files + + return parsed_data + + class DataFormat(Enum): YAML = auto() JSON = auto() - def loads(self, text: str) -> dict[Any, Any]: + def load_file(self, file: str | Path) -> ParsedData: + """Read and parse data from file in data format and return the data in dictionary.""" + file_path = Path(file) + text = file_path.read_text() + if self is DataFormat.YAML: + loader = _YAMLRaiseDuplicatesIncludeLoader(text, file) + try: + return loader.get_single_data() + finally: + loader.dispose() + return self.load_str(text) + + def load_str(self, text: str) -> ParsedData: """Load data from string in data format and return the data in dictionary.""" if self is DataFormat.YAML: - return yaml.load(text, Loader=_YAMLRaiseDuplicatesLoader) # noqa: S506 + return yaml.load(text, Loader=_YAMLRaiseDuplicatesIncludeLoader) # noqa: S506 if self is DataFormat.JSON: return json.loads(text, object_pairs_hook=_json_raise_duplicates) msg = f"parsing data from '{self}' format is not implemented" raise NotImplementedError(msg) - def dumps(self, data: dict[Any, Any], indent: int | None = None) -> str: - """Dump dictionary data to string in required data format.""" - if self is DataFormat.YAML: - return yaml.safe_dump(data, indent=indent) - if self is DataFormat.JSON: - return json.dumps(data, indent=indent) - msg = f"exporting data to '{self}' format is not implemented" - raise NotImplementedError(msg) + # def dump_str(self, data: ParsedData, indent: int | None = None) -> str: + # """Dump the parsed(dict) data into a string in the required format.""" + # if self is DataFormat.YAML: + # return yaml.safe_dump(data, indent=indent) + # if self is DataFormat.JSON: + # return json.dumps(data, indent=indent) + # msg = f"exporting data to '{self}' format is not implemented" + # raise NotImplementedError(msg) + + +def parse_json_str(data: str) -> ParsedData: + """Parse the JSON string, and return its parsed(dict) data.""" + return DataFormat.JSON.load_str(data) -def parse_yaml(data: str) -> dict[Any, Any]: - """Parse YAML string and return the data in dictionary.""" - return DataFormat.YAML.loads(data) +def parse_json_file(file: str | Path) -> ParsedDataWrapper: + """Read the JSON file, parse its data string, and return its parsed(dict) data.""" + data = DataFormat.JSON.load_file(file) + return ParsedDataWrapper(data, file) -def parse_json(data: str) -> dict[Any, Any]: - """Parse JSON string and return the data in dictionary.""" - return DataFormat.JSON.loads(data) +def parse_yaml_file(file: str | Path) -> ParsedDataWrapper: + """Read the YAML file, parse its data string, and return its parsed(dict) data.""" + data = DataFormat.YAML.load_file(file) + return _include_key_root(ParsedDataWrapper(data, file)) -def try_to_parse(data: str) -> dict[Any, Any]: - """Attempt to parse data string as a JSON or YAML and return it's dictionary.""" +def try_to_parse_file(file: str | Path) -> ParsedDataWrapper: + """Attempt to read the file and parse its data string as JSON or YAML, then return its parsed(dict) data.""" try: - return parse_json(data) + return parse_json_file(file) + except OSError as e: + raise DataReadingError(str(e), str(file)) from e except json.JSONDecodeError: try: - return parse_yaml(data) + return parse_yaml_file(file) except yaml.YAMLError as e: # YAML parsing error should be sufficient because the JSON can be parsed by the YAML parser. # We should receive a helpful error message for JSON as well. - raise DataParsingError(e) from e + raise DataParsingError(str(e), str(file)) from e diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing.py b/tests/python/knot_resolver/utils/modeling/test_parsing.py index 63fe122da..18c96558e 100644 --- a/tests/python/knot_resolver/utils/modeling/test_parsing.py +++ b/tests/python/knot_resolver/utils/modeling/test_parsing.py @@ -1,68 +1,14 @@ +from pathlib import Path + import pytest from knot_resolver.utils.modeling.errors import DataParsingError -from knot_resolver.utils.modeling.parsing import parse_json, parse_yaml, try_to_parse +from knot_resolver.utils.modeling.parsing import ParsedDataWrapper, parse_json_file, parse_yaml_file, try_to_parse_file -json_data = """ -{ - "none": null, - "boolean": false, - "number": 2026, - "string": "this is string", - "object": { - "number": 5000, - "string": "this is object string" - }, - "array": [ - "item1", - "item2", - "item3" - ] -} -""" +base_path = Path(__file__).parent / "test_parsing" -json_data_duplicates = """ -{ - "duplicity-key": 1, - "duplicity-key": 2 -} -""" -json_data_duplicates_inner = """ -{ - "object": { - "duplicity-key": 1, - "duplicity-key": 2 - } -} -""" - -yaml_data = """ -none: null -boolean: false -number: 2026 -string: this is string -object: - number: 5000 - string: this is object string -array: - - item1 - - item2 - - item3 -""" - -yaml_data_duplicates = """ -duplicity-key: 1 -duplicity-key: 2 -""" - -yaml_data_duplicates_inner = """ -object: - duplicity-key: 1 - duplicity-key: 2 -""" - -data_dict = { +result_dict = { "none": None, "boolean": False, "number": 2026, @@ -79,44 +25,83 @@ data_dict = { } -def test_parse_json() -> None: - data = parse_json(json_data) - assert data == data_dict +@pytest.mark.parametrize("file", ["data.json"]) +def test_parse_json_file(file: str) -> None: + file_path = base_path / file + wrapped_data = parse_json_file(file_path) + assert wrapped_data.file == file_path + assert wrapped_data.data == result_dict -@pytest.mark.parametrize("data", [json_data, yaml_data]) -def test_parse_yaml(data: str) -> None: - data = parse_yaml(data) - assert data == data_dict +@pytest.mark.parametrize("file", ["data.json", "data.yaml"]) +def test_parse_yaml_file(file: str) -> None: + file_path = base_path / file + wrapped_data = parse_yaml_file(file_path) + assert wrapped_data.file == file_path + assert wrapped_data.data == result_dict -@pytest.mark.parametrize( - "data", - [ - json_data_duplicates, - json_data_duplicates_inner, - ], -) -def test_parse_json_duplicates(data: str) -> None: +@pytest.mark.parametrize("file", ["duplicity.json", "duplicity.inner.json"]) +def test_parse_json_file_duplicity(file: str) -> None: + file_path = base_path / file with pytest.raises(DataParsingError): - parse_json(data) + parse_json_file(file_path) @pytest.mark.parametrize( - "data", + "file", [ - json_data_duplicates, - json_data_duplicates_inner, - yaml_data_duplicates, - yaml_data_duplicates_inner, + "duplicity.json", + "duplicity.inner.json", + "duplicity.yaml", + "duplicity.inner.yaml", ], ) -def test_parse_yaml_duplicates(data: str) -> None: +def test_parse_yaml_file_duplicity(file: str) -> None: + file_path = base_path / file with pytest.raises(DataParsingError): - parse_yaml(data) - - -@pytest.mark.parametrize("data", [json_data, yaml_data]) -def test_try_to_parse(data: str) -> None: - data = try_to_parse(data) - assert data == data_dict + parse_yaml_file(file_path) + + +@pytest.mark.parametrize("file", ["data.json", "data.yaml"]) +def test_try_to_parse_file(file: str) -> None: + file_path = base_path / file + wrapped_data = try_to_parse_file(file_path) + assert wrapped_data.file == file_path + assert wrapped_data.data == result_dict + + +@pytest.mark.parametrize("file", ["include.root.yaml"]) +def test_try_to_parse_file_yaml_include_tag(file: str) -> None: + file_path = base_path / file + wrapped_data = try_to_parse_file(file_path) + assert wrapped_data.file == file_path + assert wrapped_data.data.file.parent == base_path + assert wrapped_data.data.data == result_dict + + +@pytest.mark.parametrize("file", ["include.inner.yaml"]) +def test_try_to_parse_file_yaml_include_tag_inner(file: str) -> None: + file_path = base_path / file + wrapped_data = try_to_parse_file(file_path) + assert wrapped_data.file == file_path + assert wrapped_data.data["object"].data == result_dict["object"] + + +@pytest.mark.parametrize("file", ["include-key.yaml"]) +def test_try_to_parse_file_yaml_include_key(file: str) -> None: + file_path = base_path / file + wrapped_data = try_to_parse_file(file_path) + assert wrapped_data.file == file_path + for key in ["none", "boolean", "number", "string"]: + assert wrapped_data.data[key] == result_dict[key] + for include in wrapped_data.data["include"]: + print(include.data) + assert isinstance(include, ParsedDataWrapper) + data = include.data + if "object" in data: + assert data["object"] == result_dict["object"] + elif "array" in data: + assert data["array"] == result_dict["array"] + else: + assert False diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/data.json b/tests/python/knot_resolver/utils/modeling/test_parsing/data.json new file mode 100644 index 000000000..9ab07e576 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/data.json @@ -0,0 +1,15 @@ +{ + "none": null, + "boolean": false, + "number": 2026, + "string": "this is string", + "object": { + "number": 5000, + "string": "this is object string" + }, + "array": [ + "item1", + "item2", + "item3" + ] +} diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/data.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/data.yaml new file mode 100644 index 000000000..a70348b20 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/data.yaml @@ -0,0 +1,11 @@ +none: null +boolean: false +number: 2026 +string: this is string +object: + number: 5000 + string: this is object string +array: + - item1 + - item2 + - item3 diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.inner.json b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.inner.json new file mode 100644 index 000000000..d1d41e8be --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.inner.json @@ -0,0 +1,6 @@ +{ + "object": { + "duplicity-key": 1, + "duplicity-key": 2 + } +} diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.inner.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.inner.yaml new file mode 100644 index 000000000..49bf64323 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.inner.yaml @@ -0,0 +1,3 @@ +object: + duplicity-key: 1 + duplicity-key: 2 diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.json b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.json new file mode 100644 index 000000000..70bb2dda5 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.json @@ -0,0 +1,4 @@ +{ + "duplicity-key": 1, + "duplicity-key": 2 +} \ No newline at end of file diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.yaml new file mode 100644 index 000000000..4219b67d3 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/duplicity.yaml @@ -0,0 +1,2 @@ +duplicity-key: 1 +duplicity-key: 2 diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/include-key.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/include-key.yaml new file mode 100644 index 000000000..ca53bc014 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/include-key.yaml @@ -0,0 +1,7 @@ +none: null +boolean: false +number: 2026 +string: this is string +include: + - include1.yaml + - include2.yaml \ No newline at end of file diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/include.inner.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/include.inner.yaml new file mode 100644 index 000000000..50c0a61da --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/include.inner.yaml @@ -0,0 +1,9 @@ +none: null +boolean: false +number: 2026 +string: this is string +object: !include include.object.yaml +array: + - item1 + - item2 + - item3 \ No newline at end of file diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/include.object.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/include.object.yaml new file mode 100644 index 000000000..4dd62b573 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/include.object.yaml @@ -0,0 +1,2 @@ +number: 5000 +string: this is object string diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/include.root.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/include.root.yaml new file mode 100644 index 000000000..a461a0edb --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/include.root.yaml @@ -0,0 +1 @@ +!include data.yaml diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/include1.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/include1.yaml new file mode 100644 index 000000000..1f7820401 --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/include1.yaml @@ -0,0 +1,4 @@ +array: + - item1 + - item2 + - item3 diff --git a/tests/python/knot_resolver/utils/modeling/test_parsing/include2.yaml b/tests/python/knot_resolver/utils/modeling/test_parsing/include2.yaml new file mode 100644 index 000000000..15972a33d --- /dev/null +++ b/tests/python/knot_resolver/utils/modeling/test_parsing/include2.yaml @@ -0,0 +1,3 @@ +object: + number: 5000 + string: this is object string