From eecf8b3b5c43e4832a11c8718ae43e559a755829 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 17 Feb 2025 18:29:06 +0100 Subject: [PATCH] Rework module-list settings to use globs instead of regexps MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit "If you have a problem, and use a regexp, now you have two problems." I don't think this quip applies in all cases, but the existing interface with regexps was problematic for a few reasons: - users usually want to match at a word boundary, but regexps apply anywhere in the string, so to actually match correctly, the regexp has to be carefully constructed with word boundary assertions. Even our own config for initrds included some modules by mistake, e.g. drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/ch_ipsec.ko.xz was matched by crypto/. - once the regexps include the word boundary assertions, they became quite complex and hard to read. - because of the separate evaluation of include and exlude patterns, we need to have exclude patterns in the include patterns. The example given in the review: ^drivers/net/(?!wireless/) But this means that we can do exclusions in two places, making the whole scheme very complex. - the default to include all modules by default goes against the general design of mkosi, where only things that are explicitly configured are put in the image. That default is only useful when trying to build a "maximal" image that matches the current machine. In most uses, the default of including only requested modules makes more sense (initrd, addons, any not-host-only images). A new setting that takes glob patterns is added. There is only one setting instead of a pair, exclusion patterns are prefixed by '-'. The last matching glob wins. The details of how those globs are interpreted is crafted to match our particular use cases. For a single glob, the rules are: - 'foo' matches the basename, /some/path/foo.ko - 'bar/foo' matches the last component of the path, bar/foo.ko - '/full/path/bar' only matches /full/path/bar.ko. - crypto/* or crypto/ match all modules any crypto/ directory. - /crypto/* matches the modules in the top-level directory. This might seem complicated at first glance, but apart from the special handling of the suffix, those rules mostly match how 'ls' would handle a local path argument. Suffixes are not specified in the globs. '-' and '_' are treated as equivalent, except when part of special glob syntax with […]. New settings are added: KernelModules=GLOB KernelInitrdModules=GLOB (KernelModulesInitrd=BOOL already exists and specifies whether to create a separate initrd.) I think the new syntax is more pleasant to read and write. Backward compatibility is maintained by keeping the old options in place. The change to exclude all modules by default is a breaking change, but in most uses both options were used in combination anyway, so I think this should be fine. The example given in the review: KernelModulesInclude= ^drivers/net/(?!wireless/) becomes KernelModules= /drivers/net/ -/drivers/net/wireless/ --- mkosi/config.py | 81 ++++++++++++++++++++++++++++++++---------- mkosi/kmod.py | 87 +++++++++++++++++++++++++++++++++++++++------- tests/test_json.py | 8 ++--- tests/test_kmod.py | 51 +++++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 36 deletions(-) create mode 100644 tests/test_kmod.py diff --git a/mkosi/config.py b/mkosi/config.py index 7ebb3daa0..26d14f14a 100644 --- a/mkosi/config.py +++ b/mkosi/config.py @@ -2426,6 +2426,12 @@ def parse_ini(path: Path, only_sections: Collection[str] = ()) -> Iterator[tuple yield section, "", "" +def parse_kernel_module_filter_regexp(p: str) -> str: + if p in ("default", "host"): + return p + return f"re:{p}" + + UKI_PROFILE_SETTINGS: list[ConfigSetting[Any]] = [ ConfigSetting( dest="profile", @@ -3044,22 +3050,37 @@ SETTINGS: list[ConfigSetting[Any]] = [ dest="kernel_modules_include", metavar="REGEX", section="Content", - parse=config_make_list_parser(delimiter=","), + parse=config_make_list_parser( + delimiter=",", + parse=parse_kernel_module_filter_regexp, + ), help="Include the specified kernel modules in the image", ), ConfigSetting( - dest="kernel_modules_include_host", - metavar="BOOL", + dest="kernel_modules_exclude", + metavar="REGEX", section="Content", - parse=config_parse_boolean, - help="Include the currently loaded modules on the host in the image", + parse=config_make_list_parser( + delimiter=",", + parse=parse_kernel_module_filter_regexp, + ), + help="Exclude the specified kernel modules from the image", ), ConfigSetting( - dest="kernel_modules_exclude", - metavar="REGEX", + dest="kernel_modules_include", + name="KernelModules", + long="--kernel-modules", + metavar="GLOB", section="Content", parse=config_make_list_parser(delimiter=","), - help="Exclude the specified kernel modules from the image", + help="Include/exclude the specified kernel modules in the image", + ), + ConfigSetting( + dest="kernel_modules_include_host", + metavar="BOOL", + section="Content", + parse=config_parse_boolean, + help="Include the currently loaded modules on the host in the image", ), ConfigSetting( dest="kernel_modules_initrd", @@ -3073,9 +3094,31 @@ SETTINGS: list[ConfigSetting[Any]] = [ dest="kernel_modules_initrd_include", metavar="REGEX", section="Content", - parse=config_make_list_parser(delimiter=","), + parse=config_make_list_parser( + delimiter=",", + parse=parse_kernel_module_filter_regexp, + ), help="When building a kernel modules initrd, include the specified kernel modules", ), + ConfigSetting( + dest="kernel_modules_initrd_exclude", + metavar="REGEX", + section="Content", + parse=config_make_list_parser( + delimiter=",", + parse=parse_kernel_module_filter_regexp, + ), + help="When building a kernel modules initrd, exclude the specified kernel modules", + ), + ConfigSetting( + dest="kernel_modules_initrd_include", + name="KernelInitrdModules", + long="--kernel-initrd-modules", + metavar="GLOB", + section="Content", + parse=config_make_list_parser(delimiter=","), + help="Include/exclude modules in the initrd", + ), ConfigSetting( dest="kernel_modules_initrd_include_host", metavar="BOOL", @@ -3084,13 +3127,6 @@ SETTINGS: list[ConfigSetting[Any]] = [ help="When building a kernel modules initrd, include the currently loaded modules " "on the host in the image", ), - ConfigSetting( - dest="kernel_modules_initrd_exclude", - metavar="REGEX", - section="Content", - parse=config_make_list_parser(delimiter=","), - help="When building a kernel modules initrd, exclude the specified kernel modules", - ), ConfigSetting( dest="firmware_include", metavar="REGEX", @@ -4023,6 +4059,11 @@ SETTINGS: list[ConfigSetting[Any]] = [ ] SETTINGS_LOOKUP_BY_NAME = {name: s for s in SETTINGS for name in [s.name, *s.compat_names]} SETTINGS_LOOKUP_BY_DEST = {s.dest: s for s in SETTINGS} +SETTINGS_LOOKUP_BY_OPTION = { + name: s + for s in SETTINGS + for name in [s.long, *s.compat_longs, s.short] if name +} # fmt: skip SETTINGS_LOOKUP_BY_SPECIFIER = {s.specifier: s for s in SETTINGS if s.specifier} MATCHES = ( @@ -4324,7 +4365,9 @@ class ConfigAction(argparse.Action): ) -> None: assert option_string is not None - s = SETTINGS_LOOKUP_BY_DEST[self.dest] + # For options that have the same dest, try to figure out the right + # option by matching the option name + s = SETTINGS_LOOKUP_BY_OPTION[self.option_strings[0]] if values is None or isinstance(values, str): values = [values] @@ -5070,14 +5113,14 @@ def summary(config: Config) -> str: Devicetree: {none_to_none(config.devicetree)} Splash: {none_to_none(config.splash)} Kernel Command Line: {line_join_list(config.kernel_command_line)} - Kernel Modules Include: {line_join_list(config.kernel_modules_include)} + Kernel Modules: {line_join_list(config.kernel_modules_include)} Kernel Modules Exclude: {line_join_list(config.kernel_modules_exclude)} Kernel Modules Include Host: {yes_no(config.kernel_modules_include_host)} Firmware Include: {line_join_list(config.firmware_include)} Firmware Exclude: {line_join_list(config.firmware_exclude)} Kernel Modules Initrd: {yes_no(config.kernel_modules_initrd)} - Kernel Modules Initrd Include: {line_join_list(config.kernel_modules_initrd_include)} + Kernel Initrd Modules: {line_join_list(config.kernel_modules_initrd_include)} Kernel Modules Initrd Exclude: {line_join_list(config.kernel_modules_initrd_exclude)} Kernel Modules Initrd Include Host: {yes_no(config.kernel_modules_initrd_include_host)} diff --git a/mkosi/kmod.py b/mkosi/kmod.py index 8d7b9f3e4..1e6d27d39 100644 --- a/mkosi/kmod.py +++ b/mkosi/kmod.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-2.1-or-later +import fnmatch import itertools import logging import os @@ -18,11 +19,37 @@ from mkosi.util import chdir, parents_below def loaded_modules() -> list[str]: # Loaded modules are listed with underscores but the filenames might use dashes instead. return [ - rf"/{line.split()[0].replace('_', '[_-]')}\.ko" - for line in Path("/proc/modules").read_text().splitlines() + normalize_module_name(line.split()[0]) for line in Path("/proc/modules").read_text().splitlines() ] +def globs_match_module(name, globs) -> bool: + # Strip '.ko' suffix and an optional compression suffix + name = re.sub(r"\.ko(\.(gz|xz|zst))?$", "", name) + # Check whether the suffixless-path matches any of the globs + + for glob in reversed(globs): + # Patterns are evaluated in order and last matching one wins. + # Patterns may be prefixed with '-' to exclude modules. + if negative := glob.startswith("-"): + glob = glob[1:] + # As a special case, if a directory is specified, all items + # below that directory are matched. + if glob.endswith("/"): + glob += "*" + + if ( + # match the full path + (glob.startswith("/") and fnmatch.fnmatch(f"/{name}", glob)) + # match a subset of the path, at path element boundary + or ("/" in glob and fnmatch.fnmatch(f"/{name}", f"*/{glob}")) + # match the basename + or fnmatch.fnmatch(name.split("/")[-1], glob) + ): + return not negative + return False + + def filter_kernel_modules( root: Path, kver: str, @@ -30,29 +57,49 @@ def filter_kernel_modules( include: Iterable[str], exclude: Iterable[str], ) -> list[Path]: + logging.debug(f"Kernel modules include: {' '.join(include)}") + logging.debug(f"Kernel modules exclude: {' '.join(exclude)}") + modulesd = Path("usr/lib/modules") / kver with chdir(root): - modules = set(modulesd.rglob("*.ko*")) + # The glob may match additional paths. + # Narrow this down to *.ko, *.ko.gz, *.ko.xz, *.ko.zst. + modules = { + m for m in modulesd.rglob("*.ko*") if m.name.endswith((".ko", ".ko.gz", ".ko.xz", ".ko.zst")) + } + + n_modules = len(modules) keep = set() + if include: - regex = re.compile("|".join(include)) + patterns = [p[3:] for p in include if p.startswith("re:")] + regex = re.compile("|".join(patterns)) + + globs = [normalize_module_glob(p) for p in include if not p.startswith("re:")] + for m in modules: rel = os.fspath(Path(*m.parts[5:])) - if regex.search(rel): - keep.add(rel) + + if (patterns and regex.search(rel)) or globs_match_module(normalize_module_name(rel), globs): + keep.add(m) if exclude: + assert all(p.startswith("re:") for p in exclude) + patterns = [p[3:] for p in exclude] + regex = re.compile("|".join(patterns)) + remove = set() - regex = re.compile("|".join(exclude)) - for m in modules: + for m in keep: rel = os.fspath(Path(*m.parts[5:])) - if rel not in keep and regex.search(rel): + if regex.search(rel): remove.add(m) - modules -= remove + keep -= remove + + logging.debug(f'Including {len(keep)}/{n_modules} kernel modules.') - return sorted(modules) + return sorted(keep) def filter_firmware( @@ -89,9 +136,23 @@ def filter_firmware( def normalize_module_name(name: str) -> str: + # Replace '_' by '-' return name.replace("_", "-") +def normalize_module_glob(name: str) -> str: + # We want to replace '_' by '-', except when used in […] + ans = "" + while name: + i = (name + "[").index("[") + ans += name[:i].replace("_", "-") + name = name[i:] + i = (name + "]").index("]") + ans += name[: i + 1] + name = name[i + 1 :] + return ans + + def module_path_to_name(path: Path) -> str: return normalize_module_name(path.name.partition(".")[0]) @@ -221,7 +282,7 @@ def gen_required_kernel_modules( # There is firmware in /usr/lib/firmware that is not depended on by any modules so if any firmware was # installed we have to take the slow path to make sure we don't copy firmware into the initrd that is not # depended on by any kernel modules. - if modules_exclude or (context.root / firmwared).glob("*"): + if modules_include or modules_exclude or (context.root / firmwared).glob("*"): modules = filter_kernel_modules(context.root, kver, include=modules_include, exclude=modules_exclude) names = [module_path_to_name(m) for m in modules] mods, firmware = resolve_module_dependencies(context, kver, names) @@ -278,7 +339,7 @@ def process_kernel_modules( firmware_include: Iterable[str], firmware_exclude: Iterable[str], ) -> None: - if not modules_exclude and not firmware_exclude: + if not (modules_include or modules_exclude or firmware_exclude): return modulesd = Path("usr/lib/modules") / kver diff --git a/tests/test_json.py b/tests/test_json.py index ff5d4bf4d..efa186b8a 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -227,16 +227,16 @@ def test_config() -> None: "command", "line" ], + "KernelInitrdModules": [], + "KernelModules": [ + "loop" + ], "KernelModulesExclude": [ "nvidia" ], - "KernelModulesInclude": [ - "loop" - ], "KernelModulesIncludeHost": true, "KernelModulesInitrd": true, "KernelModulesInitrdExclude": [], - "KernelModulesInitrdInclude": [], "KernelModulesInitrdIncludeHost": true, "Key": null, "Keymap": "wow, so much keymap", diff --git a/tests/test_kmod.py b/tests/test_kmod.py new file mode 100644 index 000000000..d1b3a324a --- /dev/null +++ b/tests/test_kmod.py @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +from mkosi import kmod + + +def test_globs_match_module(): + assert kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["ahci"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz.2", ["ahci"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["ata"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["drivers"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["/drivers"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["/drivers"]) + assert not kmod.globs_match_module("drivers/ata/ahci-2.ko.xz", ["ahci"]) + assert not kmod.globs_match_module("drivers/ata/ahci2.ko.zst", ["ahci"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["ata/*"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["/ata/*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["drivers/*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["/drivers/*"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko", ["ahci/*"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko", ["bahci*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.zst", ["ahci*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["ahc*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["ah*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["ata/"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["drivers/"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["drivers/ata/"]) + + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["-ahci", "*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko", ["-ahci", "*", "ahciahci"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["-ahci", "*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.zst", ["-ahci", "*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.gz", ["-ahci", "*"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.gz", ["-ahci", "drivers/"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.gz", ["-ahci", "ata/"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.gz", ["-ahci", "ata/ata/"]) + assert kmod.globs_match_module("drivers/ata/ahci.ko.gz", ["-ahci", "drivers/ata/"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko", ["*", "-ahci"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko", ["ahci", "-*"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.zst", ["-*"]) + assert not kmod.globs_match_module("drivers/ata/ahci.ko.xz", ["-*"]) + + +def test_normalize_module_glob(): + assert kmod.normalize_module_glob("raid[0-9]") == "raid[0-9]" + assert kmod.normalize_module_glob("raid[0_9]") == "raid[0_9]" + assert kmod.normalize_module_glob("raid[0_9]a_z") == "raid[0_9]a-z" + assert kmod.normalize_module_glob("0_9") == "0-9" + assert kmod.normalize_module_glob("[0_9") == "[0_9" + assert kmod.normalize_module_glob("0_9]") == "0-9]" + assert kmod.normalize_module_glob("raid[0_9]a_z[a_c]") == "raid[0_9]a-z[a_c]" -- 2.47.3