From: Daan De Meyer Date: Sat, 5 Aug 2023 14:12:43 +0000 (+0200) Subject: Rework tar archive handling X-Git-Tag: v15~35^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F1736%2Fhead;p=thirdparty%2Fmkosi.git Rework tar archive handling - Instead of relying on shutil.unpack_archive(), let's always use tar - Introduce archive_tree() and extract_tree() to abstract tar archives - Make sure tar always uses the user/group information from the root dir - Enable all features - Make sure tar doesn't overwrite directory permissions --- diff --git a/mkosi/__init__.py b/mkosi/__init__.py index f60a5d4d1..028d90395 100644 --- a/mkosi/__init__.py +++ b/mkosi/__init__.py @@ -43,7 +43,7 @@ from mkosi.pager import page from mkosi.qemu import copy_ephemeral, machine_cid, run_qemu from mkosi.run import become_root, bwrap, chroot_cmd, init_mount_namespace, run from mkosi.state import MkosiState -from mkosi.tree import copy_tree, move_tree, rmtree +from mkosi.tree import archive_tree, copy_tree, extract_tree, move_tree, rmtree from mkosi.types import PathString from mkosi.util import ( InvokingUser, @@ -70,7 +70,7 @@ def mount_image(state: MkosiState) -> Iterator[None]: if path.is_dir(): bases += [path] elif path.suffix == ".tar": - shutil.unpack_archive(path, d) + extract_tree(path, d) bases += [d] elif path.suffix == ".raw": run(["systemd-dissect", "-M", path, d]) @@ -524,7 +524,7 @@ def install_base_trees(state: MkosiState) -> None: if path.is_dir(): copy_tree(state.config, path, state.root) elif path.suffix == ".tar": - shutil.unpack_archive(path, state.root) + extract_tree(path, state.root) elif path.suffix == ".raw": run(["systemd-dissect", "--copy-from", path, "/", state.root]) else: @@ -546,7 +546,7 @@ def install_skeleton_trees(state: MkosiState) -> None: if source.is_dir() or target: copy_tree(state.config, source, t, preserve_owner=False) else: - shutil.unpack_archive(source, t) + extract_tree(source, t) def install_package_manager_trees(state: MkosiState) -> None: @@ -564,7 +564,7 @@ def install_package_manager_trees(state: MkosiState) -> None: if source.is_dir() or target: copy_tree(state.config, source, t, preserve_owner=False) else: - shutil.unpack_archive(source, t) + extract_tree(source, t) def install_extra_trees(state: MkosiState) -> None: @@ -582,7 +582,7 @@ def install_extra_trees(state: MkosiState) -> None: if source.is_dir() or target: copy_tree(state.config, source, t, preserve_owner=False) else: - shutil.unpack_archive(source, t) + extract_tree(source, t) def install_build_dest(state: MkosiState) -> None: @@ -597,32 +597,12 @@ def gzip_binary() -> str: return "pigz" if shutil.which("pigz") else "gzip" -def tar_binary() -> str: - # Some distros (Mandriva) install BSD tar as "tar", hence prefer - # "gtar" if it exists, which should be GNU tar wherever it exists. - # We are interested in exposing same behaviour everywhere hence - # it's preferable to use the same implementation of tar - # everywhere. In particular given the limited/different SELinux - # support in BSD tar and the different command line syntax - # compared to GNU tar. - return "gtar" if shutil.which("gtar") else "tar" - - def make_tar(state: MkosiState) -> None: if state.config.output_format != OutputFormat.tar: return - cmd: list[PathString] = [ - tar_binary(), - "-C", state.root, - "-c", "--xattrs", - "--xattrs-include=*", - "--file", state.staging / state.config.output_with_format, - ".", - ] - with complete_step("Creating archive…"): - run(cmd) + archive_tree(state.root, state.staging / state.config.output_with_format) def find_files(dir: Path, root: Path) -> Iterator[Path]: diff --git a/mkosi/distributions/debian.py b/mkosi/distributions/debian.py index be9d716ed..0fb09f990 100644 --- a/mkosi/distributions/debian.py +++ b/mkosi/distributions/debian.py @@ -11,6 +11,7 @@ from mkosi.installer.apt import invoke_apt, setup_apt from mkosi.log import die from mkosi.run import run from mkosi.state import MkosiState +from mkosi.tree import extract_tree class DebianInstaller(DistributionInstaller): @@ -112,7 +113,7 @@ class DebianInstaller(DistributionInstaller): for deb in essential: with tempfile.NamedTemporaryFile() as f: run(["dpkg-deb", "--fsys-tarfile", deb], stdout=f) - run(["tar", "-C", state.root, "--keep-directory-symlink", "--extract", "--file", f.name]) + extract_tree(Path(f.name), state.root) # Finally, run apt to properly install packages in the chroot without having to worry that maintainer # scripts won't find basic tools that they depend on. diff --git a/mkosi/distributions/gentoo.py b/mkosi/distributions/gentoo.py index 27d5722ba..52ee62055 100644 --- a/mkosi/distributions/gentoo.py +++ b/mkosi/distributions/gentoo.py @@ -12,7 +12,7 @@ from mkosi.distributions import DistributionInstaller, PackageType from mkosi.log import ARG_DEBUG, complete_step, die from mkosi.run import apivfs_cmd, bwrap, chroot_cmd, run from mkosi.state import MkosiState -from mkosi.tree import copy_tree, rmtree +from mkosi.tree import copy_tree, extract_tree, rmtree from mkosi.types import PathString from mkosi.util import flatten, sort_packages @@ -111,14 +111,7 @@ class GentooInstaller(DistributionInstaller): if not any(stage3.iterdir()): with complete_step(f"Extracting {stage3_tar.name} to {stage3}"): - run(["tar", - "--numeric-owner", - "-C", stage3, - "--extract", - "--file", stage3_tar, - "--exclude", "./dev/*", - "--exclude", "./proc/*", - "--exclude", "./sys/*"]) + extract_tree(stage3_tar, stage3) for d in ("binpkgs", "distfiles", "repos/gentoo"): (state.cache_dir / d).mkdir(parents=True, exist_ok=True) diff --git a/mkosi/run.py b/mkosi/run.py index 801bd550c..0fe242b9b 100644 --- a/mkosi/run.py +++ b/mkosi/run.py @@ -313,6 +313,24 @@ def bwrap( return result +def finalize_passwd_mounts(root: Path) -> list[PathString]: + """ + If passwd or a related file exists in the apivfs directory, bind mount it over the host files while we + run the command, to make sure that the command we run uses user/group information from the apivfs + directory instead of from the host. If the file doesn't exist yet, mount over /dev/null instead. + """ + options: list[PathString] = [] + + for f in ("passwd", "group", "shadow", "gshadow"): + p = root / "etc" / f + if p.exists(): + options += ["--bind", p, f"/etc/{f}"] + else: + options += ["--bind", "/dev/null", f"/etc/{f}"] + + return options + + def apivfs_cmd(root: Path) -> list[PathString]: cmdline: list[PathString] = [ "bwrap", @@ -330,16 +348,7 @@ def apivfs_cmd(root: Path) -> list[PathString]: # Make sure /etc/machine-id is not overwritten by any package manager post install scripts. cmdline += ["--ro-bind", root / "etc/machine-id", root / "etc/machine-id"] - # If passwd or a related file exists in the apivfs directory, bind mount it over the host files while - # we run the command, to make sure that the command we run uses user/group information from the - # apivfs directory instead of from the host. If the file doesn't exist yet, mount over /dev/null - # instead. - for f in ("passwd", "group", "shadow", "gshadow"): - p = root / "etc" / f - if p.exists(): - cmdline += ["--bind", p, f"/etc/{f}"] - else: - cmdline += ["--bind", "/dev/null", f"/etc/{f}"] + cmdline += finalize_passwd_mounts(root) chmod = f"chmod 1777 {root / 'tmp'} {root / 'var/tmp'} {root / 'dev/shm'}" # Make sure anything running in the root directory thinks it's in a container. $container can't always be diff --git a/mkosi/tree.py b/mkosi/tree.py index 640890d84..da826a166 100644 --- a/mkosi/tree.py +++ b/mkosi/tree.py @@ -8,8 +8,9 @@ from typing import Sequence, cast from mkosi.config import ConfigFeature, MkosiConfig from mkosi.log import die -from mkosi.run import run +from mkosi.run import bwrap, finalize_passwd_mounts, run from mkosi.types import PathString +from mkosi.util import tar_binary def statfs(path: Path) -> str: @@ -98,3 +99,57 @@ def move_tree(config: MkosiConfig, src: Path, dst: Path) -> None: copy_tree(config, src, dst) rmtree(src) + + +def tar_exclude_apivfs_tmp() -> list[str]: + return [ + "--exclude", "./dev/*", + "--exclude", "./proc/*", + "--exclude", "./sys/*", + "--exclude", "./tmp/*", + "--exclude", "./run/*", + "--exclude", "./var/tmp/*", + ] + + +def archive_tree(src: Path, dst: Path) -> None: + bwrap( + [ + tar_binary(), + "--create", + "--file", dst, + "--directory", src, + "--acls", + "--selinux", + "--xattrs", + "--sparse", + "--force-local", + *tar_exclude_apivfs_tmp(), + ".", + ], + # Make sure tar uses user/group information from the root directory instead of the host. + options=finalize_passwd_mounts(src) if (src / "etc/passwd").exists() else [], + ) + + +def extract_tree(src: Path, dst: Path) -> None: + bwrap( + [ + tar_binary(), + "--extract", + "--file", src, + "--directory", dst, + "--keep-directory-symlink", + "--no-overwrite-dir", + "--same-permissions", + "--same-owner" if (dst / "etc/passwd").exists() else "--numeric-owner", + "--same-order", + "--acls", + "--selinux", + "--xattrs", + "--force-local", + *tar_exclude_apivfs_tmp(), + ], + # Make sure tar uses user/group information from the root directory instead of the host. + options=finalize_passwd_mounts(dst) if (dst / "etc/passwd").exists() else [], + ) diff --git a/mkosi/util.py b/mkosi/util.py index 17843b3bb..1ac16af2b 100644 --- a/mkosi/util.py +++ b/mkosi/util.py @@ -14,6 +14,7 @@ import os import pwd import re import resource +import shutil import stat import sys import tempfile @@ -213,3 +214,14 @@ class StrEnum(enum.Enum): @classmethod def values(cls) -> list[str]: return list(map(str, cls)) + + +def tar_binary() -> str: + # Some distros (Mandriva) install BSD tar as "tar", hence prefer + # "gtar" if it exists, which should be GNU tar wherever it exists. + # We are interested in exposing same behaviour everywhere hence + # it's preferable to use the same implementation of tar + # everywhere. In particular given the limited/different SELinux + # support in BSD tar and the different command line syntax + # compared to GNU tar. + return "gtar" if shutil.which("gtar") else "tar"