]> git.ipfire.org Git - thirdparty/mkosi.git/commitdiff
Rework tar archive handling 1736/head
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Sat, 5 Aug 2023 14:12:43 +0000 (16:12 +0200)
committerDaan De Meyer <daan.j.demeyer@gmail.com>
Sat, 5 Aug 2023 14:34:38 +0000 (16:34 +0200)
- Instead of relying on shutil.unpack_archive(), let's always use tar
- Introduce archive_tree() and extract_tree() to abstract tar archives
- Make sure tar always uses the user/group information from the root dir
- Enable all features
- Make sure tar doesn't overwrite directory permissions

mkosi/__init__.py
mkosi/distributions/debian.py
mkosi/distributions/gentoo.py
mkosi/run.py
mkosi/tree.py
mkosi/util.py

index f60a5d4d142dc797288d00e147ddf204120dd78d..028d90395a76c93336faf621beca22a654878459 100644 (file)
@@ -43,7 +43,7 @@ from mkosi.pager import page
 from mkosi.qemu import copy_ephemeral, machine_cid, run_qemu
 from mkosi.run import become_root, bwrap, chroot_cmd, init_mount_namespace, run
 from mkosi.state import MkosiState
-from mkosi.tree import copy_tree, move_tree, rmtree
+from mkosi.tree import archive_tree, copy_tree, extract_tree, move_tree, rmtree
 from mkosi.types import PathString
 from mkosi.util import (
     InvokingUser,
@@ -70,7 +70,7 @@ def mount_image(state: MkosiState) -> Iterator[None]:
                 if path.is_dir():
                     bases += [path]
                 elif path.suffix == ".tar":
-                    shutil.unpack_archive(path, d)
+                    extract_tree(path, d)
                     bases += [d]
                 elif path.suffix == ".raw":
                     run(["systemd-dissect", "-M", path, d])
@@ -524,7 +524,7 @@ def install_base_trees(state: MkosiState) -> None:
             if path.is_dir():
                 copy_tree(state.config, path, state.root)
             elif path.suffix == ".tar":
-                shutil.unpack_archive(path, state.root)
+                extract_tree(path, state.root)
             elif path.suffix == ".raw":
                 run(["systemd-dissect", "--copy-from", path, "/", state.root])
             else:
@@ -546,7 +546,7 @@ def install_skeleton_trees(state: MkosiState) -> None:
             if source.is_dir() or target:
                 copy_tree(state.config, source, t, preserve_owner=False)
             else:
-                shutil.unpack_archive(source, t)
+                extract_tree(source, t)
 
 
 def install_package_manager_trees(state: MkosiState) -> None:
@@ -564,7 +564,7 @@ def install_package_manager_trees(state: MkosiState) -> None:
             if source.is_dir() or target:
                 copy_tree(state.config, source, t, preserve_owner=False)
             else:
-                shutil.unpack_archive(source, t)
+                extract_tree(source, t)
 
 
 def install_extra_trees(state: MkosiState) -> None:
@@ -582,7 +582,7 @@ def install_extra_trees(state: MkosiState) -> None:
             if source.is_dir() or target:
                 copy_tree(state.config, source, t, preserve_owner=False)
             else:
-                shutil.unpack_archive(source, t)
+                extract_tree(source, t)
 
 
 def install_build_dest(state: MkosiState) -> None:
@@ -597,32 +597,12 @@ def gzip_binary() -> str:
     return "pigz" if shutil.which("pigz") else "gzip"
 
 
-def tar_binary() -> str:
-    # Some distros (Mandriva) install BSD tar as "tar", hence prefer
-    # "gtar" if it exists, which should be GNU tar wherever it exists.
-    # We are interested in exposing same behaviour everywhere hence
-    # it's preferable to use the same implementation of tar
-    # everywhere. In particular given the limited/different SELinux
-    # support in BSD tar and the different command line syntax
-    # compared to GNU tar.
-    return "gtar" if shutil.which("gtar") else "tar"
-
-
 def make_tar(state: MkosiState) -> None:
     if state.config.output_format != OutputFormat.tar:
         return
 
-    cmd: list[PathString] = [
-        tar_binary(),
-        "-C", state.root,
-        "-c", "--xattrs",
-        "--xattrs-include=*",
-        "--file", state.staging / state.config.output_with_format,
-        ".",
-    ]
-
     with complete_step("Creating archive…"):
-        run(cmd)
+        archive_tree(state.root, state.staging / state.config.output_with_format)
 
 
 def find_files(dir: Path, root: Path) -> Iterator[Path]:
index be9d716ed98e10e65e58049f2a969d5516cae28d..0fb09f990eff1e5a253177de36af0d1cc9b8aced 100644 (file)
@@ -11,6 +11,7 @@ from mkosi.installer.apt import invoke_apt, setup_apt
 from mkosi.log import die
 from mkosi.run import run
 from mkosi.state import MkosiState
+from mkosi.tree import extract_tree
 
 
 class DebianInstaller(DistributionInstaller):
@@ -112,7 +113,7 @@ class DebianInstaller(DistributionInstaller):
         for deb in essential:
             with tempfile.NamedTemporaryFile() as f:
                 run(["dpkg-deb", "--fsys-tarfile", deb], stdout=f)
-                run(["tar", "-C", state.root, "--keep-directory-symlink", "--extract", "--file", f.name])
+                extract_tree(Path(f.name), state.root)
 
         # Finally, run apt to properly install packages in the chroot without having to worry that maintainer
         # scripts won't find basic tools that they depend on.
index 27d5722ba6a9a9642b8769afa7c6ca5f88d09986..52ee620555dba93e03ba614f9532f2c938de5478 100644 (file)
@@ -12,7 +12,7 @@ from mkosi.distributions import DistributionInstaller, PackageType
 from mkosi.log import ARG_DEBUG, complete_step, die
 from mkosi.run import apivfs_cmd, bwrap, chroot_cmd, run
 from mkosi.state import MkosiState
-from mkosi.tree import copy_tree, rmtree
+from mkosi.tree import copy_tree, extract_tree, rmtree
 from mkosi.types import PathString
 from mkosi.util import flatten, sort_packages
 
@@ -111,14 +111,7 @@ class GentooInstaller(DistributionInstaller):
 
         if not any(stage3.iterdir()):
             with complete_step(f"Extracting {stage3_tar.name} to {stage3}"):
-                run(["tar",
-                     "--numeric-owner",
-                     "-C", stage3,
-                     "--extract",
-                     "--file", stage3_tar,
-                     "--exclude", "./dev/*",
-                     "--exclude", "./proc/*",
-                     "--exclude", "./sys/*"])
+                extract_tree(stage3_tar, stage3)
 
         for d in ("binpkgs", "distfiles", "repos/gentoo"):
             (state.cache_dir / d).mkdir(parents=True, exist_ok=True)
index 801bd550cd0ce814a1adbf73233088d807981c65..0fe242b9b5d46dc31ab702c3129d1fe6031c1574 100644 (file)
@@ -313,6 +313,24 @@ def bwrap(
         return result
 
 
+def finalize_passwd_mounts(root: Path) -> list[PathString]:
+    """
+    If passwd or a related file exists in the apivfs directory, bind mount it over the host files while we
+    run the command, to make sure that the command we run uses user/group information from the apivfs
+    directory instead of from the host. If the file doesn't exist yet, mount over /dev/null instead.
+    """
+    options: list[PathString] = []
+
+    for f in ("passwd", "group", "shadow", "gshadow"):
+        p = root / "etc" / f
+        if p.exists():
+            options += ["--bind", p, f"/etc/{f}"]
+        else:
+            options += ["--bind", "/dev/null", f"/etc/{f}"]
+
+    return options
+
+
 def apivfs_cmd(root: Path) -> list[PathString]:
     cmdline: list[PathString] = [
         "bwrap",
@@ -330,16 +348,7 @@ def apivfs_cmd(root: Path) -> list[PathString]:
         # Make sure /etc/machine-id is not overwritten by any package manager post install scripts.
         cmdline += ["--ro-bind", root / "etc/machine-id", root / "etc/machine-id"]
 
-    # If passwd or a related file exists in the apivfs directory, bind mount it over the host files while
-    # we run the command, to make sure that the command we run uses user/group information from the
-    # apivfs directory instead of from the host. If the file doesn't exist yet, mount over /dev/null
-    # instead.
-    for f in ("passwd", "group", "shadow", "gshadow"):
-        p = root / "etc" / f
-        if p.exists():
-            cmdline += ["--bind", p, f"/etc/{f}"]
-        else:
-            cmdline += ["--bind", "/dev/null", f"/etc/{f}"]
+    cmdline += finalize_passwd_mounts(root)
 
     chmod = f"chmod 1777 {root / 'tmp'} {root / 'var/tmp'} {root / 'dev/shm'}"
     # Make sure anything running in the root directory thinks it's in a container. $container can't always be
index 640890d84280614a1cedda7177bf42badea6a3d8..da826a166361c6f82671bb533de16f5bc1f7945b 100644 (file)
@@ -8,8 +8,9 @@ from typing import Sequence, cast
 
 from mkosi.config import ConfigFeature, MkosiConfig
 from mkosi.log import die
-from mkosi.run import run
+from mkosi.run import bwrap, finalize_passwd_mounts, run
 from mkosi.types import PathString
+from mkosi.util import tar_binary
 
 
 def statfs(path: Path) -> str:
@@ -98,3 +99,57 @@ def move_tree(config: MkosiConfig, src: Path, dst: Path) -> None:
 
         copy_tree(config, src, dst)
         rmtree(src)
+
+
+def tar_exclude_apivfs_tmp() -> list[str]:
+    return [
+        "--exclude", "./dev/*",
+        "--exclude", "./proc/*",
+        "--exclude", "./sys/*",
+        "--exclude", "./tmp/*",
+        "--exclude", "./run/*",
+        "--exclude", "./var/tmp/*",
+    ]
+
+
+def archive_tree(src: Path, dst: Path) -> None:
+    bwrap(
+        [
+            tar_binary(),
+            "--create",
+            "--file", dst,
+            "--directory", src,
+            "--acls",
+            "--selinux",
+            "--xattrs",
+            "--sparse",
+            "--force-local",
+            *tar_exclude_apivfs_tmp(),
+            ".",
+        ],
+        # Make sure tar uses user/group information from the root directory instead of the host.
+        options=finalize_passwd_mounts(src) if (src / "etc/passwd").exists() else [],
+    )
+
+
+def extract_tree(src: Path, dst: Path) -> None:
+    bwrap(
+        [
+            tar_binary(),
+            "--extract",
+            "--file", src,
+            "--directory", dst,
+            "--keep-directory-symlink",
+            "--no-overwrite-dir",
+            "--same-permissions",
+            "--same-owner" if (dst / "etc/passwd").exists() else "--numeric-owner",
+            "--same-order",
+            "--acls",
+            "--selinux",
+            "--xattrs",
+            "--force-local",
+            *tar_exclude_apivfs_tmp(),
+        ],
+        # Make sure tar uses user/group information from the root directory instead of the host.
+        options=finalize_passwd_mounts(dst) if (dst / "etc/passwd").exists() else [],
+    )
index 17843b3bb75de47dfb1e0c34856aa687dbbf6e82..1ac16af2bbb5490b7dfb279118bb65bd6674adae 100644 (file)
@@ -14,6 +14,7 @@ import os
 import pwd
 import re
 import resource
+import shutil
 import stat
 import sys
 import tempfile
@@ -213,3 +214,14 @@ class StrEnum(enum.Enum):
     @classmethod
     def values(cls) -> list[str]:
         return list(map(str, cls))
+
+
+def tar_binary() -> str:
+    # Some distros (Mandriva) install BSD tar as "tar", hence prefer
+    # "gtar" if it exists, which should be GNU tar wherever it exists.
+    # We are interested in exposing same behaviour everywhere hence
+    # it's preferable to use the same implementation of tar
+    # everywhere. In particular given the limited/different SELinux
+    # support in BSD tar and the different command line syntax
+    # compared to GNU tar.
+    return "gtar" if shutil.which("gtar") else "tar"