]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
importd: port untarring logic over to libarchive 39143/head
authorLennart Poettering <lennart@poettering.net>
Thu, 17 Jul 2025 06:27:21 +0000 (08:27 +0200)
committerLennart Poettering <lennart@poettering.net>
Tue, 14 Oct 2025 09:56:45 +0000 (11:56 +0200)
This way we have can expose identical behaviour everywhere, can make use
of our atomic replacement calls, and openat() logic, and later apply
additional tracks while unpacking, such as putting limits on UID ranges
and similar.

meson.build
src/import/import-common.c
src/import/import-common.h
src/import/import-tar.c
src/import/pull-tar.c
src/shared/libarchive-util.c
src/shared/libarchive-util.h
src/shared/meson.build
src/shared/tar-util.c [new file with mode: 0644]
src/shared/tar-util.h [new file with mode: 0644]

index c67e7b6c30de4676184de7f9e0fcc2e2c6d5ec6c..cda4285c9df4c59918eafc3e2559115e26d6e0fa 100644 (file)
@@ -1438,6 +1438,10 @@ libarchive = dependency('libarchive',
                         version : '>= 3.0',
                         required : get_option('libarchive'))
 conf.set10('HAVE_LIBARCHIVE', libarchive.found())
+conf.set10('HAVE_LIBARCHIVE_UID_IS_SET',
+           libblkid.found() and cc.has_function('archive_entry_uid_is_set', dependencies : libarchive))
+conf.set10('HAVE_LIBARCHIVE_HARDLINK_IS_SET',
+           libblkid.found() and cc.has_function('archive_entry_hardlink_is_set', dependencies : libarchive))
 
 libxkbcommon = dependency('xkbcommon',
                           version : '>= 0.3.0',
index b6e47fee4d60c695a86c7a9029b5e26073056170..31bc7c95df57ba7a04195ba54c31ef1a70d9dbe7 100644 (file)
@@ -1,71 +1,60 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
 #include <sched.h>
-#include <sys/stat.h>
-#include <unistd.h>
+#include <sys/prctl.h>
 
 #include "sd-event.h"
 
-#include "alloc-util.h"
 #include "capability-util.h"
 #include "dirent-util.h"
 #include "fd-util.h"
-#include "fileio.h"
 #include "fs-util.h"
 #include "import-common.h"
+#include "libarchive-util.h"
 #include "log.h"
 #include "os-util.h"
 #include "pidref.h"
 #include "process-util.h"
 #include "selinux-util.h"
 #include "stat-util.h"
+#include "tar-util.h"
 #include "tmpfile-util.h"
 
-int import_fork_tar_x(const char *path, PidRef *ret) {
-        _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
-        _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR;
-        bool use_selinux;
+int import_fork_tar_x(int tree_fd, PidRef *ret_pid) {
         int r;
 
-        assert(path);
-        assert(ret);
+        assert(tree_fd >= 0);
+        assert(ret_pid);
+
+        r = dlopen_libarchive();
+        if (r < 0)
+                return r;
+
+        TarFlags flags = mac_selinux_use() ? TAR_SELINUX : 0;
 
+        _cleanup_close_pair_ int pipefd[2] = EBADF_PAIR;
         if (pipe2(pipefd, O_CLOEXEC) < 0)
                 return log_error_errno(errno, "Failed to create pipe for tar: %m");
 
         (void) fcntl(pipefd[0], F_SETPIPE_SZ, IMPORT_BUFFER_SIZE);
 
-        use_selinux = mac_selinux_use();
-
         r = pidref_safe_fork_full(
-                        "(tar)",
-                        (int[]) { pipefd[0], -EBADF, STDERR_FILENO },
-                        NULL, 0,
-                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_REARRANGE_STDIO|FORK_LOG,
-                        &pid);
+                        "tar-x",
+                        /* stdio_fds= */ NULL,
+                        (int[]) { tree_fd, pipefd[0] }, 2,
+                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_LOG|FORK_REOPEN_LOG,
+                        ret_pid);
         if (r < 0)
                 return r;
         if (r == 0) {
-                const char *cmdline[] = {
-                       "tar",
-                       "--ignore-zeros",
-                       "--numeric-owner",
-                       "-C", path,
-                       "-pxf",
-                       "-",
-                       "--xattrs",
-                       "--xattrs-include=*",
-                       use_selinux ? "--selinux" : "--no-selinux",
-                       NULL
-                };
-
-                uint64_t retain =
+                static const uint64_t retain =
                         (1ULL << CAP_CHOWN) |
                         (1ULL << CAP_FOWNER) |
                         (1ULL << CAP_FSETID) |
                         (1ULL << CAP_MKNOD) |
                         (1ULL << CAP_SETFCAP) |
-                        (1ULL << CAP_DAC_OVERRIDE);
+                        (1ULL << CAP_DAC_OVERRIDE) |
+                        (1ULL << CAP_DAC_READ_SEARCH);
 
                 /* Child */
 
@@ -76,20 +65,14 @@ int import_fork_tar_x(const char *path, PidRef *ret) {
                 if (r < 0)
                         log_warning_errno(r, "Failed to drop capabilities, ignoring: %m");
 
-                /* Try "gtar" before "tar". We only test things upstream with GNU tar. Some distros appear to
-                 * install a different implementation as "tar" (in particular some that do not support the
-                 * same command line switches), but then provide "gtar" as alias for the real thing, hence
-                 * let's prefer that. (Yes, it's a bad idea they do that, given they don't provide equivalent
-                 * command line support, but we are not here to argue, let's just expose the same
-                 * behaviour/implementation everywhere.) */
-                execvp("gtar", (char* const*) cmdline);
-                execvp("tar", (char* const*) cmdline);
+                if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
+                        log_warning_errno(errno, "Failed to enable PR_SET_NO_NEW_PRIVS, ignoring: %m");
 
-                log_error_errno(errno, "Failed to execute tar: %m");
-                _exit(EXIT_FAILURE);
-        }
+                if (tar_x(pipefd[0], tree_fd, flags) < 0)
+                        _exit(EXIT_FAILURE);
 
-        *ret = TAKE_PIDREF(pid);
+                _exit(EXIT_SUCCESS);
+        }
 
         return TAKE_FD(pipefd[1]);
 }
index bd6988277f3c0a5c81399aaa831cf87ea579cf9d..552e68f67cf5401af087be4476fd436152927bb2 100644 (file)
@@ -34,7 +34,7 @@ typedef enum ImportFlags {
 } ImportFlags;
 
 int import_fork_tar_c(const char *path, PidRef *ret);
-int import_fork_tar_x(const char *path, PidRef *ret);
+int import_fork_tar_x(int tree_fd, PidRef *ret_pid);
 
 int import_mangle_os_tree(const char *path);
 
index 09bea8f767d6e8d25c1b1d9c122c4fd69c18783e..7e5499b2b993f13b6ae843d0af62e3c199a31ff9 100644 (file)
@@ -45,6 +45,7 @@ typedef struct TarImport {
 
         int input_fd;
         int tar_fd;
+        int tree_fd;
 
         ImportCompress compress;
 
@@ -79,6 +80,7 @@ TarImport* tar_import_unref(TarImport *i) {
         sd_event_unref(i->event);
 
         safe_close(i->tar_fd);
+        safe_close(i->tree_fd);
 
         free(i->final_path);
         free(i->image_root);
@@ -111,6 +113,7 @@ int tar_import_new(
         *i = (TarImport) {
                 .input_fd = -EBADF,
                 .tar_fd = -EBADF,
+                .tree_fd = -EBADF,
                 .on_finished = on_finished,
                 .userdata = userdata,
                 .last_percent = UINT_MAX,
@@ -172,6 +175,7 @@ static int tar_import_finish(TarImport *i) {
 
         assert(i);
         assert(i->tar_fd >= 0);
+        assert(i->tree_fd >= 0);
 
         i->tar_fd = safe_close(i->tar_fd);
 
@@ -215,6 +219,7 @@ static int tar_import_fork_tar(TarImport *i) {
         assert(!i->final_path);
         assert(!i->temp_path);
         assert(i->tar_fd < 0);
+        assert(i->tree_fd < 0);
 
         if (i->flags & IMPORT_DIRECT) {
                 d = i->local;
@@ -254,7 +259,11 @@ static int tar_import_fork_tar(TarImport *i) {
                 (void) import_assign_pool_quota_and_warn(d);
         }
 
-        i->tar_fd = import_fork_tar_x(d, &i->tar_pid);
+        i->tree_fd = open(d, O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+        if (i->tree_fd < 0)
+                return log_error_errno(errno, "Failed to open '%s': %m", d);
+
+        i->tar_fd = import_fork_tar_x(i->tree_fd, &i->tar_pid);
         if (i->tar_fd < 0)
                 return i->tar_fd;
 
index 922449b980b3ac53e485bdfacd83f265b65c28b5..fa44216f8cf3d1b7935c1280aea9137a28e2b364 100644 (file)
@@ -10,6 +10,7 @@
 #include "copy.h"
 #include "curl-util.h"
 #include "errno-util.h"
+#include "fd-util.h"
 #include "fs-util.h"
 #include "import-common.h"
 #include "import-util.h"
@@ -61,6 +62,8 @@ typedef struct TarPull {
         char *settings_temp_path;
 
         char *checksum;
+
+        int tree_fd;
 } TarPull;
 
 TarPull* tar_pull_unref(TarPull *i) {
@@ -86,6 +89,8 @@ TarPull* tar_pull_unref(TarPull *i) {
         free(i->local);
         free(i->checksum);
 
+        safe_close(i->tree_fd);
+
         return mfree(i);
 }
 
@@ -132,6 +137,7 @@ int tar_pull_new(
                 .event = TAKE_PTR(e),
                 .glue = TAKE_PTR(g),
                 .tar_pid = PIDREF_NULL,
+                .tree_fd = -EBADF,
         };
 
         i->glue->on_finished = pull_job_curl_on_finished;
@@ -512,6 +518,7 @@ static int tar_pull_job_on_open_disk_tar(PullJob *j) {
         i = j->userdata;
         assert(i->tar_job == j);
         assert(!pidref_is_set(&i->tar_pid));
+        assert(i->tree_fd < 0);
 
         if (i->flags & IMPORT_DIRECT)
                 where = i->local;
@@ -545,7 +552,11 @@ static int tar_pull_job_on_open_disk_tar(PullJob *j) {
                 (void) import_assign_pool_quota_and_warn(where);
         }
 
-        j->disk_fd = import_fork_tar_x(where, &i->tar_pid);
+        i->tree_fd = open(where, O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+        if (i->tree_fd < 0)
+                return log_error_errno(errno, "Failed to open '%s': %m", where);
+
+        j->disk_fd = import_fork_tar_x(i->tree_fd, &i->tar_pid);
         if (j->disk_fd < 0)
                 return j->disk_fd;
 
index dcc9efa810dd6b14c64204bf250a3b3daf4cf9ac..961d9f6a03961dcb40f5df381873b9648bbf0a2f 100644 (file)
@@ -7,8 +7,24 @@
 #if HAVE_LIBARCHIVE
 static void *libarchive_dl = NULL;
 
+DLSYM_PROTOTYPE(archive_entry_filetype) = NULL;
 DLSYM_PROTOTYPE(archive_entry_free) = NULL;
+DLSYM_PROTOTYPE(archive_entry_gid) = NULL;
+#if HAVE_LIBARCHIVE_UID_IS_SET
+DLSYM_PROTOTYPE(archive_entry_gid_is_set) = NULL;
+#endif
+DLSYM_PROTOTYPE(archive_entry_hardlink) = NULL;
+#if HAVE_LIBARCHIVE_HARDLINK_IS_SET
+DLSYM_PROTOTYPE(archive_entry_hardlink_is_set) = NULL;
+#endif
+DLSYM_PROTOTYPE(archive_entry_mode) = NULL;
+DLSYM_PROTOTYPE(archive_entry_mtime) = NULL;
+DLSYM_PROTOTYPE(archive_entry_mtime_is_set) = NULL;
+DLSYM_PROTOTYPE(archive_entry_mtime_nsec) = NULL;
 DLSYM_PROTOTYPE(archive_entry_new) = NULL;
+DLSYM_PROTOTYPE(archive_entry_pathname) = NULL;
+DLSYM_PROTOTYPE(archive_entry_rdevmajor) = NULL;
+DLSYM_PROTOTYPE(archive_entry_rdevminor) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_ctime) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_filetype) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_gid) = NULL;
@@ -17,10 +33,24 @@ DLSYM_PROTOTYPE(archive_entry_set_pathname) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_perm) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_rdevmajor) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_rdevminor) = NULL;
-DLSYM_PROTOTYPE(archive_entry_set_symlink) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_size) = NULL;
+DLSYM_PROTOTYPE(archive_entry_set_symlink) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_uid) = NULL;
+DLSYM_PROTOTYPE(archive_entry_symlink) = NULL;
+DLSYM_PROTOTYPE(archive_entry_uid) = NULL;
+#if HAVE_LIBARCHIVE_UID_IS_SET
+DLSYM_PROTOTYPE(archive_entry_uid_is_set) = NULL;
+#endif
+DLSYM_PROTOTYPE(archive_entry_xattr_next) = NULL;
+DLSYM_PROTOTYPE(archive_entry_xattr_reset) = NULL;
 DLSYM_PROTOTYPE(archive_error_string) = NULL;
+DLSYM_PROTOTYPE(archive_read_data_into_fd) = NULL;
+DLSYM_PROTOTYPE(archive_read_free) = NULL;
+DLSYM_PROTOTYPE(archive_read_new) = NULL;
+DLSYM_PROTOTYPE(archive_read_next_header) = NULL;
+DLSYM_PROTOTYPE(archive_read_open_fd) = NULL;
+DLSYM_PROTOTYPE(archive_read_support_format_cpio) = NULL;
+DLSYM_PROTOTYPE(archive_read_support_format_tar) = NULL;
 DLSYM_PROTOTYPE(archive_write_close) = NULL;
 DLSYM_PROTOTYPE(archive_write_data) = NULL;
 DLSYM_PROTOTYPE(archive_write_free) = NULL;
@@ -41,8 +71,24 @@ int dlopen_libarchive(void) {
                         &libarchive_dl,
                         "libarchive.so.13",
                         LOG_DEBUG,
+                        DLSYM_ARG(archive_entry_filetype),
                         DLSYM_ARG(archive_entry_free),
+                        DLSYM_ARG(archive_entry_gid),
+#if HAVE_LIBARCHIVE_UID_IS_SET
+                        DLSYM_ARG(archive_entry_gid_is_set),
+#endif
+                        DLSYM_ARG(archive_entry_hardlink),
+#if HAVE_LIBARCHIVE_HARDLINK_IS_SET
+                        DLSYM_ARG(archive_entry_hardlink_is_set),
+#endif
+                        DLSYM_ARG(archive_entry_mode),
+                        DLSYM_ARG(archive_entry_mtime),
+                        DLSYM_ARG(archive_entry_mtime_is_set),
+                        DLSYM_ARG(archive_entry_mtime_nsec),
                         DLSYM_ARG(archive_entry_new),
+                        DLSYM_ARG(archive_entry_pathname),
+                        DLSYM_ARG(archive_entry_rdevmajor),
+                        DLSYM_ARG(archive_entry_rdevminor),
                         DLSYM_ARG(archive_entry_set_ctime),
                         DLSYM_ARG(archive_entry_set_filetype),
                         DLSYM_ARG(archive_entry_set_gid),
@@ -54,7 +100,21 @@ int dlopen_libarchive(void) {
                         DLSYM_ARG(archive_entry_set_size),
                         DLSYM_ARG(archive_entry_set_symlink),
                         DLSYM_ARG(archive_entry_set_uid),
+                        DLSYM_ARG(archive_entry_symlink),
+                        DLSYM_ARG(archive_entry_uid),
+#if HAVE_LIBARCHIVE_UID_IS_SET
+                        DLSYM_ARG(archive_entry_uid_is_set),
+#endif
+                        DLSYM_ARG(archive_entry_xattr_next),
+                        DLSYM_ARG(archive_entry_xattr_reset),
                         DLSYM_ARG(archive_error_string),
+                        DLSYM_ARG(archive_read_data_into_fd),
+                        DLSYM_ARG(archive_read_free),
+                        DLSYM_ARG(archive_read_new),
+                        DLSYM_ARG(archive_read_next_header),
+                        DLSYM_ARG(archive_read_open_fd),
+                        DLSYM_ARG(archive_read_support_format_cpio),
+                        DLSYM_ARG(archive_read_support_format_tar),
                         DLSYM_ARG(archive_write_close),
                         DLSYM_ARG(archive_write_data),
                         DLSYM_ARG(archive_write_free),
@@ -63,7 +123,18 @@ int dlopen_libarchive(void) {
                         DLSYM_ARG(archive_write_open_FILE),
                         DLSYM_ARG(archive_write_open_fd),
                         DLSYM_ARG(archive_write_set_format_filter_by_ext),
-                        DLSYM_ARG(archive_write_set_format_gnutar));
+                        DLSYM_ARG(archive_write_set_format_gnutar)
+        );
 }
 
+/* libarchive uses its own file type macros. They happen to be defined the same way as the Linux ones, and
+ * we'd like to rely on it. Let's verify this first though. */
+assert_cc(S_IFDIR == AE_IFDIR);
+assert_cc(S_IFREG == AE_IFREG);
+assert_cc(S_IFLNK == AE_IFLNK);
+assert_cc(S_IFBLK == AE_IFBLK);
+assert_cc(S_IFCHR == AE_IFCHR);
+assert_cc(S_IFIFO == AE_IFIFO);
+assert_cc(S_IFSOCK == AE_IFSOCK);
+
 #endif
index aeda1ecb4cbb0c0860009dd630098992ae03e6b1..5eed91db0b3fcd57cb466a3092b0e45bb88a3224 100644 (file)
@@ -9,8 +9,18 @@
 
 #include "dlfcn-util.h"
 
+extern DLSYM_PROTOTYPE(archive_entry_filetype);
 extern DLSYM_PROTOTYPE(archive_entry_free);
+extern DLSYM_PROTOTYPE(archive_entry_gid);
+extern DLSYM_PROTOTYPE(archive_entry_hardlink);
+extern DLSYM_PROTOTYPE(archive_entry_mode);
+extern DLSYM_PROTOTYPE(archive_entry_mtime);
+extern DLSYM_PROTOTYPE(archive_entry_mtime_is_set);
+extern DLSYM_PROTOTYPE(archive_entry_mtime_nsec);
 extern DLSYM_PROTOTYPE(archive_entry_new);
+extern DLSYM_PROTOTYPE(archive_entry_pathname);
+extern DLSYM_PROTOTYPE(archive_entry_rdevmajor);
+extern DLSYM_PROTOTYPE(archive_entry_rdevminor);
 extern DLSYM_PROTOTYPE(archive_entry_set_ctime);
 extern DLSYM_PROTOTYPE(archive_entry_set_filetype);
 extern DLSYM_PROTOTYPE(archive_entry_set_gid);
@@ -19,10 +29,21 @@ extern DLSYM_PROTOTYPE(archive_entry_set_pathname);
 extern DLSYM_PROTOTYPE(archive_entry_set_perm);
 extern DLSYM_PROTOTYPE(archive_entry_set_rdevmajor);
 extern DLSYM_PROTOTYPE(archive_entry_set_rdevminor);
-extern DLSYM_PROTOTYPE(archive_entry_set_symlink);
 extern DLSYM_PROTOTYPE(archive_entry_set_size);
+extern DLSYM_PROTOTYPE(archive_entry_set_symlink);
 extern DLSYM_PROTOTYPE(archive_entry_set_uid);
+extern DLSYM_PROTOTYPE(archive_entry_symlink);
+extern DLSYM_PROTOTYPE(archive_entry_uid);
+extern DLSYM_PROTOTYPE(archive_entry_xattr_next);
+extern DLSYM_PROTOTYPE(archive_entry_xattr_reset);
 extern DLSYM_PROTOTYPE(archive_error_string);
+extern DLSYM_PROTOTYPE(archive_read_data_into_fd);
+extern DLSYM_PROTOTYPE(archive_read_free);
+extern DLSYM_PROTOTYPE(archive_read_new);
+extern DLSYM_PROTOTYPE(archive_read_next_header);
+extern DLSYM_PROTOTYPE(archive_read_open_fd);
+extern DLSYM_PROTOTYPE(archive_read_support_format_cpio);
+extern DLSYM_PROTOTYPE(archive_read_support_format_tar);
 extern DLSYM_PROTOTYPE(archive_write_close);
 extern DLSYM_PROTOTYPE(archive_write_data);
 extern DLSYM_PROTOTYPE(archive_write_free);
@@ -33,10 +54,32 @@ extern DLSYM_PROTOTYPE(archive_write_open_fd);
 extern DLSYM_PROTOTYPE(archive_write_set_format_filter_by_ext);
 extern DLSYM_PROTOTYPE(archive_write_set_format_gnutar);
 
+#if HAVE_LIBARCHIVE_UID_IS_SET
+extern DLSYM_PROTOTYPE(archive_entry_gid_is_set);
+extern DLSYM_PROTOTYPE(archive_entry_uid_is_set);
+#else
+#include "user-util.h"
+static inline int sym_archive_entry_gid_is_set(struct archive_entry *e) {
+        return gid_is_valid(sym_archive_entry_gid(e));
+}
+static inline int sym_archive_entry_uid_is_set(struct archive_entry *e) {
+        return uid_is_valid(sym_archive_entry_uid(e));
+}
+#endif
+
+#if HAVE_LIBARCHIVE_HARDLINK_IS_SET
+extern DLSYM_PROTOTYPE(archive_entry_hardlink_is_set);
+#else
+static inline int sym_archive_entry_hardlink_is_set(struct archive_entry *e) {
+        return !!sym_archive_entry_hardlink(e);
+}
+#endif
+
 int dlopen_libarchive(void);
 
 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct archive_entry*, sym_archive_entry_free, NULL);
 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct archive*, sym_archive_write_free, NULL);
+DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(struct archive*, sym_archive_read_free, NULL);
 
 #else
 
index 6e3e79eb2e6013fdfad904acf547f6ee7e5e8fc7..023ca48fffb9607e4b00248c84fdf4dc0e9d8f45 100644 (file)
@@ -177,6 +177,7 @@ shared_sources = files(
         'socket-netlink.c',
         'specifier.c',
         'switch-root.c',
+        'tar-util.c',
         'tmpfile-util-label.c',
         'tomoyo-util.c',
         'tpm2-util.c',
diff --git a/src/shared/tar-util.c b/src/shared/tar-util.c
new file mode 100644 (file)
index 0000000..6eb3a2b
--- /dev/null
@@ -0,0 +1,686 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "errno-util.h"
+#include "log.h"
+#include "tar-util.h"
+
+#if HAVE_LIBARCHIVE
+#include <sys/sysmacros.h>
+
+#include "alloc-util.h"
+#include "chase.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "iovec-util.h"
+#include "libarchive-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+#define DEPTH_MAX 128U
+
+typedef struct XAttr {
+        char *name;
+        struct iovec data;
+} XAttr;
+
+typedef struct OpenInode {
+        int fd;
+        char *path;
+
+        /* File properties to apply when we are done with the inode, i.e. right before closing it */
+        mode_t filetype;
+        mode_t mode;
+        struct timespec mtime;
+        uid_t uid;
+        gid_t gid;
+        XAttr *xattr;
+        size_t n_xattr;
+} OpenInode;
+
+static void xattr_done(XAttr *xa) {
+        assert(xa);
+
+        free(xa->name);
+        iovec_done(&xa->data);
+}
+
+static void xattr_done_many(XAttr *xa, size_t n) {
+        assert(xa || n == 0);
+
+        FOREACH_ARRAY(i, xa, n)
+                xattr_done(i);
+
+        free(xa);
+}
+
+static void open_inode_done(OpenInode *of) {
+        assert(of);
+
+        if (of->path) {
+                /* Only close the stored fd if the path field is set. We'll set the path to NULL for the root
+                 * inode, and we don't want the fd for that closed, as it's owned by the caller. */
+                of->fd = safe_close(of->fd);
+                of->path = mfree(of->path);
+        }
+        xattr_done_many(of->xattr, of->n_xattr);
+}
+
+static void open_inode_done_many(OpenInode *array, size_t n) {
+        assert(array || n == 0);
+
+        FOREACH_ARRAY(i, array, n)
+                open_inode_done(i);
+
+        free(array);
+}
+
+static int open_inode_finalize(OpenInode *of) {
+        int r = 0;
+
+        assert(of);
+
+        if (of->fd >= 0)  {
+                int k;
+
+                /* We adjust the UID/GID right before the mode, since doing this might affect the mode (drops
+                 * suid/sgid bits).
+                 *
+                 * We adjust the mode only when leaving a dir, because if we are unpriv we might lose the
+                 * ability to enter it once we do this. */
+
+                if (uid_is_valid(of->uid) || gid_is_valid(of->gid) || of->mode != MODE_INVALID) {
+                        k = fchmod_and_chown_with_fallback(of->fd, /* path= */ NULL, of->mode, of->uid, of->gid);
+                        if (k < 0)
+                                RET_GATHER(r, log_error_errno(k, "Failed to adjust ownership/mode of '%s': %m", of->path));
+                }
+
+                /* We also adjust the mtime only after leaving a dir, since it might otherwise change again
+                 * because we make modifications inside it */
+                if (of->mtime.tv_nsec != UTIME_OMIT) {
+                        k = futimens_opath(of->fd, (const struct timespec[2]) {
+                                        { .tv_nsec = UTIME_OMIT },
+                                        of->mtime,
+                                });
+                        if (k < 0)
+                                RET_GATHER(r, log_error_errno(k, "Failed to adjust mtime of '%s': %m", of->path));
+                }
+
+                /* Setting certain xattrs might cause us to lose access to the inode, hence set this last */
+                FOREACH_ARRAY(i, of->xattr, of->n_xattr) {
+                        k = xsetxattr_full(
+                                        of->fd,
+                                        /* path= */ NULL,
+                                        AT_EMPTY_PATH,
+                                        i->name,
+                                        i->data.iov_base,
+                                        i->data.iov_len,
+                                        /* xattr_flags= */ 0);
+                        if (k < 0)
+                                RET_GATHER(r, log_error_errno(k, "Failed to set xattr '%s' of '%s': %m", i->name, of->path));
+                }
+        }
+
+        open_inode_done(of); /* free this item even on failure */
+        return r;
+}
+
+static int open_inode_finalize_many(OpenInode **array, size_t *n) {
+        int r = 0;
+        assert(array);
+        assert(n);
+        assert(*array || *n == 0);
+
+        /* Go backwards, so that we adjust innermost first */
+        for (size_t i = *n; i > 0; i--)
+                RET_GATHER(r, open_inode_finalize(*array + i - 1));
+
+        *array = mfree(*array);
+        *n = 0;
+        return r;
+}
+
+static int archive_unpack_regular(
+                struct archive *a,
+                struct archive_entry *entry,
+                int parent_fd,
+                const char *filename,
+                const char *path) {
+
+        int r;
+
+        assert(a);
+        assert(entry);
+        assert(parent_fd >= 0);
+        assert(filename);
+        assert(path);
+
+        _cleanup_free_ char *tmp = NULL;
+        _cleanup_close_ int fd = open_tmpfile_linkable_at(parent_fd, filename, O_CLOEXEC|O_WRONLY, &tmp);
+        if (fd < 0)
+                return log_error_errno(fd, "Failed to create regular file '%s': %m", path);
+
+        r = sym_archive_read_data_into_fd(a, fd);
+        if (r != ARCHIVE_OK) {
+                r = log_error_errno(
+                                SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                "Failed to unpack regular file '%s': %s", path, sym_archive_error_string(a));
+                goto fail;
+        }
+
+        /* If this is a sparse file, then libarchive's archive_read_data_into_fd() won't insert the final
+         * hole. We need to manually truncate. */
+        off_t l = lseek(fd, 0, SEEK_CUR);
+        if (l < 0) {
+                r = log_error_errno(errno, "Failed to determine current file position in '%s': %m", path);
+                goto fail;
+        }
+        if (ftruncate(fd, l) < 0) {
+                r = log_error_errno(errno, "Failed to truncate regular file '%s' to %" PRIu64 ": %m", path, (uint64_t) l);
+                goto fail;
+        }
+
+        r = link_tmpfile_at(fd, parent_fd, tmp, filename, LINK_TMPFILE_REPLACE);
+        if (r < 0) {
+                log_error_errno(r, "Failed to install regular file '%s': %m", path);
+                goto fail;
+        }
+
+        return TAKE_FD(fd);
+
+fail:
+        if (tmp)
+                (void) unlinkat(parent_fd, tmp, /* flags= */ 0);
+
+        return r;
+}
+
+static int archive_unpack_directory(
+                struct archive *a,
+                struct archive_entry *entry,
+                int parent_fd,
+                const char *filename,
+                const char *path) {
+
+        assert(a);
+        assert(entry);
+        assert(parent_fd >= 0);
+        assert(filename);
+        assert(path);
+
+        /* For the other inode types we operate in an atomic replace fashion, but not for the directories,
+         * they are more of a "shared" concept, and we try to reuse existing inodes. Note that we create the
+         * dir inode in mode 0700, so that we can fully access it (but others cannot). We'll adjust the modes
+         * right before closing the inode. */
+        _cleanup_close_ int fd = open_mkdir_at(parent_fd, filename, O_CLOEXEC, 0700);
+        if (fd < 0)
+                return log_error_errno(fd, "Failed to create directory '%s': %m", path);
+
+        return TAKE_FD(fd);
+}
+
+static int archive_unpack_symlink(
+                struct archive *a,
+                struct archive_entry *entry,
+                int parent_fd,
+                const char *filename,
+                const char *path) {
+
+        int r;
+
+        assert(a);
+        assert(entry);
+        assert(parent_fd >= 0);
+        assert(filename);
+        assert(path);
+
+        const char *target = sym_archive_entry_symlink(entry);
+        if (!target)
+                return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "Failed to get symlink target for '%s': %m", path);
+
+        r = symlinkat_atomic_full(target, parent_fd, filename, /* flags= */ 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create symlink '%s' â†’ '%s': %m", path, target);
+
+        _cleanup_close_ int fd = openat(parent_fd, filename, O_CLOEXEC|O_PATH|O_NOFOLLOW);
+        if (fd < 0)
+                return log_error_errno(errno, "Failed to open symlink '%s' we just created: %m", path);
+
+        r = fd_verify_symlink(fd);
+        if (r < 0)
+                return log_error_errno(r, "Symlink '%s' we just created is not a symlink: %m", path);
+
+        return TAKE_FD(fd);
+}
+
+static int archive_unpack_special_inode(
+                struct archive *a,
+                struct archive_entry *entry,
+                int parent_fd,
+                const char *filename,
+                const char *path,
+                mode_t filetype) {
+
+        int r;
+
+        assert(a);
+        assert(entry);
+        assert(parent_fd >= 0);
+        assert(filename);
+        assert(path);
+
+        dev_t major = 0, minor = 0;
+        if (IN_SET(filetype, S_IFCHR, S_IFBLK)) {
+                major = sym_archive_entry_rdevmajor(entry);
+                minor = sym_archive_entry_rdevminor(entry);
+        }
+
+        r = mknodat_atomic(parent_fd, filename, filetype | 0000, makedev(major, minor));
+        if (r < 0)
+                return log_error_errno(r, "Failed to create special node '%s': %m", path);
+
+        _cleanup_close_ int fd = openat(parent_fd, filename, O_CLOEXEC|O_PATH|O_NOFOLLOW);
+        if (fd < 0)
+                return log_error_errno(errno, "Failed to open special node '%s' we just created: %m", path);
+
+        struct stat st;
+        if (fstat(fd, &st) < 0)
+                return log_error_errno(errno, "Failed to fstat() '%s': %m", path);
+
+        if (((st.st_mode ^ filetype) & S_IFMT) != 0)
+                return log_error_errno(
+                                SYNTHETIC_ERRNO(ENODEV),
+                                "Special node '%s' we just created is of a wrong type: %m", path);
+
+        return TAKE_FD(fd);
+}
+
+static int archive_entry_pathname_safe(struct archive_entry *entry, const char **ret) {
+        /* libarchive prefixes all paths with "./", let's chop that off. Note that we'll return a path of
+         * NULL for the root inode here! */
+
+        assert(entry);
+        assert(ret);
+
+        const char *p = sym_archive_entry_pathname(entry);
+        if (!p)
+                return -EBADMSG;
+
+        const char *e = startswith(p, "./") ?: p;
+        if (isempty(e))
+                *ret = NULL;
+        else if (path_is_safe(e))
+                *ret = e;
+        else
+                return -EBADMSG;
+
+        return 0;
+}
+
+static int archive_entry_read_stat(
+                struct archive_entry *entry,
+                mode_t *filetype,
+                mode_t *mode,
+                struct timespec *mtime,
+                uid_t *uid,
+                gid_t *gid,
+                XAttr **xa,
+                size_t *n_xa,
+                TarFlags flags) {
+
+        assert(entry);
+
+        /* Fills in all fields that are present in the archive entry. Doesn't change the fields if the entry
+         * doesn't contain the relevant data */
+
+        if (filetype)
+                *filetype = sym_archive_entry_filetype(entry);
+
+        if (mode)
+                *mode = sym_archive_entry_mode(entry);
+
+        if (mtime && sym_archive_entry_mtime_is_set(entry))
+                *mtime = (struct timespec) {
+                        sym_archive_entry_mtime(entry),
+                        sym_archive_entry_mtime_nsec(entry),
+                };
+        if (uid && sym_archive_entry_uid_is_set(entry))
+                *uid = sym_archive_entry_uid(entry);
+        if (gid && sym_archive_entry_gid_is_set(entry))
+                *gid = sym_archive_entry_gid(entry);
+
+        (void) sym_archive_entry_xattr_reset(entry);
+        for (;;) {
+                const char *name = NULL;
+                struct iovec data;
+                (void) sym_archive_entry_xattr_next(entry, &name, (const void**) &data.iov_base, &data.iov_len);
+                if (!name)
+                        break;
+
+                if (xattr_is_acl(name))
+                        continue;
+
+                if (!FLAGS_SET(flags, TAR_SELINUX) && xattr_is_selinux(name))
+                        continue;
+
+                _cleanup_free_ char *n = strdup(name);
+                if (!n)
+                        return log_oom();
+
+                _cleanup_(iovec_done) struct iovec iovec_copy = {};
+                if (!iovec_memdup(&data, &iovec_copy))
+                        return log_oom();
+
+                if (!GREEDY_REALLOC(*xa, *n_xa+1))
+                        return log_oom();
+
+                (*xa)[(*n_xa)++] = (XAttr) {
+                        .name = TAKE_PTR(n),
+                        .data = TAKE_STRUCT(iovec_copy),
+                };
+        }
+
+        return 0;
+}
+
+int tar_x(int input_fd, int tree_fd, TarFlags flags) {
+        int ar, r;
+
+        assert(input_fd >= 0);
+        assert(tree_fd >= 0);
+
+        _cleanup_(sym_archive_read_freep) struct archive *a = NULL;
+        a = sym_archive_read_new();
+        if (!a)
+                return log_oom();
+
+        ar = sym_archive_read_support_format_tar(a);
+        if (ar != ARCHIVE_OK)
+                return log_error_errno(
+                                SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                "Failed to enable tar unpacking: %s", sym_archive_error_string(a));
+
+        ar = sym_archive_read_support_format_cpio(a);
+        if (ar != ARCHIVE_OK)
+                return log_error_errno(
+                                SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                "Failed to enable cpio unpacking: %s", sym_archive_error_string(a));
+
+        ar = sym_archive_read_open_fd(a, input_fd, 64 * 1024);
+        if (ar != ARCHIVE_OK)
+                return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "Failed to initialize archive context: %s", sym_archive_error_string(a));
+
+
+        OpenInode *open_inodes = NULL;
+        if (!GREEDY_REALLOC(open_inodes, 2)) /* the minimal case is a single file in an archive, which would
+                                              * mean two inodes, the root dir inode, and he regular file
+                                              * inode, hence start with 2 here */
+                return log_oom();
+
+        size_t n_open_inodes = 0;
+        CLEANUP_ARRAY(open_inodes, n_open_inodes, open_inode_done_many);
+
+        /* Fill in the root inode. (Note: we leave the .path field as NULL to mark it as root inode.) */
+        open_inodes[0] = (OpenInode) {
+                .fd = tree_fd,
+                .filetype = S_IFDIR,
+                .mode = MODE_INVALID,
+                .mtime = { .tv_nsec = UTIME_OMIT },
+                .uid = UID_INVALID,
+                .gid = GID_INVALID,
+        };
+        n_open_inodes = 1;
+
+        for (;;) {
+                struct archive_entry *entry = NULL;
+
+                ar = sym_archive_read_next_header(a, &entry);
+                if (ar == ARCHIVE_EOF)
+                        break;
+                if (ar != ARCHIVE_OK)
+                        return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse archive: %s", sym_archive_error_string(a));
+
+                const char *p = NULL;
+                r = archive_entry_pathname_safe(entry, &p);
+                if (r < 0)
+                        return log_error_errno(r, "Invalid path name in entry, refusing.");
+
+                if (!p) {
+                        /* This is the root inode */
+                        r = archive_entry_read_stat(
+                                        entry,
+                                        &open_inodes[0].filetype,
+                                        &open_inodes[0].mode,
+                                        &open_inodes[0].mtime,
+                                        &open_inodes[0].uid,
+                                        &open_inodes[0].gid,
+                                        &open_inodes[0].xattr,
+                                        &open_inodes[0].n_xattr,
+                                        flags);
+                        if (r < 0)
+                                return r;
+                        if (open_inodes[0].filetype != S_IFDIR)
+                                return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Archives root inode is not a directory, refusing.");
+
+                        continue;
+                }
+
+                /* Find common prefix with path elements we were looking at so far. */
+                const char *rest = p;
+                size_t i;
+                for (i = 1; i < n_open_inodes; i++) {
+                        const char *e = path_startswith(p, open_inodes[i].path);
+                        if (isempty(e))
+                                break;
+
+                        rest = e;
+                }
+
+                /* Finalize all inodes we won't need anymore now (go backwards, i.e. close inner fds first) */
+                while (n_open_inodes > i) {
+                        r = open_inode_finalize(open_inodes + n_open_inodes - 1);
+                        if (r < 0)
+                                return r;
+
+                        n_open_inodes--;
+                }
+
+                /* And now create all remaining components */
+                for (;;) {
+                        const char *element;
+
+                        r = path_find_first_component(&rest, /* accept_dot_dot= */ false, &element);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to extract next element from path: %m");
+                        if (r == 0)
+                                break;
+
+                        /* Safety check, before we add another level to our stack */
+                        if (n_open_inodes >= DEPTH_MAX)
+                                return log_error_errno(
+                                                SYNTHETIC_ERRNO(E2BIG),
+                                                "Archive's directory tree nested too deeply, refusing to descend more than %u levels.", DEPTH_MAX);
+
+                        _cleanup_free_ char *e = strndup(element, r);
+                        if (!e)
+                                return log_oom();
+
+                        const char *parent_path = NULL;
+                        int parent_fd = -EBADF;
+                        assert(n_open_inodes > 0);
+                        parent_fd = open_inodes[n_open_inodes-1].fd;
+                        parent_path = open_inodes[n_open_inodes-1].path;
+
+                        _cleanup_free_ char *j = parent_path ? path_join(parent_path, e) : strdup(e);
+                        if (!j)
+                                return log_oom();
+
+                        if (!GREEDY_REALLOC(open_inodes, n_open_inodes+1))
+                                return log_oom();
+
+                        _cleanup_close_ int fd = -EBADF;
+                        mode_t filetype = MODE_INVALID;
+                        mode_t mode = MODE_INVALID;
+                        uid_t uid = UID_INVALID;
+                        gid_t gid = GID_INVALID;
+                        struct timespec mtime = { .tv_nsec = UTIME_OMIT };
+                        XAttr *xa = NULL;
+                        size_t n_xa = 0;
+                        CLEANUP_ARRAY(xa, n_xa, xattr_done_many);
+
+                        if (isempty(rest)) {
+                                /* This is the final node in the path, create it */
+
+                                if (sym_archive_entry_hardlink_is_set(entry)) {
+                                        /* If this is a hardlink, act on it */
+                                        const char *h = sym_archive_entry_hardlink(entry);
+                                        if (!h)
+                                                return log_error_errno(
+                                                                SYNTHETIC_ERRNO(EBADMSG),
+                                                                "No hardlink target in hardlink entry, refusing.");
+
+                                        /* libarchive prefixes all paths with "./", let's chop that off */
+                                        const char *target = startswith(h, "./") ?: h;
+                                        if (!path_is_safe(target))
+                                                return log_error_errno(
+                                                                SYNTHETIC_ERRNO(EBADMSG),
+                                                                "Invalid hardlink path name '%s' in entry, refusing.", target);
+
+                                        _cleanup_close_ int target_fd = -EBADF;
+                                        r = chaseat(tree_fd, target, CHASE_PROHIBIT_SYMLINKS|CHASE_AT_RESOLVE_IN_ROOT, /* ret_path= */ NULL, &target_fd);
+                                        if (r < 0)
+                                                return log_error_errno(
+                                                                r,
+                                                                "Failed to find inode '%s' which shall be hardlinked as '%s': %m", target, j);
+
+                                        struct stat verify_st;
+                                        if (fstat(target_fd, &verify_st) < 0)
+                                                return log_error_errno(errno, "Failed to stat inode '%s': %m", target);
+
+                                        /* Refuse hardlinking directories early. */
+                                        if (!inode_type_can_hardlink(verify_st.st_mode))
+                                                return log_error_errno(
+                                                                SYNTHETIC_ERRNO(EBADF),
+                                                                "Refusing to hardlink inode '%s' of type '%s': %m", target, inode_type_to_string(verify_st.st_mode));
+
+                                        if (linkat(target_fd, "", parent_fd, e, AT_EMPTY_PATH) < 0) {
+                                                if (errno != ENOENT)
+                                                        return log_error_errno(
+                                                                        errno,
+                                                                        "Failed to hardlink inode '%s' as '%s': %m", target, j);
+
+                                                /* To be able to link by inode fd we might have needed
+                                                 * CAP_DAC_READ_SEARCH which we lacked. Let's retry with the
+                                                 * parent. Yes, glibc/kernel report this as ENOENT. Kinda
+                                                 * annoying. */
+
+                                                _cleanup_close_ int target_parent_fd = -EBADF;
+                                                _cleanup_free_ char *target_filename = NULL;
+                                                r = chaseat(tree_fd, target, CHASE_PROHIBIT_SYMLINKS|CHASE_AT_RESOLVE_IN_ROOT|CHASE_PARENT|CHASE_EXTRACT_FILENAME, &target_filename, &target_parent_fd);
+                                                if (r < 0)
+                                                        return log_error_errno(
+                                                                        r,
+                                                                        "Failed to find inode '%s' which shall be hardlinked as '%s': %m", target, j);
+
+                                                if (linkat(target_parent_fd, target_filename, parent_fd, e, /* flags= */ 0) < 0)
+                                                        return log_error_errno(
+                                                                        errno,
+                                                                        "Failed to hardlink inode '%s' as '%s': %m", target, j);
+                                        }
+
+                                        continue;
+                                }
+
+                                r = archive_entry_read_stat(
+                                                entry,
+                                                &filetype,
+                                                &mode,
+                                                &mtime,
+                                                &uid,
+                                                &gid,
+                                                &xa,
+                                                &n_xa,
+                                                flags);
+                                if (r < 0)
+                                        return r;
+
+                                switch (filetype) {
+
+                                case S_IFREG:
+                                        fd = archive_unpack_regular(a, entry, parent_fd, e, j);
+                                        break;
+
+                                case S_IFDIR:
+                                        fd = archive_unpack_directory(a, entry, parent_fd, e, j);
+                                        break;
+
+                                case S_IFLNK:
+                                        fd = archive_unpack_symlink(a, entry, parent_fd, e, j);
+                                        break;
+
+                                case S_IFCHR:
+                                case S_IFBLK:
+                                case S_IFIFO:
+                                case S_IFSOCK:
+                                        fd = archive_unpack_special_inode(a, entry, parent_fd, e, j, filetype);
+                                        break;
+
+                                default:
+                                        return log_error_errno(
+                                                        SYNTHETIC_ERRNO(ENOTRECOVERABLE),
+                                                        "Unexpected file type %i of '%s', refusing.", (int) filetype, j);
+                                }
+                                if (fd < 0)
+                                        return fd;
+
+                        } else {
+                                /* This is some intermediary node in the path that we haven't opened yet. Create it with default attributes */
+                                fd = open_mkdir_at(parent_fd, e, O_CLOEXEC, 0700);
+                                if (fd < 0)
+                                        return log_error_errno(fd, "Failed to create directory '%s': %m", j);
+
+                                filetype = S_IFDIR;
+                        }
+
+                        /* Now store a reference to the inode we just created in our stack array. Note that
+                         * we have not applied file ownership, access mode, mtime here, we'll do that only
+                         * when we are finished with the inode, since we have to apply them *after* we are
+                         * fully done with the inode (i.e. after creating further inodes inside of dir inodes
+                         * for example), due to permission problems this might create or that the mtime
+                         * changes we do might still be affected by our changes. */
+                        open_inodes[n_open_inodes++] = (OpenInode) {
+                                .fd = TAKE_FD(fd),
+                                .path = TAKE_PTR(j),
+                                .filetype = filetype,
+                                .mode = mode,
+                                .mtime = mtime,
+                                .uid = uid,
+                                .gid = gid,
+                                .xattr = TAKE_PTR(xa),
+                                .n_xattr = n_xa,
+                        };
+
+                        n_xa = 0;
+                }
+        }
+
+        r = open_inode_finalize_many(&open_inodes, &n_open_inodes);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
+#else
+
+int tar_x(int input_fd, int tree_fd, TarFlags flags) {
+        assert(input_fd >= 0);
+        assert(tree_fd >= 0);
+
+        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libarchive support not available.");
+}
+
+#endif
diff --git a/src/shared/tar-util.h b/src/shared/tar-util.h
new file mode 100644 (file)
index 0000000..4fb00d9
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum TarFlags {
+        TAR_SELINUX = 1 << 0,
+} TarFlags;
+
+int tar_x(int input_fd, int tree_fd, TarFlags flags);