]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/core/execute.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / core / execute.c
index e2d84feb0dda13f26fff1037d4a1306fd34fa3fa..80b48191bd8889be82e3ab564a979c90d6063ebd 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
 /***
   This file is part of systemd.
 
 #include <unistd.h>
 #include <utmpx.h>
 
-#ifdef HAVE_PAM
+#if HAVE_PAM
 #include <security/pam_appl.h>
 #endif
 
-#ifdef HAVE_SELINUX
+#if HAVE_SELINUX
 #include <selinux/selinux.h>
 #endif
 
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
 #include <seccomp.h>
 #endif
 
-#ifdef HAVE_APPARMOR
+#if HAVE_APPARMOR
 #include <sys/apparmor.h>
 #endif
 
 
 #include "af-list.h"
 #include "alloc-util.h"
-#ifdef HAVE_APPARMOR
+#if HAVE_APPARMOR
 #include "apparmor-util.h"
 #endif
 #include "async.h"
 #include "barrier.h"
 #include "cap-list.h"
 #include "capability-util.h"
+#include "chown-recursive.h"
 #include "def.h"
 #include "env-util.h"
 #include "errno-list.h"
@@ -76,6 +78,7 @@
 #include "glob-util.h"
 #include "io-util.h"
 #include "ioprio.h"
+#include "label.h"
 #include "log.h"
 #include "macro.h"
 #include "missing.h"
@@ -86,7 +89,7 @@
 #include "process-util.h"
 #include "rlimit-util.h"
 #include "rm-rf.h"
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
 #include "seccomp-util.h"
 #endif
 #include "securebits.h"
@@ -588,7 +591,7 @@ static int setup_output(
         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
                 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
                 if (r < 0) {
-                        log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
+                        log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
                         r = open_null_as(O_WRONLY, fileno);
                 } else {
                         struct stat st;
@@ -893,7 +896,7 @@ static int get_supplementary_groups(const ExecContext *c, const char *user,
                 keep_groups = true;
         }
 
-        if (!c->supplementary_groups)
+        if (strv_isempty(c->supplementary_groups))
                 return 0;
 
         /*
@@ -960,14 +963,11 @@ static int get_supplementary_groups(const ExecContext *c, const char *user,
         return 0;
 }
 
-static int enforce_groups(const ExecContext *context, gid_t gid,
-                          gid_t *supplementary_gids, int ngids) {
+static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
         int r;
 
-        assert(context);
-
-        /* Handle SupplementaryGroups= even if it is empty */
-        if (context->supplementary_groups) {
+        /* Handle SupplementaryGroups= if it is not empty */
+        if (ngids > 0) {
                 r = maybe_setgroups(ngids, supplementary_gids);
                 if (r < 0)
                         return r;
@@ -1017,7 +1017,7 @@ static int enforce_user(const ExecContext *context, uid_t uid) {
         return 0;
 }
 
-#ifdef HAVE_PAM
+#if HAVE_PAM
 
 static int null_conv(
                 int num_msg,
@@ -1041,7 +1041,7 @@ static int setup_pam(
                 char ***env,
                 int fds[], unsigned n_fds) {
 
-#ifdef HAVE_PAM
+#if HAVE_PAM
 
         static const struct pam_conv conv = {
                 .conv = null_conv,
@@ -1291,7 +1291,7 @@ static bool context_has_syscall_filters(const ExecContext *c) {
         assert(c);
 
         return c->syscall_whitelist ||
-                !set_isempty(c->syscall_filter);
+                !hashmap_isempty(c->syscall_filter);
 }
 
 static bool context_has_no_new_privileges(const ExecContext *c) {
@@ -1316,16 +1316,14 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
                 c->lock_personality;
 }
 
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
 
 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
 
         if (is_seccomp_available())
                 return false;
 
-        log_open();
         log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
-        log_close();
         return true;
 }
 
@@ -1708,7 +1706,12 @@ static bool exec_needs_mount_namespace(
             !strv_isempty(context->inaccessible_paths))
                 return true;
 
-        if (context->n_bind_mounts > 0)
+        if (context->n_bind_mounts > 0 ||
+            !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
+            !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
+            !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
+            !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
+            !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
                 return true;
 
         if (context->mount_flags != 0)
@@ -1898,7 +1901,7 @@ static int setup_exec_directory(
                 ExecDirectoryType type,
                 int *exit_status) {
 
-        static const int exit_status_table[_EXEC_DIRECTORY_MAX] = {
+        static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
                 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
                 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
                 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
@@ -1910,7 +1913,7 @@ static int setup_exec_directory(
 
         assert(context);
         assert(params);
-        assert(type >= 0 && type < _EXEC_DIRECTORY_MAX);
+        assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
         assert(exit_status);
 
         if (!params->prefix[type])
@@ -1924,7 +1927,8 @@ static int setup_exec_directory(
         }
 
         STRV_FOREACH(rt, context->directories[type].paths) {
-                _cleanup_free_ char *p;
+                _cleanup_free_ char *p = NULL, *pp = NULL;
+                const char *effective;
 
                 p = strjoin(params->prefix[type], "/", *rt);
                 if (!p) {
@@ -1936,16 +1940,97 @@ static int setup_exec_directory(
                 if (r < 0)
                         goto fail;
 
-                r = mkdir_p_label(p, context->directories[type].mode);
-                if (r < 0)
+                if (context->dynamic_user &&
+                    !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
+                        _cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
+
+                        /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
+                         * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
+                         * whose UID is later on reused. To lock this down we use the same trick used by container
+                         * managers to prohibit host users to get access to files of the same UID in containers: we
+                         * place everything inside a directory that has an access mode of 0700 and is owned root:root,
+                         * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
+                         * to make this directory permeable for the service itself.
+                         *
+                         * Specifically: for a service which wants a special directory "foo/" we first create a
+                         * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
+                         * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
+                         * privileged host users can access "foo/" as usual, but unprivileged host users can't look
+                         * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
+                         * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
+                         * disabling the access boundary for the service and making sure it only gets access to the
+                         * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
+                         *
+                         * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
+                         * owned by the service itself.
+                         * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
+                         * files or sockets with other services. */
+
+                        private_root = strjoin(params->prefix[type], "/private");
+                        if (!private_root) {
+                                r = -ENOMEM;
+                                goto fail;
+                        }
+
+                        /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
+                        r = mkdir_safe_label(private_root, 0700, 0, 0, false);
+                        if (r < 0)
+                                goto fail;
+
+                        pp = strjoin(private_root, "/", *rt);
+                        if (!pp) {
+                                r = -ENOMEM;
+                                goto fail;
+                        }
+
+                        /* Create all directories between the configured directory and this private root, and mark them 0755 */
+                        r = mkdir_parents_label(pp, 0755);
+                        if (r < 0)
+                                goto fail;
+
+                        /* Finally, create the actual directory for the service */
+                        r = mkdir_label(pp, context->directories[type].mode);
+                        if (r < 0 && r != -EEXIST)
+                                goto fail;
+
+                        parent = dirname_malloc(p);
+                        if (!parent) {
+                                r = -ENOMEM;
+                                goto fail;
+                        }
+
+                        r = path_make_relative(parent, pp, &relative);
+                        if (r < 0)
+                                goto fail;
+
+                        /* And link it up from the original place */
+                        r = symlink_idempotent(relative, p);
+                        if (r < 0)
+                                goto fail;
+
+                        effective = pp;
+
+                } else {
+                        r = mkdir_label(p, context->directories[type].mode);
+                        if (r < 0 && r != -EEXIST)
+                                goto fail;
+
+                        effective = p;
+                }
+
+                /* First lock down the access mode */
+                if (chmod(effective, context->directories[type].mode) < 0) {
+                        r = -errno;
                         goto fail;
+                }
 
                 /* Don't change the owner of the configuration directory, as in the common case it is not written to by
                  * a service, and shall not be writable. */
                 if (type == EXEC_DIRECTORY_CONFIGURATION)
                         continue;
 
-                r = chmod_and_chown(p, context->directories[type].mode, uid, gid);
+                /* Then, change the ownership of the whole tree, if necessary */
+                r = path_chown_recursive(effective, uid, gid);
                 if (r < 0)
                         goto fail;
         }
@@ -1954,7 +2039,6 @@ static int setup_exec_directory(
 
 fail:
         *exit_status = exit_status_table[type];
-
         return r;
 }
 
@@ -1977,7 +2061,7 @@ static int setup_smack(
                 _cleanup_free_ char *exec_label = NULL;
 
                 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
-                if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
+                if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
                         return r;
 
                 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
@@ -1989,53 +2073,143 @@ static int setup_smack(
         return 0;
 }
 
-static int compile_read_write_paths(
+static int compile_bind_mounts(
                 const ExecContext *context,
                 const ExecParameters *params,
-                char ***ret) {
+                BindMount **ret_bind_mounts,
+                unsigned *ret_n_bind_mounts,
+                char ***ret_empty_directories) {
+
+        _cleanup_strv_free_ char **empty_directories = NULL;
+        BindMount *bind_mounts;
+        unsigned n, h = 0, i;
+        ExecDirectoryType t;
+        int r;
 
-        _cleanup_strv_free_ char **l = NULL;
-        char **rt;
-        ExecDirectoryType i;
+        assert(context);
+        assert(params);
+        assert(ret_bind_mounts);
+        assert(ret_n_bind_mounts);
+        assert(ret_empty_directories);
 
-        /* Compile the list of writable paths. This is the combination of
-         * the explicitly configured paths, plus all runtime directories. */
+        n = context->n_bind_mounts;
+        for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+                if (!params->prefix[t])
+                        continue;
 
-        if (strv_isempty(context->read_write_paths)) {
-                for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
-                        if (!strv_isempty(context->directories[i].paths))
-                                break;
+                n += strv_length(context->directories[t].paths);
+        }
 
-                if (i == _EXEC_DIRECTORY_MAX) {
-                        *ret = NULL; /* NOP if neither is set */
-                        return 0;
-                }
+        if (n <= 0) {
+                *ret_bind_mounts = NULL;
+                *ret_n_bind_mounts = 0;
+                *ret_empty_directories = NULL;
+                return 0;
         }
 
-        l = strv_copy(context->read_write_paths);
-        if (!l)
+        bind_mounts = new(BindMount, n);
+        if (!bind_mounts)
                 return -ENOMEM;
 
-        for (i = 0; i < _EXEC_DIRECTORY_MAX; i++) {
-                if (!params->prefix[i])
+        for (i = 0; i < context->n_bind_mounts; i++) {
+                BindMount *item = context->bind_mounts + i;
+                char *s, *d;
+
+                s = strdup(item->source);
+                if (!s) {
+                        r = -ENOMEM;
+                        goto finish;
+                }
+
+                d = strdup(item->destination);
+                if (!d) {
+                        free(s);
+                        r = -ENOMEM;
+                        goto finish;
+                }
+
+                bind_mounts[h++] = (BindMount) {
+                        .source = s,
+                        .destination = d,
+                        .read_only = item->read_only,
+                        .recursive = item->recursive,
+                        .ignore_enoent = item->ignore_enoent,
+                };
+        }
+
+        for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+                char **suffix;
+
+                if (!params->prefix[t])
                         continue;
 
-                STRV_FOREACH(rt, context->directories[i].paths) {
-                        char *s;
+                if (strv_isempty(context->directories[t].paths))
+                        continue;
 
-                        s = strjoin(params->prefix[i], "/", *rt);
-                        if (!s)
-                                return -ENOMEM;
+                if (context->dynamic_user &&
+                    !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
+                        char *private_root;
 
-                        if (strv_consume(&l, s) < 0)
-                                return -ENOMEM;
+                        /* So this is for a dynamic user, and we need to make sure the process can access its own
+                         * directory. For that we overmount the usually inaccessible "private" subdirectory with a
+                         * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
+
+                        private_root = strjoin(params->prefix[t], "/private");
+                        if (!private_root) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        r = strv_consume(&empty_directories, private_root);
+                        if (r < 0) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+                }
+
+                STRV_FOREACH(suffix, context->directories[t].paths) {
+                        char *s, *d;
+
+                        if (context->dynamic_user &&
+                            !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
+                                s = strjoin(params->prefix[t], "/private/", *suffix);
+                        else
+                                s = strjoin(params->prefix[t], "/", *suffix);
+                        if (!s) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        d = strdup(s);
+                        if (!d) {
+                                free(s);
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        bind_mounts[h++] = (BindMount) {
+                                .source = s,
+                                .destination = d,
+                                .read_only = false,
+                                .recursive = true,
+                                .ignore_enoent = false,
+                        };
                 }
         }
 
-        *ret = l;
-        l = NULL;
+        assert(h == n);
 
-        return 0;
+        *ret_bind_mounts = bind_mounts;
+        *ret_n_bind_mounts = n;
+        *ret_empty_directories = empty_directories;
+
+        empty_directories = NULL;
+
+        return (int) n;
+
+finish:
+        bind_mount_free_many(bind_mounts, h);
+        return r;
 }
 
 static int apply_mount_namespace(
@@ -2045,10 +2219,10 @@ static int apply_mount_namespace(
                 const ExecParameters *params,
                 ExecRuntime *runtime) {
 
-        _cleanup_strv_free_ char **rw = NULL;
+        _cleanup_strv_free_ char **empty_directories = NULL;
         char *tmp = NULL, *var = NULL;
         const char *root_dir = NULL, *root_image = NULL;
-        NameSpaceInfo ns_info = {
+        NamespaceInfo ns_info = {
                 .ignore_protect_paths = false,
                 .private_dev = context->private_devices,
                 .protect_control_groups = context->protect_control_groups,
@@ -2057,6 +2231,8 @@ static int apply_mount_namespace(
                 .mount_apivfs = context->mount_apivfs,
         };
         bool needs_sandboxing;
+        BindMount *bind_mounts = NULL;
+        unsigned n_bind_mounts = 0;
         int r;
 
         assert(context);
@@ -2072,10 +2248,6 @@ static int apply_mount_namespace(
                         var = strjoina(runtime->var_tmp_dir, "/tmp");
         }
 
-        r = compile_read_write_paths(context, params, &rw);
-        if (r < 0)
-                return r;
-
         if (params->flags & EXEC_APPLY_CHROOT) {
                 root_image = context->root_image;
 
@@ -2083,6 +2255,10 @@ static int apply_mount_namespace(
                         root_dir = context->root_directory;
         }
 
+        r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
+        if (r < 0)
+                return r;
+
         /*
          * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
          * sandbox info, otherwise enforce it, don't ignore protected paths and
@@ -2094,11 +2270,12 @@ static int apply_mount_namespace(
         needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
 
         r = setup_namespace(root_dir, root_image,
-                            &ns_info, rw,
+                            &ns_info, context->read_write_paths,
                             needs_sandboxing ? context->read_only_paths : NULL,
                             needs_sandboxing ? context->inaccessible_paths : NULL,
-                            context->bind_mounts,
-                            context->n_bind_mounts,
+                            empty_directories,
+                            bind_mounts,
+                            n_bind_mounts,
                             tmp,
                             var,
                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
@@ -2106,13 +2283,13 @@ static int apply_mount_namespace(
                             context->mount_flags,
                             DISSECT_IMAGE_DISCARD_ON_LOOP);
 
+        bind_mount_free_many(bind_mounts, n_bind_mounts);
+
         /* If we couldn't set up the namespace this is probably due to a
          * missing capability. In this case, silently proceeed. */
         if (IN_SET(r, -EPERM, -EACCES)) {
-                log_open();
                 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
-                log_close();
-                r = 0;
+                return 0;
         }
 
         return r;
@@ -2163,10 +2340,17 @@ static int apply_working_directory(
         return 0;
 }
 
-static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
+static int setup_keyring(
+                Unit *u,
+                const ExecContext *context,
+                const ExecParameters *p,
+                uid_t uid, gid_t gid) {
+
         key_serial_t keyring;
+        int r;
 
         assert(u);
+        assert(context);
         assert(p);
 
         /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
@@ -2179,16 +2363,19 @@ static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid)
         if (!(p->flags & EXEC_NEW_KEYRING))
                 return 0;
 
+        if (context->keyring_mode == EXEC_KEYRING_INHERIT)
+                return 0;
+
         keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
         if (keyring == -1) {
                 if (errno == ENOSYS)
-                        log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
+                        log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
                 else if (IN_SET(errno, EACCES, EPERM))
-                        log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
+                        log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
                 else if (errno == EDQUOT)
-                        log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
+                        log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
                 else
-                        return log_error_errno(errno, "Setting up kernel keyring failed: %m");
+                        return log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
 
                 return 0;
         }
@@ -2199,19 +2386,68 @@ static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid)
 
                 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
                 if (key == -1)
-                        log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
+                        log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
                 else {
                         if (keyctl(KEYCTL_SETPERM, key,
                                    KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
                                    KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
-                                return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
+                                return log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
                 }
         }
 
         /* And now, make the keyring owned by the service's user */
         if (uid_is_valid(uid) || gid_is_valid(gid))
                 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
-                        return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
+                        return log_unit_error_errno(u, errno, "Failed to change ownership of session keyring: %m");
+
+        /* When requested link the user keyring into the session keyring. */
+        if (context->keyring_mode == EXEC_KEYRING_SHARED) {
+                uid_t saved_uid;
+                gid_t saved_gid;
+
+                /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things
+                 * set up properly by the kernel. If we don't do that then we can't create it atomically, and that
+                 * sucks for parallel execution. This mimics what pam_keyinit does, too.*/
+
+                saved_uid = getuid();
+                saved_gid = getgid();
+
+                if (gid_is_valid(gid) && gid != saved_gid) {
+                        if (setregid(gid, -1) < 0)
+                                return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
+                }
+
+                if (uid_is_valid(uid) && uid != saved_uid) {
+                        if (setreuid(uid, -1) < 0) {
+                                (void) setregid(saved_gid, -1);
+                                return log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
+                        }
+                }
+
+                if (keyctl(KEYCTL_LINK,
+                           KEY_SPEC_USER_KEYRING,
+                           KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
+
+                        r = -errno;
+
+                        (void) setreuid(saved_uid, -1);
+                        (void) setregid(saved_gid, -1);
+
+                        return log_unit_error_errno(u, r, "Failed to link user keyring into session keyring: %m");
+                }
+
+                if (uid_is_valid(uid) && uid != saved_uid) {
+                        if (setreuid(saved_uid, -1) < 0) {
+                                (void) setregid(saved_gid, -1);
+                                return log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
+                        }
+                }
+
+                if (gid_is_valid(gid) && gid != saved_gid) {
+                        if (setregid(saved_gid, -1) < 0)
+                                return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
+                }
+        }
 
         return 0;
 }
@@ -2292,9 +2528,9 @@ static int send_user_lookup(
 
         if (writev(user_lookup_fd,
                (struct iovec[]) {
-                           { .iov_base = &uid, .iov_len = sizeof(uid) },
-                           { .iov_base = &gid, .iov_len = sizeof(gid) },
-                           { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+                           IOVEC_INIT(&uid, sizeof(uid)),
+                           IOVEC_INIT(&gid, sizeof(gid)),
+                           IOVEC_INIT_STRING(unit->id) }, 3) < 0)
                 return -errno;
 
         return 0;
@@ -2329,6 +2565,52 @@ static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char
         return 1;
 }
 
+static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
+        _cleanup_strv_free_ char ** list = NULL;
+        ExecDirectoryType t;
+        int r;
+
+        assert(c);
+        assert(p);
+        assert(ret);
+
+        assert(c->dynamic_user);
+
+        /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
+         * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
+         * directories. */
+
+        for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+                char **i;
+
+                if (t == EXEC_DIRECTORY_CONFIGURATION)
+                        continue;
+
+                if (!p->prefix[t])
+                        continue;
+
+                STRV_FOREACH(i, c->directories[t].paths) {
+                        char *e;
+
+                        if (t == EXEC_DIRECTORY_RUNTIME)
+                                e = strjoin(p->prefix[t], "/", *i);
+                        else
+                                e = strjoin(p->prefix[t], "/private/", *i);
+                        if (!e)
+                                return -ENOMEM;
+
+                        r = strv_consume(&list, e);
+                        if (r < 0)
+                                return r;
+                }
+        }
+
+        *ret = list;
+        list = NULL;
+
+        return 0;
+}
+
 static int exec_child(
                 Unit *unit,
                 ExecCommand *command,
@@ -2344,8 +2626,7 @@ static int exec_child(
                 unsigned n_socket_fds,
                 char **files_env,
                 int user_lookup_fd,
-                int *exit_status,
-                char **error_message) {
+                int *exit_status) {
 
         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
         _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
@@ -2358,13 +2639,13 @@ static int exec_child(
                 needs_setuid,           /* Do we need to do the actual setresuid()/setresgid() calls? */
                 needs_mount_namespace,  /* Do we need to set up a mount namespace for this kernel? */
                 needs_ambient_hack;     /* Do we need to apply the ambient capabilities hack? */
-#ifdef HAVE_SELINUX
+#if HAVE_SELINUX
         bool use_selinux = false;
 #endif
-#ifdef HAVE_SMACK
+#if ENABLE_SMACK
         bool use_smack = false;
 #endif
-#ifdef HAVE_APPARMOR
+#if HAVE_APPARMOR
         bool use_apparmor = false;
 #endif
         uid_t uid = UID_INVALID;
@@ -2379,9 +2660,6 @@ static int exec_child(
         assert(context);
         assert(params);
         assert(exit_status);
-        assert(error_message);
-        /* We don't always set error_message, hence it must be initialized */
-        assert(*error_message == NULL);
 
         rename_process_from_path(command->path);
 
@@ -2399,32 +2677,34 @@ static int exec_child(
         r = reset_signal_mask();
         if (r < 0) {
                 *exit_status = EXIT_SIGNAL_MASK;
-                *error_message = strdup("Failed to reset signal mask");
-                /* If strdup fails, here and below, we will just print the generic error message. */
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
         }
 
         if (params->idle_pipe)
                 do_idle_pipe_dance(params->idle_pipe);
 
-        /* Close sockets very early to make sure we don't
-         * block init reexecution because it cannot bind its
-         * sockets */
+        /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
+         * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
+         * any fds open we don't really want open during the transition. In order to make logging work, we switch the
+         * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
 
         log_forget_fds();
+        log_set_open_when_needed(true);
+
+        /* In case anything used libc syslog(), close this here, too */
+        closelog();
 
         n_fds = n_storage_fds + n_socket_fds;
         r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
         if (r < 0) {
                 *exit_status = EXIT_FDS;
-                *error_message = strdup("Failed to close remaining fds");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
         }
 
         if (!context->same_pgrp)
                 if (setsid() < 0) {
                         *exit_status = EXIT_SETSID;
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
                 }
 
         exec_context_tty_reset(context, params);
@@ -2435,8 +2715,8 @@ static int exec_child(
 
                 cmdline = exec_command_line(argv);
                 if (!cmdline) {
-                        *exit_status = EXIT_CONFIRM;
-                        return -ENOMEM;
+                        *exit_status = EXIT_MEMORY;
+                        return log_oom();
                 }
 
                 r = ask_for_confirmation(vc, unit, cmdline);
@@ -2446,37 +2726,45 @@ static int exec_child(
                                 return 0;
                         }
                         *exit_status = EXIT_CONFIRM;
-                        *error_message = strdup("Execution cancelled");
+                        log_unit_error(unit, "Execution cancelled by the user");
                         return -ECANCELED;
                 }
         }
 
         if (context->dynamic_user && dcreds) {
+                _cleanup_strv_free_ char **suggested_paths = NULL;
 
                 /* Make sure we bypass our own NSS module for any NSS checks */
                 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
                         *exit_status = EXIT_USER;
-                        *error_message = strdup("Failed to update environment");
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
+                }
+
+                r = compile_suggested_paths(context, params, &suggested_paths);
+                if (r < 0) {
+                        *exit_status = EXIT_MEMORY;
+                        return log_oom();
                 }
 
-                r = dynamic_creds_realize(dcreds, &uid, &gid);
+                r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
                 if (r < 0) {
                         *exit_status = EXIT_USER;
-                        *error_message = strdup("Failed to update dynamic user credentials");
-                        return r;
+                        if (r == -EILSEQ) {
+                                log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
+                                return -EOPNOTSUPP;
+                        }
+                        return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
                 }
 
                 if (!uid_is_valid(uid)) {
                         *exit_status = EXIT_USER;
-                        (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
-                        /* If asprintf fails, here and below, we will just print the generic error message. */
+                        log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
                         return -ESRCH;
                 }
 
                 if (!gid_is_valid(gid)) {
                         *exit_status = EXIT_USER;
-                        (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
+                        log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
                         return -ESRCH;
                 }
 
@@ -2487,15 +2775,13 @@ static int exec_child(
                 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
                 if (r < 0) {
                         *exit_status = EXIT_USER;
-                        *error_message = strdup("Failed to determine user credentials");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
                 }
 
                 r = get_fixed_group(context, &groupname, &gid);
                 if (r < 0) {
                         *exit_status = EXIT_GROUP;
-                        *error_message = strdup("Failed to determine group credentials");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
                 }
         }
 
@@ -2504,15 +2790,13 @@ static int exec_child(
                                      &supplementary_gids, &ngids);
         if (r < 0) {
                 *exit_status = EXIT_GROUP;
-                *error_message = strdup("Failed to determine supplementary groups");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
         }
 
         r = send_user_lookup(unit, user_lookup_fd, uid, gid);
         if (r < 0) {
                 *exit_status = EXIT_USER;
-                *error_message = strdup("Failed to send user credentials to PID1");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
         }
 
         user_lookup_fd = safe_close(user_lookup_fd);
@@ -2520,8 +2804,7 @@ static int exec_child(
         r = acquire_home(context, uid, &home, &home_buffer);
         if (r < 0) {
                 *exit_status = EXIT_CHDIR;
-                *error_message = strdup("Failed to determine $HOME for user");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
         }
 
         /* If a socket is connected to STDIN/STDOUT/STDERR, we
@@ -2532,30 +2815,26 @@ static int exec_child(
         r = setup_input(context, params, socket_fd, named_iofds);
         if (r < 0) {
                 *exit_status = EXIT_STDIN;
-                *error_message = strdup("Failed to set up stdin");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
         }
 
         r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDOUT;
-                *error_message = strdup("Failed to set up stdout");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
         }
 
         r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDERR;
-                *error_message = strdup("Failed to set up stderr");
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
         }
 
         if (params->cgroup_path) {
                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
                 if (r < 0) {
                         *exit_status = EXIT_CGROUP;
-                        (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path);
                 }
         }
 
@@ -2569,21 +2848,18 @@ static int exec_child(
 
                 sprintf(t, "%i", context->oom_score_adjust);
                 r = write_string_file("/proc/self/oom_score_adj", t, 0);
-                if (r == -EPERM || r == -EACCES) {
-                        log_open();
+                if (IN_SET(r, -EPERM, -EACCES))
                         log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
-                        log_close();
-                } else if (r < 0) {
+                else if (r < 0) {
                         *exit_status = EXIT_OOM_ADJUST;
-                        *error_message = strdup("Failed to write /proc/self/oom_score_adj");
-                        return -errno;
+                        return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
                 }
         }
 
         if (context->nice_set)
                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
                         *exit_status = EXIT_NICE;
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
                 }
 
         if (context->cpu_sched_set) {
@@ -2598,33 +2874,33 @@ static int exec_child(
                                        &param);
                 if (r < 0) {
                         *exit_status = EXIT_SETSCHEDULER;
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
                 }
         }
 
         if (context->cpuset)
                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
                         *exit_status = EXIT_CPUAFFINITY;
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
                 }
 
         if (context->ioprio_set)
                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
                         *exit_status = EXIT_IOPRIO;
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
                 }
 
         if (context->timer_slack_nsec != NSEC_INFINITY)
                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
                         *exit_status = EXIT_TIMERSLACK;
-                        return -errno;
+                        return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
                 }
 
         if (context->personality != PERSONALITY_INVALID) {
                 r = safe_personality(context->personality);
                 if (r < 0) {
                         *exit_status = EXIT_PERSONALITY;
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
                 }
         }
 
@@ -2640,7 +2916,7 @@ static int exec_child(
                 r = chown_terminal(STDIN_FILENO, uid);
                 if (r < 0) {
                         *exit_status = EXIT_STDIN;
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
                 }
         }
 
@@ -2651,21 +2927,20 @@ static int exec_child(
                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
                 if (r < 0) {
                         *exit_status = EXIT_CGROUP;
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
                 }
 
-
                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
                 if (r < 0) {
                         *exit_status = EXIT_CGROUP;
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
                 }
         }
 
-        for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
+        for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
                 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
                 if (r < 0)
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
         }
 
         r = build_environment(
@@ -2681,13 +2956,13 @@ static int exec_child(
                         &our_env);
         if (r < 0) {
                 *exit_status = EXIT_MEMORY;
-                return r;
+                return log_oom();
         }
 
         r = build_pass_environment(context, &pass_env);
         if (r < 0) {
                 *exit_status = EXIT_MEMORY;
-                return r;
+                return log_oom();
         }
 
         accum_env = strv_env_merge(5,
@@ -2699,16 +2974,16 @@ static int exec_child(
                                    NULL);
         if (!accum_env) {
                 *exit_status = EXIT_MEMORY;
-                return -ENOMEM;
+                return log_oom();
         }
         accum_env = strv_env_clean(accum_env);
 
         (void) umask(context->umask);
 
-        r = setup_keyring(unit, params, uid, gid);
+        r = setup_keyring(unit, context, params, uid, gid);
         if (r < 0) {
                 *exit_status = EXIT_KEYRING;
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
         }
 
         /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
@@ -2728,13 +3003,13 @@ static int exec_child(
                  * present. The actual MAC context application will happen later, as late as possible, to avoid
                  * impacting our own code paths. */
 
-#ifdef HAVE_SELINUX
+#if HAVE_SELINUX
                 use_selinux = mac_selinux_use();
 #endif
-#ifdef HAVE_SMACK
+#if ENABLE_SMACK
                 use_smack = mac_smack_use();
 #endif
-#ifdef HAVE_APPARMOR
+#if HAVE_APPARMOR
                 use_apparmor = mac_apparmor_use();
 #endif
         }
@@ -2744,17 +3019,20 @@ static int exec_child(
                         r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
                         if (r < 0) {
                                 *exit_status = EXIT_PAM;
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
                         }
                 }
         }
 
         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
-                r = setup_netns(runtime->netns_storage_socket);
-                if (r < 0) {
-                        *exit_status = EXIT_NETWORK;
-                        return r;
-                }
+                if (ns_type_supported(NAMESPACE_NET)) {
+                        r = setup_netns(runtime->netns_storage_socket);
+                        if (r < 0) {
+                                *exit_status = EXIT_NETWORK;
+                                return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
+                        }
+                } else
+                        log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
         }
 
         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
@@ -2762,31 +3040,31 @@ static int exec_child(
                 r = apply_mount_namespace(unit, command, context, params, runtime);
                 if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
                 }
         }
 
         /* Apply just after mount namespace setup */
         r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
         if (r < 0)
-                return r;
+                return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
 
         /* Drop groups as early as possbile */
         if (needs_setuid) {
-                r = enforce_groups(context, gid, supplementary_gids, ngids);
+                r = enforce_groups(gid, supplementary_gids, ngids);
                 if (r < 0) {
                         *exit_status = EXIT_GROUP;
-                        return r;
+                        return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
                 }
         }
 
         if (needs_sandboxing) {
-#ifdef HAVE_SELINUX
+#if HAVE_SELINUX
                 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
                         r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
                         if (r < 0) {
                                 *exit_status = EXIT_SELINUX_CONTEXT;
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
                         }
                 }
 #endif
@@ -2795,7 +3073,7 @@ static int exec_child(
                         r = setup_private_users(uid, gid);
                         if (r < 0) {
                                 *exit_status = EXIT_USER;
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
                         }
                 }
         }
@@ -2810,7 +3088,7 @@ static int exec_child(
                 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
         if (r < 0) {
                 *exit_status = EXIT_FDS;
-                return r;
+                return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
         }
 
         secure_bits = context->secure_bits;
@@ -2826,7 +3104,7 @@ static int exec_child(
                         r = setrlimit_closest(i, context->rlimit[i]);
                         if (r < 0) {
                                 *exit_status = EXIT_LIMITS;
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to adjust resource limit %s: %m", rlimit_to_string(i));
                         }
                 }
 
@@ -2834,7 +3112,7 @@ static int exec_child(
                 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
                         if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
                                 *exit_status = EXIT_LIMITS;
-                                return -errno;
+                                return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
                         }
                 }
 
@@ -2851,8 +3129,7 @@ static int exec_child(
                         r = capability_bounding_set_drop(bset, false);
                         if (r < 0) {
                                 *exit_status = EXIT_CAPABILITIES;
-                                *error_message = strdup("Failed to drop capabilities");
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
                         }
                 }
 
@@ -2863,8 +3140,7 @@ static int exec_child(
                         r = capability_ambient_set_apply(context->capability_ambient_set, true);
                         if (r < 0) {
                                 *exit_status = EXIT_CAPABILITIES;
-                                *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
                         }
                 }
         }
@@ -2874,8 +3150,7 @@ static int exec_child(
                         r = enforce_user(context, uid);
                         if (r < 0) {
                                 *exit_status = EXIT_USER;
-                                (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
                         }
 
                         if (!needs_ambient_hack &&
@@ -2885,8 +3160,7 @@ static int exec_child(
                                 r = capability_ambient_set_apply(context->capability_ambient_set, false);
                                 if (r < 0) {
                                         *exit_status = EXIT_CAPABILITIES;
-                                        *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
-                                        return r;
+                                        return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
                                 }
 
                                 /* If we were asked to change user and ambient capabilities
@@ -2907,7 +3181,7 @@ static int exec_child(
                  * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
                  * are restricted. */
 
-#ifdef HAVE_SELINUX
+#if HAVE_SELINUX
                 if (use_selinux) {
                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
 
@@ -2915,33 +3189,28 @@ static int exec_child(
                                 r = setexeccon(exec_context);
                                 if (r < 0) {
                                         *exit_status = EXIT_SELINUX_CONTEXT;
-                                        (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
-                                        return r;
+                                        return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
                                 }
                         }
                 }
 #endif
 
-#ifdef HAVE_SMACK
+#if ENABLE_SMACK
                 if (use_smack) {
                         r = setup_smack(context, command);
                         if (r < 0) {
                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                *error_message = strdup("Failed to set SMACK process label");
-                                return r;
+                                return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
                         }
                 }
 #endif
 
-#ifdef HAVE_APPARMOR
+#if HAVE_APPARMOR
                 if (use_apparmor && context->apparmor_profile) {
                         r = aa_change_onexec(context->apparmor_profile);
                         if (r < 0 && !context->apparmor_profile_ignore) {
                                 *exit_status = EXIT_APPARMOR_PROFILE;
-                                (void) asprintf(error_message,
-                                                "Failed to prepare AppArmor profile change to %s",
-                                                context->apparmor_profile);
-                                return -errno;
+                                return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
                         }
                 }
 #endif
@@ -2951,79 +3220,68 @@ static int exec_child(
                 if (prctl(PR_GET_SECUREBITS) != secure_bits)
                         if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
                                 *exit_status = EXIT_SECUREBITS;
-                                *error_message = strdup("Failed to set secure bits");
-                                return -errno;
+                                return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
                         }
 
                 if (context_has_no_new_privileges(context))
                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
-                                *error_message = strdup("Failed to disable new privileges");
-                                return -errno;
+                                return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
                         }
 
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
                 r = apply_address_families(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_ADDRESS_FAMILIES;
-                        *error_message = strdup("Failed to restrict address families");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
                 }
 
                 r = apply_memory_deny_write_execute(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to disable writing to executable memory");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
                 }
 
                 r = apply_restrict_realtime(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to apply realtime restrictions");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
                 }
 
                 r = apply_restrict_namespaces(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to apply namespace restrictions");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
                 }
 
                 r = apply_protect_sysctl(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to apply sysctl restrictions");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
                 }
 
                 r = apply_protect_kernel_modules(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to apply module loading restrictions");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
                 }
 
                 r = apply_private_devices(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to set up private devices");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
                 }
 
                 r = apply_syscall_archs(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to apply syscall architecture restrictions");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
                 }
 
                 r = apply_lock_personality(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to lock personalities");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
                 }
 
                 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
@@ -3031,8 +3289,7 @@ static int exec_child(
                 r = apply_syscall_filter(unit, context, needs_ambient_hack);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
-                        *error_message = strdup("Failed to apply syscall filters");
-                        return r;
+                        return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
                 }
 #endif
         }
@@ -3043,7 +3300,7 @@ static int exec_child(
                 ee = strv_env_delete(accum_env, 1, context->unset_environment);
                 if (!ee) {
                         *exit_status = EXIT_MEMORY;
-                        return -ENOMEM;
+                        return log_oom();
                 }
 
                 strv_free(accum_env);
@@ -3053,8 +3310,7 @@ static int exec_child(
         final_argv = replace_env_argv(argv, accum_env);
         if (!final_argv) {
                 *exit_status = EXIT_MEMORY;
-                *error_message = strdup("Failed to prepare process arguments");
-                return -ENOMEM;
+                return log_oom();
         }
 
         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
@@ -3062,19 +3318,33 @@ static int exec_child(
 
                 line = exec_command_line(final_argv);
                 if (line) {
-                        log_open();
                         log_struct(LOG_DEBUG,
                                    "EXECUTABLE=%s", command->path,
                                    LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
                                    LOG_UNIT_ID(unit),
+                                   LOG_UNIT_INVOCATION_ID(unit),
                                    NULL);
-                        log_close();
                 }
         }
 
         execve(command->path, final_argv, accum_env);
+
+        if (errno == ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+
+                log_struct_errno(LOG_INFO, errno,
+                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+                                 LOG_UNIT_ID(unit),
+                                 LOG_UNIT_INVOCATION_ID(unit),
+                                 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
+                                                  command->path),
+                                 "EXECUTABLE=%s", command->path,
+                                 NULL);
+
+                return 0;
+        }
+
         *exit_status = EXIT_EXEC;
-        return -errno;
+        return log_unit_error_errno(unit, errno, "Failed to execute command: %m");
 }
 
 int exec_spawn(Unit *unit,
@@ -3140,14 +3410,15 @@ int exec_spawn(Unit *unit,
                    LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
                    "EXECUTABLE=%s", command->path,
                    LOG_UNIT_ID(unit),
+                   LOG_UNIT_INVOCATION_ID(unit),
                    NULL);
+
         pid = fork();
         if (pid < 0)
                 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
 
         if (pid == 0) {
-                int exit_status;
-                _cleanup_free_ char *error_message = NULL;
+                int exit_status = EXIT_SUCCESS;
 
                 r = exec_child(unit,
                                command,
@@ -3163,35 +3434,18 @@ int exec_spawn(Unit *unit,
                                n_socket_fds,
                                files_env,
                                unit->manager->user_lookup_fds[1],
-                               &exit_status,
-                               &error_message);
+                               &exit_status);
+
                 if (r < 0) {
-                        log_open();
-                        if (error_message)
-                                log_struct_errno(LOG_ERR, r,
-                                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
-                                                 LOG_UNIT_ID(unit),
-                                                 LOG_UNIT_MESSAGE(unit, "%s: %m",
-                                                                  error_message),
-                                                 "EXECUTABLE=%s", command->path,
-                                                 NULL);
-                        else if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE))
-                                log_struct_errno(LOG_INFO, r,
-                                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
-                                                 LOG_UNIT_ID(unit),
-                                                 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
-                                                                  command->path),
-                                                 "EXECUTABLE=%s", command->path,
-                                                 NULL);
-                        else
-                                log_struct_errno(LOG_ERR, r,
-                                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
-                                                 LOG_UNIT_ID(unit),
-                                                 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
-                                                                  exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
-                                                                  command->path),
-                                                 "EXECUTABLE=%s", command->path,
-                                                 NULL);
+                        log_struct_errno(LOG_ERR, r,
+                                         "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+                                         LOG_UNIT_ID(unit),
+                                         LOG_UNIT_INVOCATION_ID(unit),
+                                         LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
+                                                          exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
+                                                          command->path),
+                                         "EXECUTABLE=%s", command->path,
+                                         NULL);
                 }
 
                 _exit(exit_status);
@@ -3226,15 +3480,16 @@ void exec_context_init(ExecContext *c) {
         c->ignore_sigpipe = true;
         c->timer_slack_nsec = NSEC_INFINITY;
         c->personality = PERSONALITY_INVALID;
-        for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
+        for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
                 c->directories[i].mode = 0755;
         c->capability_bounding_set = CAP_ALL;
         c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
+        c->log_level_max = -1;
 }
 
 void exec_context_done(ExecContext *c) {
-        unsigned l;
         ExecDirectoryType i;
+        size_t l;
 
         assert(c);
 
@@ -3275,12 +3530,16 @@ void exec_context_done(ExecContext *c) {
         c->apparmor_profile = mfree(c->apparmor_profile);
         c->smack_process_label = mfree(c->smack_process_label);
 
-        c->syscall_filter = set_free(c->syscall_filter);
+        c->syscall_filter = hashmap_free(c->syscall_filter);
         c->syscall_archs = set_free(c->syscall_archs);
         c->address_families = set_free(c->address_families);
 
-        for (i = 0; i < _EXEC_DIRECTORY_MAX; i++)
+        for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
                 c->directories[i].paths = strv_free(c->directories[i].paths);
+
+        c->log_level_max = -1;
+
+        exec_context_free_log_extra_fields(c);
 }
 
 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
@@ -3298,8 +3557,7 @@ int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_p
                 if (!p)
                         return -ENOMEM;
 
-                /* We execute this synchronously, since we need to be
-                 * sure this is gone when we start the service
+                /* We execute this synchronously, since we need to be sure this is gone when we start the service
                  * next. */
                 (void) rm_rf(p, REMOVE_ROOT);
         }
@@ -3544,9 +3802,9 @@ static void strv_fprintf(FILE *f, char **l) {
 }
 
 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
+        ExecDirectoryType dt;
         char **e, **d;
         unsigned i;
-        ExecDirectoryType dt;
         int r;
 
         assert(c);
@@ -3571,7 +3829,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 "%sMountAPIVFS: %s\n"
                 "%sIgnoreSIGPIPE: %s\n"
                 "%sMemoryDenyWriteExecute: %s\n"
-                "%sRestrictRealtime: %s\n",
+                "%sRestrictRealtime: %s\n"
+                "%sKeyringMode: %s\n",
                 prefix, c->umask,
                 prefix, c->working_directory ? c->working_directory : "/",
                 prefix, c->root_directory ? c->root_directory : "/",
@@ -3588,7 +3847,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->mount_apivfs),
                 prefix, yes_no(c->ignore_sigpipe),
                 prefix, yes_no(c->memory_deny_write_execute),
-                prefix, yes_no(c->restrict_realtime));
+                prefix, yes_no(c->restrict_realtime),
+                prefix, exec_keyring_mode_to_string(c->keyring_mode));
 
         if (c->root_image)
                 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
@@ -3607,7 +3867,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
 
         fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
 
-        for (dt = 0; dt < _EXEC_DIRECTORY_MAX; dt++) {
+        for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
                 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
 
                 STRV_FOREACH(d, c->directories[dt].paths)
@@ -3712,6 +3972,26 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                         fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
         }
 
+        if (c->log_level_max >= 0) {
+                _cleanup_free_ char *t = NULL;
+
+                (void) log_level_to_string_alloc(c->log_level_max, &t);
+
+                fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
+        }
+
+        if (c->n_log_extra_fields > 0) {
+                size_t j;
+
+                for (j = 0; j < c->n_log_extra_fields; j++) {
+                        fprintf(f, "%sLogExtraFields: ", prefix);
+                        fwrite(c->log_extra_fields[j].iov_base,
+                               1, c->log_extra_fields[j].iov_len,
+                               f);
+                        fputc('\n', f);
+                }
+        }
+
         if (c->secure_bits) {
                 _cleanup_free_ char *str = NULL;
 
@@ -3743,7 +4023,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
 
         fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
 
-        if (strv_length(c->supplementary_groups) > 0) {
+        if (!strv_isempty(c->supplementary_groups)) {
                 fprintf(f, "%sSupplementaryGroups:", prefix);
                 strv_fprintf(f, c->supplementary_groups);
                 fputs("\n", f);
@@ -3809,9 +4089,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->lock_personality));
 
         if (c->syscall_filter) {
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
                 Iterator j;
-                void *id;
+                void *id, *val;
                 bool first = true;
 #endif
 
@@ -3822,9 +4102,11 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 if (!c->syscall_whitelist)
                         fputc('~', f);
 
-#ifdef HAVE_SECCOMP
-                SET_FOREACH(id, c->syscall_filter, j) {
+#if HAVE_SECCOMP
+                HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
                         _cleanup_free_ char *name = NULL;
+                        const char *errno_name = NULL;
+                        int num = PTR_TO_INT(val);
 
                         if (first)
                                 first = false;
@@ -3833,6 +4115,14 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
 
                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
                         fputs(strna(name), f);
+
+                        if (num >= 0) {
+                                errno_name = errno_to_name(num);
+                                if (errno_name)
+                                        fprintf(f, ":%s", errno_name);
+                                else
+                                        fprintf(f, ":%d", num);
+                        }
                 }
 #endif
 
@@ -3840,7 +4130,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
         }
 
         if (c->syscall_archs) {
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
                 Iterator j;
                 void *id;
 #endif
@@ -3849,7 +4139,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                         "%sSystemCallArchitectures:",
                         prefix);
 
-#ifdef HAVE_SECCOMP
+#if HAVE_SECCOMP
                 SET_FOREACH(id, c->syscall_archs, j)
                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
 #endif
@@ -3865,10 +4155,17 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                                 prefix, s);
         }
 
-        if (c->syscall_errno > 0)
-                fprintf(f,
-                        "%sSystemCallErrorNumber: %s\n",
-                        prefix, strna(errno_to_name(c->syscall_errno)));
+        if (c->syscall_errno > 0) {
+                const char *errno_name;
+
+                fprintf(f, "%sSystemCallErrorNumber: ", prefix);
+
+                errno_name = errno_to_name(c->syscall_errno);
+                if (errno_name)
+                        fprintf(f, "%s\n", errno_name);
+                else
+                        fprintf(f, "%d\n", c->syscall_errno);
+        }
 
         if (c->apparmor_profile)
                 fprintf(f,
@@ -3906,6 +4203,17 @@ int exec_context_get_effective_ioprio(ExecContext *c) {
         return p;
 }
 
+void exec_context_free_log_extra_fields(ExecContext *c) {
+        size_t l;
+
+        assert(c);
+
+        for (l = 0; l < c->n_log_extra_fields; l++)
+                free(c->log_extra_fields[l].iov_base);
+        c->log_extra_fields = mfree(c->log_extra_fields);
+        c->n_log_extra_fields = 0;
+}
+
 void exec_status_start(ExecStatus *s, pid_t pid) {
         assert(s);
 
@@ -4359,7 +4667,7 @@ static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
 
 DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
 
-static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
+static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
         [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
         [EXEC_DIRECTORY_STATE] = "StateDirectory",
         [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
@@ -4368,3 +4676,11 @@ static const char* const exec_directory_type_table[_EXEC_DIRECTORY_MAX] = {
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
+
+static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
+        [EXEC_KEYRING_INHERIT] = "inherit",
+        [EXEC_KEYRING_PRIVATE] = "private",
+        [EXEC_KEYRING_SHARED] = "shared",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);