]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/core/execute.c
core: add RootHashSignature service parameter
[thirdparty/systemd.git] / src / core / execute.c
index 3911363c54e027cd6745b667fbef9add72afabdd..4bee1b19665edcdb8f9597f8d7b393dd335b8a34 100644 (file)
@@ -54,6 +54,7 @@
 #include "format-util.h"
 #include "fs-util.h"
 #include "glob-util.h"
+#include "hexdecoct.h"
 #include "io-util.h"
 #include "ioprio.h"
 #include "label.h"
@@ -219,17 +220,10 @@ static bool is_terminal_input(ExecInput i) {
 static bool is_terminal_output(ExecOutput o) {
         return IN_SET(o,
                       EXEC_OUTPUT_TTY,
-                      EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
                       EXEC_OUTPUT_KMSG_AND_CONSOLE,
                       EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
 }
 
-static bool is_syslog_output(ExecOutput o) {
-        return IN_SET(o,
-                      EXEC_OUTPUT_SYSLOG,
-                      EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
-}
-
 static bool is_kmsg_output(ExecOutput o) {
         return IN_SET(o,
                       EXEC_OUTPUT_KMSG,
@@ -271,9 +265,8 @@ static int connect_journal_socket(
                 uid_t uid,
                 gid_t gid) {
 
-        union sockaddr_union sa = {
-                .un.sun_family = AF_UNIX,
-        };
+        union sockaddr_union sa;
+        socklen_t sa_len;
         uid_t olduid = UID_INVALID;
         gid_t oldgid = GID_INVALID;
         const char *j;
@@ -285,6 +278,7 @@ static int connect_journal_socket(
         r = sockaddr_un_set_path(&sa.un, j);
         if (r < 0)
                 return r;
+        sa_len = r;
 
         if (gid_is_valid(gid)) {
                 oldgid = getgid();
@@ -302,7 +296,7 @@ static int connect_journal_socket(
                 }
         }
 
-        r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
+        r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
 
         /* If we fail to restore the uid or gid, things will likely
            fail later on. This should only happen if an LSM interferes. */
@@ -361,7 +355,7 @@ static int connect_logger_as(
                 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
                 context->syslog_priority,
                 !!context->syslog_level_prefix,
-                is_syslog_output(output),
+                false,
                 is_kmsg_output(output),
                 is_terminal_output(output)) < 0)
                 return -errno;
@@ -383,9 +377,10 @@ static int open_terminal_as(const char *path, int flags, int nfd) {
 }
 
 static int acquire_path(const char *path, int flags, mode_t mode) {
-        union sockaddr_union sa = {};
+        union sockaddr_union sa;
+        socklen_t sa_len;
         _cleanup_close_ int fd = -1;
-        int r, salen;
+        int r;
 
         assert(path);
 
@@ -398,20 +393,19 @@ static int acquire_path(const char *path, int flags, mode_t mode) {
 
         if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
                 return -errno;
-        if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
-                return -ENXIO;
 
         /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
 
+        r = sockaddr_un_set_path(&sa.un, path);
+        if (r < 0)
+                return r == -EINVAL ? -ENXIO : r;
+        sa_len = r;
+
         fd = socket(AF_UNIX, SOCK_STREAM, 0);
         if (fd < 0)
                 return -errno;
 
-        salen = sockaddr_un_set_path(&sa.un, path);
-        if (salen < 0)
-                return salen;
-
-        if (connect(fd, &sa.sa, salen) < 0)
+        if (connect(fd, &sa.sa, sa_len) < 0)
                 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
                                                            * indication that his wasn't an AF_UNIX socket after all */
 
@@ -420,7 +414,7 @@ static int acquire_path(const char *path, int flags, mode_t mode) {
         else if ((flags & O_ACCMODE) == O_WRONLY)
                 r = shutdown(fd, SHUT_RD);
         else
-                return TAKE_FD(fd);
+                r = 0;
         if (r < 0)
                 return -errno;
 
@@ -664,8 +658,6 @@ static int setup_output(
                 /* We don't reset the terminal if this is just about output */
                 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 
-        case EXEC_OUTPUT_SYSLOG:
-        case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
         case EXEC_OUTPUT_KMSG:
         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
         case EXEC_OUTPUT_JOURNAL:
@@ -1388,14 +1380,14 @@ static void rename_process_from_path(const char *path) {
 static bool context_has_address_families(const ExecContext *c) {
         assert(c);
 
-        return c->address_families_whitelist ||
+        return c->address_families_allow_list ||
                 !set_isempty(c->address_families);
 }
 
 static bool context_has_syscall_filters(const ExecContext *c) {
         assert(c);
 
-        return c->syscall_whitelist ||
+        return c->syscall_allow_list ||
                 !hashmap_isempty(c->syscall_filter);
 }
 
@@ -1451,7 +1443,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
 
         negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
 
-        if (c->syscall_whitelist) {
+        if (c->syscall_allow_list) {
                 default_action = negative_action;
                 action = SCMP_ACT_ALLOW;
         } else {
@@ -1460,7 +1452,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
         }
 
         if (needs_ambient_hack) {
-                r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
+                r = seccomp_filter_set_add(c->syscall_filter, c->syscall_allow_list, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
                 if (r < 0)
                         return r;
         }
@@ -1491,7 +1483,7 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
                 return 0;
 
-        return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
+        return seccomp_restrict_address_families(c->address_families, c->address_families_allow_list);
 }
 
 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
@@ -1577,7 +1569,7 @@ static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
         return seccomp_protect_syslog();
 }
 
-static int apply_protect_clock(const Unit *u, const ExecContext *c)  {
+static int apply_protect_clock(const Unit *u, const ExecContext *c) {
         assert(u);
         assert(c);
 
@@ -1646,6 +1638,41 @@ static int apply_lock_personality(const Unit* u, const ExecContext *c) {
 
 #endif
 
+static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
+        assert(u);
+        assert(c);
+
+        if (!c->protect_hostname)
+                return 0;
+
+        if (ns_type_supported(NAMESPACE_UTS)) {
+                if (unshare(CLONE_NEWUTS) < 0) {
+                        if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
+                                *ret_exit_status = EXIT_NAMESPACE;
+                                return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
+                        }
+
+                        log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
+                }
+        } else
+                log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
+
+#if HAVE_SECCOMP
+        int r;
+
+        if (skip_seccomp_unavailable(u, "ProtectHostname="))
+                return 0;
+
+        r = seccomp_protect_hostname();
+        if (r < 0) {
+                *ret_exit_status = EXIT_SECCOMP;
+                return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
+        }
+#endif
+
+        return 0;
+}
+
 static void do_idle_pipe_dance(int idle_pipe[static 4]) {
         assert(idle_pipe);
 
@@ -1785,12 +1812,13 @@ static int build_environment(
 
                 tty_path = exec_context_tty_path(c);
 
-                /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
-                 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
-                 * passes to PID 1 ends up all the way in the console login shown. */
+                /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try
+                 * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
+                 * container manager passes to PID 1 ends up all the way in the console login shown. */
 
-                if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+                if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
                         term = getenv("TERM");
+
                 if (!term)
                         term = default_term_for_tty(tty_path);
 
@@ -2247,7 +2275,7 @@ static int setup_exec_directory(
 
                         if (type != EXEC_DIRECTORY_CONFIGURATION &&
                             readlink_and_make_absolute(p, &target) >= 0) {
-                                _cleanup_free_ char *q = NULL;
+                                _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
 
                                 /* This already exists and is a symlink? Interesting. Maybe it's one created
                                  * by DynamicUser=1 (see above)?
@@ -2256,13 +2284,22 @@ static int setup_exec_directory(
                                  * since they all support the private/ symlink logic at least in some
                                  * configurations, see above. */
 
+                                r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
+                                if (r < 0)
+                                        goto fail;
+
                                 q = path_join(params->prefix[type], "private", *rt);
                                 if (!q) {
                                         r = -ENOMEM;
                                         goto fail;
                                 }
 
-                                if (path_equal(q, target)) {
+                                /* /var/lib or friends may be symlinks. So, let's chase them also. */
+                                r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
+                                if (r < 0)
+                                        goto fail;
+
+                                if (path_equal(q_resolved, target_resolved)) {
 
                                         /* Hmm, apparently DynamicUser= was once turned on for this service,
                                          * but is no longer. Let's move the directory back up. */
@@ -2522,7 +2559,7 @@ static bool insist_on_sandboxing(
         assert(n_bind_mounts == 0 || bind_mounts);
 
         /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
-         * would alter the view on the file system beyond making things read-only or invisble, i.e. would
+         * would alter the view on the file system beyond making things read-only or invisible, i.e. would
          * rearrange stuff in a way we cannot ignore gracefully. */
 
         if (context->n_temporary_filesystems > 0)
@@ -2630,6 +2667,9 @@ static int apply_mount_namespace(
                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
                             needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
                             context->mount_flags,
+                            context->root_hash, context->root_hash_size, context->root_hash_path,
+                            context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
+                            context->root_verity,
                             DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
                             error_path);
 
@@ -3012,6 +3052,33 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **
         return using_subcgroup;
 }
 
+static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
+        _cleanup_(cpu_set_reset) CPUSet s = {};
+        int r;
+
+        assert(c);
+        assert(ret);
+
+        if (!c->numa_policy.nodes.set) {
+                log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
+                return 0;
+        }
+
+        r = numa_to_cpu_set(&c->numa_policy, &s);
+        if (r < 0)
+                return r;
+
+        cpu_set_reset(ret);
+
+        return cpu_set_add_all(ret, &s);
+}
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
+        assert(c);
+
+        return c->cpu_affinity_from_numa;
+}
+
 static int exec_child(
                 Unit *unit,
                 const ExecCommand *command,
@@ -3287,6 +3354,14 @@ static int exec_child(
                 }
         }
 
+        if (context->coredump_filter_set) {
+                r = set_coredump_filter(context->coredump_filter);
+                if (ERRNO_IS_PRIVILEGE(r))
+                        log_unit_debug_errno(unit, r, "Failed to adjust coredump_filter, ignoring: %m");
+                else if (r < 0)
+                        return log_unit_error_errno(unit, r, "Failed to adjust coredump_filter: %m");
+        }
+
         if (context->nice_set) {
                 r = setpriority_closest(context->nice);
                 if (r < 0)
@@ -3309,11 +3384,26 @@ static int exec_child(
                 }
         }
 
-        if (context->cpu_set.set)
-                if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
+        if (context->cpu_affinity_from_numa || context->cpu_set.set) {
+                _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
+                const CPUSet *cpu_set;
+
+                if (context->cpu_affinity_from_numa) {
+                        r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
+                        if (r < 0) {
+                                *exit_status = EXIT_CPUAFFINITY;
+                                return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
+                        }
+
+                        cpu_set = &converted_cpu_set;
+                } else
+                        cpu_set = &context->cpu_set;
+
+                if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
                         *exit_status = EXIT_CPUAFFINITY;
                         return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
                 }
+        }
 
         if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
                 r = apply_numa_policy(&context->numa_policy);
@@ -3406,8 +3496,7 @@ static int exec_child(
                                    our_env,
                                    pass_env,
                                    context->environment,
-                                   files_env,
-                                   NULL);
+                                   files_env);
         if (!accum_env) {
                 *exit_status = EXIT_MEMORY;
                 return log_oom();
@@ -3538,25 +3627,10 @@ static int exec_child(
                 }
         }
 
-        if (context->protect_hostname) {
-                if (ns_type_supported(NAMESPACE_UTS)) {
-                        if (unshare(CLONE_NEWUTS) < 0) {
-                                if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
-                                        *exit_status = EXIT_NAMESPACE;
-                                        return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
-                                }
-
-                                log_unit_warning(unit, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
-                        }
-                } else
-                        log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
-#if HAVE_SECCOMP
-                r = seccomp_protect_hostname();
-                if (r < 0) {
-                        *exit_status = EXIT_SECCOMP;
-                        return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
-                }
-#endif
+        if (needs_sandboxing) {
+                r = apply_protect_hostname(unit, context, exit_status);
+                if (r < 0)
+                        return r;
         }
 
         /* Drop groups as early as possible.
@@ -4125,6 +4199,13 @@ void exec_context_done(ExecContext *c) {
         c->working_directory = mfree(c->working_directory);
         c->root_directory = mfree(c->root_directory);
         c->root_image = mfree(c->root_image);
+        c->root_hash = mfree(c->root_hash);
+        c->root_hash_size = 0;
+        c->root_hash_path = mfree(c->root_hash_path);
+        c->root_hash_sig = mfree(c->root_hash_sig);
+        c->root_hash_sig_size = 0;
+        c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+        c->root_verity = mfree(c->root_verity);
         c->tty_path = mfree(c->tty_path);
         c->syslog_identifier = mfree(c->syslog_identifier);
         c->user = mfree(c->user);
@@ -4529,6 +4610,30 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
         if (c->root_image)
                 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
 
+        if (c->root_hash) {
+                _cleanup_free_ char *encoded = NULL;
+                encoded = hexmem(c->root_hash, c->root_hash_size);
+                if (encoded)
+                        fprintf(f, "%sRootHash: %s\n", prefix, encoded);
+        }
+
+        if (c->root_hash_path)
+                fprintf(f, "%sRootHash: %s\n", prefix, c->root_hash_path);
+
+        if (c->root_hash_sig) {
+                _cleanup_free_ char *encoded = NULL;
+                ssize_t len;
+                len = base64mem(c->root_hash_sig, c->root_hash_sig_size, &encoded);
+                if (len)
+                        fprintf(f, "%sRootHashSignature: base64:%s\n", prefix, encoded);
+        }
+
+        if (c->root_hash_sig_path)
+                fprintf(f, "%sRootHashSignature: %s\n", prefix, c->root_hash_sig_path);
+
+        if (c->root_verity)
+                fprintf(f, "%sRootVerity: %s\n", prefix, c->root_verity);
+
         STRV_FOREACH(e, c->environment)
                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
 
@@ -4564,6 +4669,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                         "%sOOMScoreAdjust: %i\n",
                         prefix, c->oom_score_adjust);
 
+        if (c->coredump_filter_set)
+                fprintf(f,
+                        "%sCoredumpFilter: 0x%"PRIx64"\n",
+                        prefix, c->coredump_filter);
+
         for (i = 0; i < RLIM_NLIMITS; i++)
                 if (c->rlimit[i]) {
                         fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
@@ -4652,17 +4762,13 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                         prefix, yes_no(c->tty_vt_disallocate));
 
         if (IN_SET(c->std_output,
-                   EXEC_OUTPUT_SYSLOG,
                    EXEC_OUTPUT_KMSG,
                    EXEC_OUTPUT_JOURNAL,
-                   EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
             IN_SET(c->std_error,
-                   EXEC_OUTPUT_SYSLOG,
                    EXEC_OUTPUT_KMSG,
                    EXEC_OUTPUT_JOURNAL,
-                   EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
                    EXEC_OUTPUT_KMSG_AND_CONSOLE,
                    EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
 
@@ -4828,7 +4934,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                         "%sSystemCallFilter: ",
                         prefix);
 
-                if (!c->syscall_whitelist)
+                if (!c->syscall_allow_list)
                         fputc('~', f);
 
 #if HAVE_SECCOMP
@@ -4881,7 +4987,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 r = namespace_flags_to_string(c->restrict_namespaces, &s);
                 if (r >= 0)
                         fprintf(f, "%sRestrictNamespaces: %s\n",
-                                prefix, s);
+                                prefix, strna(s));
         }
 
         if (c->network_namespace_path)
@@ -5363,7 +5469,10 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
         if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
                 return 0;
 
-        if (c->private_tmp) {
+        if (c->private_tmp &&
+            !(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
+              (prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
+               prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
                 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
                 if (r < 0)
                         return r;
@@ -5677,8 +5786,6 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
         [EXEC_OUTPUT_INHERIT] = "inherit",
         [EXEC_OUTPUT_NULL] = "null",
         [EXEC_OUTPUT_TTY] = "tty",
-        [EXEC_OUTPUT_SYSLOG] = "syslog",
-        [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
         [EXEC_OUTPUT_KMSG] = "kmsg",
         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
         [EXEC_OUTPUT_JOURNAL] = "journal",