core: add RootHashSignature service parameter

[thirdparty/systemd.git] / src / core / execute.c
diff --git a/src/core/execute.c b/src/core/execute.c

index 3911363c54e027cd6745b667fbef9add72afabdd..4bee1b19665edcdb8f9597f8d7b393dd335b8a34 100644 (file)
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -54,6 +54,7 @@
  #include "format-util.h"
  #include "fs-util.h"
  #include "glob-util.h"
+#include "hexdecoct.h"
  #include "io-util.h"
  #include "ioprio.h"
  #include "label.h"
@@ -219,17 +220,10 @@ static bool is_terminal_input(ExecInput i) {
  static bool is_terminal_output(ExecOutput o) {
          return IN_SET(o,
                        EXEC_OUTPUT_TTY,
-                      EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
                        EXEC_OUTPUT_KMSG_AND_CONSOLE,
                        EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
  }
  
-static bool is_syslog_output(ExecOutput o) {
-        return IN_SET(o,
-                      EXEC_OUTPUT_SYSLOG,
-                      EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
-}
-
  static bool is_kmsg_output(ExecOutput o) {
          return IN_SET(o,
                        EXEC_OUTPUT_KMSG,
@@ -271,9 +265,8 @@ static int connect_journal_socket(
                  uid_t uid,
                  gid_t gid) {
  
-        union sockaddr_union sa = {
-                .un.sun_family = AF_UNIX,
-        };
+        union sockaddr_union sa;
+        socklen_t sa_len;
          uid_t olduid = UID_INVALID;
          gid_t oldgid = GID_INVALID;
          const char *j;
@@ -285,6 +278,7 @@ static int connect_journal_socket(
          r = sockaddr_un_set_path(&sa.un, j);
          if (r < 0)
                  return r;
+        sa_len = r;
  
          if (gid_is_valid(gid)) {
                  oldgid = getgid();
@@ -302,7 +296,7 @@ static int connect_journal_socket(
                  }
          }
  
-        r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
+        r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
  
          /* If we fail to restore the uid or gid, things will likely
             fail later on. This should only happen if an LSM interferes. */
@@ -361,7 +355,7 @@ static int connect_logger_as(
                  params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
                  context->syslog_priority,
                  !!context->syslog_level_prefix,
-                is_syslog_output(output),
+                false,
                  is_kmsg_output(output),
                  is_terminal_output(output)) < 0)
                  return -errno;
@@ -383,9 +377,10 @@ static int open_terminal_as(const char *path, int flags, int nfd) {
  }
  
  static int acquire_path(const char *path, int flags, mode_t mode) {
-        union sockaddr_union sa = {};
+        union sockaddr_union sa;
+        socklen_t sa_len;
          _cleanup_close_ int fd = -1;
-        int r, salen;
+        int r;
  
          assert(path);
  
@@ -398,20 +393,19 @@ static int acquire_path(const char *path, int flags, mode_t mode) {
  
          if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
                  return -errno;
-        if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
-                return -ENXIO;
  
          /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
  
+        r = sockaddr_un_set_path(&sa.un, path);
+        if (r < 0)
+                return r == -EINVAL ? -ENXIO : r;
+        sa_len = r;
+
          fd = socket(AF_UNIX, SOCK_STREAM, 0);
          if (fd < 0)
                  return -errno;
  
-        salen = sockaddr_un_set_path(&sa.un, path);
-        if (salen < 0)
-                return salen;
-
-        if (connect(fd, &sa.sa, salen) < 0)
+        if (connect(fd, &sa.sa, sa_len) < 0)
                  return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
                                                             * indication that his wasn't an AF_UNIX socket after all */
  
@@ -420,7 +414,7 @@ static int acquire_path(const char *path, int flags, mode_t mode) {
          else if ((flags & O_ACCMODE) == O_WRONLY)
                  r = shutdown(fd, SHUT_RD);
          else
-                return TAKE_FD(fd);
+                r = 0;
          if (r < 0)
                  return -errno;
  
@@ -664,8 +658,6 @@ static int setup_output(
                  /* We don't reset the terminal if this is just about output */
                  return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
  
-        case EXEC_OUTPUT_SYSLOG:
-        case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
          case EXEC_OUTPUT_KMSG:
          case EXEC_OUTPUT_KMSG_AND_CONSOLE:
          case EXEC_OUTPUT_JOURNAL:
@@ -1388,14 +1380,14 @@ static void rename_process_from_path(const char *path) {
  static bool context_has_address_families(const ExecContext *c) {
          assert(c);
  
-        return c->address_families_whitelist ||
+        return c->address_families_allow_list ||
                  !set_isempty(c->address_families);
  }
  
  static bool context_has_syscall_filters(const ExecContext *c) {
          assert(c);
  
-        return c->syscall_whitelist ||
+        return c->syscall_allow_list ||
                  !hashmap_isempty(c->syscall_filter);
  }
  
@@ -1451,7 +1443,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
  
          negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
  
-        if (c->syscall_whitelist) {
+        if (c->syscall_allow_list) {
                  default_action = negative_action;
                  action = SCMP_ACT_ALLOW;
          } else {
@@ -1460,7 +1452,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
          }
  
          if (needs_ambient_hack) {
-                r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
+                r = seccomp_filter_set_add(c->syscall_filter, c->syscall_allow_list, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
                  if (r < 0)
                          return r;
          }
@@ -1491,7 +1483,7 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
          if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
                  return 0;
  
-        return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
+        return seccomp_restrict_address_families(c->address_families, c->address_families_allow_list);
  }
  
  static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
@@ -1577,7 +1569,7 @@ static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
          return seccomp_protect_syslog();
  }
  
-static int apply_protect_clock(const Unit *u, const ExecContext *c)  {
+static int apply_protect_clock(const Unit *u, const ExecContext *c) {
          assert(u);
          assert(c);
  
@@ -1646,6 +1638,41 @@ static int apply_lock_personality(const Unit* u, const ExecContext *c) {
  
  #endif
  
+static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
+        assert(u);
+        assert(c);
+
+        if (!c->protect_hostname)
+                return 0;
+
+        if (ns_type_supported(NAMESPACE_UTS)) {
+                if (unshare(CLONE_NEWUTS) < 0) {
+                        if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
+                                *ret_exit_status = EXIT_NAMESPACE;
+                                return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
+                        }
+
+                        log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
+                }
+        } else
+                log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
+
+#if HAVE_SECCOMP
+        int r;
+
+        if (skip_seccomp_unavailable(u, "ProtectHostname="))
+                return 0;
+
+        r = seccomp_protect_hostname();
+        if (r < 0) {
+                *ret_exit_status = EXIT_SECCOMP;
+                return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
+        }
+#endif
+
+        return 0;
+}
+
  static void do_idle_pipe_dance(int idle_pipe[static 4]) {
          assert(idle_pipe);
  
@@ -1785,12 +1812,13 @@ static int build_environment(
  
                  tty_path = exec_context_tty_path(c);
  
-                /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
-                 * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
-                 * passes to PID 1 ends up all the way in the console login shown. */
+                /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try
+                 * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
+                 * container manager passes to PID 1 ends up all the way in the console login shown. */
  
-                if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+                if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
                          term = getenv("TERM");
+
                  if (!term)
                          term = default_term_for_tty(tty_path);
  
@@ -2247,7 +2275,7 @@ static int setup_exec_directory(
  
                          if (type != EXEC_DIRECTORY_CONFIGURATION &&
                              readlink_and_make_absolute(p, &target) >= 0) {
-                                _cleanup_free_ char *q = NULL;
+                                _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
  
                                  /* This already exists and is a symlink? Interesting. Maybe it's one created
                                   * by DynamicUser=1 (see above)?
@@ -2256,13 +2284,22 @@ static int setup_exec_directory(
                                   * since they all support the private/ symlink logic at least in some
                                   * configurations, see above. */
  
+                                r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
+                                if (r < 0)
+                                        goto fail;
+
                                  q = path_join(params->prefix[type], "private", *rt);
                                  if (!q) {
                                          r = -ENOMEM;
                                          goto fail;
                                  }
  
-                                if (path_equal(q, target)) {
+                                /* /var/lib or friends may be symlinks. So, let's chase them also. */
+                                r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
+                                if (r < 0)
+                                        goto fail;
+
+                                if (path_equal(q_resolved, target_resolved)) {
  
                                          /* Hmm, apparently DynamicUser= was once turned on for this service,
                                           * but is no longer. Let's move the directory back up. */
@@ -2522,7 +2559,7 @@ static bool insist_on_sandboxing(
          assert(n_bind_mounts == 0 || bind_mounts);
  
          /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
-         * would alter the view on the file system beyond making things read-only or invisble, i.e. would
+         * would alter the view on the file system beyond making things read-only or invisible, i.e. would
           * rearrange stuff in a way we cannot ignore gracefully. */
  
          if (context->n_temporary_filesystems > 0)
@@ -2630,6 +2667,9 @@ static int apply_mount_namespace(
                              needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
                              needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
                              context->mount_flags,
+                            context->root_hash, context->root_hash_size, context->root_hash_path,
+                            context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
+                            context->root_verity,
                              DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
                              error_path);
  
@@ -3012,6 +3052,33 @@ static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **
          return using_subcgroup;
  }
  
+static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
+        _cleanup_(cpu_set_reset) CPUSet s = {};
+        int r;
+
+        assert(c);
+        assert(ret);
+
+        if (!c->numa_policy.nodes.set) {
+                log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
+                return 0;
+        }
+
+        r = numa_to_cpu_set(&c->numa_policy, &s);
+        if (r < 0)
+                return r;
+
+        cpu_set_reset(ret);
+
+        return cpu_set_add_all(ret, &s);
+}
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
+        assert(c);
+
+        return c->cpu_affinity_from_numa;
+}
+
  static int exec_child(
                  Unit *unit,
                  const ExecCommand *command,
@@ -3287,6 +3354,14 @@ static int exec_child(
                  }
          }
  
+        if (context->coredump_filter_set) {
+                r = set_coredump_filter(context->coredump_filter);
+                if (ERRNO_IS_PRIVILEGE(r))
+                        log_unit_debug_errno(unit, r, "Failed to adjust coredump_filter, ignoring: %m");
+                else if (r < 0)
+                        return log_unit_error_errno(unit, r, "Failed to adjust coredump_filter: %m");
+        }
+
          if (context->nice_set) {
                  r = setpriority_closest(context->nice);
                  if (r < 0)
@@ -3309,11 +3384,26 @@ static int exec_child(
                  }
          }
  
-        if (context->cpu_set.set)
-                if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
+        if (context->cpu_affinity_from_numa || context->cpu_set.set) {
+                _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
+                const CPUSet *cpu_set;
+
+                if (context->cpu_affinity_from_numa) {
+                        r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
+                        if (r < 0) {
+                                *exit_status = EXIT_CPUAFFINITY;
+                                return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
+                        }
+
+                        cpu_set = &converted_cpu_set;
+                } else
+                        cpu_set = &context->cpu_set;
+
+                if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
                          *exit_status = EXIT_CPUAFFINITY;
                          return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
                  }
+        }
  
          if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
                  r = apply_numa_policy(&context->numa_policy);
@@ -3406,8 +3496,7 @@ static int exec_child(
                                     our_env,
                                     pass_env,
                                     context->environment,
-                                   files_env,
-                                   NULL);
+                                   files_env);
          if (!accum_env) {
                  *exit_status = EXIT_MEMORY;
                  return log_oom();
@@ -3538,25 +3627,10 @@ static int exec_child(
                  }
          }
  
-        if (context->protect_hostname) {
-                if (ns_type_supported(NAMESPACE_UTS)) {
-                        if (unshare(CLONE_NEWUTS) < 0) {
-                                if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
-                                        *exit_status = EXIT_NAMESPACE;
-                                        return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
-                                }
-
-                                log_unit_warning(unit, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
-                        }
-                } else
-                        log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
-#if HAVE_SECCOMP
-                r = seccomp_protect_hostname();
-                if (r < 0) {
-                        *exit_status = EXIT_SECCOMP;
-                        return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
-                }
-#endif
+        if (needs_sandboxing) {
+                r = apply_protect_hostname(unit, context, exit_status);
+                if (r < 0)
+                        return r;
          }
  
          /* Drop groups as early as possible.
@@ -4125,6 +4199,13 @@ void exec_context_done(ExecContext *c) {
          c->working_directory = mfree(c->working_directory);
          c->root_directory = mfree(c->root_directory);
          c->root_image = mfree(c->root_image);
+        c->root_hash = mfree(c->root_hash);
+        c->root_hash_size = 0;
+        c->root_hash_path = mfree(c->root_hash_path);
+        c->root_hash_sig = mfree(c->root_hash_sig);
+        c->root_hash_sig_size = 0;
+        c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+        c->root_verity = mfree(c->root_verity);
          c->tty_path = mfree(c->tty_path);
          c->syslog_identifier = mfree(c->syslog_identifier);
          c->user = mfree(c->user);
@@ -4529,6 +4610,30 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
          if (c->root_image)
                  fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
  
+        if (c->root_hash) {
+                _cleanup_free_ char *encoded = NULL;
+                encoded = hexmem(c->root_hash, c->root_hash_size);
+                if (encoded)
+                        fprintf(f, "%sRootHash: %s\n", prefix, encoded);
+        }
+
+        if (c->root_hash_path)
+                fprintf(f, "%sRootHash: %s\n", prefix, c->root_hash_path);
+
+        if (c->root_hash_sig) {
+                _cleanup_free_ char *encoded = NULL;
+                ssize_t len;
+                len = base64mem(c->root_hash_sig, c->root_hash_sig_size, &encoded);
+                if (len)
+                        fprintf(f, "%sRootHashSignature: base64:%s\n", prefix, encoded);
+        }
+
+        if (c->root_hash_sig_path)
+                fprintf(f, "%sRootHashSignature: %s\n", prefix, c->root_hash_sig_path);
+
+        if (c->root_verity)
+                fprintf(f, "%sRootVerity: %s\n", prefix, c->root_verity);
+
          STRV_FOREACH(e, c->environment)
                  fprintf(f, "%sEnvironment: %s\n", prefix, *e);
  
@@ -4564,6 +4669,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                          "%sOOMScoreAdjust: %i\n",
                          prefix, c->oom_score_adjust);
  
+        if (c->coredump_filter_set)
+                fprintf(f,
+                        "%sCoredumpFilter: 0x%"PRIx64"\n",
+                        prefix, c->coredump_filter);
+
          for (i = 0; i < RLIM_NLIMITS; i++)
                  if (c->rlimit[i]) {
                          fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
@@ -4652,17 +4762,13 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                          prefix, yes_no(c->tty_vt_disallocate));
  
          if (IN_SET(c->std_output,
-                   EXEC_OUTPUT_SYSLOG,
                     EXEC_OUTPUT_KMSG,
                     EXEC_OUTPUT_JOURNAL,
-                   EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
                     EXEC_OUTPUT_KMSG_AND_CONSOLE,
                     EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
              IN_SET(c->std_error,
-                   EXEC_OUTPUT_SYSLOG,
                     EXEC_OUTPUT_KMSG,
                     EXEC_OUTPUT_JOURNAL,
-                   EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
                     EXEC_OUTPUT_KMSG_AND_CONSOLE,
                     EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
  
@@ -4828,7 +4934,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                          "%sSystemCallFilter: ",
                          prefix);
  
-                if (!c->syscall_whitelist)
+                if (!c->syscall_allow_list)
                          fputc('~', f);
  
  #if HAVE_SECCOMP
@@ -4881,7 +4987,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                  r = namespace_flags_to_string(c->restrict_namespaces, &s);
                  if (r >= 0)
                          fprintf(f, "%sRestrictNamespaces: %s\n",
-                                prefix, s);
+                                prefix, strna(s));
          }
  
          if (c->network_namespace_path)
@@ -5363,7 +5469,10 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
          if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
                  return 0;
  
-        if (c->private_tmp) {
+        if (c->private_tmp &&
+            !(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
+              (prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
+               prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
                  r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
                  if (r < 0)
                          return r;
@@ -5677,8 +5786,6 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
          [EXEC_OUTPUT_INHERIT] = "inherit",
          [EXEC_OUTPUT_NULL] = "null",
          [EXEC_OUTPUT_TTY] = "tty",
-        [EXEC_OUTPUT_SYSLOG] = "syslog",
-        [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
          [EXEC_OUTPUT_KMSG] = "kmsg",
          [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
          [EXEC_OUTPUT_JOURNAL] = "journal",