]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/core/execute.c
tree-wide: port various places over to use chmod_and_chown()
[thirdparty/systemd.git] / src / core / execute.c
index 39d9f07518be500a829ad43997ee81c1cfb40eae..ab2a4de37a515ae2e70f317674f674ea2ecc05e9 100644 (file)
@@ -65,6 +65,7 @@
 #include "log.h"
 #include "macro.h"
 #include "manager.h"
+#include "memory-util.h"
 #include "missing.h"
 #include "mkdir.h"
 #include "namespace.h"
@@ -91,7 +92,6 @@
 #include "umask-util.h"
 #include "unit.h"
 #include "user-util.h"
-#include "util.h"
 #include "utmp-wtmp.h"
 
 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
@@ -728,25 +728,24 @@ static int setup_output(
 }
 
 static int chown_terminal(int fd, uid_t uid) {
-        struct stat st;
+        int r;
 
         assert(fd >= 0);
 
         /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
-        if (isatty(fd) < 1)
-                return 0;
-
-        /* This might fail. What matters are the results. */
-        (void) fchown(fd, uid, -1);
-        (void) fchmod(fd, TTY_MODE);
+        if (isatty(fd) < 1) {
+                if (IN_SET(errno, EINVAL, ENOTTY))
+                        return 0; /* not a tty */
 
-        if (fstat(fd, &st) < 0)
                 return -errno;
+        }
 
-        if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
-                return -EPERM;
+        /* This might fail. What matters are the results. */
+        r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
+        if (r < 0)
+                return r;
 
-        return 0;
+        return 1;
 }
 
 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
@@ -1404,6 +1403,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
         return context_has_address_families(c) ||
                 c->memory_deny_write_execute ||
                 c->restrict_realtime ||
+                c->restrict_suid_sgid ||
                 exec_context_restrict_namespaces_set(c) ||
                 c->protect_kernel_tunables ||
                 c->protect_kernel_modules ||
@@ -1438,7 +1438,7 @@ static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_
         if (skip_seccomp_unavailable(u, "SystemCallFilter="))
                 return 0;
 
-        negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
+        negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
 
         if (c->syscall_whitelist) {
                 default_action = negative_action;
@@ -1509,6 +1509,19 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
         return seccomp_restrict_realtime();
 }
 
+static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
+        assert(u);
+        assert(c);
+
+        if (!c->restrict_suid_sgid)
+                return 0;
+
+        if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
+                return 0;
+
+        return seccomp_restrict_suid_sgid();
+}
+
 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
         assert(u);
         assert(c);
@@ -1697,6 +1710,8 @@ static int build_environment(
                 x = strappend("HOME=", home);
                 if (!x)
                         return -ENOMEM;
+
+                path_simplify(x + 5, true);
                 our_env[n_env++] = x;
         }
 
@@ -1716,6 +1731,8 @@ static int build_environment(
                 x = strappend("SHELL=", shell);
                 if (!x)
                         return -ENOMEM;
+
+                path_simplify(x + 6, true);
                 our_env[n_env++] = x;
         }
 
@@ -2070,7 +2087,7 @@ static int setup_exec_directory(
         STRV_FOREACH(rt, context->directories[type].paths) {
                 _cleanup_free_ char *p = NULL, *pp = NULL;
 
-                p = strjoin(params->prefix[type], "/", *rt);
+                p = path_join(params->prefix[type], *rt);
                 if (!p) {
                         r = -ENOMEM;
                         goto fail;
@@ -2081,7 +2098,8 @@ static int setup_exec_directory(
                         goto fail;
 
                 if (context->dynamic_user &&
-                    !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
+                    (!IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) ||
+                     (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode != EXEC_PRESERVE_NO))) {
                         _cleanup_free_ char *private_root = NULL;
 
                         /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
@@ -2106,7 +2124,7 @@ static int setup_exec_directory(
                          * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
                          * files or sockets with other services. */
 
-                        private_root = strjoin(params->prefix[type], "/private");
+                        private_root = path_join(params->prefix[type], "private");
                         if (!private_root) {
                                 r = -ENOMEM;
                                 goto fail;
@@ -2117,7 +2135,7 @@ static int setup_exec_directory(
                         if (r < 0)
                                 goto fail;
 
-                        pp = strjoin(private_root, "/", *rt);
+                        pp = path_join(private_root, *rt);
                         if (!pp) {
                                 r = -ENOMEM;
                                 goto fail;
@@ -2152,39 +2170,47 @@ static int setup_exec_directory(
                         if (r < 0)
                                 goto fail;
 
-                        /* Lock down the access mode */
-                        if (chmod(pp, context->directories[type].mode) < 0) {
-                                r = -errno;
-                                goto fail;
-                        }
                 } else {
                         r = mkdir_label(p, context->directories[type].mode);
-                        if (r < 0 && r != -EEXIST)
-                                goto fail;
-                        if (r == -EEXIST) {
-                                struct stat st;
-
-                                if (stat(p, &st) < 0) {
-                                        r = -errno;
+                        if (r < 0) {
+                                if (r != -EEXIST)
                                         goto fail;
-                                }
-                                if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
-                                        log_warning("%s \'%s\' already exists but the mode is different. "
-                                                    "(filesystem: %o %sMode: %o)",
-                                                    exec_directory_type_to_string(type), *rt,
-                                                    st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
-                                if (!context->dynamic_user)
+
+                                if (type == EXEC_DIRECTORY_CONFIGURATION) {
+                                        struct stat st;
+
+                                        /* Don't change the owner/access mode of the configuration directory,
+                                         * as in the common case it is not written to by a service, and shall
+                                         * not be writable. */
+
+                                        if (stat(p, &st) < 0) {
+                                                r = -errno;
+                                                goto fail;
+                                        }
+
+                                        /* Still complain if the access mode doesn't match */
+                                        if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
+                                                log_warning("%s \'%s\' already exists but the mode is different. "
+                                                            "(File system: %o %sMode: %o)",
+                                                            exec_directory_type_to_string(type), *rt,
+                                                            st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
+
                                         continue;
+                                }
                         }
                 }
 
-                /* Don't change the owner of the configuration directory, as in the common case it is not written to by
-                 * a service, and shall not be writable. */
-                if (type == EXEC_DIRECTORY_CONFIGURATION)
-                        continue;
+                /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
+                 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
+                 * current UID/GID ownership.) */
+                r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
+                if (r < 0)
+                        goto fail;
 
-                /* Then, change the ownership of the whole tree, if necessary */
-                r = path_chown_recursive(pp ?: p, uid, gid);
+                /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
+                 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
+                 * assignments to exist.*/
+                r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
                 if (r < 0)
                         goto fail;
         }
@@ -2354,6 +2380,7 @@ static int compile_bind_mounts(
                                 .source = s,
                                 .destination = d,
                                 .read_only = false,
+                                .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
                                 .recursive = true,
                                 .ignore_enoent = false,
                         };
@@ -2378,7 +2405,8 @@ static int apply_mount_namespace(
                 const ExecCommand *command,
                 const ExecContext *context,
                 const ExecParameters *params,
-                const ExecRuntime *runtime) {
+                const ExecRuntime *runtime,
+                char **error_path) {
 
         _cleanup_strv_free_ char **empty_directories = NULL;
         char *tmp = NULL, *var = NULL;
@@ -2454,12 +2482,13 @@ static int apply_mount_namespace(
                             needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
                             needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
                             context->mount_flags,
-                            DISSECT_IMAGE_DISCARD_ON_LOOP);
+                            DISSECT_IMAGE_DISCARD_ON_LOOP,
+                            error_path);
 
         bind_mount_free_many(bind_mounts, n_bind_mounts);
 
         /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
-         * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
+         * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
          * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
          * completely different execution environment. */
         if (r == -ENOANO) {
@@ -2738,12 +2767,6 @@ static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char
         if (!c->working_directory_home)
                 return 0;
 
-        if (uid == 0) {
-                /* Hardcode /root as home directory for UID 0 */
-                *home = "/root";
-                return 1;
-        }
-
         r = get_home_dir(buf);
         if (r < 0)
                 return r;
@@ -3062,6 +3085,14 @@ static int exec_child(
                 }
         }
 
+        if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
+                r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
+                if (r < 0) {
+                        *exit_status = EXIT_NETWORK;
+                        return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
+                }
+        }
+
         r = setup_input(context, params, socket_fd, named_iofds);
         if (r < 0) {
                 *exit_status = EXIT_STDIN;
@@ -3148,7 +3179,7 @@ static int exec_child(
                                       USER_PROCESS,
                                       username);
 
-        if (context->user) {
+        if (uid_is_valid(uid)) {
                 r = chown_terminal(STDIN_FILENO, uid);
                 if (r < 0) {
                         *exit_status = EXIT_STDIN;
@@ -3272,23 +3303,30 @@ static int exec_child(
                 }
         }
 
-        if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
+        if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
+
                 if (ns_type_supported(NAMESPACE_NET)) {
                         r = setup_netns(runtime->netns_storage_socket);
                         if (r < 0) {
                                 *exit_status = EXIT_NETWORK;
                                 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
                         }
+                } else if (context->network_namespace_path) {
+                        *exit_status = EXIT_NETWORK;
+                        return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
                 } else
                         log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
         }
 
         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
         if (needs_mount_namespace) {
-                r = apply_mount_namespace(unit, command, context, params, runtime);
+                _cleanup_free_ char *error_path = NULL;
+
+                r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
                 if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
-                        return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
+                        return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
+                                                    error_path ? ": " : "", strempty(error_path));
                 }
         }
 
@@ -3449,7 +3487,7 @@ static int exec_child(
         }
 
         if (needs_setuid) {
-                if (context->user) {
+                if (uid_is_valid(uid)) {
                         r = enforce_user(context, uid);
                         if (r < 0) {
                                 *exit_status = EXIT_USER;
@@ -3547,6 +3585,12 @@ static int exec_child(
                         return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
                 }
 
+                r = apply_restrict_suid_sgid(unit, context);
+                if (r < 0) {
+                        *exit_status = EXIT_SECCOMP;
+                        return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
+                }
+
                 r = apply_restrict_namespaces(unit, context);
                 if (r < 0) {
                         *exit_status = EXIT_SECCOMP;
@@ -3879,6 +3923,8 @@ void exec_context_done(ExecContext *c) {
 
         c->stdin_data = mfree(c->stdin_data);
         c->stdin_data_size = 0;
+
+        c->network_namespace_path = mfree(c->network_namespace_path);
 }
 
 int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
@@ -3892,12 +3938,12 @@ int exec_context_destroy_runtime_directory(const ExecContext *c, const char *run
         STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
                 _cleanup_free_ char *p;
 
-                p = strjoin(runtime_prefix, "/", *i);
+                p = path_join(runtime_prefix, *i);
                 if (!p)
                         return -ENOMEM;
 
-                /* We execute this synchronously, since we need to be sure this is gone when we start the service
-                 * next. */
+                /* We execute this synchronously, since we need to be sure this is gone when we start the
+                 * service next. */
                 (void) rm_rf(p, REMOVE_ROOT);
         }
 
@@ -4138,17 +4184,23 @@ static bool tty_may_match_dev_console(const char *tty) {
                 return true; /* if we could not resolve, assume it may */
 
         /* "tty0" means the active VC, so it may be the same sometimes */
-        return streq(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
+        return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
 }
 
-bool exec_context_may_touch_console(const ExecContext *ec) {
+static bool exec_context_may_touch_tty(const ExecContext *ec) {
+        assert(ec);
 
-        return (ec->tty_reset ||
+        return ec->tty_reset ||
                 ec->tty_vhangup ||
                 ec->tty_vt_disallocate ||
                 is_terminal_input(ec->std_input) ||
                 is_terminal_output(ec->std_output) ||
-                is_terminal_output(ec->std_error)) &&
+                is_terminal_output(ec->std_error);
+}
+
+bool exec_context_may_touch_console(const ExecContext *ec) {
+
+        return exec_context_may_touch_tty(ec) &&
                tty_may_match_dev_console(exec_context_tty_path(ec));
 }
 
@@ -4190,6 +4242,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 "%sIgnoreSIGPIPE: %s\n"
                 "%sMemoryDenyWriteExecute: %s\n"
                 "%sRestrictRealtime: %s\n"
+                "%sRestrictSUIDSGID: %s\n"
                 "%sKeyringMode: %s\n"
                 "%sProtectHostname: %s\n",
                 prefix, c->umask,
@@ -4209,6 +4262,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->ignore_sigpipe),
                 prefix, yes_no(c->memory_deny_write_execute),
                 prefix, yes_no(c->restrict_realtime),
+                prefix, yes_no(c->restrict_suid_sgid),
                 prefix, exec_keyring_mode_to_string(c->keyring_mode),
                 prefix, yes_no(c->protect_hostname));
 
@@ -4556,6 +4610,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                                 prefix, s);
         }
 
+        if (c->network_namespace_path)
+                fprintf(f,
+                        "%sNetworkNamespacePath: %s\n",
+                        prefix, c->network_namespace_path);
+
         if (c->syscall_errno > 0) {
                 const char *errno_name;
 
@@ -4567,11 +4626,6 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 else
                         fprintf(f, "%d\n", c->syscall_errno);
         }
-
-        if (c->apparmor_profile)
-                fprintf(f,
-                        "%sAppArmorProfile: %s%s\n",
-                        prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
 }
 
 bool exec_context_maintains_privileges(const ExecContext *c) {
@@ -4615,6 +4669,30 @@ void exec_context_free_log_extra_fields(ExecContext *c) {
         c->n_log_extra_fields = 0;
 }
 
+void exec_context_revert_tty(ExecContext *c) {
+        int r;
+
+        assert(c);
+
+        /* First, reset the TTY (possibly kicking everybody else from the TTY) */
+        exec_context_tty_reset(c, NULL);
+
+        /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
+         * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
+         * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
+
+        if (exec_context_may_touch_tty(c)) {
+                const char *path;
+
+                path = exec_context_tty_path(c);
+                if (path) {
+                        r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
+                        if (r < 0 && r != -ENOENT)
+                                log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
+                }
+        }
+}
+
 void exec_status_start(ExecStatus *s, pid_t pid) {
         assert(s);
 
@@ -4639,12 +4717,8 @@ void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int
         s->code = code;
         s->status = status;
 
-        if (context) {
-                if (context->utmp_id)
-                        (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
-
-                exec_context_tty_reset(context, NULL);
-        }
+        if (context && context->utmp_id)
+                (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
 }
 
 void exec_status_reset(ExecStatus *s) {
@@ -4860,18 +4934,23 @@ static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
 }
 
 static void exec_runtime_freep(ExecRuntime **rt) {
-        if (*rt)
-                (void) exec_runtime_free(*rt, false);
+        (void) exec_runtime_free(*rt, false);
 }
 
-static int exec_runtime_allocate(ExecRuntime **rt) {
-        assert(rt);
+static int exec_runtime_allocate(ExecRuntime **ret) {
+        ExecRuntime *n;
 
-        *rt = new0(ExecRuntime, 1);
-        if (!*rt)
+        assert(ret);
+
+        n = new(ExecRuntime, 1);
+        if (!n)
                 return -ENOMEM;
 
-        (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
+        *n = (ExecRuntime) {
+                .netns_storage_socket = { -1, -1 },
+        };
+
+        *ret = n;
         return 0;
 }
 
@@ -4934,7 +5013,7 @@ static int exec_runtime_add(
 
 static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
         _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
-        _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
+        _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
         int r;
 
         assert(m);
@@ -4942,7 +5021,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
         assert(id);
 
         /* It is not necessary to create ExecRuntime object. */
-        if (!c->private_network && !c->private_tmp)
+        if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
                 return 0;
 
         if (c->private_tmp) {
@@ -4951,7 +5030,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
                         return r;
         }
 
-        if (c->private_network) {
+        if (c->private_network || c->network_namespace_path) {
                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
                         return -errno;
         }
@@ -4961,8 +5040,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
                 return r;
 
         /* Avoid cleanup */
-        netns_storage_socket[0] = -1;
-        netns_storage_socket[1] = -1;
+        netns_storage_socket[0] = netns_storage_socket[1] = -1;
         return 1;
 }