#if HAVE_APPARMOR
#include "apparmor-util.h"
#endif
+#include "argv-util.h"
#include "async.h"
#include "barrier.h"
#include "bpf-lsm.h"
#include "cgroup-setup.h"
#include "chase-symlinks.h"
#include "chown-recursive.h"
+#include "constants.h"
#include "cpu-set-util.h"
#include "creds-util.h"
#include "data-fd-util.h"
-#include "def.h"
#include "env-file.h"
#include "env-util.h"
#include "errno-list.h"
#include "signal-util.h"
#include "smack-util.h"
#include "socket-util.h"
+#include "sort-util.h"
#include "special.h"
#include "stat-util.h"
#include "string-table.h"
return 0;
}
-static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
- size_t n_fds;
+static int flags_fds(
+ const int fds[],
+ size_t n_socket_fds,
+ size_t n_fds,
+ bool nonblock) {
+
int r;
- n_fds = n_socket_fds + n_storage_fds;
if (n_fds <= 0)
return 0;
uid_t uid,
gid_t gid) {
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
int r;
assert(context);
}
static int acquire_path(const char *path, int flags, mode_t mode) {
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
int r;
assert(path);
int *ret_saved_stdin,
int *ret_saved_stdout) {
- _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
+ _cleanup_close_ int fd = -EBADF, saved_stdin = -EBADF, saved_stdout = -EBADF;
int r;
assert(ret_saved_stdin);
}
static void write_confirm_error(int err, const char *vc, const Unit *u) {
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
assert(vc);
u->id, u->description, cmdline);
continue; /* ask again */
case 'j':
- manager_dump_jobs(u->manager, stdout, " ");
+ manager_dump_jobs(u->manager, stdout, /* patterns= */ NULL, " ");
continue; /* ask again */
case 'n':
/* 'n' was removed in favor of 'f'. */
}
static void rename_process_from_path(const char *path) {
- char process_name[11];
+ _cleanup_free_ char *buf = NULL;
const char *p;
- size_t l;
- /* This resulting string must fit in 10 chars (i.e. the length
- * of "/sbin/init") to look pretty in /bin/ps */
+ assert(path);
+
+ /* This resulting string must fit in 10 chars (i.e. the length of "/sbin/init") to look pretty in
+ * /bin/ps */
- p = basename(path);
- if (isempty(p)) {
+ if (path_extract_filename(path, &buf) < 0) {
rename_process("(...)");
return;
}
- l = strlen(p);
+ size_t l = strlen(buf);
if (l > 8) {
- /* The end of the process name is usually more
- * interesting, since the first bit might just be
+ /* The end of the process name is usually more interesting, since the first bit might just be
* "systemd-" */
- p = p + l - 8;
+ p = buf + l - 8;
l = 8;
- }
+ } else
+ p = buf;
+ char process_name[11];
process_name[0] = '(';
memcpy(process_name+1, p, l);
process_name[1+l] = ')';
if (c->no_new_privileges)
return true;
- if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+ if (have_effective_cap(CAP_SYS_ADMIN) > 0) /* if we are privileged, we don't need NNP */
return false;
/* We need NNP if we have any form of seccomp and are unprivileged */
const ExecContext *c,
const ExecParameters *p,
size_t n_fds,
+ char **fdnames,
const char *home,
const char *username,
const char *shell,
return -ENOMEM;
our_env[n_env++] = x;
- joined = strv_join(p->fd_names, ":");
+ joined = strv_join(fdnames, ":");
if (!joined)
return -ENOMEM;
static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
- _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
- _cleanup_close_ int unshare_ready_fd = -1;
+ _cleanup_close_pair_ int errno_pipe[2] = PIPE_EBADF;
+ _cleanup_close_ int unshare_ready_fd = -EBADF;
_cleanup_(sigkill_waitp) pid_t pid = 0;
uint64_t c = 1;
ssize_t n;
* does not need CAP_SETUID to write the single line mapping to itself. */
/* Can only set up multiple mappings with CAP_SETUID. */
- if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
+ if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid))
r = asprintf(&uid_map,
UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
return -ENOMEM;
/* Can only set up multiple mappings with CAP_SETGID. */
- if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
+ if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid))
r = asprintf(&gid_map,
GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
if (r < 0)
return r;
if (r == 0) {
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
const char *a;
pid_t ppid;
goto fail;
}
- /* And link it up from the original place. Note that if a mount namespace is going to be
- * used, then this symlink remains on the host, and a new one for the child namespace will
- * be created later. */
- r = symlink_idempotent(pp, p, true);
- if (r < 0)
- goto fail;
+ if (!context->directories[type].items[i].only_create) {
+ /* And link it up from the original place.
+ * Notes
+ * 1) If a mount namespace is going to be used, then this symlink remains on
+ * the host, and a new one for the child namespace will be created later.
+ * 2) It is not necessary to create this symlink when one of its parent
+ * directories is specified and already created. E.g.
+ * StateDirectory=foo foo/bar
+ * In that case, the inode points to pp and p for "foo/bar" are the same:
+ * pp = "/var/lib/private/foo/bar"
+ * p = "/var/lib/foo/bar"
+ * and, /var/lib/foo is a symlink to /var/lib/private/foo. So, not only
+ * we do not need to create the symlink, but we cannot create the symlink.
+ * See issue #24783. */
+ r = symlink_idempotent(pp, p, true);
+ if (r < 0)
+ goto fail;
+ }
} else {
_cleanup_free_ char *target = NULL;
bool ownership_ok) {
_cleanup_(unlink_and_freep) char *tmp = NULL;
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
int r;
r = tempfn_random_child("", "cred", &tmp);
assert(id);
assert(path);
assert(unit);
+ assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
assert(write_dfd >= 0);
assert(left);
bool ownership_ok) {
uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
- _cleanup_close_ int dfd = -1;
+ _cleanup_close_ int dfd = -EBADF;
ExecLoadCredential *lc;
ExecSetCredential *sc;
int r;
/* First, load credentials off disk (or acquire via AF_UNIX socket) */
HASHMAP_FOREACH(lc, context->load_credentials) {
- _cleanup_close_ int sub_fd = -1;
+ _cleanup_close_ int sub_fd = -EBADF;
/* If this is an absolute path, then try to open it as a directory. If that works, then we'll
* recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
lc->path,
lc->encrypted,
unit,
- -1,
+ AT_FDCWD,
dfd,
uid,
ownership_ok,
if (!params->prefix[t])
continue;
- n += context->directories[t].n_items;
+ for (size_t i = 0; i < context->directories[t].n_items; i++)
+ n += !context->directories[t].items[i].only_create;
}
if (n <= 0) {
for (size_t i = 0; i < context->directories[t].n_items; i++) {
char *s, *d;
+ /* When one of the parent directories is in the list, we cannot create the symlink
+ * for the child directory. See also the comments in setup_exec_directory(). */
+ if (context->directories[t].items[i].only_create)
+ continue;
+
if (exec_directory_is_private(context, t))
s = path_join(params->prefix[t], "private", context->directories[t].items[i].path);
else
return r;
}
- if (!exec_directory_is_private(context, dt) || exec_context_with_rootfs(context))
+ if (!exec_directory_is_private(context, dt) ||
+ exec_context_with_rootfs(context) ||
+ context->directories[dt].items[i].only_create)
continue;
private_path = path_join(params->prefix[dt], "private", context->directories[dt].items[i].path);
assert(ret_fd);
if (fd < 0) {
- *ret_fd = -1;
+ *ret_fd = -EBADF;
return 0;
}
return 1;
}
+static int connect_unix_harder(Unit *u, const OpenFile *of, int ofd) {
+ union sockaddr_union addr = {
+ .un.sun_family = AF_UNIX,
+ };
+ socklen_t sa_len;
+ static const int socket_types[] = { SOCK_DGRAM, SOCK_STREAM, SOCK_SEQPACKET };
+ int r;
+
+ assert(u);
+ assert(of);
+ assert(ofd >= 0);
+
+ r = sockaddr_un_set_path(&addr.un, FORMAT_PROC_FD_PATH(ofd));
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set sockaddr for %s: %m", of->path);
+
+ sa_len = r;
+
+ for (size_t i = 0; i < ELEMENTSOF(socket_types); i++) {
+ _cleanup_close_ int fd = -EBADF;
+
+ fd = socket(AF_UNIX, socket_types[i] | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return log_unit_error_errno(u, errno, "Failed to create socket for %s: %m", of->path);
+
+ r = RET_NERRNO(connect(fd, &addr.sa, sa_len));
+ if (r == -EPROTOTYPE)
+ continue;
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to connect socket for %s: %m", of->path);
+
+ return TAKE_FD(fd);
+ }
+
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(EPROTOTYPE), "Failed to connect socket for \"%s\".", of->path);
+}
+
+static int get_open_file_fd(Unit *u, const OpenFile *of) {
+ struct stat st;
+ _cleanup_close_ int fd = -EBADF, ofd = -EBADF;
+
+ assert(u);
+ assert(of);
+
+ ofd = open(of->path, O_PATH | O_CLOEXEC);
+ if (ofd < 0)
+ return log_error_errno(errno, "Could not open \"%s\": %m", of->path);
+ if (fstat(ofd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", of->path);
+
+ if (S_ISSOCK(st.st_mode)) {
+ fd = connect_unix_harder(u, of, ofd);
+ if (fd < 0)
+ return fd;
+
+ if (FLAGS_SET(of->flags, OPENFILE_READ_ONLY) && shutdown(fd, SHUT_WR) < 0)
+ return log_error_errno(errno, "Failed to shutdown send for socket %s: %m", of->path);
+
+ log_unit_debug(u, "socket %s opened (fd=%d)", of->path, fd);
+ } else {
+ int flags = FLAGS_SET(of->flags, OPENFILE_READ_ONLY) ? O_RDONLY : O_RDWR;
+ if (FLAGS_SET(of->flags, OPENFILE_APPEND))
+ flags |= O_APPEND;
+ else if (FLAGS_SET(of->flags, OPENFILE_TRUNCATE))
+ flags |= O_TRUNC;
+
+ fd = fd_reopen(ofd, flags | O_CLOEXEC);
+ if (fd < 0)
+ return log_unit_error_errno(u, fd, "Failed to open file %s: %m", of->path);
+
+ log_unit_debug(u, "file %s opened (fd=%d)", of->path, fd);
+ }
+
+ return TAKE_FD(fd);
+}
+
+static int collect_open_file_fds(
+ Unit *u,
+ OpenFile* open_files,
+ int **fds,
+ char ***fdnames,
+ size_t *n_fds) {
+ int r;
+
+ assert(u);
+ assert(fds);
+ assert(fdnames);
+ assert(n_fds);
+
+ LIST_FOREACH(open_files, of, open_files) {
+ _cleanup_close_ int fd = -EBADF;
+
+ fd = get_open_file_fd(u, of);
+ if (fd < 0) {
+ if (FLAGS_SET(of->flags, OPENFILE_GRACEFUL)) {
+ log_unit_debug_errno(u, fd, "Failed to get OpenFile= file descriptor for %s, ignoring: %m", of->path);
+ continue;
+ }
+
+ return fd;
+ }
+
+ if (!GREEDY_REALLOC(*fds, *n_fds + 1))
+ return -ENOMEM;
+
+ r = strv_extend(fdnames, of->fdname);
+ if (r < 0)
+ return r;
+
+ (*fds)[*n_fds] = TAKE_FD(fd);
+
+ (*n_fds)++;
+ }
+
+ return 0;
+}
+
static int exec_child(
Unit *unit,
const ExecCommand *command,
DynamicCreds *dcreds,
int socket_fd,
const int named_iofds[static 3],
- int *fds,
+ int *params_fds,
size_t n_socket_fds,
size_t n_storage_fds,
char **files_env,
int secure_bits;
_cleanup_free_ gid_t *gids_after_pam = NULL;
int ngids_after_pam = 0;
+ _cleanup_free_ int *fds = NULL;
+ _cleanup_strv_free_ char **fdnames = NULL;
assert(unit);
assert(command);
/* In case anything used libc syslog(), close this here, too */
closelog();
+ fds = newdup(int, params_fds, n_fds);
+ if (!fds) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ fdnames = strv_copy((char**) params->fd_names);
+ if (!fdnames) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = collect_open_file_fds(unit, params->open_files, &fds, &fdnames, &n_fds);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to get OpenFile= file descriptors: %m");
+ }
+
int keep_fds[n_fds + 3];
memcpy_safe(keep_fds, fds, n_fds * sizeof(int));
n_keep_fds = n_fds;
context,
params,
n_fds,
+ fdnames,
home,
username,
shell,
}
}
- if (needs_sandboxing && context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
+ if (needs_sandboxing && context->private_users && have_effective_cap(CAP_SYS_ADMIN) <= 0) {
/* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
* Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
* set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
/* If the user namespace was not set up above, try to do it now.
* It's preferred to set up the user namespace later (after all other namespaces) so as not to be
- * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
+ * restricted by rules pertaining to combining user namespaces with other namespaces (e.g. in the
* case of mount namespaces being less privileged when the mount point list is copied from a
* different user namespace). */
* shall execute. */
_cleanup_free_ char *executable = NULL;
- _cleanup_close_ int executable_fd = -1;
+ _cleanup_close_ int executable_fd = -EBADF;
r = find_executable_full(command->path, /* root= */ NULL, context->exec_search_path, false, &executable, &executable_fd);
if (r < 0) {
if (r != -ENOMEM && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
#if HAVE_SELINUX
if (needs_sandboxing && use_selinux && params->selinux_context_net) {
- int fd = -1;
+ int fd = -EBADF;
if (socket_fd >= 0)
fd = socket_fd;
if (r >= 0)
r = shift_fds(fds, n_fds);
if (r >= 0)
- r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
+ r = flags_fds(fds, n_socket_fds, n_fds, context->non_blocking);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
socket_fd = params->fds[0];
} else {
- socket_fd = -1;
+ socket_fd = -EBADF;
fds = params->fds;
n_socket_fds = params->n_socket_fds;
n_storage_fds = params->n_storage_fds;
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
- /* Normally we would not propagate the oomd xattrs to children but since we created this
+ /* Normally we would not propagate the xattrs to children but since we created this
* sub-cgroup internally we should do it. */
cgroup_oomd_xattr_apply(unit, subcgroup_path);
+ cgroup_log_xattr_apply(unit, subcgroup_path);
}
}
c->log_level_max = -1;
exec_context_free_log_extra_fields(c);
+ c->log_filter_allowed_patterns = set_free(c->log_filter_allowed_patterns);
+ c->log_filter_denied_patterns = set_free(c->log_filter_denied_patterns);
c->log_ratelimit_interval_usec = 0;
c->log_ratelimit_burst = 0;
return 0;
}
+int exec_context_destroy_mount_ns_dir(Unit *u) {
+ _cleanup_free_ char *p = NULL;
+
+ if (!u || !MANAGER_IS_SYSTEM(u->manager))
+ return 0;
+
+ p = path_join("/run/systemd/propagate/", u->id);
+ if (!p)
+ return -ENOMEM;
+
+ /* This is only filled transiently (see mount_in_namespace()), should be empty or even non-existent*/
+ if (rmdir(p) < 0 && errno != ENOENT)
+ log_unit_debug_errno(u, errno, "Unable to remove propagation dir '%s', ignoring: %m", p);
+
+ return 0;
+}
+
static void exec_command_done(ExecCommand *c) {
assert(c);
if (c->log_ratelimit_burst > 0)
fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
+ if (!set_isempty(c->log_filter_allowed_patterns) || !set_isempty(c->log_filter_denied_patterns)) {
+ fprintf(f, "%sLogFilterPatterns:", prefix);
+
+ char *pattern;
+ SET_FOREACH(pattern, c->log_filter_allowed_patterns)
+ fprintf(f, " %s", pattern);
+ SET_FOREACH(pattern, c->log_filter_denied_patterns)
+ fprintf(f, " ~%s", pattern);
+ fputc('\n', f);
+ }
+
for (size_t j = 0; j < c->n_log_extra_fields; j++) {
fprintf(f, "%sLogExtraFields: ", prefix);
fwrite(c->log_extra_fields[j].iov_base,
}
void exec_context_revert_tty(ExecContext *c) {
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
const char *path;
struct stat st;
int r;
if (*l) {
/* It's kind of important, that we keep the order here */
- LIST_FIND_TAIL(command, *l, end);
+ end = LIST_FIND_TAIL(command, *l);
LIST_INSERT_AFTER(command, *l, end, e);
} else
*l = e;
*n = (ExecRuntime) {
.id = TAKE_PTR(id_copy),
- .netns_storage_socket = { -1, -1 },
- .ipcns_storage_socket = { -1, -1 },
+ .netns_storage_socket = PIPE_EBADF,
+ .ipcns_storage_socket = PIPE_EBADF,
};
*ret = n;
ExecRuntime **ret) {
_cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL;
- _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 }, ipcns_storage_socket[2] = { -1, -1 };
+ _cleanup_close_pair_ int netns_storage_socket[2] = PIPE_EBADF, ipcns_storage_socket[2] = PIPE_EBADF;
int r;
assert(m);
d->mode = 0755;
}
-int exec_directory_add(ExecDirectoryItem **d, size_t *n, const char *path, char **symlinks) {
+static ExecDirectoryItem *exec_directory_find(ExecDirectory *d, const char *path) {
+ assert(d);
+ assert(path);
+
+ for (size_t i = 0; i < d->n_items; i++)
+ if (path_equal(d->items[i].path, path))
+ return &d->items[i];
+
+ return NULL;
+}
+
+int exec_directory_add(ExecDirectory *d, const char *path, const char *symlink) {
_cleanup_strv_free_ char **s = NULL;
_cleanup_free_ char *p = NULL;
+ ExecDirectoryItem *existing;
+ int r;
assert(d);
- assert(n);
assert(path);
+ existing = exec_directory_find(d, path);
+ if (existing) {
+ r = strv_extend(&existing->symlinks, symlink);
+ if (r < 0)
+ return r;
+
+ return 0; /* existing item is updated */
+ }
+
p = strdup(path);
if (!p)
return -ENOMEM;
- if (symlinks) {
- s = strv_copy(symlinks);
+ if (symlink) {
+ s = strv_new(symlink);
if (!s)
return -ENOMEM;
}
- if (!GREEDY_REALLOC(*d, *n + 1))
+ if (!GREEDY_REALLOC(d->items, d->n_items + 1))
return -ENOMEM;
- (*d)[(*n) ++] = (ExecDirectoryItem) {
+ d->items[d->n_items++] = (ExecDirectoryItem) {
.path = TAKE_PTR(p),
.symlinks = TAKE_PTR(s),
};
- return 0;
+ return 1; /* new item is added */
+}
+
+static int exec_directory_item_compare_func(const ExecDirectoryItem *a, const ExecDirectoryItem *b) {
+ assert(a);
+ assert(b);
+
+ return path_compare(a->path, b->path);
+}
+
+void exec_directory_sort(ExecDirectory *d) {
+ assert(d);
+
+ /* Sort the exec directories to make always parent directories processed at first in
+ * setup_exec_directory(), e.g., even if StateDirectory=foo/bar foo, we need to create foo at first,
+ * then foo/bar. Also, set .only_create flag if one of the parent directories is contained in the
+ * list. See also comments in setup_exec_directory() and issue #24783. */
+
+ if (d->n_items <= 1)
+ return;
+
+ typesafe_qsort(d->items, d->n_items, exec_directory_item_compare_func);
+
+ for (size_t i = 1; i < d->n_items; i++)
+ for (size_t j = 0; j < i; j++)
+ if (path_startswith(d->items[i].path, d->items[j].path)) {
+ d->items[i].only_create = true;
+ break;
+ }
}
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(exec_set_credential_hash_ops, char, string_hash_func, string_compare_func, ExecSetCredential, exec_set_credential_free);