/* SPDX-License-Identifier: LGPL-2.1+ */
-/***
- This file is part of systemd.
-
- Copyright 2010 Lennart Poettering
-***/
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
+#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include "missing.h"
#include "process-util.h"
#include "raw-clone.h"
+#include "rlimit-util.h"
#include "signal-util.h"
#include "stat-util.h"
#include "string-table.h"
return (unsigned char) state;
}
-int get_process_comm(pid_t pid, char **name) {
+int get_process_comm(pid_t pid, char **ret) {
+ _cleanup_free_ char *escaped = NULL, *comm = NULL;
const char *p;
int r;
- assert(name);
+ assert(ret);
assert(pid >= 0);
+ escaped = new(char, TASK_COMM_LEN);
+ if (!escaped)
+ return -ENOMEM;
+
p = procfs_file_alloca(pid, "comm");
- r = read_one_line_file(p, name);
+ r = read_one_line_file(p, &comm);
if (r == -ENOENT)
return -ESRCH;
+ if (r < 0)
+ return r;
- return r;
+ /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
+ cellescape(escaped, TASK_COMM_LEN, comm);
+
+ *ret = TAKE_PTR(escaped);
+ return 0;
}
int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
memcpy(ans, "[...]", max_length-1);
ans[max_length-1] = 0;
} else {
- char *e;
-
t[max_length - 6] = 0;
/* Chop off final spaces */
- e = strchr(t, 0);
- while (e > t && isspace(e[-1]))
- e--;
- *e = 0;
+ delete_trailing_chars(t, WHITESPACE);
ans = strjoin("[", t, "...]");
}
* can use PR_SET_NAME, which sets the thread name for the calling thread. */
if (prctl(PR_SET_NAME, name) < 0)
log_debug_errno(errno, "PR_SET_NAME failed: %m");
- if (l > 15) /* Linux process names can be 15 chars at max */
+ if (l >= TASK_COMM_LEN) /* Linux process names can be 15 chars at max */
truncated = true;
/* Second step, change glibc's ID of the process name. */
/* Now, let's tell the kernel about this new memory */
if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
- log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
- (void) munmap(nn, nn_size);
- goto use_saved_argv;
- }
+ /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be
+ * below the desired start address, in which case the kernel may have kicked this back due
+ * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
+ * action). The proper solution would be to have a prctl() API that could set both start+end
+ * simultaneously, or at least let us query the existing address to anticipate this condition
+ * and respond accordingly. For now, we can only guess at the cause of this failure and try
+ * a workaround--which will briefly expand the arg space to something potentially huge before
+ * resizing it to what we want. */
+ log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
+ log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
+ (void) munmap(nn, nn_size);
+ goto use_saved_argv;
+ }
- /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
- * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
- if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
- log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
+ log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
+ goto use_saved_argv;
+ }
+ } else {
+ /* And update the end pointer to the new end, too. If this fails, we don't really know what
+ * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
+ * and continue. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
if (mm)
(void) munmap(mm, mm_size);
q += l;
}
- /* Skip preceeding whitespace */
+ /* Skip preceding whitespace */
l = strspn(q, WHITESPACE);
if (l < 1)
return -EINVAL;
static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
_cleanup_fclose_ FILE *f = NULL;
- char line[LINE_MAX];
const char *p;
+ int r;
assert(field);
assert(uid);
(void) __fsetlocking(f, FSETLOCKING_BYCALLER);
- FOREACH_LINE(line, f, return -errno) {
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
char *l;
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
l = strstrip(line);
if (startswith(l, field)) {
/*
* Return values:
- * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
- * process, the process timed out, the process was terminated by a
- * signal, or failed for an unknown reason.
+ *
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
+ * was terminated by a signal, or failed for an unknown reason.
+ *
* >=0 : The process terminated normally with no failures.
*
- * Success is indicated by a return value of zero, a timeout is indicated
- * by ETIMEDOUT, and all other child failure states are indicated by error
- * is indicated by a non-zero value.
+ * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
+ * states are indicated by error is indicated by a non-zero value.
+ *
+ * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
+ * to remain entirely race-free.
*/
int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
sigset_t mask;
void sigkill_wait(pid_t pid) {
assert(pid > 1);
- if (kill(pid, SIGKILL) > 0)
+ if (kill(pid, SIGKILL) >= 0)
(void) wait_for_terminate(pid, NULL);
}
void sigterm_wait(pid_t pid) {
assert(pid > 1);
- if (kill_and_sigcont(pid, SIGTERM) > 0)
+ if (kill_and_sigcont(pid, SIGTERM) >= 0)
(void) wait_for_terminate(pid, NULL);
}
do {
char line[LINE_MAX];
- unsigned i;
+ size_t i;
for (i = 0; i < sizeof(line)-1; i++) {
int c;
#endif
}
-int pid_compare_func(const void *a, const void *b) {
- const pid_t *p = a, *q = b;
-
+int pid_compare_func(const pid_t *a, const pid_t *b) {
/* Suitable for usage in qsort() */
-
- if (*p < *q)
- return -1;
- if (*p > *q)
- return 1;
- return 0;
+ return CMP(*a, *b);
}
int ioprio_parse_priority(const char *s, int *ret) {
/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
* headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
* libpthread, as it is part of glibc anyway. */
-extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
+extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle);
extern void* __dso_handle __attribute__ ((__weak__));
pid_t getpid_cached(void) {
if (geteuid() == 0)
return 0;
- log_error("Need to be root.");
- return -EPERM;
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
}
int safe_fork_full(
}
}
+ if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
+
+ /* Optionally, make sure we never propagate mounts to the host. */
+
+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
if (flags & FORK_CLOSE_ALL_FDS) {
/* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
log_close();
}
}
+ if (flags & FORK_RLIMIT_NOFILE_SAFE) {
+ r = rlimit_nofile_safe();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
if (ret_pid)
*ret_pid = getpid_cached();
return 0;
}
-int fork_agent(const char *name, const int except[], unsigned n_except, pid_t *ret_pid, const char *path, ...) {
+int namespace_fork(
+ const char *outer_name,
+ const char *inner_name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ int pidns_fd,
+ int mntns_fd,
+ int netns_fd,
+ int userns_fd,
+ int root_fd,
+ pid_t *ret_pid) {
+
+ int r;
+
+ /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
+ * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
+ * /proc/self/fd works correctly. */
+
+ r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pid_t pid;
+
+ /* Child */
+
+ r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
+ if (r < 0) {
+ log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
+ r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ if (r == 0) {
+ /* Child */
+ if (ret_pid)
+ *ret_pid = pid;
+ return 0;
+ }
+
+ r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(r);
+ }
+
+ return 1;
+}
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
bool stdout_is_tty, stderr_is_tty;
- unsigned n, i;
+ size_t n, i;
va_list ap;
char **l;
int r;
safe_close_above_stdio(fd);
}
+ (void) rlimit_nofile_safe();
+
/* Count arguments */
va_start(ap, path);
for (n = 0; va_arg(ap, char*); n++)
va_end(ap);
/* Allocate strv */
- l = alloca(sizeof(char *) * (n + 1));
+ l = newa(char*, n + 1);
/* Fill in arguments */
va_start(ap, path);
_exit(EXIT_FAILURE);
}
+int set_oom_score_adjust(int value) {
+ char t[DECIMAL_STR_MAX(int)];
+
+ sprintf(t, "%i", value);
+
+ return write_string_file("/proc/self/oom_score_adj", t,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
static const char *const ioprio_class_table[] = {
[IOPRIO_CLASS_NONE] = "none",
[IOPRIO_CLASS_RT] = "realtime",