]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
coredump: rework coredumping logic
authorLennart Poettering <lennart@poettering.net>
Mon, 8 Feb 2016 20:16:08 +0000 (21:16 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 10 Feb 2016 15:08:32 +0000 (16:08 +0100)
This reworks the coredumping logic so that the coredump handler invoked from the kernel only collects runtime data
about the crashed process, and then submits it for processing to a socket-activate coredump service, which extracts a
stacktrace and writes the coredump to disk.

This has a number of benefits: the disk IO and stack trace generation may take a substantial amount of resources, and
hence should better be managed by PID 1, so that resource management applies. This patch uses RuntimeMaxSec=, Nice=, OOMScoreAdjust=
and various sandboxing settings to ensure that the coredump handler doesn't take away unbounded resources from normally
priorized processes.

This logic is also nice since this makes sure the coredump processing and storage is delayed correctly until
/var/systemd/coredump is mounted and writable.

Fixes: #2286
Makefile.am
src/basic/socket-util.c
src/coredump/coredump.c
units/.gitignore
units/systemd-coredump.socket [new file with mode: 0644]
units/systemd-coredump@.service.in [new file with mode: 0644]

index 1e3eb4deed0b0e4d03b2a5edf745fcdd3eac6763..87744d784ca4a0e6956bff159fc4b039764ecb5d 100644 (file)
@@ -4413,6 +4413,15 @@ systemd_coredump_LDADD += \
        $(ELFUTILS_LIBS)
 endif
 
+nodist_systemunit_DATA += \
+       units/systemd-coredump@.service
+
+dist_systemunit_DATA += \
+       units/systemd-coredump.socket
+
+SOCKETS_TARGET_WANTS += \
+       systemd-coredump.socket
+
 rootlibexec_PROGRAMS += \
        systemd-coredump
 
@@ -4453,7 +4462,8 @@ CLEANFILES += \
 endif
 
 EXTRA_DIST += \
-       sysctl.d/50-coredump.conf.in
+       sysctl.d/50-coredump.conf.in \
+       units/systemd-coredump@.service.in
 
 # ------------------------------------------------------------------------------
 if ENABLE_BINFMT
index 7a866f2e2336c93366ae96e5b8eceee4cf97216a..49e5f5b125a62bbbee92c92821da634ffe9f10f4 100644 (file)
@@ -871,14 +871,13 @@ int send_one_fd_sa(
                 struct cmsghdr cmsghdr;
                 uint8_t buf[CMSG_SPACE(sizeof(int))];
         } control = {};
-        struct cmsghdr *cmsg;
-
         struct msghdr mh = {
                 .msg_name = (struct sockaddr*) sa,
                 .msg_namelen = len,
                 .msg_control = &control,
                 .msg_controllen = sizeof(control),
         };
+        struct cmsghdr *cmsg;
 
         assert(transport_fd >= 0);
         assert(fd >= 0);
index 8b1c670cc65a29fd960a2fb5115fd8c19a6778a7..9dec6521f10dfc2999098fc00e8d9738741d8918 100644 (file)
 #include <unistd.h>
 
 #ifdef HAVE_ELFUTILS
-#  include <dwarf.h>
-#  include <elfutils/libdwfl.h>
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
 #endif
 
 #include "sd-journal.h"
 #include "sd-login.h"
+#include "sd-daemon.h"
 
 #include "acl-util.h"
 #include "alloc-util.h"
@@ -51,6 +52,7 @@
 #include "mkdir.h"
 #include "parse-util.h"
 #include "process-util.h"
+#include "socket-util.h"
 #include "special.h"
 #include "stacktrace.h"
 #include "string-table.h"
 assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX);
 
 enum {
-        INFO_PID,
-        INFO_UID,
-        INFO_GID,
-        INFO_SIGNAL,
-        INFO_TIMESTAMP,
-        INFO_COMM,
-        INFO_EXE,
-        _INFO_LEN
+        /* We use this as array indexes for a couple of special fields we use for naming coredumping files, and
+         * attaching xattrs */
+        CONTEXT_PID,
+        CONTEXT_UID,
+        CONTEXT_GID,
+        CONTEXT_SIGNAL,
+        CONTEXT_TIMESTAMP,
+        CONTEXT_COMM,
+        CONTEXT_EXE,
+        _CONTEXT_MAX
 };
 
 typedef enum CoredumpStorage {
@@ -173,16 +177,16 @@ static int fix_acl(int fd, uid_t uid) {
         return 0;
 }
 
-static int fix_xattr(int fd, const char *info[_INFO_LEN]) {
+static int fix_xattr(int fd, const char *context[_CONTEXT_MAX]) {
 
-        static const char * const xattrs[_INFO_LEN] = {
-                [INFO_PID] = "user.coredump.pid",
-                [INFO_UID] = "user.coredump.uid",
-                [INFO_GID] = "user.coredump.gid",
-                [INFO_SIGNAL] = "user.coredump.signal",
-                [INFO_TIMESTAMP] = "user.coredump.timestamp",
-                [INFO_COMM] = "user.coredump.comm",
-                [INFO_EXE] = "user.coredump.exe",
+        static const char * const xattrs[_CONTEXT_MAX] = {
+                [CONTEXT_PID] = "user.coredump.pid",
+                [CONTEXT_UID] = "user.coredump.uid",
+                [CONTEXT_GID] = "user.coredump.gid",
+                [CONTEXT_SIGNAL] = "user.coredump.signal",
+                [CONTEXT_TIMESTAMP] = "user.coredump.timestamp",
+                [CONTEXT_COMM] = "user.coredump.comm",
+                [CONTEXT_EXE] = "user.coredump.exe",
         };
 
         int r = 0;
@@ -193,13 +197,13 @@ static int fix_xattr(int fd, const char *info[_INFO_LEN]) {
         /* Attach some metadata to coredumps via extended
          * attributes. Just because we can. */
 
-        for (i = 0; i < _INFO_LEN; i++) {
+        for (i = 0; i < _CONTEXT_MAX; i++) {
                 int k;
 
-                if (isempty(info[i]) || !xattrs[i])
+                if (isempty(context[i]) || !xattrs[i])
                         continue;
 
-                k = fsetxattr(fd, xattrs[i], info[i], strlen(info[i]), XATTR_CREATE);
+                k = fsetxattr(fd, xattrs[i], context[i], strlen(context[i]), XATTR_CREATE);
                 if (k < 0 && r == 0)
                         r = -errno;
         }
@@ -213,18 +217,18 @@ static int fix_permissions(
                 int fd,
                 const char *filename,
                 const char *target,
-                const char *info[_INFO_LEN],
+                const char *context[_CONTEXT_MAX],
                 uid_t uid) {
 
         assert(fd >= 0);
         assert(filename);
         assert(target);
-        assert(info);
+        assert(context);
 
         /* Ignore errors on these */
-        fchmod(fd, 0640);
-        fix_acl(fd, uid);
-        fix_xattr(fd, info);
+        (void) fchmod(fd, 0640);
+        (void) fix_acl(fd, uid);
+        (void) fix_xattr(fd, context);
 
         if (fsync(fd) < 0)
                 return log_error_errno(errno, "Failed to sync coredump %s: %m", filename);
@@ -252,18 +256,18 @@ static int maybe_remove_external_coredump(const char *filename, uint64_t size) {
         return 1;
 }
 
-static int make_filename(const char *info[_INFO_LEN], char **ret) {
+static int make_filename(const char *context[_CONTEXT_MAX], char **ret) {
         _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL;
         sd_id128_t boot = {};
         int r;
 
-        assert(info);
+        assert(context);
 
-        c = filename_escape(info[INFO_COMM]);
+        c = filename_escape(context[CONTEXT_COMM]);
         if (!c)
                 return -ENOMEM;
 
-        u = filename_escape(info[INFO_UID]);
+        u = filename_escape(context[CONTEXT_UID]);
         if (!u)
                 return -ENOMEM;
 
@@ -271,11 +275,11 @@ static int make_filename(const char *info[_INFO_LEN], char **ret) {
         if (r < 0)
                 return r;
 
-        p = filename_escape(info[INFO_PID]);
+        p = filename_escape(context[CONTEXT_PID]);
         if (!p)
                 return -ENOMEM;
 
-        t = filename_escape(info[INFO_TIMESTAMP]);
+        t = filename_escape(context[CONTEXT_TIMESTAMP]);
         if (!t)
                 return -ENOMEM;
 
@@ -292,8 +296,8 @@ static int make_filename(const char *info[_INFO_LEN], char **ret) {
 }
 
 static int save_external_coredump(
-                const char *info[_INFO_LEN],
-                uid_t uid,
+                const char *context[_CONTEXT_MAX],
+                int input_fd,
                 char **ret_filename,
                 int *ret_node_fd,
                 int *ret_data_fd,
@@ -302,15 +306,20 @@ static int save_external_coredump(
         _cleanup_free_ char *fn = NULL, *tmp = NULL;
         _cleanup_close_ int fd = -1;
         struct stat st;
+        uid_t uid;
         int r;
 
-        assert(info);
+        assert(context);
         assert(ret_filename);
         assert(ret_node_fd);
         assert(ret_data_fd);
         assert(ret_size);
 
-        r = make_filename(info, &fn);
+        r = parse_uid(context[CONTEXT_UID], &uid);
+        if (r < 0)
+                return log_error_errno(r, "Failed to parse UID: %m");
+
+        r = make_filename(context, &fn);
         if (r < 0)
                 return log_error_errno(r, "Failed to determine coredump file name: %m");
 
@@ -324,12 +333,12 @@ static int save_external_coredump(
         if (fd < 0)
                 return log_error_errno(errno, "Failed to create coredump file %s: %m", tmp);
 
-        r = copy_bytes(STDIN_FILENO, fd, arg_process_size_max, false);
+        r = copy_bytes(input_fd, fd, arg_process_size_max, false);
         if (r == -EFBIG) {
-                log_error("Coredump of %s (%s) is larger than configured processing limit, refusing.", info[INFO_PID], info[INFO_COMM]);
+                log_error("Coredump of %s (%s) is larger than configured processing limit, refusing.", context[CONTEXT_PID], context[CONTEXT_COMM]);
                 goto fail;
         } else if (IN_SET(r, -EDQUOT, -ENOSPC)) {
-                log_error("Not enough disk space for coredump of %s (%s), refusing.", info[INFO_PID], info[INFO_COMM]);
+                log_error("Not enough disk space for coredump of %s (%s), refusing.", context[CONTEXT_PID], context[CONTEXT_COMM]);
                 goto fail;
         } else if (r < 0) {
                 log_error_errno(r, "Failed to dump coredump to file: %m");
@@ -378,7 +387,7 @@ static int save_external_coredump(
                         goto fail_compressed;
                 }
 
-                r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, info, uid);
+                r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid);
                 if (r < 0)
                         goto fail_compressed;
 
@@ -396,13 +405,13 @@ static int save_external_coredump(
                 return 0;
 
         fail_compressed:
-                unlink_noerrno(tmp_compressed);
+                (void) unlink(tmp_compressed);
         }
 
 uncompressed:
 #endif
 
-        r = fix_permissions(fd, tmp, fn, info, uid);
+        r = fix_permissions(fd, tmp, fn, context, uid);
         if (r < 0)
                 goto fail;
 
@@ -417,7 +426,7 @@ uncompressed:
         return 0;
 
 fail:
-        unlink_noerrno(tmp);
+        (void) unlink(tmp);
         return r;
 }
 
@@ -539,186 +548,457 @@ static int compose_open_fds(pid_t pid, char **open_fds) {
         return 0;
 }
 
-int main(int argc, char* argv[]) {
+static int change_uid_gid(const char *context[]) {
+        uid_t uid;
+        gid_t gid;
+        int r;
 
-        /* The small core field we allocate on the stack, to keep things simple */
-        char
-                *core_pid = NULL, *core_uid = NULL, *core_gid = NULL, *core_signal = NULL,
-                *core_session = NULL, *core_exe = NULL, *core_comm = NULL, *core_cmdline = NULL,
-                *core_cgroup = NULL, *core_cwd = NULL, *core_root = NULL, *core_unit = NULL,
-                *core_slice = NULL;
+        r = parse_uid(context[CONTEXT_UID], &uid);
+        if (r < 0)
+                return r;
 
-        /* The larger ones we allocate on the heap */
-        _cleanup_free_ char
-                *core_timestamp = NULL,  *core_message = NULL, *coredump_data = NULL, *core_owner_uid = NULL,
-                *core_open_fds = NULL, *core_proc_status = NULL, *core_proc_maps = NULL, *core_proc_limits = NULL,
-                *core_proc_cgroup = NULL, *core_environ = NULL;
+        r = parse_gid(context[CONTEXT_GID], &gid);
+        if (r < 0)
+                return r;
+
+        return drop_privileges(uid, gid, 0);
+}
 
-        _cleanup_free_ char *exe = NULL, *comm = NULL, *filename = NULL;
-        const char *info[_INFO_LEN];
+static int submit_coredump(
+                const char *context[_CONTEXT_MAX],
+                struct iovec *iovec,
+                size_t n_iovec_allocated,
+                size_t n_iovec,
+                int input_fd) {
 
         _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
-
-        struct iovec iovec[26];
+        _cleanup_free_ char *core_message = NULL, *filename = NULL, *coredump_data = NULL;
         uint64_t coredump_size;
-        int r, j = 0;
-        uid_t uid, owner_uid;
-        gid_t gid;
-        pid_t pid;
-        char *t;
-        const char *p;
+        int r;
 
-        /* Make sure we never enter a loop */
-        prctl(PR_SET_DUMPABLE, 0);
+        assert(context);
+        assert(iovec);
+        assert(n_iovec_allocated >= n_iovec + 3);
+        assert(input_fd >= 0);
 
-        /* First, log to a safe place, since we don't know what
-         * crashed and it might be journald which we'd rather not log
-         * to then. */
-        log_set_target(LOG_TARGET_KMSG);
-        log_open();
+        /* Vacuum before we write anything again */
+        (void) coredump_vacuum(-1, arg_keep_free, arg_max_use);
 
-        if (argc < INFO_COMM + 1) {
-                log_error("Not enough arguments passed from kernel (%d, expected %d).",
-                          argc - 1, INFO_COMM + 1 - 1);
-                r = -EINVAL;
-                goto finish;
+        /* Always stream the coredump to disk, if that's possible */
+        r = save_external_coredump(context, input_fd, &filename, &coredump_node_fd, &coredump_fd, &coredump_size);
+        if (r < 0)
+                /* Skip whole core dumping part */
+                goto log;
+
+        /* If we don't want to keep the coredump on disk, remove it now, as later on we will lack the privileges for
+         * it. However, we keep the fd to it, so that we can still process it and log it. */
+        r = maybe_remove_external_coredump(filename, coredump_size);
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                const char *coredump_filename;
+
+                coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
+                IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename);
         }
 
-        /* Ignore all parse errors */
-        parse_config();
+        /* Vacuum again, but exclude the coredump we just created */
+        (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
 
-        log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage));
-        log_debug("Selected compression %s.", yes_no(arg_compress));
+        /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the coredump
+         * memory under the user's uid. This also ensures that the credentials journald will see are the ones of the
+         * coredumping user, thus making sure the user gets access to the core dump. Let's also get rid of all
+         * capabilities, if we run as root, we won't need them anymore. */
+        r = change_uid_gid(context);
+        if (r < 0)
+                return log_error_errno(r, "Failed to drop privileges: %m");
 
-        r = parse_uid(argv[INFO_UID + 1], &uid);
-        if (r < 0) {
-                log_error("Failed to parse UID.");
-                goto finish;
+#ifdef HAVE_ELFUTILS
+        /* Try to get a strack trace if we can */
+        if (coredump_size <= arg_process_size_max) {
+                _cleanup_free_ char *stacktrace = NULL;
+
+                r = coredump_make_stack_trace(coredump_fd, context[CONTEXT_EXE], &stacktrace);
+                if (r >= 0)
+                        core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], " (", context[CONTEXT_COMM], ") of user ", context[CONTEXT_UID], " dumped core.\n\n", stacktrace, NULL);
+                else if (r == -EINVAL)
+                        log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
+                else
+                        log_warning_errno(r, "Failed to generate stack trace: %m");
         }
 
-        r = parse_pid(argv[INFO_PID + 1], &pid);
-        if (r < 0) {
-                log_error("Failed to parse PID.");
-                goto finish;
+        if (!core_message)
+#endif
+log:
+        core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], " (", context[CONTEXT_COMM], ") of user ", context[CONTEXT_UID], " dumped core.", NULL);
+        if (core_message)
+                IOVEC_SET_STRING(iovec[n_iovec++], core_message);
+
+        /* Optionally store the entire coredump in the journal */
+        if (IN_SET(arg_storage, COREDUMP_STORAGE_JOURNAL, COREDUMP_STORAGE_BOTH) &&
+            coredump_size <= arg_journal_size_max) {
+                size_t sz = 0;
+
+                /* Store the coredump itself in the journal */
+
+                r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
+                if (r >= 0) {
+                        iovec[n_iovec].iov_base = coredump_data;
+                        iovec[n_iovec].iov_len = sz;
+                        n_iovec++;
+                }
         }
 
-        r = parse_gid(argv[INFO_GID + 1], &gid);
-        if (r < 0) {
-                log_error("Failed to parse GID.");
+        assert(n_iovec <= n_iovec_allocated);
+
+        r = sd_journal_sendv(iovec, n_iovec);
+        if (r < 0)
+                return log_error_errno(r, "Failed to log coredump: %m");
+
+        return 0;
+}
+
+static void map_context_fields(const struct iovec *iovec, const char *context[]) {
+
+        static const char * const context_field_names[_CONTEXT_MAX] = {
+                [CONTEXT_PID] = "COREDUMP_PID=",
+                [CONTEXT_UID] = "COREDUMP_UID=",
+                [CONTEXT_GID] = "COREDUMP_GID=",
+                [CONTEXT_SIGNAL] = "COREDUMP_SIGNAL=",
+                [CONTEXT_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
+                [CONTEXT_COMM] = "COREDUMP_COMM=",
+                [CONTEXT_EXE] = "COREDUMP_EXE=",
+        };
+
+        unsigned i;
+
+        assert(iovec);
+        assert(context);
+
+        for (i = 0; i < _CONTEXT_MAX; i++) {
+                size_t l;
+
+                l = strlen(context_field_names[i]);
+                if (iovec->iov_len < l)
+                        continue;
+
+                if (memcmp(iovec->iov_base, context_field_names[i], l) != 0)
+                        continue;
+
+                /* Note that these strings are NUL terminated, because we made sure that a trailing NUL byte is in the
+                 * buffer, though not included in the iov_len count. (see below) */
+                context[i] = (char*) iovec->iov_base + l;
+                break;
+        }
+}
+
+static int process_socket(int fd) {
+        _cleanup_close_ int coredump_fd = -1;
+        struct iovec *iovec = NULL;
+        size_t n_iovec = 0, n_iovec_allocated = 0, i;
+        const char *context[_CONTEXT_MAX] = {};
+        int r;
+
+        assert(fd >= 0);
+
+        log_set_target(LOG_TARGET_AUTO);
+        log_parse_environment();
+        log_open();
+
+        for (;;) {
+                union {
+                        struct cmsghdr cmsghdr;
+                        uint8_t buf[CMSG_SPACE(sizeof(int))];
+                } control = {};
+                struct msghdr mh = {
+                        .msg_control = &control,
+                        .msg_controllen = sizeof(control),
+                        .msg_iovlen = 1,
+                };
+                ssize_t n;
+                int l;
+
+                if (!GREEDY_REALLOC(iovec, n_iovec_allocated, n_iovec + 3)) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                if (ioctl(fd, FIONREAD, &l) < 0) {
+                        r = log_error_errno(errno, "FIONREAD failed: %m");
+                        goto finish;
+                }
+
+                assert(l >= 0);
+
+                iovec[n_iovec].iov_len = l;
+                iovec[n_iovec].iov_base = malloc(l + 1);
+
+                if (!iovec[n_iovec].iov_base) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                mh.msg_iov = iovec + n_iovec;
+
+                n = recvmsg(fd, &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC);
+                if (n < 0)  {
+                        free(iovec[n_iovec].iov_base);
+                        r = log_error_errno(errno, "Failed to receive datagram: %m");
+                        goto finish;
+                }
+
+                if (n == 0) {
+                        struct cmsghdr *cmsg, *found = NULL;
+                        /* The final zero-length datagram carries the file descriptor and tells us that we're done. */
+
+                        free(iovec[n_iovec].iov_base);
+
+                        CMSG_FOREACH(cmsg, &mh) {
+                                if (cmsg->cmsg_level == SOL_SOCKET &&
+                                    cmsg->cmsg_type == SCM_RIGHTS &&
+                                    cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+                                        assert(!found);
+                                        found = cmsg;
+                                }
+                        }
+
+                        if (!found) {
+                                log_error("Coredump file descriptor missing.");
+                                r = -EBADMSG;
+                                goto finish;
+                        }
+
+                        assert(coredump_fd < 0);
+                        coredump_fd = *(int*) CMSG_DATA(found);
+                        break;
+                }
+
+                /* Add trailing NUL byte, in case these are strings */
+                ((char*) iovec[n_iovec].iov_base)[n] = 0;
+                iovec[n_iovec].iov_len = (size_t) n;
+
+                cmsg_close_all(&mh);
+                map_context_fields(iovec + n_iovec, context);
+                n_iovec++;
+        }
+
+        if (!GREEDY_REALLOC(iovec, n_iovec_allocated, n_iovec + 3)) {
+                r = log_oom();
                 goto finish;
         }
 
-        if (get_process_comm(pid, &comm) < 0) {
-                log_warning("Failed to get COMM, falling back to the command line.");
-                comm = strv_join(argv + INFO_COMM + 1, " ");
+        /* Make sure we we got all data we really need */
+        assert(context[CONTEXT_PID]);
+        assert(context[CONTEXT_UID]);
+        assert(context[CONTEXT_GID]);
+        assert(context[CONTEXT_SIGNAL]);
+        assert(context[CONTEXT_TIMESTAMP]);
+        assert(context[CONTEXT_COMM]);
+        assert(coredump_fd >= 0);
+
+        r = submit_coredump(context, iovec, n_iovec_allocated, n_iovec, coredump_fd);
+
+finish:
+        for (i = 0; i < n_iovec; i++)
+                free(iovec[i].iov_base);
+        free(iovec);
+
+        return r;
+}
+
+static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) {
+
+        static const union sockaddr_union sa = {
+                .un.sun_family = AF_UNIX,
+                .un.sun_path = "/run/systemd/coredump",
+        };
+        _cleanup_close_ int fd = -1;
+        size_t i;
+        int r;
+
+        assert(iovec || n_iovec <= 0);
+        assert(input_fd >= 0);
+
+        fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+        if (fd < 0)
+                return log_error_errno(errno, "Failed to create coredump socket: %m");
+
+        if (connect(fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path)) < 0)
+                return log_error_errno(errno, "Failed to connect to coredump service: %m");
+
+        for (i = 0; i < n_iovec; i++) {
+                ssize_t n;
+                assert(iovec[i].iov_len > 0);
+
+                n = send(fd, iovec[i].iov_base, iovec[i].iov_len, MSG_NOSIGNAL);
+                if (n < 0)
+                        return log_error_errno(errno, "Failed to send coredump datagram: %m");
         }
 
-        if (get_process_exe(pid, &exe) < 0)
-                log_warning("Failed to get EXE.");
+        r = send_one_fd(fd, input_fd, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to send coredump fd: %m");
 
-        info[INFO_PID] = argv[INFO_PID + 1];
-        info[INFO_UID] = argv[INFO_UID + 1];
-        info[INFO_GID] = argv[INFO_GID + 1];
-        info[INFO_SIGNAL] = argv[INFO_SIGNAL + 1];
-        info[INFO_TIMESTAMP] = argv[INFO_TIMESTAMP + 1];
-        info[INFO_COMM] = comm;
-        info[INFO_EXE] = exe;
+        return 0;
+}
 
-        if (cg_pid_get_unit(pid, &t) >= 0) {
+static int process_journald_crash(const char *context[], int input_fd) {
+        _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
+        _cleanup_free_ char *filename = NULL;
+        uint64_t coredump_size;
+        int r;
 
-                if (streq(t, SPECIAL_JOURNALD_SERVICE)) {
-                        free(t);
+        assert(context);
+        assert(input_fd >= 0);
 
-                        /* If we are journald, we cut things short,
-                         * don't write to the journal, but still
-                         * create a coredump. */
+        /* If we are journald, we cut things short, don't write to the journal, but still create a coredump. */
 
-                        if (arg_storage != COREDUMP_STORAGE_NONE)
-                                arg_storage = COREDUMP_STORAGE_EXTERNAL;
+        if (arg_storage != COREDUMP_STORAGE_NONE)
+                arg_storage = COREDUMP_STORAGE_EXTERNAL;
 
-                        r = save_external_coredump(info, uid, &filename, &coredump_node_fd, &coredump_fd, &coredump_size);
-                        if (r < 0)
-                                goto finish;
+        r = save_external_coredump(context, input_fd, &filename, &coredump_node_fd, &coredump_fd, &coredump_size);
+        if (r < 0)
+                return r;
 
-                        r = maybe_remove_external_coredump(filename, coredump_size);
-                        if (r < 0)
-                                goto finish;
+        r = maybe_remove_external_coredump(filename, coredump_size);
+        if (r < 0)
+                return r;
 
-                        log_info("Detected coredump of the journal daemon itself, diverted to %s.", filename);
-                        goto finish;
+        log_info("Detected coredump of the journal daemon itself, diverted to %s.", filename);
+        return 0;
+}
+
+static int process_kernel(int argc, char* argv[]) {
+
+        /* The small core field we allocate on the stack, to keep things simple */
+        char
+                *core_pid = NULL, *core_uid = NULL, *core_gid = NULL, *core_signal = NULL,
+                *core_session = NULL, *core_exe = NULL, *core_comm = NULL, *core_cmdline = NULL,
+                *core_cgroup = NULL, *core_cwd = NULL, *core_root = NULL, *core_unit = NULL,
+                *core_user_unit = NULL, *core_slice = NULL, *core_timestamp = NULL;
+
+        /* The larger ones we allocate on the heap */
+        _cleanup_free_ char
+                *core_owner_uid = NULL, *core_open_fds = NULL, *core_proc_status = NULL,
+                *core_proc_maps = NULL, *core_proc_limits = NULL, *core_proc_cgroup = NULL, *core_environ = NULL;
+
+        _cleanup_free_ char *exe = NULL, *comm = NULL;
+        const char *context[_CONTEXT_MAX];
+        struct iovec iovec[24];
+        size_t n_iovec = 0;
+        uid_t owner_uid;
+        const char *p;
+        pid_t pid;
+        char *t;
+        int r;
+
+        if (argc < CONTEXT_COMM + 1) {
+                log_error("Not enough arguments passed from kernel (%i, expected %i).", argc - 1, CONTEXT_COMM + 1 - 1);
+                return -EINVAL;
+        }
+
+        r = parse_pid(argv[CONTEXT_PID + 1], &pid);
+        if (r < 0)
+                return log_error_errno(r, "Failed to parse PID.");
+
+        r = get_process_comm(pid, &comm);
+        if (r < 0) {
+                log_warning_errno(r, "Failed to get COMM, falling back to the command line: %m");
+                comm = strv_join(argv + CONTEXT_COMM + 1, " ");
+                if (!comm)
+                        return log_oom();
+        }
+
+        r = get_process_exe(pid, &exe);
+        if (r < 0)
+                log_warning_errno(r, "Failed to get EXE, ignoring: %m");
+
+        context[CONTEXT_PID] = argv[CONTEXT_PID + 1];
+        context[CONTEXT_UID] = argv[CONTEXT_UID + 1];
+        context[CONTEXT_GID] = argv[CONTEXT_GID + 1];
+        context[CONTEXT_SIGNAL] = argv[CONTEXT_SIGNAL + 1];
+        context[CONTEXT_TIMESTAMP] = argv[CONTEXT_TIMESTAMP + 1];
+        context[CONTEXT_COMM] = comm;
+        context[CONTEXT_EXE] = exe;
+
+        if (cg_pid_get_unit(pid, &t) >= 0) {
+
+                if (streq(t, SPECIAL_JOURNALD_SERVICE)) {
+                        free(t);
+                        return process_journald_crash(context, STDIN_FILENO);
                 }
 
                 core_unit = strjoina("COREDUMP_UNIT=", t);
                 free(t);
 
-        } else if (cg_pid_get_user_unit(pid, &t) >= 0) {
-                core_unit = strjoina("COREDUMP_USER_UNIT=", t);
-                free(t);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_unit);
         }
 
-        if (core_unit)
-                IOVEC_SET_STRING(iovec[j++], core_unit);
-
-        /* OK, now we know it's not the journal, hence we can make use
-         * of it now. */
+        /* OK, now we know it's not the journal, hence we can make use of it now. */
         log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
         log_open();
 
-        core_pid = strjoina("COREDUMP_PID=", info[INFO_PID]);
-        IOVEC_SET_STRING(iovec[j++], core_pid);
+        if (cg_pid_get_user_unit(pid, &t) >= 0) {
+                core_user_unit = strjoina("COREDUMP_USER_UNIT=", t);
+                free(t);
+
+                IOVEC_SET_STRING(iovec[n_iovec++], core_user_unit);
+        }
+
+        core_pid = strjoina("COREDUMP_PID=", context[CONTEXT_PID]);
+        IOVEC_SET_STRING(iovec[n_iovec++], core_pid);
 
-        core_uid = strjoina("COREDUMP_UID=", info[INFO_UID]);
-        IOVEC_SET_STRING(iovec[j++], core_uid);
+        core_uid = strjoina("COREDUMP_UID=", context[CONTEXT_UID]);
+        IOVEC_SET_STRING(iovec[n_iovec++], core_uid);
 
-        core_gid = strjoina("COREDUMP_GID=", info[INFO_GID]);
-        IOVEC_SET_STRING(iovec[j++], core_gid);
+        core_gid = strjoina("COREDUMP_GID=", context[CONTEXT_GID]);
+        IOVEC_SET_STRING(iovec[n_iovec++], core_gid);
 
-        core_signal = strjoina("COREDUMP_SIGNAL=", info[INFO_SIGNAL]);
-        IOVEC_SET_STRING(iovec[j++], core_signal);
+        core_signal = strjoina("COREDUMP_SIGNAL=", context[CONTEXT_SIGNAL]);
+        IOVEC_SET_STRING(iovec[n_iovec++], core_signal);
 
         if (sd_pid_get_session(pid, &t) >= 0) {
                 core_session = strjoina("COREDUMP_SESSION=", t);
                 free(t);
 
-                IOVEC_SET_STRING(iovec[j++], core_session);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_session);
         }
 
         if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
-                r = asprintf(&core_owner_uid,
-                             "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
+                r = asprintf(&core_owner_uid, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
                 if (r > 0)
-                        IOVEC_SET_STRING(iovec[j++], core_owner_uid);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_owner_uid);
         }
 
         if (sd_pid_get_slice(pid, &t) >= 0) {
                 core_slice = strjoina("COREDUMP_SLICE=", t);
                 free(t);
 
-                IOVEC_SET_STRING(iovec[j++], core_slice);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_slice);
         }
 
         if (comm) {
                 core_comm = strjoina("COREDUMP_COMM=", comm);
-                IOVEC_SET_STRING(iovec[j++], core_comm);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_comm);
         }
 
         if (exe) {
                 core_exe = strjoina("COREDUMP_EXE=", exe);
-                IOVEC_SET_STRING(iovec[j++], core_exe);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_exe);
         }
 
         if (get_process_cmdline(pid, 0, false, &t) >= 0) {
                 core_cmdline = strjoina("COREDUMP_CMDLINE=", t);
                 free(t);
 
-                IOVEC_SET_STRING(iovec[j++], core_cmdline);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_cmdline);
         }
 
         if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) {
                 core_cgroup = strjoina("COREDUMP_CGROUP=", t);
                 free(t);
 
-                IOVEC_SET_STRING(iovec[j++], core_cgroup);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_cgroup);
         }
 
         if (compose_open_fds(pid, &t) >= 0) {
@@ -726,7 +1006,7 @@ int main(int argc, char* argv[]) {
                 free(t);
 
                 if (core_open_fds)
-                        IOVEC_SET_STRING(iovec[j++], core_open_fds);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_open_fds);
         }
 
         p = procfs_file_alloca(pid, "status");
@@ -735,7 +1015,7 @@ int main(int argc, char* argv[]) {
                 free(t);
 
                 if (core_proc_status)
-                        IOVEC_SET_STRING(iovec[j++], core_proc_status);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_proc_status);
         }
 
         p = procfs_file_alloca(pid, "maps");
@@ -744,7 +1024,7 @@ int main(int argc, char* argv[]) {
                 free(t);
 
                 if (core_proc_maps)
-                        IOVEC_SET_STRING(iovec[j++], core_proc_maps);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_proc_maps);
         }
 
         p = procfs_file_alloca(pid, "limits");
@@ -753,7 +1033,7 @@ int main(int argc, char* argv[]) {
                 free(t);
 
                 if (core_proc_limits)
-                        IOVEC_SET_STRING(iovec[j++], core_proc_limits);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_proc_limits);
         }
 
         p = procfs_file_alloca(pid, "cgroup");
@@ -762,21 +1042,21 @@ int main(int argc, char* argv[]) {
                 free(t);
 
                 if (core_proc_cgroup)
-                        IOVEC_SET_STRING(iovec[j++], core_proc_cgroup);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_proc_cgroup);
         }
 
         if (get_process_cwd(pid, &t) >= 0) {
                 core_cwd = strjoina("COREDUMP_CWD=", t);
                 free(t);
 
-                IOVEC_SET_STRING(iovec[j++], core_cwd);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_cwd);
         }
 
         if (get_process_root(pid, &t) >= 0) {
                 core_root = strjoina("COREDUMP_ROOT=", t);
                 free(t);
 
-                IOVEC_SET_STRING(iovec[j++], core_root);
+                IOVEC_SET_STRING(iovec[n_iovec++], core_root);
         }
 
         if (get_process_environ(pid, &t) >= 0) {
@@ -784,96 +1064,56 @@ int main(int argc, char* argv[]) {
                 free(t);
 
                 if (core_environ)
-                        IOVEC_SET_STRING(iovec[j++], core_environ);
+                        IOVEC_SET_STRING(iovec[n_iovec++], core_environ);
         }
 
-        core_timestamp = strjoin("COREDUMP_TIMESTAMP=", info[INFO_TIMESTAMP], "000000", NULL);
-        if (core_timestamp)
-                IOVEC_SET_STRING(iovec[j++], core_timestamp);
+        core_timestamp = strjoina("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL);
+        IOVEC_SET_STRING(iovec[n_iovec++], core_timestamp);
 
-        IOVEC_SET_STRING(iovec[j++], "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1");
+        IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1");
 
         assert_cc(2 == LOG_CRIT);
-        IOVEC_SET_STRING(iovec[j++], "PRIORITY=2");
-
-        /* Vacuum before we write anything again */
-        coredump_vacuum(-1, arg_keep_free, arg_max_use);
+        IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
 
-        /* Always stream the coredump to disk, if that's possible */
-        r = save_external_coredump(info, uid, &filename, &coredump_node_fd, &coredump_fd, &coredump_size);
-        if (r < 0)
-                /* skip whole core dumping part */
-                goto log;
+        assert(n_iovec <= ELEMENTSOF(iovec));
 
-        /* If we don't want to keep the coredump on disk, remove it
-         * now, as later on we will lack the privileges for
-         * it. However, we keep the fd to it, so that we can still
-         * process it and log it. */
-        r = maybe_remove_external_coredump(filename, coredump_size);
-        if (r < 0)
-                goto finish;
-        if (r == 0) {
-                const char *coredump_filename;
-
-                coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
-                IOVEC_SET_STRING(iovec[j++], coredump_filename);
-        }
+        return send_iovec(iovec, n_iovec, STDIN_FILENO);
+}
 
-        /* Vacuum again, but exclude the coredump we just created */
-        coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
-
-        /* Now, let's drop privileges to become the user who owns the
-         * segfaulted process and allocate the coredump memory under
-         * the user's uid. This also ensures that the credentials
-         * journald will see are the ones of the coredumping user,
-         * thus making sure the user gets access to the core
-         * dump. Let's also get rid of all capabilities, if we run as
-         * root, we won't need them anymore. */
-        r = drop_privileges(uid, gid, 0);
-        if (r < 0) {
-                log_error_errno(r, "Failed to drop privileges: %m");
-                goto finish;
-        }
+int main(int argc, char *argv[]) {
+        int r;
 
-#ifdef HAVE_ELFUTILS
-        /* Try to get a strack trace if we can */
-        if (coredump_size <= arg_process_size_max) {
-                _cleanup_free_ char *stacktrace = NULL;
+        /* First, log to a safe place, since we don't know what crashed and it might be journald which we'd rather not
+         * log to then. */
 
-                r = coredump_make_stack_trace(coredump_fd, exe, &stacktrace);
-                if (r >= 0)
-                        core_message = strjoin("MESSAGE=Process ", info[INFO_PID], " (", comm, ") of user ", info[INFO_UID], " dumped core.\n\n", stacktrace, NULL);
-                else if (r == -EINVAL)
-                        log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
-                else
-                        log_warning_errno(r, "Failed to generate stack trace: %m");
-        }
+        log_set_target(LOG_TARGET_KMSG);
+        log_open();
 
-        if (!core_message)
-#endif
-log:
-        core_message = strjoin("MESSAGE=Process ", info[INFO_PID], " (", comm, ") of user ", info[INFO_UID], " dumped core.", NULL);
-        if (core_message)
-                IOVEC_SET_STRING(iovec[j++], core_message);
+        /* Make sure we never enter a loop */
+        (void) prctl(PR_SET_DUMPABLE, 0);
 
-        /* Optionally store the entire coredump in the journal */
-        if (IN_SET(arg_storage, COREDUMP_STORAGE_JOURNAL, COREDUMP_STORAGE_BOTH) &&
-            coredump_size <= arg_journal_size_max) {
-                size_t sz = 0;
+        /* Ignore all parse errors */
+        (void) parse_config();
 
-                /* Store the coredump itself in the journal */
+        log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage));
+        log_debug("Selected compression %s.", yes_no(arg_compress));
 
-                r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
-                if (r >= 0) {
-                        iovec[j].iov_base = coredump_data;
-                        iovec[j].iov_len = sz;
-                        j++;
-                }
+        r = sd_listen_fds(false);
+        if (r < 0) {
+                log_error_errno(r, "Failed to determine number of file descriptor: %m");
+                goto finish;
         }
 
-        r = sd_journal_sendv(iovec, j);
-        if (r < 0)
-                log_error_errno(r, "Failed to log coredump: %m");
+        /* If we got an fd passed, we are running in coredumpd mode. Otherwise we are invoked from the kernel as
+         * coredump handler */
+        if (r == 0)
+                r = process_kernel(argc, argv);
+        else if (r == 1)
+                r = process_socket(SD_LISTEN_FDS_START);
+        else {
+                log_error("Received unexpected number of file descriptors.");
+                r = -EINVAL;
+        }
 
 finish:
         return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
index c89740df05ca12240b05007c998a858f5d6325c6..2fff20a052cda92fdeadc15f2e4988f5adc87e3e 100644 (file)
@@ -25,6 +25,7 @@
 /systemd-binfmt.service
 /systemd-bootchart.service
 /systemd-bus-proxyd.service
+/systemd-coredump@.service
 /systemd-firstboot.service
 /systemd-fsck-root.service
 /systemd-fsck@.service
diff --git a/units/systemd-coredump.socket b/units/systemd-coredump.socket
new file mode 100644 (file)
index 0000000..4cb2460
--- /dev/null
@@ -0,0 +1,17 @@
+#  This file is part of systemd.
+#
+#  systemd is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU Lesser General Public License as published by
+#  the Free Software Foundation; either version 2.1 of the License, or
+#  (at your option) any later version.
+
+[Unit]
+Description=Process Core Dump Socket
+Documentation=man:systemd-coredump(8)
+DefaultDependencies=no
+
+[Socket]
+ListenSequentialPacket=/run/systemd/coredump
+SocketMode=0600
+Accept=yes
+MaxConnections=16
diff --git a/units/systemd-coredump@.service.in b/units/systemd-coredump@.service.in
new file mode 100644 (file)
index 0000000..588c8d6
--- /dev/null
@@ -0,0 +1,24 @@
+#  This file is part of systemd.
+#
+#  systemd is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU Lesser General Public License as published by
+#  the Free Software Foundation; either version 2.1 of the License, or
+#  (at your option) any later version.
+
+[Unit]
+Description=Process Core Dump
+Documentation=man:systemd-coredump(8)
+DefaultDependencies=no
+RequiresMountsFor=/var/lib/systemd/coredump
+Conflicts=shutdown.target
+After=systemd-remount-fs.service systemd-journald.socket
+Requires=systemd-journald.socket
+Before=shutdown.target
+
+[Service]
+ExecStart=-@rootlibexecdir@/systemd-coredump
+Nice=9
+OOMScoreAdjust=500
+PrivateNetwork=yes
+ProtectSystem=full
+RuntimeMaxSec=5min