]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
coredump: process special crashes in an (almost) normal way
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Sun, 26 Feb 2017 21:46:23 +0000 (16:46 -0500)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Wed, 1 Mar 2017 02:33:52 +0000 (21:33 -0500)
We would only log a terse message when pid1 or systemd-journald crashed.
It seems better to reuse the normal code paths as much as possible,
with the following differences:
- if pid1 crashes, we cannot launch the helper, so we don't analyze the
 coredump, just write it to file directly from the helper invoked by the
 kernel;
- if journald crashes, we can produce the backtrace, but we don't log full
  structured messages.

With comparison to previous code, advantages are:
- we go through most of the steps, so for example vacuuming is performed,
- we gather and log more data. In particular for journald and pid1 crashes we
  generate a backtrace, and for pid1 crashes we record the metadata (fdinfo,
  maps, etc.),
- coredumpctl shows pid1 crashes.

A disavantage (inefficiency) is that we gather metadata for journald crashes
which is then ignored because _TRANSPORT=kernel does not support structued
messages.

Messages for the systemd-journald "crash" have _TRANSPORT=kernel, and
_TRANSPORT=journal for the pid1 "crash".

Feb 26 16:27:55 systemd[1]: systemd-journald.service: Main process exited, code=dumped, status=11/SEGV
Feb 26 16:27:55 systemd[1]: systemd-journald.service: Unit entered failed state.
Feb 26 16:37:54 systemd-coredump[18801]: Process 18729 (systemd-journal) of user 0 dumped core.
Feb 26 16:37:54 systemd-coredump[18801]: Coredump diverted to /var/lib/systemd/coredump/core.systemd-journal.0.36c14bf3c6ce4c38914f441038990979.18729.1488145074000000.lz4
Feb 26 16:37:54 systemd-coredump[18801]: Stack trace of thread 18729:
Feb 26 16:37:54 systemd-coredump[18801]: #0  0x00007f46d6a06b8d fsync (libpthread.so.0)
Feb 26 16:37:54 systemd-coredump[18801]: #1  0x00007f46d71bfc47 journal_file_set_online (libsystemd-shared-233.so)
Feb 26 16:37:54 systemd-coredump[18801]: #2  0x00007f46d71c1c31 journal_file_append_object (libsystemd-shared-233.so)
Feb 26 16:37:54 systemd-coredump[18801]: #3  0x00007f46d71c3405 journal_file_append_data (libsystemd-shared-233.so)
Feb 26 16:37:54 systemd-coredump[18801]: #4  0x00007f46d71c4b7c journal_file_append_entry (libsystemd-shared-233.so)
Feb 26 16:37:54 systemd-coredump[18801]: #5  0x00005577688cf056 write_to_journal (systemd-journald)
Feb 26 16:37:54 systemd-coredump[18801]: #6  0x00005577688d2e98 dispatch_message_real (systemd-journald)
Feb 26 16:37:54 kernel: systemd-coredum: 9 output lines suppressed due to ratelimiting
Feb 26 16:37:54 systemd-journald[18810]: Journal started

Feb 26 16:50:59 systemd-coredump[19229]: Due to PID 1 having crashed coredump collection will now be turned off.
Feb 26 16:51:00 systemd[1]: Caught <SEGV>, dumped core as pid 19228.
Feb 26 16:51:00 systemd[1]: Freezing execution.
Feb 26 16:51:00 systemd-coredump[19229]: Process 19228 (systemd) of user 0 dumped core.

                                         Stack trace of thread 19228:
                                         #0  0x00007fab82075c47 kill (libc.so.6)
                                         #1  0x000055fdf7c38b6b crash (systemd)
                                         #2  0x00007fab824175c0 __restore_rt (libpthread.so.0)
                                         #3  0x00007fab82148573 epoll_wait (libc.so.6)
                                         #4  0x00007fab8366f84a sd_event_wait (libsystemd-shared-233.so)
                                         #5  0x00007fab836701de sd_event_run (libsystemd-shared-233.so)
                                         #6  0x000055fdf7c4a380 manager_loop (systemd)
                                         #7  0x000055fdf7c402c2 main (systemd)
                                         #8  0x00007fab82060401 __libc_start_main (libc.so.6)
                                         #9  0x000055fdf7c3818a _start (systemd)

Poor machine ;)

src/coredump/coredump.c

index 9e7206cc6ac0b1d64185c0cfba5d21da00539fe2..270af630cf9053f488364df54cc9e36dbdc467f5 100644 (file)
@@ -102,6 +102,7 @@ enum {
         CONTEXT_RLIMIT,
         CONTEXT_COMM,
         CONTEXT_EXE,
+        CONTEXT_UNIT,
         _CONTEXT_MAX
 };
 
@@ -706,7 +707,7 @@ static int submit_coredump(
         _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
         _cleanup_free_ char *core_message = NULL, *filename = NULL, *coredump_data = NULL;
         uint64_t coredump_size = UINT64_MAX;
-        bool truncated = false;
+        bool truncated = false, journald_crash;
         int r;
 
         assert(context);
@@ -714,6 +715,8 @@ static int submit_coredump(
         assert(n_iovec_allocated >= n_iovec + SUBMIT_COREDUMP_FIELDS);
         assert(input_fd >= 0);
 
+        journald_crash = streq_ptr(context[CONTEXT_UNIT], SPECIAL_JOURNALD_SERVICE);
+
         /* Vacuum before we write anything again */
         (void) coredump_vacuum(-1, arg_keep_free, arg_max_use);
 
@@ -758,8 +761,10 @@ static int submit_coredump(
                 if (r >= 0)
                         core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID],
                                                " (", context[CONTEXT_COMM], ") of user ",
-                                               context[CONTEXT_UID], " dumped core.\n\n",
-                                               stacktrace);
+                                               context[CONTEXT_UID], " dumped core.",
+                                               journald_crash ? "\nCoredump diverted to " : "",
+                                               journald_crash ? filename : "",
+                                               "\n\n", stacktrace);
                 else if (r == -EINVAL)
                         log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
                 else
@@ -771,9 +776,21 @@ static int submit_coredump(
         if (!core_message)
 #endif
 log:
-        core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], " (",
-                               context[CONTEXT_COMM], ") of user ",
-                               context[CONTEXT_UID], " dumped core.");
+        core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID],
+                               " (", context[CONTEXT_COMM], ") of user ",
+                               context[CONTEXT_UID], " dumped core.",
+                               journald_crash ? "\nCoredump diverted to " : NULL,
+                               journald_crash ? filename : NULL);
+        if (!core_message)
+                return log_oom();
+
+        if (journald_crash) {
+                /* We cannot log to the journal, so just print the MESSAGE.
+                 * The target was set previously to something safe. */
+                log_struct(LOG_ERR, core_message, NULL);
+                return 0;
+        }
+
         if (core_message)
                 IOVEC_SET_STRING(iovec[n_iovec++], core_message);
 
@@ -810,15 +827,15 @@ log:
 
 static void map_context_fields(const struct iovec *iovec, const char* context[]) {
 
-        static const char * const context_field_names[_CONTEXT_MAX] = {
+        static const char * const context_field_names[] = {
                 [CONTEXT_PID] = "COREDUMP_PID=",
                 [CONTEXT_UID] = "COREDUMP_UID=",
                 [CONTEXT_GID] = "COREDUMP_GID=",
                 [CONTEXT_SIGNAL] = "COREDUMP_SIGNAL=",
                 [CONTEXT_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
+                [CONTEXT_RLIMIT] = "COREDUMP_RLIMIT=",
                 [CONTEXT_COMM] = "COREDUMP_COMM=",
                 [CONTEXT_EXE] = "COREDUMP_EXE=",
-                [CONTEXT_RLIMIT] = "COREDUMP_RLIMIT=",
         };
 
         unsigned i;
@@ -826,9 +843,12 @@ static void map_context_fields(const struct iovec *iovec, const char* context[])
         assert(iovec);
         assert(context);
 
-        for (i = 0; i < _CONTEXT_MAX; i++) {
+        for (i = 0; i < ELEMENTSOF(context_field_names); i++) {
                 size_t l;
 
+                if (!context_field_names[i])
+                        continue;
+
                 l = strlen(context_field_names[i]);
                 if (iovec->iov_len < l)
                         continue;
@@ -1033,35 +1053,6 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd)
         return 0;
 }
 
-static int process_special_crash(const char *context[], int input_fd) {
-        _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
-        _cleanup_free_ char *filename = NULL;
-        uint64_t coredump_size;
-        bool truncated;
-        int r;
-
-        assert(context);
-        assert(input_fd >= 0);
-
-        /* If we are pid1 or journald, we cut things short, don't write to the journal, but still create a coredump. */
-
-        if (arg_storage != COREDUMP_STORAGE_NONE)
-                arg_storage = COREDUMP_STORAGE_EXTERNAL;
-
-        r = save_external_coredump(context, input_fd,
-                                   &filename, &coredump_node_fd, &coredump_fd, &coredump_size, &truncated);
-        if (r < 0)
-                return r;
-
-        r = maybe_remove_external_coredump(filename, coredump_size);
-        if (r < 0)
-                return r;
-
-        log_notice("Detected coredump of the journal daemon or PID 1, diverted to %s.", filename);
-
-        return 0;
-}
-
 static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char *field, const char *value) {
         char *x;
 
@@ -1079,7 +1070,7 @@ static char* set_iovec_field_free(struct iovec iovec[27], size_t *n_iovec, const
         return x;
 }
 
-static int gather_pid_metadata_and_process_special_crash(
+static int gather_pid_metadata(
                 char* context[_CONTEXT_MAX],
                 char **comm_fallback,
                 struct iovec *iovec, size_t *n_iovec) {
@@ -1111,32 +1102,22 @@ static int gather_pid_metadata_and_process_special_crash(
         if (r < 0)
                 log_warning_errno(r, "Failed to get EXE, ignoring: %m");
 
-        if (cg_pid_get_unit(pid, &t) >= 0) {
+        if (cg_pid_get_unit(pid, &context[CONTEXT_UNIT]) >= 0) {
+                if (!streq(context[CONTEXT_UNIT], SPECIAL_JOURNALD_SERVICE)) {
+                        /* OK, now we know it's not the journal, hence we can make use of it now. */
+                        log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+                        log_open();
+                }
 
                 /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */
-                if (streq(t, SPECIAL_INIT_SCOPE)) {
+                if (streq(context[CONTEXT_UNIT], SPECIAL_INIT_SCOPE)) {
                         log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
                         (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
                 }
 
-                /* Let's avoid dead-locks when processing journald and init crashes, as socket activation and logging
-                 * are unlikely to work then. */
-                if (STR_IN_SET(t, SPECIAL_JOURNALD_SERVICE, SPECIAL_INIT_SCOPE)) {
-                        free(t);
-                        r = process_special_crash((const char**) context, STDIN_FILENO);
-                        if (r < 0)
-                                return r;
-
-                        return 1; /* > 0 means: we have already processed it, because it's a special crash */
-                }
-
-                set_iovec_field_free(iovec, n_iovec, "COREDUMP_UNIT=", t);
+                set_iovec_field(iovec, n_iovec, "COREDUMP_UNIT=", context[CONTEXT_UNIT]);
         }
 
-        /* OK, now we know it's not the journal, hence we can make use of it now. */
-        log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
-        log_open();
-
         if (cg_pid_get_user_unit(pid, &t) >= 0)
                 set_iovec_field_free(iovec, n_iovec, "COREDUMP_USER_UNIT=", t);
 
@@ -1230,13 +1211,13 @@ static int gather_pid_metadata_and_process_special_crash(
         if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
                 set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
 
-        return 0; /* == 0 means: we successfully acquired all metadata */
+        return 0; /* we successfully acquired all metadata */
 }
 
 static int process_kernel(int argc, char* argv[]) {
 
         char* context[_CONTEXT_MAX] = {};
-        struct iovec iovec[28];
+        struct iovec iovec[28 + SUBMIT_COREDUMP_FIELDS];
         size_t i, n_iovec, n_to_free = 0;
         int r;
 
@@ -1254,9 +1235,8 @@ static int process_kernel(int argc, char* argv[]) {
         context[CONTEXT_TIMESTAMP] = argv[1 + CONTEXT_TIMESTAMP];
         context[CONTEXT_RLIMIT]    = argv[1 + CONTEXT_RLIMIT];
 
-        r = gather_pid_metadata_and_process_special_crash(context, argv + 1 + CONTEXT_COMM, iovec, &n_to_free);
-        if (r != 0)
-                /* Error, or a a special crash, which has already been processed. */
+        r = gather_pid_metadata(context, argv + 1 + CONTEXT_COMM, iovec, &n_to_free);
+        if (r < 0)
                 goto finish;
 
         n_iovec = n_to_free;
@@ -1268,15 +1248,23 @@ static int process_kernel(int argc, char* argv[]) {
 
         assert(n_iovec <= ELEMENTSOF(iovec));
 
-        r = send_iovec(iovec, n_iovec, STDIN_FILENO);
+        if (STRPTR_IN_SET(context[CONTEXT_UNIT],
+                          SPECIAL_JOURNALD_SERVICE,
+                          SPECIAL_INIT_SCOPE))
+                r = submit_coredump((const char**) context,
+                                    iovec, ELEMENTSOF(iovec), n_iovec,
+                                    STDIN_FILENO);
+        else
+                r = send_iovec(iovec, n_iovec, STDIN_FILENO);
 
  finish:
         for (i = 0; i < n_to_free; i++)
                 free(iovec[i].iov_base);
 
-        /* Those fields are allocated by gather_pid_metadata_and_process_special_crash */
+        /* Those fields are allocated by gather_pid_metadata */
         free(context[CONTEXT_COMM]);
         free(context[CONTEXT_EXE]);
+        free(context[CONTEXT_UNIT]);
 
         return r;
 }
@@ -1305,12 +1293,13 @@ static int process_backtrace(int argc, char *argv[]) {
         context[CONTEXT_TIMESTAMP] = argv[2 + CONTEXT_TIMESTAMP];
         context[CONTEXT_RLIMIT]    = argv[2 + CONTEXT_RLIMIT];
 
-        n_allocated = 33; /* 25 metadata, 2 static, +unknown input, rounded up */
+        n_allocated = 33 + COREDUMP_STORAGE_EXTERNAL;
+        /* 25 metadata, 2 static, +unknown input, 4 storage, rounded up */
         iovec = new(struct iovec, n_allocated);
         if (!iovec)
                 return log_oom();
 
-        r = gather_pid_metadata_and_process_special_crash(context, argv + 2 + CONTEXT_COMM, iovec, &n_to_free);
+        r = gather_pid_metadata(context, argv + 2 + CONTEXT_COMM, iovec, &n_to_free);
         if (r < 0)
                 goto finish;
         if (r > 0) {
@@ -1364,9 +1353,10 @@ static int process_backtrace(int argc, char *argv[]) {
         for (i = 0; i < n_to_free; i++)
                 free(iovec[i].iov_base);
 
-        /* Those fields are allocated by gather_pid_metadata_and_process_special_crash */
+        /* Those fields are allocated by gather_pid_metadata */
         free(context[CONTEXT_COMM]);
         free(context[CONTEXT_EXE]);
+        free(context[CONTEXT_UNIT]);
 
         return r;
 }