1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2012 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/prctl.h>
24 #include <sys/xattr.h>
29 #include <elfutils/libdwfl.h>
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
35 #include "sd-messages.h"
38 #include "alloc-util.h"
39 #include "capability-util.h"
40 #include "cgroup-util.h"
42 #include "conf-parser.h"
44 #include "coredump-vacuum.h"
45 #include "dirent-util.h"
51 #include "journal-importer.h"
56 #include "parse-util.h"
57 #include "process-util.h"
58 #include "signal-util.h"
59 #include "socket-util.h"
61 #include "stacktrace.h"
62 #include "string-table.h"
63 #include "string-util.h"
65 #include "user-util.h"
68 /* The maximum size up to which we process coredumps */
69 #define PROCESS_SIZE_MAX ((uint64_t) (2LLU*1024LLU*1024LLU*1024LLU))
71 /* The maximum size up to which we leave the coredump around on disk */
72 #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX
74 /* The maximum size up to which we store the coredump in the journal */
75 #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU))
77 /* Make sure to not make this larger than the maximum journal entry
78 * size. See DATA_SIZE_MAX in journald-native.c. */
79 assert_cc(JOURNAL_SIZE_MAX
<= DATA_SIZE_MAX
);
82 /* We use this as array indexes for a couple of special fields we use for
83 * naming coredump files, and attaching xattrs, and for indexing argv[].
85 * Our pattern for man:systectl(1) kernel.core_pattern is such that the
86 * kernel passes fields until CONTEXT_RLIMIT as arguments in argv[]. After
87 * that it gets complicated: the kernel passes "comm" as one or more fields
88 * starting at index CONTEXT_COMM (in other words, full "comm" is under index
89 * CONTEXT_COMM when it does not contain spaces, which is the common
90 * case). This mapping is not reversible, so we prefer to retrieve "comm"
91 * from /proc. We only fall back to argv[CONTEXT_COMM...] when that fails.
93 * In the internal context[] array, fields before CONTEXT_COMM are the
94 * strings from argv[], so they should not be freed. The strings at indices
95 * CONTEXT_COMM and higher are allocated by us and should be freed at the
110 typedef enum CoredumpStorage
{
111 COREDUMP_STORAGE_NONE
,
112 COREDUMP_STORAGE_EXTERNAL
,
113 COREDUMP_STORAGE_JOURNAL
,
114 _COREDUMP_STORAGE_MAX
,
115 _COREDUMP_STORAGE_INVALID
= -1
118 static const char* const coredump_storage_table
[_COREDUMP_STORAGE_MAX
] = {
119 [COREDUMP_STORAGE_NONE
] = "none",
120 [COREDUMP_STORAGE_EXTERNAL
] = "external",
121 [COREDUMP_STORAGE_JOURNAL
] = "journal",
124 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage
, CoredumpStorage
);
125 static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage
, coredump_storage
, CoredumpStorage
, "Failed to parse storage setting");
127 static CoredumpStorage arg_storage
= COREDUMP_STORAGE_EXTERNAL
;
128 static bool arg_compress
= true;
129 static uint64_t arg_process_size_max
= PROCESS_SIZE_MAX
;
130 static uint64_t arg_external_size_max
= EXTERNAL_SIZE_MAX
;
131 static uint64_t arg_journal_size_max
= JOURNAL_SIZE_MAX
;
132 static uint64_t arg_keep_free
= (uint64_t) -1;
133 static uint64_t arg_max_use
= (uint64_t) -1;
135 static int parse_config(void) {
136 static const ConfigTableItem items
[] = {
137 { "Coredump", "Storage", config_parse_coredump_storage
, 0, &arg_storage
},
138 { "Coredump", "Compress", config_parse_bool
, 0, &arg_compress
},
139 { "Coredump", "ProcessSizeMax", config_parse_iec_uint64
, 0, &arg_process_size_max
},
140 { "Coredump", "ExternalSizeMax", config_parse_iec_uint64
, 0, &arg_external_size_max
},
141 { "Coredump", "JournalSizeMax", config_parse_iec_size
, 0, &arg_journal_size_max
},
142 { "Coredump", "KeepFree", config_parse_iec_uint64
, 0, &arg_keep_free
},
143 { "Coredump", "MaxUse", config_parse_iec_uint64
, 0, &arg_max_use
},
147 return config_parse_many_nulstr(PKGSYSCONFDIR
"/coredump.conf",
148 CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
150 config_item_table_lookup
, items
,
151 CONFIG_PARSE_WARN
, NULL
);
154 static inline uint64_t storage_size_max(void) {
155 return arg_storage
== COREDUMP_STORAGE_EXTERNAL
? arg_external_size_max
: arg_journal_size_max
;
158 static int fix_acl(int fd
, uid_t uid
) {
161 _cleanup_(acl_freep
) acl_t acl
= NULL
;
163 acl_permset_t permset
;
168 if (uid
<= SYSTEM_UID_MAX
)
171 /* Make sure normal users can read (but not write or delete)
172 * their own coredumps */
174 acl
= acl_get_fd(fd
);
176 return log_error_errno(errno
, "Failed to get ACL: %m");
178 if (acl_create_entry(&acl
, &entry
) < 0 ||
179 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
180 acl_set_qualifier(entry
, &uid
) < 0)
181 return log_error_errno(errno
, "Failed to patch ACL: %m");
183 if (acl_get_permset(entry
, &permset
) < 0 ||
184 acl_add_perm(permset
, ACL_READ
) < 0)
185 return log_warning_errno(errno
, "Failed to patch ACL: %m");
187 r
= calc_acl_mask_if_needed(&acl
);
189 return log_warning_errno(r
, "Failed to patch ACL: %m");
191 if (acl_set_fd(fd
, acl
) < 0)
192 return log_error_errno(errno
, "Failed to apply ACL: %m");
198 static int fix_xattr(int fd
, const char *context
[_CONTEXT_MAX
]) {
200 static const char * const xattrs
[_CONTEXT_MAX
] = {
201 [CONTEXT_PID
] = "user.coredump.pid",
202 [CONTEXT_UID
] = "user.coredump.uid",
203 [CONTEXT_GID
] = "user.coredump.gid",
204 [CONTEXT_SIGNAL
] = "user.coredump.signal",
205 [CONTEXT_TIMESTAMP
] = "user.coredump.timestamp",
206 [CONTEXT_RLIMIT
] = "user.coredump.rlimit",
207 [CONTEXT_COMM
] = "user.coredump.comm",
208 [CONTEXT_EXE
] = "user.coredump.exe",
216 /* Attach some metadata to coredumps via extended
217 * attributes. Just because we can. */
219 for (i
= 0; i
< _CONTEXT_MAX
; i
++) {
222 if (isempty(context
[i
]) || !xattrs
[i
])
225 k
= fsetxattr(fd
, xattrs
[i
], context
[i
], strlen(context
[i
]), XATTR_CREATE
);
233 #define filename_escape(s) xescape((s), "./ ")
235 static inline const char *coredump_tmpfile_name(const char *s
) {
236 return s
? s
: "(unnamed temporary file)";
239 static int fix_permissions(
241 const char *filename
,
243 const char *context
[_CONTEXT_MAX
],
252 /* Ignore errors on these */
253 (void) fchmod(fd
, 0640);
254 (void) fix_acl(fd
, uid
);
255 (void) fix_xattr(fd
, context
);
258 return log_error_errno(errno
, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename
));
260 r
= link_tmpfile(fd
, filename
, target
);
262 return log_error_errno(r
, "Failed to move coredump %s into place: %m", target
);
267 static int maybe_remove_external_coredump(const char *filename
, uint64_t size
) {
269 /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */
271 if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
&&
272 size
<= arg_external_size_max
)
278 if (unlink(filename
) < 0 && errno
!= ENOENT
)
279 return log_error_errno(errno
, "Failed to unlink %s: %m", filename
);
284 static int make_filename(const char *context
[_CONTEXT_MAX
], char **ret
) {
285 _cleanup_free_
char *c
= NULL
, *u
= NULL
, *p
= NULL
, *t
= NULL
;
286 sd_id128_t boot
= {};
291 c
= filename_escape(context
[CONTEXT_COMM
]);
295 u
= filename_escape(context
[CONTEXT_UID
]);
299 r
= sd_id128_get_boot(&boot
);
303 p
= filename_escape(context
[CONTEXT_PID
]);
307 t
= filename_escape(context
[CONTEXT_TIMESTAMP
]);
312 "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR
".%s.%s000000",
315 SD_ID128_FORMAT_VAL(boot
),
323 static int save_external_coredump(
324 const char *context
[_CONTEXT_MAX
],
330 bool *ret_truncated
) {
332 _cleanup_free_
char *fn
= NULL
, *tmp
= NULL
;
333 _cleanup_close_
int fd
= -1;
334 uint64_t rlimit
, max_size
;
340 assert(ret_filename
);
345 r
= parse_uid(context
[CONTEXT_UID
], &uid
);
347 return log_error_errno(r
, "Failed to parse UID: %m");
349 r
= safe_atou64(context
[CONTEXT_RLIMIT
], &rlimit
);
351 return log_error_errno(r
, "Failed to parse resource limit: %s", context
[CONTEXT_RLIMIT
]);
352 if (rlimit
< page_size()) {
353 /* Is coredumping disabled? Then don't bother saving/processing the coredump.
354 * Anything below PAGE_SIZE cannot give a readable coredump (the kernel uses
355 * ELF_EXEC_PAGESIZE which is not easily accessible, but is usually the same as PAGE_SIZE. */
356 log_info("Resource limits disable core dumping for process %s (%s).",
357 context
[CONTEXT_PID
], context
[CONTEXT_COMM
]);
361 /* Never store more than the process configured, or than we actually shall keep or process */
362 max_size
= MIN(rlimit
, MAX(arg_process_size_max
, storage_size_max()));
364 r
= make_filename(context
, &fn
);
366 return log_error_errno(r
, "Failed to determine coredump file name: %m");
368 mkdir_p_label("/var/lib/systemd/coredump", 0755);
370 fd
= open_tmpfile_linkable(fn
, O_RDWR
|O_CLOEXEC
, &tmp
);
372 return log_error_errno(fd
, "Failed to create temporary file for coredump %s: %m", fn
);
374 r
= copy_bytes(input_fd
, fd
, max_size
, 0);
376 log_error_errno(r
, "Cannot store coredump of %s (%s): %m", context
[CONTEXT_PID
], context
[CONTEXT_COMM
]);
379 *ret_truncated
= r
== 1;
382 LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size
),
383 "SIZE_LIMIT=%zu", max_size
,
384 "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR
,
387 if (fstat(fd
, &st
) < 0) {
388 log_error_errno(errno
, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp
));
392 if (lseek(fd
, 0, SEEK_SET
) == (off_t
) -1) {
393 log_error_errno(errno
, "Failed to seek on %s: %m", coredump_tmpfile_name(tmp
));
397 #if HAVE_XZ || HAVE_LZ4
398 /* If we will remove the coredump anyway, do not compress. */
399 if (arg_compress
&& !maybe_remove_external_coredump(NULL
, st
.st_size
)) {
401 _cleanup_free_
char *fn_compressed
= NULL
, *tmp_compressed
= NULL
;
402 _cleanup_close_
int fd_compressed
= -1;
404 fn_compressed
= strappend(fn
, COMPRESSED_EXT
);
405 if (!fn_compressed
) {
410 fd_compressed
= open_tmpfile_linkable(fn_compressed
, O_RDWR
|O_CLOEXEC
, &tmp_compressed
);
411 if (fd_compressed
< 0) {
412 log_error_errno(fd_compressed
, "Failed to create temporary file for coredump %s: %m", fn_compressed
);
416 r
= compress_stream(fd
, fd_compressed
, -1);
418 log_error_errno(r
, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed
));
419 goto fail_compressed
;
422 r
= fix_permissions(fd_compressed
, tmp_compressed
, fn_compressed
, context
, uid
);
424 goto fail_compressed
;
426 /* OK, this worked, we can get rid of the uncompressed version now */
430 *ret_filename
= fn_compressed
; /* compressed */
431 *ret_node_fd
= fd_compressed
; /* compressed */
432 *ret_data_fd
= fd
; /* uncompressed */
433 *ret_size
= (uint64_t) st
.st_size
; /* uncompressed */
435 fn_compressed
= NULL
;
436 fd
= fd_compressed
= -1;
442 (void) unlink(tmp_compressed
);
448 r
= fix_permissions(fd
, tmp
, fn
, context
, uid
);
455 *ret_size
= (uint64_t) st
.st_size
;
468 static int allocate_journal_field(int fd
, size_t size
, char **ret
, size_t *ret_size
) {
469 _cleanup_free_
char *field
= NULL
;
476 if (lseek(fd
, 0, SEEK_SET
) == (off_t
) -1)
477 return log_warning_errno(errno
, "Failed to seek: %m");
479 field
= malloc(9 + size
);
481 log_warning("Failed to allocate memory for coredump, coredump will not be stored.");
485 memcpy(field
, "COREDUMP=", 9);
487 n
= read(fd
, field
+ 9, size
);
489 return log_error_errno((int) n
, "Failed to read core data: %m");
490 if ((size_t) n
< size
) {
491 log_error("Core data too short.");
496 *ret_size
= size
+ 9;
503 /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines:
517 static int compose_open_fds(pid_t pid
, char **open_fds
) {
518 _cleanup_closedir_
DIR *proc_fd_dir
= NULL
;
519 _cleanup_close_
int proc_fdinfo_fd
= -1;
520 _cleanup_free_
char *buffer
= NULL
;
521 _cleanup_fclose_
FILE *stream
= NULL
;
522 const char *fddelim
= "", *path
;
523 struct dirent
*dent
= NULL
;
528 assert(open_fds
!= NULL
);
530 path
= procfs_file_alloca(pid
, "fd");
531 proc_fd_dir
= opendir(path
);
535 proc_fdinfo_fd
= openat(dirfd(proc_fd_dir
), "../fdinfo", O_DIRECTORY
|O_NOFOLLOW
|O_CLOEXEC
|O_PATH
);
536 if (proc_fdinfo_fd
< 0)
539 stream
= open_memstream(&buffer
, &size
);
543 FOREACH_DIRENT(dent
, proc_fd_dir
, return -errno
) {
544 _cleanup_fclose_
FILE *fdinfo
= NULL
;
545 _cleanup_free_
char *fdname
= NULL
;
549 r
= readlinkat_malloc(dirfd(proc_fd_dir
), dent
->d_name
, &fdname
);
553 fprintf(stream
, "%s%s:%s\n", fddelim
, dent
->d_name
, fdname
);
556 /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */
557 fd
= openat(proc_fdinfo_fd
, dent
->d_name
, O_NOFOLLOW
|O_CLOEXEC
|O_RDONLY
);
561 fdinfo
= fdopen(fd
, "re");
562 if (fdinfo
== NULL
) {
567 FOREACH_LINE(line
, fdinfo
, break) {
568 fputs_unlocked(line
, stream
);
569 if (!endswith(line
, "\n"))
575 stream
= safe_fclose(stream
);
586 static int get_process_ns(pid_t pid
, const char *namespace, ino_t
*ns
) {
589 _cleanup_close_
int proc_ns_dir_fd
;
591 p
= procfs_file_alloca(pid
, "ns");
593 proc_ns_dir_fd
= open(p
, O_DIRECTORY
| O_CLOEXEC
| O_RDONLY
);
594 if (proc_ns_dir_fd
< 0)
597 if (fstatat(proc_ns_dir_fd
, namespace, &stbuf
, /* flags */0) < 0)
604 static int get_mount_namespace_leader(pid_t pid
, pid_t
*container_pid
) {
605 pid_t cpid
= pid
, ppid
= 0;
609 r
= get_process_ns(pid
, "mnt", &proc_mntns
);
616 r
= get_process_ppid(cpid
, &ppid
);
620 r
= get_process_ns(ppid
, "mnt", &parent_mntns
);
624 if (proc_mntns
!= parent_mntns
)
633 *container_pid
= ppid
;
637 /* Returns 1 if the parent was found.
638 * Returns 0 if there is not a process we can call the pid's
639 * container parent (the pid's process isn't 'containerized').
640 * Returns a negative number on errors.
642 static int get_process_container_parent_cmdline(pid_t pid
, char** cmdline
) {
645 const char *proc_root_path
;
646 struct stat root_stat
, proc_root_stat
;
648 /* To compare inodes of / and /proc/[pid]/root */
649 if (stat("/", &root_stat
) < 0)
652 proc_root_path
= procfs_file_alloca(pid
, "root");
653 if (stat(proc_root_path
, &proc_root_stat
) < 0)
656 /* The process uses system root. */
657 if (proc_root_stat
.st_ino
== root_stat
.st_ino
) {
662 r
= get_mount_namespace_leader(pid
, &container_pid
);
666 r
= get_process_cmdline(container_pid
, 0, false, cmdline
);
673 static int change_uid_gid(const char *context
[]) {
678 r
= parse_uid(context
[CONTEXT_UID
], &uid
);
682 if (uid
<= SYSTEM_UID_MAX
) {
683 const char *user
= "systemd-coredump";
685 r
= get_user_creds(&user
, &uid
, &gid
, NULL
, NULL
);
687 log_warning_errno(r
, "Cannot resolve %s user. Proceeding to dump core as root: %m", user
);
691 r
= parse_gid(context
[CONTEXT_GID
], &gid
);
696 return drop_privileges(uid
, gid
, 0);
699 static bool is_journald_crash(const char *context
[_CONTEXT_MAX
]) {
702 return streq_ptr(context
[CONTEXT_UNIT
], SPECIAL_JOURNALD_SERVICE
);
705 static bool is_pid1_crash(const char *context
[_CONTEXT_MAX
]) {
708 return streq_ptr(context
[CONTEXT_UNIT
], SPECIAL_INIT_SCOPE
) ||
709 streq_ptr(context
[CONTEXT_PID
], "1");
712 #define SUBMIT_COREDUMP_FIELDS 4
714 static int submit_coredump(
715 const char *context
[_CONTEXT_MAX
],
717 size_t n_iovec_allocated
,
721 _cleanup_close_
int coredump_fd
= -1, coredump_node_fd
= -1;
722 _cleanup_free_
char *core_message
= NULL
, *filename
= NULL
, *coredump_data
= NULL
;
723 uint64_t coredump_size
= UINT64_MAX
;
724 bool truncated
= false, journald_crash
;
729 assert(n_iovec_allocated
>= n_iovec
+ SUBMIT_COREDUMP_FIELDS
);
730 assert(input_fd
>= 0);
732 journald_crash
= is_journald_crash(context
);
734 /* Vacuum before we write anything again */
735 (void) coredump_vacuum(-1, arg_keep_free
, arg_max_use
);
737 /* Always stream the coredump to disk, if that's possible */
738 r
= save_external_coredump(context
, input_fd
,
739 &filename
, &coredump_node_fd
, &coredump_fd
, &coredump_size
, &truncated
);
741 /* Skip whole core dumping part */
744 /* If we don't want to keep the coredump on disk, remove it now, as later on we will lack the privileges for
745 * it. However, we keep the fd to it, so that we can still process it and log it. */
746 r
= maybe_remove_external_coredump(filename
, coredump_size
);
750 const char *coredump_filename
;
752 coredump_filename
= strjoina("COREDUMP_FILENAME=", filename
);
753 iovec
[n_iovec
++] = IOVEC_MAKE_STRING(coredump_filename
);
754 } else if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
)
755 log_info("The core will not be stored: size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
756 coredump_size
, arg_external_size_max
);
758 /* Vacuum again, but exclude the coredump we just created */
759 (void) coredump_vacuum(coredump_node_fd
>= 0 ? coredump_node_fd
: coredump_fd
, arg_keep_free
, arg_max_use
);
761 /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the coredump
762 * memory under the user's uid. This also ensures that the credentials journald will see are the ones of the
763 * coredumping user, thus making sure the user gets access to the core dump. Let's also get rid of all
764 * capabilities, if we run as root, we won't need them anymore. */
765 r
= change_uid_gid(context
);
767 return log_error_errno(r
, "Failed to drop privileges: %m");
770 /* Try to get a strack trace if we can */
771 if (coredump_size
<= arg_process_size_max
) {
772 _cleanup_free_
char *stacktrace
= NULL
;
774 r
= coredump_make_stack_trace(coredump_fd
, context
[CONTEXT_EXE
], &stacktrace
);
776 core_message
= strjoin("MESSAGE=Process ", context
[CONTEXT_PID
],
777 " (", context
[CONTEXT_COMM
], ") of user ",
778 context
[CONTEXT_UID
], " dumped core.",
779 journald_crash
? "\nCoredump diverted to " : "",
780 journald_crash
? filename
: "",
782 else if (r
== -EINVAL
)
783 log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
785 log_warning_errno(r
, "Failed to generate stack trace: %m");
787 log_debug("Not generating stack trace: core size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
788 coredump_size
, arg_process_size_max
);
793 core_message
= strjoin("MESSAGE=Process ", context
[CONTEXT_PID
],
794 " (", context
[CONTEXT_COMM
], ") of user ",
795 context
[CONTEXT_UID
], " dumped core.",
796 journald_crash
? "\nCoredump diverted to " : NULL
,
797 journald_crash
? filename
: NULL
);
801 if (journald_crash
) {
802 /* We cannot log to the journal, so just print the MESSAGE.
803 * The target was set previously to something safe. */
804 log_dispatch(LOG_ERR
, 0, core_message
);
808 iovec
[n_iovec
++] = IOVEC_MAKE_STRING(core_message
);
811 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
813 /* Optionally store the entire coredump in the journal */
814 if (arg_storage
== COREDUMP_STORAGE_JOURNAL
) {
815 if (coredump_size
<= arg_journal_size_max
) {
818 /* Store the coredump itself in the journal */
820 r
= allocate_journal_field(coredump_fd
, (size_t) coredump_size
, &coredump_data
, &sz
);
822 iovec
[n_iovec
++] = IOVEC_MAKE(coredump_data
, sz
);
824 log_warning_errno(r
, "Failed to attach the core to the journal entry: %m");
826 log_info("The core will not be stored: size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
827 coredump_size
, arg_journal_size_max
);
830 assert(n_iovec
<= n_iovec_allocated
);
832 r
= sd_journal_sendv(iovec
, n_iovec
);
834 return log_error_errno(r
, "Failed to log coredump: %m");
839 static void map_context_fields(const struct iovec
*iovec
, const char* context
[]) {
841 static const char * const context_field_names
[] = {
842 [CONTEXT_PID
] = "COREDUMP_PID=",
843 [CONTEXT_UID
] = "COREDUMP_UID=",
844 [CONTEXT_GID
] = "COREDUMP_GID=",
845 [CONTEXT_SIGNAL
] = "COREDUMP_SIGNAL=",
846 [CONTEXT_TIMESTAMP
] = "COREDUMP_TIMESTAMP=",
847 [CONTEXT_RLIMIT
] = "COREDUMP_RLIMIT=",
848 [CONTEXT_COMM
] = "COREDUMP_COMM=",
849 [CONTEXT_EXE
] = "COREDUMP_EXE=",
857 for (i
= 0; i
< ELEMENTSOF(context_field_names
); i
++) {
860 if (!context_field_names
[i
])
863 l
= strlen(context_field_names
[i
]);
864 if (iovec
->iov_len
< l
)
867 if (memcmp(iovec
->iov_base
, context_field_names
[i
], l
) != 0)
870 /* Note that these strings are NUL terminated, because we made sure that a trailing NUL byte is in the
871 * buffer, though not included in the iov_len count. (see below) */
872 context
[i
] = (char*) iovec
->iov_base
+ l
;
877 static int process_socket(int fd
) {
878 _cleanup_close_
int coredump_fd
= -1;
879 struct iovec
*iovec
= NULL
;
880 size_t n_iovec
= 0, n_allocated
= 0, i
, k
;
881 const char *context
[_CONTEXT_MAX
] = {};
886 log_set_target(LOG_TARGET_AUTO
);
887 log_parse_environment();
890 log_debug("Processing coredump received on stdin...");
894 struct cmsghdr cmsghdr
;
895 uint8_t buf
[CMSG_SPACE(sizeof(int))];
898 .msg_control
= &control
,
899 .msg_controllen
= sizeof(control
),
905 if (!GREEDY_REALLOC(iovec
, n_allocated
, n_iovec
+ SUBMIT_COREDUMP_FIELDS
)) {
910 l
= next_datagram_size_fd(fd
);
912 r
= log_error_errno(l
, "Failed to determine datagram size to read: %m");
918 iovec
[n_iovec
].iov_len
= l
;
919 iovec
[n_iovec
].iov_base
= malloc(l
+ 1);
920 if (!iovec
[n_iovec
].iov_base
) {
925 mh
.msg_iov
= iovec
+ n_iovec
;
927 n
= recvmsg(fd
, &mh
, MSG_NOSIGNAL
|MSG_CMSG_CLOEXEC
);
929 free(iovec
[n_iovec
].iov_base
);
930 r
= log_error_errno(errno
, "Failed to receive datagram: %m");
935 struct cmsghdr
*cmsg
, *found
= NULL
;
936 /* The final zero-length datagram carries the file descriptor and tells us that we're done. */
938 free(iovec
[n_iovec
].iov_base
);
940 CMSG_FOREACH(cmsg
, &mh
) {
941 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
942 cmsg
->cmsg_type
== SCM_RIGHTS
&&
943 cmsg
->cmsg_len
== CMSG_LEN(sizeof(int))) {
950 log_error("Coredump file descriptor missing.");
955 assert(coredump_fd
< 0);
956 coredump_fd
= *(int*) CMSG_DATA(found
);
960 /* Add trailing NUL byte, in case these are strings */
961 ((char*) iovec
[n_iovec
].iov_base
)[n
] = 0;
962 iovec
[n_iovec
].iov_len
= (size_t) n
;
965 map_context_fields(iovec
+ n_iovec
, context
);
969 if (!GREEDY_REALLOC(iovec
, n_allocated
, n_iovec
+ SUBMIT_COREDUMP_FIELDS
)) {
974 /* Make sure we got all data we really need */
975 assert(context
[CONTEXT_PID
]);
976 assert(context
[CONTEXT_UID
]);
977 assert(context
[CONTEXT_GID
]);
978 assert(context
[CONTEXT_SIGNAL
]);
979 assert(context
[CONTEXT_TIMESTAMP
]);
980 assert(context
[CONTEXT_RLIMIT
]);
981 assert(context
[CONTEXT_COMM
]);
982 assert(coredump_fd
>= 0);
984 /* Small quirk: the journal fields contain the timestamp padded with six zeroes, so that the kernel-supplied 1s
985 * granularity timestamps becomes 1µs granularity, i.e. the granularity systemd usually operates in. Since we
986 * are reconstructing the original kernel context, we chop this off again, here. */
987 k
= strlen(context
[CONTEXT_TIMESTAMP
]);
989 context
[CONTEXT_TIMESTAMP
] = strndupa(context
[CONTEXT_TIMESTAMP
], k
- 6);
991 r
= submit_coredump(context
, iovec
, n_allocated
, n_iovec
, coredump_fd
);
994 for (i
= 0; i
< n_iovec
; i
++)
995 free(iovec
[i
].iov_base
);
1001 static int send_iovec(const struct iovec iovec
[], size_t n_iovec
, int input_fd
) {
1003 static const union sockaddr_union sa
= {
1004 .un
.sun_family
= AF_UNIX
,
1005 .un
.sun_path
= "/run/systemd/coredump",
1007 _cleanup_close_
int fd
= -1;
1011 assert(iovec
|| n_iovec
<= 0);
1012 assert(input_fd
>= 0);
1014 fd
= socket(AF_UNIX
, SOCK_SEQPACKET
|SOCK_CLOEXEC
, 0);
1016 return log_error_errno(errno
, "Failed to create coredump socket: %m");
1018 if (connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0)
1019 return log_error_errno(errno
, "Failed to connect to coredump service: %m");
1021 for (i
= 0; i
< n_iovec
; i
++) {
1022 struct msghdr mh
= {
1023 .msg_iov
= (struct iovec
*) iovec
+ i
,
1026 struct iovec copy
[2];
1029 if (sendmsg(fd
, &mh
, MSG_NOSIGNAL
) >= 0)
1032 if (errno
== EMSGSIZE
&& mh
.msg_iov
[0].iov_len
> 0) {
1033 /* This field didn't fit? That's a pity. Given that this is just metadata,
1034 * let's truncate the field at half, and try again. We append three dots, in
1035 * order to show that this is truncated. */
1037 if (mh
.msg_iov
!= copy
) {
1038 /* We don't want to modify the caller's iovec, hence let's create our
1039 * own array, consisting of two new iovecs, where the first is a
1040 * (truncated) copy of what we want to send, and the second one
1041 * contains the trailing dots. */
1043 copy
[1] = (struct iovec
) {
1044 .iov_base
= (char[]) { '.', '.', '.' },
1052 copy
[0].iov_len
/= 2; /* halve it, and try again */
1056 return log_error_errno(errno
, "Failed to send coredump datagram: %m");
1060 r
= send_one_fd(fd
, input_fd
, 0);
1062 return log_error_errno(r
, "Failed to send coredump fd: %m");
1067 static char* set_iovec_field(struct iovec iovec
[27], size_t *n_iovec
, const char *field
, const char *value
) {
1070 x
= strappend(field
, value
);
1072 iovec
[(*n_iovec
)++] = IOVEC_MAKE_STRING(x
);
1076 static char* set_iovec_field_free(struct iovec iovec
[27], size_t *n_iovec
, const char *field
, char *value
) {
1079 x
= set_iovec_field(iovec
, n_iovec
, field
, value
);
1084 static int gather_pid_metadata(
1085 char* context
[_CONTEXT_MAX
],
1086 char **comm_fallback
,
1087 struct iovec
*iovec
, size_t *n_iovec
) {
1089 /* We need 26 empty slots in iovec!
1091 * Note that if we fail on oom later on, we do not roll-back changes to the iovec structure. (It remains valid,
1092 * with the first n_iovec fields initialized.) */
1100 r
= parse_pid(context
[CONTEXT_PID
], &pid
);
1102 return log_error_errno(r
, "Failed to parse PID \"%s\": %m", context
[CONTEXT_PID
]);
1104 r
= get_process_comm(pid
, &context
[CONTEXT_COMM
]);
1106 log_warning_errno(r
, "Failed to get COMM, falling back to the command line: %m");
1107 context
[CONTEXT_COMM
] = strv_join(comm_fallback
, " ");
1108 if (!context
[CONTEXT_COMM
])
1112 r
= get_process_exe(pid
, &context
[CONTEXT_EXE
]);
1114 log_warning_errno(r
, "Failed to get EXE, ignoring: %m");
1116 if (cg_pid_get_unit(pid
, &context
[CONTEXT_UNIT
]) >= 0) {
1117 if (!is_journald_crash((const char**) context
)) {
1118 /* OK, now we know it's not the journal, hence we can make use of it now. */
1119 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG
);
1123 /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */
1124 if (is_pid1_crash((const char**) context
)) {
1125 log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
1126 (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
1129 set_iovec_field(iovec
, n_iovec
, "COREDUMP_UNIT=", context
[CONTEXT_UNIT
]);
1132 if (cg_pid_get_user_unit(pid
, &t
) >= 0)
1133 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_USER_UNIT=", t
);
1135 /* The next few are mandatory */
1136 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_PID=", context
[CONTEXT_PID
]))
1139 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_UID=", context
[CONTEXT_UID
]))
1142 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_GID=", context
[CONTEXT_GID
]))
1145 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_SIGNAL=", context
[CONTEXT_SIGNAL
]))
1148 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_RLIMIT=", context
[CONTEXT_RLIMIT
]))
1151 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_COMM=", context
[CONTEXT_COMM
]))
1154 if (context
[CONTEXT_EXE
] &&
1155 !set_iovec_field(iovec
, n_iovec
, "COREDUMP_EXE=", context
[CONTEXT_EXE
]))
1158 if (sd_pid_get_session(pid
, &t
) >= 0)
1159 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_SESSION=", t
);
1161 if (sd_pid_get_owner_uid(pid
, &owner_uid
) >= 0) {
1162 r
= asprintf(&t
, "COREDUMP_OWNER_UID=" UID_FMT
, owner_uid
);
1164 iovec
[(*n_iovec
)++] = IOVEC_MAKE_STRING(t
);
1167 if (sd_pid_get_slice(pid
, &t
) >= 0)
1168 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_SLICE=", t
);
1170 if (get_process_cmdline(pid
, 0, false, &t
) >= 0)
1171 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CMDLINE=", t
);
1173 if (cg_pid_get_path_shifted(pid
, NULL
, &t
) >= 0)
1174 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CGROUP=", t
);
1176 if (compose_open_fds(pid
, &t
) >= 0)
1177 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_OPEN_FDS=", t
);
1179 p
= procfs_file_alloca(pid
, "status");
1180 if (read_full_file(p
, &t
, NULL
) >= 0)
1181 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_STATUS=", t
);
1183 p
= procfs_file_alloca(pid
, "maps");
1184 if (read_full_file(p
, &t
, NULL
) >= 0)
1185 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_MAPS=", t
);
1187 p
= procfs_file_alloca(pid
, "limits");
1188 if (read_full_file(p
, &t
, NULL
) >= 0)
1189 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_LIMITS=", t
);
1191 p
= procfs_file_alloca(pid
, "cgroup");
1192 if (read_full_file(p
, &t
, NULL
) >=0)
1193 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_CGROUP=", t
);
1195 p
= procfs_file_alloca(pid
, "mountinfo");
1196 if (read_full_file(p
, &t
, NULL
) >=0)
1197 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_MOUNTINFO=", t
);
1199 if (get_process_cwd(pid
, &t
) >= 0)
1200 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CWD=", t
);
1202 if (get_process_root(pid
, &t
) >= 0) {
1203 bool proc_self_root_is_slash
;
1205 proc_self_root_is_slash
= strcmp(t
, "/") == 0;
1207 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_ROOT=", t
);
1209 /* If the process' root is "/", then there is a chance it has
1210 * mounted own root and hence being containerized. */
1211 if (proc_self_root_is_slash
&& get_process_container_parent_cmdline(pid
, &t
) > 0)
1212 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CONTAINER_CMDLINE=", t
);
1215 if (get_process_environ(pid
, &t
) >= 0)
1216 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_ENVIRON=", t
);
1218 t
= strjoin("COREDUMP_TIMESTAMP=", context
[CONTEXT_TIMESTAMP
], "000000");
1220 iovec
[(*n_iovec
)++] = IOVEC_MAKE_STRING(t
);
1222 if (safe_atoi(context
[CONTEXT_SIGNAL
], &signo
) >= 0 && SIGNAL_VALID(signo
))
1223 set_iovec_field(iovec
, n_iovec
, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo
));
1225 return 0; /* we successfully acquired all metadata */
1228 static int process_kernel(int argc
, char* argv
[]) {
1230 char* context
[_CONTEXT_MAX
] = {};
1231 struct iovec iovec
[28 + SUBMIT_COREDUMP_FIELDS
];
1232 size_t i
, n_iovec
, n_to_free
= 0;
1235 log_debug("Processing coredump received from the kernel...");
1237 if (argc
< CONTEXT_COMM
+ 1) {
1238 log_error("Not enough arguments passed by the kernel (%i, expected %i).", argc
- 1, CONTEXT_COMM
+ 1 - 1);
1242 context
[CONTEXT_PID
] = argv
[1 + CONTEXT_PID
];
1243 context
[CONTEXT_UID
] = argv
[1 + CONTEXT_UID
];
1244 context
[CONTEXT_GID
] = argv
[1 + CONTEXT_GID
];
1245 context
[CONTEXT_SIGNAL
] = argv
[1 + CONTEXT_SIGNAL
];
1246 context
[CONTEXT_TIMESTAMP
] = argv
[1 + CONTEXT_TIMESTAMP
];
1247 context
[CONTEXT_RLIMIT
] = argv
[1 + CONTEXT_RLIMIT
];
1249 r
= gather_pid_metadata(context
, argv
+ 1 + CONTEXT_COMM
, iovec
, &n_to_free
);
1253 n_iovec
= n_to_free
;
1255 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR
);
1257 assert_cc(2 == LOG_CRIT
);
1258 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("PRIORITY=2");
1260 assert(n_iovec
<= ELEMENTSOF(iovec
));
1262 if (is_journald_crash((const char**) context
) || is_pid1_crash((const char**) context
))
1263 r
= submit_coredump((const char**) context
,
1264 iovec
, ELEMENTSOF(iovec
), n_iovec
,
1267 r
= send_iovec(iovec
, n_iovec
, STDIN_FILENO
);
1270 for (i
= 0; i
< n_to_free
; i
++)
1271 free(iovec
[i
].iov_base
);
1273 /* Those fields are allocated by gather_pid_metadata */
1274 free(context
[CONTEXT_COMM
]);
1275 free(context
[CONTEXT_EXE
]);
1276 free(context
[CONTEXT_UNIT
]);
1281 static int process_backtrace(int argc
, char *argv
[]) {
1282 char *context
[_CONTEXT_MAX
] = {};
1283 _cleanup_free_
char *message
= NULL
;
1284 _cleanup_free_
struct iovec
*iovec
= NULL
;
1285 size_t n_iovec
, n_allocated
, n_to_free
= 0, i
;
1287 JournalImporter importer
= {
1291 log_debug("Processing backtrace on stdin...");
1293 if (argc
< CONTEXT_COMM
+ 1) {
1294 log_error("Not enough arguments passed (%i, expected %i).", argc
- 1, CONTEXT_COMM
+ 1 - 1);
1298 context
[CONTEXT_PID
] = argv
[2 + CONTEXT_PID
];
1299 context
[CONTEXT_UID
] = argv
[2 + CONTEXT_UID
];
1300 context
[CONTEXT_GID
] = argv
[2 + CONTEXT_GID
];
1301 context
[CONTEXT_SIGNAL
] = argv
[2 + CONTEXT_SIGNAL
];
1302 context
[CONTEXT_TIMESTAMP
] = argv
[2 + CONTEXT_TIMESTAMP
];
1303 context
[CONTEXT_RLIMIT
] = argv
[2 + CONTEXT_RLIMIT
];
1305 n_allocated
= 33 + COREDUMP_STORAGE_EXTERNAL
;
1306 /* 25 metadata, 2 static, +unknown input, 4 storage, rounded up */
1307 iovec
= new(struct iovec
, n_allocated
);
1311 r
= gather_pid_metadata(context
, argv
+ 2 + CONTEXT_COMM
, iovec
, &n_to_free
);
1315 /* This was a special crash, and has already been processed. */
1319 n_iovec
= n_to_free
;
1322 r
= journal_importer_process_data(&importer
);
1324 log_error_errno(r
, "Failed to parse journal entry on stdin: %m");
1327 if (r
== 1 || /* complete entry */
1328 journal_importer_eof(&importer
)) /* end of data */
1332 if (!GREEDY_REALLOC(iovec
, n_allocated
, n_iovec
+ importer
.iovw
.count
+ 2))
1335 if (journal_importer_eof(&importer
)) {
1336 log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter");
1338 message
= strjoin("MESSAGE=Process ", context
[CONTEXT_PID
],
1339 " (", context
[CONTEXT_COMM
], ")"
1340 " of user ", context
[CONTEXT_UID
],
1341 " failed with ", context
[CONTEXT_SIGNAL
]);
1346 iovec
[n_iovec
++] = IOVEC_MAKE_STRING(message
);
1348 for (i
= 0; i
< importer
.iovw
.count
; i
++)
1349 iovec
[n_iovec
++] = importer
.iovw
.iovec
[i
];
1352 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR
);
1353 assert_cc(2 == LOG_CRIT
);
1354 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("PRIORITY=2");
1356 assert(n_iovec
<= n_allocated
);
1358 r
= sd_journal_sendv(iovec
, n_iovec
);
1360 log_error_errno(r
, "Failed to log backtrace: %m");
1363 for (i
= 0; i
< n_to_free
; i
++)
1364 free(iovec
[i
].iov_base
);
1366 /* Those fields are allocated by gather_pid_metadata */
1367 free(context
[CONTEXT_COMM
]);
1368 free(context
[CONTEXT_EXE
]);
1369 free(context
[CONTEXT_UNIT
]);
1374 int main(int argc
, char *argv
[]) {
1377 /* First, log to a safe place, since we don't know what crashed and it might
1378 * be journald which we'd rather not log to then. */
1380 log_set_target(LOG_TARGET_KMSG
);
1383 /* Make sure we never enter a loop */
1384 (void) prctl(PR_SET_DUMPABLE
, 0);
1386 /* Ignore all parse errors */
1387 (void) parse_config();
1389 log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage
));
1390 log_debug("Selected compression %s.", yes_no(arg_compress
));
1392 r
= sd_listen_fds(false);
1394 log_error_errno(r
, "Failed to determine number of file descriptor: %m");
1398 /* If we got an fd passed, we are running in coredumpd mode. Otherwise we
1399 * are invoked from the kernel as coredump handler. */
1401 if (streq_ptr(argv
[1], "--backtrace"))
1402 r
= process_backtrace(argc
, argv
);
1404 r
= process_kernel(argc
, argv
);
1406 r
= process_socket(SD_LISTEN_FDS_START
);
1408 log_error("Received unexpected number of file descriptors.");
1413 return r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;