1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2012 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <stdio_ext.h>
24 #include <sys/prctl.h>
25 #include <sys/xattr.h>
30 #include <elfutils/libdwfl.h>
33 #include "sd-daemon.h"
34 #include "sd-journal.h"
36 #include "sd-messages.h"
39 #include "alloc-util.h"
40 #include "capability-util.h"
41 #include "cgroup-util.h"
43 #include "conf-parser.h"
45 #include "coredump-vacuum.h"
46 #include "dirent-util.h"
52 #include "journal-importer.h"
57 #include "parse-util.h"
58 #include "process-util.h"
59 #include "signal-util.h"
60 #include "socket-util.h"
62 #include "stacktrace.h"
63 #include "string-table.h"
64 #include "string-util.h"
66 #include "user-util.h"
69 /* The maximum size up to which we process coredumps */
70 #define PROCESS_SIZE_MAX ((uint64_t) (2LLU*1024LLU*1024LLU*1024LLU))
72 /* The maximum size up to which we leave the coredump around on disk */
73 #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX
75 /* The maximum size up to which we store the coredump in the journal */
76 #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU))
78 /* Make sure to not make this larger than the maximum journal entry
79 * size. See DATA_SIZE_MAX in journald-native.c. */
80 assert_cc(JOURNAL_SIZE_MAX
<= DATA_SIZE_MAX
);
83 /* We use this as array indexes for a couple of special fields we use for
84 * naming coredump files, and attaching xattrs, and for indexing argv[].
86 * Our pattern for man:systectl(1) kernel.core_pattern is such that the
87 * kernel passes fields until CONTEXT_RLIMIT as arguments in argv[]. After
88 * that it gets complicated: the kernel passes "comm" as one or more fields
89 * starting at index CONTEXT_COMM (in other words, full "comm" is under index
90 * CONTEXT_COMM when it does not contain spaces, which is the common
91 * case). This mapping is not reversible, so we prefer to retrieve "comm"
92 * from /proc. We only fall back to argv[CONTEXT_COMM...] when that fails.
94 * In the internal context[] array, fields before CONTEXT_COMM are the
95 * strings from argv[], so they should not be freed. The strings at indices
96 * CONTEXT_COMM and higher are allocated by us and should be freed at the
112 typedef enum CoredumpStorage
{
113 COREDUMP_STORAGE_NONE
,
114 COREDUMP_STORAGE_EXTERNAL
,
115 COREDUMP_STORAGE_JOURNAL
,
116 _COREDUMP_STORAGE_MAX
,
117 _COREDUMP_STORAGE_INVALID
= -1
120 static const char* const coredump_storage_table
[_COREDUMP_STORAGE_MAX
] = {
121 [COREDUMP_STORAGE_NONE
] = "none",
122 [COREDUMP_STORAGE_EXTERNAL
] = "external",
123 [COREDUMP_STORAGE_JOURNAL
] = "journal",
126 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage
, CoredumpStorage
);
127 static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage
, coredump_storage
, CoredumpStorage
, "Failed to parse storage setting");
129 static CoredumpStorage arg_storage
= COREDUMP_STORAGE_EXTERNAL
;
130 static bool arg_compress
= true;
131 static uint64_t arg_process_size_max
= PROCESS_SIZE_MAX
;
132 static uint64_t arg_external_size_max
= EXTERNAL_SIZE_MAX
;
133 static uint64_t arg_journal_size_max
= JOURNAL_SIZE_MAX
;
134 static uint64_t arg_keep_free
= (uint64_t) -1;
135 static uint64_t arg_max_use
= (uint64_t) -1;
137 static int parse_config(void) {
138 static const ConfigTableItem items
[] = {
139 { "Coredump", "Storage", config_parse_coredump_storage
, 0, &arg_storage
},
140 { "Coredump", "Compress", config_parse_bool
, 0, &arg_compress
},
141 { "Coredump", "ProcessSizeMax", config_parse_iec_uint64
, 0, &arg_process_size_max
},
142 { "Coredump", "ExternalSizeMax", config_parse_iec_uint64
, 0, &arg_external_size_max
},
143 { "Coredump", "JournalSizeMax", config_parse_iec_size
, 0, &arg_journal_size_max
},
144 { "Coredump", "KeepFree", config_parse_iec_uint64
, 0, &arg_keep_free
},
145 { "Coredump", "MaxUse", config_parse_iec_uint64
, 0, &arg_max_use
},
149 return config_parse_many_nulstr(PKGSYSCONFDIR
"/coredump.conf",
150 CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
152 config_item_table_lookup
, items
,
153 CONFIG_PARSE_WARN
, NULL
);
156 static inline uint64_t storage_size_max(void) {
157 return arg_storage
== COREDUMP_STORAGE_EXTERNAL
? arg_external_size_max
: arg_journal_size_max
;
160 static int fix_acl(int fd
, uid_t uid
) {
163 _cleanup_(acl_freep
) acl_t acl
= NULL
;
165 acl_permset_t permset
;
170 if (uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
)
173 /* Make sure normal users can read (but not write or delete)
174 * their own coredumps */
176 acl
= acl_get_fd(fd
);
178 return log_error_errno(errno
, "Failed to get ACL: %m");
180 if (acl_create_entry(&acl
, &entry
) < 0 ||
181 acl_set_tag_type(entry
, ACL_USER
) < 0 ||
182 acl_set_qualifier(entry
, &uid
) < 0)
183 return log_error_errno(errno
, "Failed to patch ACL: %m");
185 if (acl_get_permset(entry
, &permset
) < 0 ||
186 acl_add_perm(permset
, ACL_READ
) < 0)
187 return log_warning_errno(errno
, "Failed to patch ACL: %m");
189 r
= calc_acl_mask_if_needed(&acl
);
191 return log_warning_errno(r
, "Failed to patch ACL: %m");
193 if (acl_set_fd(fd
, acl
) < 0)
194 return log_error_errno(errno
, "Failed to apply ACL: %m");
200 static int fix_xattr(int fd
, const char *context
[_CONTEXT_MAX
]) {
202 static const char * const xattrs
[_CONTEXT_MAX
] = {
203 [CONTEXT_PID
] = "user.coredump.pid",
204 [CONTEXT_UID
] = "user.coredump.uid",
205 [CONTEXT_GID
] = "user.coredump.gid",
206 [CONTEXT_SIGNAL
] = "user.coredump.signal",
207 [CONTEXT_TIMESTAMP
] = "user.coredump.timestamp",
208 [CONTEXT_RLIMIT
] = "user.coredump.rlimit",
209 [CONTEXT_HOSTNAME
] = "user.coredump.hostname",
210 [CONTEXT_COMM
] = "user.coredump.comm",
211 [CONTEXT_EXE
] = "user.coredump.exe",
219 /* Attach some metadata to coredumps via extended
220 * attributes. Just because we can. */
222 for (i
= 0; i
< _CONTEXT_MAX
; i
++) {
225 if (isempty(context
[i
]) || !xattrs
[i
])
228 k
= fsetxattr(fd
, xattrs
[i
], context
[i
], strlen(context
[i
]), XATTR_CREATE
);
236 #define filename_escape(s) xescape((s), "./ ")
238 static inline const char *coredump_tmpfile_name(const char *s
) {
239 return s
? s
: "(unnamed temporary file)";
242 static int fix_permissions(
244 const char *filename
,
246 const char *context
[_CONTEXT_MAX
],
255 /* Ignore errors on these */
256 (void) fchmod(fd
, 0640);
257 (void) fix_acl(fd
, uid
);
258 (void) fix_xattr(fd
, context
);
261 return log_error_errno(errno
, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename
));
263 (void) fsync_directory_of_file(fd
);
265 r
= link_tmpfile(fd
, filename
, target
);
267 return log_error_errno(r
, "Failed to move coredump %s into place: %m", target
);
272 static int maybe_remove_external_coredump(const char *filename
, uint64_t size
) {
274 /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */
276 if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
&&
277 size
<= arg_external_size_max
)
283 if (unlink(filename
) < 0 && errno
!= ENOENT
)
284 return log_error_errno(errno
, "Failed to unlink %s: %m", filename
);
289 static int make_filename(const char *context
[_CONTEXT_MAX
], char **ret
) {
290 _cleanup_free_
char *c
= NULL
, *u
= NULL
, *p
= NULL
, *t
= NULL
;
291 sd_id128_t boot
= {};
296 c
= filename_escape(context
[CONTEXT_COMM
]);
300 u
= filename_escape(context
[CONTEXT_UID
]);
304 r
= sd_id128_get_boot(&boot
);
308 p
= filename_escape(context
[CONTEXT_PID
]);
312 t
= filename_escape(context
[CONTEXT_TIMESTAMP
]);
317 "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR
".%s.%s000000",
320 SD_ID128_FORMAT_VAL(boot
),
328 static int save_external_coredump(
329 const char *context
[_CONTEXT_MAX
],
335 bool *ret_truncated
) {
337 _cleanup_free_
char *fn
= NULL
, *tmp
= NULL
;
338 _cleanup_close_
int fd
= -1;
339 uint64_t rlimit
, max_size
;
345 assert(ret_filename
);
350 r
= parse_uid(context
[CONTEXT_UID
], &uid
);
352 return log_error_errno(r
, "Failed to parse UID: %m");
354 r
= safe_atou64(context
[CONTEXT_RLIMIT
], &rlimit
);
356 return log_error_errno(r
, "Failed to parse resource limit: %s", context
[CONTEXT_RLIMIT
]);
357 if (rlimit
< page_size()) {
358 /* Is coredumping disabled? Then don't bother saving/processing the coredump.
359 * Anything below PAGE_SIZE cannot give a readable coredump (the kernel uses
360 * ELF_EXEC_PAGESIZE which is not easily accessible, but is usually the same as PAGE_SIZE. */
361 log_info("Resource limits disable core dumping for process %s (%s).",
362 context
[CONTEXT_PID
], context
[CONTEXT_COMM
]);
366 /* Never store more than the process configured, or than we actually shall keep or process */
367 max_size
= MIN(rlimit
, MAX(arg_process_size_max
, storage_size_max()));
369 r
= make_filename(context
, &fn
);
371 return log_error_errno(r
, "Failed to determine coredump file name: %m");
373 mkdir_p_label("/var/lib/systemd/coredump", 0755);
375 fd
= open_tmpfile_linkable(fn
, O_RDWR
|O_CLOEXEC
, &tmp
);
377 return log_error_errno(fd
, "Failed to create temporary file for coredump %s: %m", fn
);
379 r
= copy_bytes(input_fd
, fd
, max_size
, 0);
381 log_error_errno(r
, "Cannot store coredump of %s (%s): %m", context
[CONTEXT_PID
], context
[CONTEXT_COMM
]);
384 *ret_truncated
= r
== 1;
387 LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size
),
388 "SIZE_LIMIT=%zu", max_size
,
389 "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR
,
392 if (fstat(fd
, &st
) < 0) {
393 log_error_errno(errno
, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp
));
397 if (lseek(fd
, 0, SEEK_SET
) == (off_t
) -1) {
398 log_error_errno(errno
, "Failed to seek on %s: %m", coredump_tmpfile_name(tmp
));
402 #if HAVE_XZ || HAVE_LZ4
403 /* If we will remove the coredump anyway, do not compress. */
404 if (arg_compress
&& !maybe_remove_external_coredump(NULL
, st
.st_size
)) {
406 _cleanup_free_
char *fn_compressed
= NULL
, *tmp_compressed
= NULL
;
407 _cleanup_close_
int fd_compressed
= -1;
409 fn_compressed
= strappend(fn
, COMPRESSED_EXT
);
410 if (!fn_compressed
) {
415 fd_compressed
= open_tmpfile_linkable(fn_compressed
, O_RDWR
|O_CLOEXEC
, &tmp_compressed
);
416 if (fd_compressed
< 0) {
417 log_error_errno(fd_compressed
, "Failed to create temporary file for coredump %s: %m", fn_compressed
);
421 r
= compress_stream(fd
, fd_compressed
, -1);
423 log_error_errno(r
, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed
));
424 goto fail_compressed
;
427 r
= fix_permissions(fd_compressed
, tmp_compressed
, fn_compressed
, context
, uid
);
429 goto fail_compressed
;
431 /* OK, this worked, we can get rid of the uncompressed version now */
435 *ret_filename
= fn_compressed
; /* compressed */
436 *ret_node_fd
= fd_compressed
; /* compressed */
437 *ret_data_fd
= fd
; /* uncompressed */
438 *ret_size
= (uint64_t) st
.st_size
; /* uncompressed */
440 fn_compressed
= NULL
;
441 fd
= fd_compressed
= -1;
447 (void) unlink(tmp_compressed
);
453 r
= fix_permissions(fd
, tmp
, fn
, context
, uid
);
460 *ret_size
= (uint64_t) st
.st_size
;
473 static int allocate_journal_field(int fd
, size_t size
, char **ret
, size_t *ret_size
) {
474 _cleanup_free_
char *field
= NULL
;
481 if (lseek(fd
, 0, SEEK_SET
) == (off_t
) -1)
482 return log_warning_errno(errno
, "Failed to seek: %m");
484 field
= malloc(9 + size
);
486 log_warning("Failed to allocate memory for coredump, coredump will not be stored.");
490 memcpy(field
, "COREDUMP=", 9);
492 n
= read(fd
, field
+ 9, size
);
494 return log_error_errno((int) n
, "Failed to read core data: %m");
495 if ((size_t) n
< size
) {
496 log_error("Core data too short.");
501 *ret_size
= size
+ 9;
508 /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines:
522 static int compose_open_fds(pid_t pid
, char **open_fds
) {
523 _cleanup_closedir_
DIR *proc_fd_dir
= NULL
;
524 _cleanup_close_
int proc_fdinfo_fd
= -1;
525 _cleanup_free_
char *buffer
= NULL
;
526 _cleanup_fclose_
FILE *stream
= NULL
;
527 const char *fddelim
= "", *path
;
528 struct dirent
*dent
= NULL
;
533 assert(open_fds
!= NULL
);
535 path
= procfs_file_alloca(pid
, "fd");
536 proc_fd_dir
= opendir(path
);
540 proc_fdinfo_fd
= openat(dirfd(proc_fd_dir
), "../fdinfo", O_DIRECTORY
|O_NOFOLLOW
|O_CLOEXEC
|O_PATH
);
541 if (proc_fdinfo_fd
< 0)
544 stream
= open_memstream(&buffer
, &size
);
548 (void) __fsetlocking(stream
, FSETLOCKING_BYCALLER
);
550 FOREACH_DIRENT(dent
, proc_fd_dir
, return -errno
) {
551 _cleanup_fclose_
FILE *fdinfo
= NULL
;
552 _cleanup_free_
char *fdname
= NULL
;
556 r
= readlinkat_malloc(dirfd(proc_fd_dir
), dent
->d_name
, &fdname
);
560 fprintf(stream
, "%s%s:%s\n", fddelim
, dent
->d_name
, fdname
);
563 /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */
564 fd
= openat(proc_fdinfo_fd
, dent
->d_name
, O_NOFOLLOW
|O_CLOEXEC
|O_RDONLY
);
568 fdinfo
= fdopen(fd
, "re");
574 FOREACH_LINE(line
, fdinfo
, break) {
576 if (!endswith(line
, "\n"))
582 stream
= safe_fclose(stream
);
587 *open_fds
= TAKE_PTR(buffer
);
592 static int get_process_ns(pid_t pid
, const char *namespace, ino_t
*ns
) {
595 _cleanup_close_
int proc_ns_dir_fd
;
597 p
= procfs_file_alloca(pid
, "ns");
599 proc_ns_dir_fd
= open(p
, O_DIRECTORY
| O_CLOEXEC
| O_RDONLY
);
600 if (proc_ns_dir_fd
< 0)
603 if (fstatat(proc_ns_dir_fd
, namespace, &stbuf
, /* flags */0) < 0)
610 static int get_mount_namespace_leader(pid_t pid
, pid_t
*container_pid
) {
611 pid_t cpid
= pid
, ppid
= 0;
615 r
= get_process_ns(pid
, "mnt", &proc_mntns
);
622 r
= get_process_ppid(cpid
, &ppid
);
626 r
= get_process_ns(ppid
, "mnt", &parent_mntns
);
630 if (proc_mntns
!= parent_mntns
)
639 *container_pid
= ppid
;
643 /* Returns 1 if the parent was found.
644 * Returns 0 if there is not a process we can call the pid's
645 * container parent (the pid's process isn't 'containerized').
646 * Returns a negative number on errors.
648 static int get_process_container_parent_cmdline(pid_t pid
, char** cmdline
) {
651 const char *proc_root_path
;
652 struct stat root_stat
, proc_root_stat
;
654 /* To compare inodes of / and /proc/[pid]/root */
655 if (stat("/", &root_stat
) < 0)
658 proc_root_path
= procfs_file_alloca(pid
, "root");
659 if (stat(proc_root_path
, &proc_root_stat
) < 0)
662 /* The process uses system root. */
663 if (proc_root_stat
.st_ino
== root_stat
.st_ino
) {
668 r
= get_mount_namespace_leader(pid
, &container_pid
);
672 r
= get_process_cmdline(container_pid
, 0, false, cmdline
);
679 static int change_uid_gid(const char *context
[]) {
684 r
= parse_uid(context
[CONTEXT_UID
], &uid
);
688 if (uid
<= SYSTEM_UID_MAX
) {
689 const char *user
= "systemd-coredump";
691 r
= get_user_creds(&user
, &uid
, &gid
, NULL
, NULL
);
693 log_warning_errno(r
, "Cannot resolve %s user. Proceeding to dump core as root: %m", user
);
697 r
= parse_gid(context
[CONTEXT_GID
], &gid
);
702 return drop_privileges(uid
, gid
, 0);
705 static bool is_journald_crash(const char *context
[_CONTEXT_MAX
]) {
708 return streq_ptr(context
[CONTEXT_UNIT
], SPECIAL_JOURNALD_SERVICE
);
711 static bool is_pid1_crash(const char *context
[_CONTEXT_MAX
]) {
714 return streq_ptr(context
[CONTEXT_UNIT
], SPECIAL_INIT_SCOPE
) ||
715 streq_ptr(context
[CONTEXT_PID
], "1");
718 #define SUBMIT_COREDUMP_FIELDS 4
720 static int submit_coredump(
721 const char *context
[_CONTEXT_MAX
],
723 size_t n_iovec_allocated
,
727 _cleanup_close_
int coredump_fd
= -1, coredump_node_fd
= -1;
728 _cleanup_free_
char *core_message
= NULL
, *filename
= NULL
, *coredump_data
= NULL
;
729 uint64_t coredump_size
= UINT64_MAX
;
730 bool truncated
= false, journald_crash
;
735 assert(n_iovec_allocated
>= n_iovec
+ SUBMIT_COREDUMP_FIELDS
);
736 assert(input_fd
>= 0);
738 journald_crash
= is_journald_crash(context
);
740 /* Vacuum before we write anything again */
741 (void) coredump_vacuum(-1, arg_keep_free
, arg_max_use
);
743 /* Always stream the coredump to disk, if that's possible */
744 r
= save_external_coredump(context
, input_fd
,
745 &filename
, &coredump_node_fd
, &coredump_fd
, &coredump_size
, &truncated
);
747 /* Skip whole core dumping part */
750 /* If we don't want to keep the coredump on disk, remove it now, as later on we will lack the privileges for
751 * it. However, we keep the fd to it, so that we can still process it and log it. */
752 r
= maybe_remove_external_coredump(filename
, coredump_size
);
756 const char *coredump_filename
;
758 coredump_filename
= strjoina("COREDUMP_FILENAME=", filename
);
759 iovec
[n_iovec
++] = IOVEC_MAKE_STRING(coredump_filename
);
760 } else if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
)
761 log_info("The core will not be stored: size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
762 coredump_size
, arg_external_size_max
);
764 /* Vacuum again, but exclude the coredump we just created */
765 (void) coredump_vacuum(coredump_node_fd
>= 0 ? coredump_node_fd
: coredump_fd
, arg_keep_free
, arg_max_use
);
767 /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the coredump
768 * memory under the user's uid. This also ensures that the credentials journald will see are the ones of the
769 * coredumping user, thus making sure the user gets access to the core dump. Let's also get rid of all
770 * capabilities, if we run as root, we won't need them anymore. */
771 r
= change_uid_gid(context
);
773 return log_error_errno(r
, "Failed to drop privileges: %m");
776 /* Try to get a strack trace if we can */
777 if (coredump_size
<= arg_process_size_max
) {
778 _cleanup_free_
char *stacktrace
= NULL
;
780 r
= coredump_make_stack_trace(coredump_fd
, context
[CONTEXT_EXE
], &stacktrace
);
782 core_message
= strjoin("MESSAGE=Process ", context
[CONTEXT_PID
],
783 " (", context
[CONTEXT_COMM
], ") of user ",
784 context
[CONTEXT_UID
], " dumped core.",
785 journald_crash
? "\nCoredump diverted to " : "",
786 journald_crash
? filename
: "",
788 else if (r
== -EINVAL
)
789 log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
791 log_warning_errno(r
, "Failed to generate stack trace: %m");
793 log_debug("Not generating stack trace: core size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
794 coredump_size
, arg_process_size_max
);
799 core_message
= strjoin("MESSAGE=Process ", context
[CONTEXT_PID
],
800 " (", context
[CONTEXT_COMM
], ") of user ",
801 context
[CONTEXT_UID
], " dumped core.",
802 journald_crash
? "\nCoredump diverted to " : NULL
,
803 journald_crash
? filename
: NULL
);
807 if (journald_crash
) {
808 /* We cannot log to the journal, so just print the MESSAGE.
809 * The target was set previously to something safe. */
810 log_dispatch(LOG_ERR
, 0, core_message
);
814 iovec
[n_iovec
++] = IOVEC_MAKE_STRING(core_message
);
817 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
819 /* Optionally store the entire coredump in the journal */
820 if (arg_storage
== COREDUMP_STORAGE_JOURNAL
) {
821 if (coredump_size
<= arg_journal_size_max
) {
824 /* Store the coredump itself in the journal */
826 r
= allocate_journal_field(coredump_fd
, (size_t) coredump_size
, &coredump_data
, &sz
);
828 iovec
[n_iovec
++] = IOVEC_MAKE(coredump_data
, sz
);
830 log_warning_errno(r
, "Failed to attach the core to the journal entry: %m");
832 log_info("The core will not be stored: size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
833 coredump_size
, arg_journal_size_max
);
836 assert(n_iovec
<= n_iovec_allocated
);
838 r
= sd_journal_sendv(iovec
, n_iovec
);
840 return log_error_errno(r
, "Failed to log coredump: %m");
845 static void map_context_fields(const struct iovec
*iovec
, const char* context
[]) {
847 static const char * const context_field_names
[] = {
848 [CONTEXT_PID
] = "COREDUMP_PID=",
849 [CONTEXT_UID
] = "COREDUMP_UID=",
850 [CONTEXT_GID
] = "COREDUMP_GID=",
851 [CONTEXT_SIGNAL
] = "COREDUMP_SIGNAL=",
852 [CONTEXT_TIMESTAMP
] = "COREDUMP_TIMESTAMP=",
853 [CONTEXT_RLIMIT
] = "COREDUMP_RLIMIT=",
854 [CONTEXT_HOSTNAME
] = "COREDUMP_HOSTNAME=",
855 [CONTEXT_COMM
] = "COREDUMP_COMM=",
856 [CONTEXT_EXE
] = "COREDUMP_EXE=",
864 for (i
= 0; i
< ELEMENTSOF(context_field_names
); i
++) {
867 if (!context_field_names
[i
])
870 l
= strlen(context_field_names
[i
]);
871 if (iovec
->iov_len
< l
)
874 if (memcmp(iovec
->iov_base
, context_field_names
[i
], l
) != 0)
877 /* Note that these strings are NUL terminated, because we made sure that a trailing NUL byte is in the
878 * buffer, though not included in the iov_len count. (see below) */
879 context
[i
] = (char*) iovec
->iov_base
+ l
;
884 static int process_socket(int fd
) {
885 _cleanup_close_
int coredump_fd
= -1;
886 struct iovec
*iovec
= NULL
;
887 size_t n_iovec
= 0, n_allocated
= 0, i
, k
;
888 const char *context
[_CONTEXT_MAX
] = {};
893 log_set_target(LOG_TARGET_AUTO
);
894 log_parse_environment();
897 log_debug("Processing coredump received on stdin...");
901 struct cmsghdr cmsghdr
;
902 uint8_t buf
[CMSG_SPACE(sizeof(int))];
905 .msg_control
= &control
,
906 .msg_controllen
= sizeof(control
),
912 if (!GREEDY_REALLOC(iovec
, n_allocated
, n_iovec
+ SUBMIT_COREDUMP_FIELDS
)) {
917 l
= next_datagram_size_fd(fd
);
919 r
= log_error_errno(l
, "Failed to determine datagram size to read: %m");
925 iovec
[n_iovec
].iov_len
= l
;
926 iovec
[n_iovec
].iov_base
= malloc(l
+ 1);
927 if (!iovec
[n_iovec
].iov_base
) {
932 mh
.msg_iov
= iovec
+ n_iovec
;
934 n
= recvmsg(fd
, &mh
, MSG_NOSIGNAL
|MSG_CMSG_CLOEXEC
);
936 free(iovec
[n_iovec
].iov_base
);
937 r
= log_error_errno(errno
, "Failed to receive datagram: %m");
942 struct cmsghdr
*cmsg
, *found
= NULL
;
943 /* The final zero-length datagram carries the file descriptor and tells us that we're done. */
945 free(iovec
[n_iovec
].iov_base
);
947 CMSG_FOREACH(cmsg
, &mh
) {
948 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
949 cmsg
->cmsg_type
== SCM_RIGHTS
&&
950 cmsg
->cmsg_len
== CMSG_LEN(sizeof(int))) {
957 log_error("Coredump file descriptor missing.");
962 assert(coredump_fd
< 0);
963 coredump_fd
= *(int*) CMSG_DATA(found
);
967 /* Add trailing NUL byte, in case these are strings */
968 ((char*) iovec
[n_iovec
].iov_base
)[n
] = 0;
969 iovec
[n_iovec
].iov_len
= (size_t) n
;
972 map_context_fields(iovec
+ n_iovec
, context
);
976 if (!GREEDY_REALLOC(iovec
, n_allocated
, n_iovec
+ SUBMIT_COREDUMP_FIELDS
)) {
981 /* Make sure we got all data we really need */
982 assert(context
[CONTEXT_PID
]);
983 assert(context
[CONTEXT_UID
]);
984 assert(context
[CONTEXT_GID
]);
985 assert(context
[CONTEXT_SIGNAL
]);
986 assert(context
[CONTEXT_TIMESTAMP
]);
987 assert(context
[CONTEXT_RLIMIT
]);
988 assert(context
[CONTEXT_HOSTNAME
]);
989 assert(context
[CONTEXT_COMM
]);
990 assert(coredump_fd
>= 0);
992 /* Small quirk: the journal fields contain the timestamp padded with six zeroes, so that the kernel-supplied 1s
993 * granularity timestamps becomes 1µs granularity, i.e. the granularity systemd usually operates in. Since we
994 * are reconstructing the original kernel context, we chop this off again, here. */
995 k
= strlen(context
[CONTEXT_TIMESTAMP
]);
997 context
[CONTEXT_TIMESTAMP
] = strndupa(context
[CONTEXT_TIMESTAMP
], k
- 6);
999 r
= submit_coredump(context
, iovec
, n_allocated
, n_iovec
, coredump_fd
);
1002 for (i
= 0; i
< n_iovec
; i
++)
1003 free(iovec
[i
].iov_base
);
1009 static int send_iovec(const struct iovec iovec
[], size_t n_iovec
, int input_fd
) {
1011 static const union sockaddr_union sa
= {
1012 .un
.sun_family
= AF_UNIX
,
1013 .un
.sun_path
= "/run/systemd/coredump",
1015 _cleanup_close_
int fd
= -1;
1019 assert(iovec
|| n_iovec
<= 0);
1020 assert(input_fd
>= 0);
1022 fd
= socket(AF_UNIX
, SOCK_SEQPACKET
|SOCK_CLOEXEC
, 0);
1024 return log_error_errno(errno
, "Failed to create coredump socket: %m");
1026 if (connect(fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
)) < 0)
1027 return log_error_errno(errno
, "Failed to connect to coredump service: %m");
1029 for (i
= 0; i
< n_iovec
; i
++) {
1030 struct msghdr mh
= {
1031 .msg_iov
= (struct iovec
*) iovec
+ i
,
1034 struct iovec copy
[2];
1037 if (sendmsg(fd
, &mh
, MSG_NOSIGNAL
) >= 0)
1040 if (errno
== EMSGSIZE
&& mh
.msg_iov
[0].iov_len
> 0) {
1041 /* This field didn't fit? That's a pity. Given that this is just metadata,
1042 * let's truncate the field at half, and try again. We append three dots, in
1043 * order to show that this is truncated. */
1045 if (mh
.msg_iov
!= copy
) {
1046 /* We don't want to modify the caller's iovec, hence let's create our
1047 * own array, consisting of two new iovecs, where the first is a
1048 * (truncated) copy of what we want to send, and the second one
1049 * contains the trailing dots. */
1051 copy
[1] = (struct iovec
) {
1052 .iov_base
= (char[]) { '.', '.', '.' },
1060 copy
[0].iov_len
/= 2; /* halve it, and try again */
1064 return log_error_errno(errno
, "Failed to send coredump datagram: %m");
1068 r
= send_one_fd(fd
, input_fd
, 0);
1070 return log_error_errno(r
, "Failed to send coredump fd: %m");
1075 static char* set_iovec_field(struct iovec
*iovec
, size_t *n_iovec
, const char *field
, const char *value
) {
1078 x
= strappend(field
, value
);
1080 iovec
[(*n_iovec
)++] = IOVEC_MAKE_STRING(x
);
1084 static char* set_iovec_field_free(struct iovec
*iovec
, size_t *n_iovec
, const char *field
, char *value
) {
1087 x
= set_iovec_field(iovec
, n_iovec
, field
, value
);
1092 static int gather_pid_metadata(
1093 char* context
[_CONTEXT_MAX
],
1094 char **comm_fallback
,
1095 struct iovec
*iovec
, size_t *n_iovec
) {
1097 /* We need 27 empty slots in iovec!
1099 * Note that if we fail on oom later on, we do not roll-back changes to the iovec structure. (It remains valid,
1100 * with the first n_iovec fields initialized.) */
1108 r
= parse_pid(context
[CONTEXT_PID
], &pid
);
1110 return log_error_errno(r
, "Failed to parse PID \"%s\": %m", context
[CONTEXT_PID
]);
1112 r
= get_process_comm(pid
, &context
[CONTEXT_COMM
]);
1114 log_warning_errno(r
, "Failed to get COMM, falling back to the command line: %m");
1115 context
[CONTEXT_COMM
] = strv_join(comm_fallback
, " ");
1116 if (!context
[CONTEXT_COMM
])
1120 r
= get_process_exe(pid
, &context
[CONTEXT_EXE
]);
1122 log_warning_errno(r
, "Failed to get EXE, ignoring: %m");
1124 if (cg_pid_get_unit(pid
, &context
[CONTEXT_UNIT
]) >= 0) {
1125 if (!is_journald_crash((const char**) context
)) {
1126 /* OK, now we know it's not the journal, hence we can make use of it now. */
1127 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG
);
1131 /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */
1132 if (is_pid1_crash((const char**) context
)) {
1133 log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
1134 disable_coredumps();
1137 set_iovec_field(iovec
, n_iovec
, "COREDUMP_UNIT=", context
[CONTEXT_UNIT
]);
1140 if (cg_pid_get_user_unit(pid
, &t
) >= 0)
1141 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_USER_UNIT=", t
);
1143 /* The next few are mandatory */
1144 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_PID=", context
[CONTEXT_PID
]))
1147 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_UID=", context
[CONTEXT_UID
]))
1150 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_GID=", context
[CONTEXT_GID
]))
1153 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_SIGNAL=", context
[CONTEXT_SIGNAL
]))
1156 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_RLIMIT=", context
[CONTEXT_RLIMIT
]))
1159 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_HOSTNAME=", context
[CONTEXT_HOSTNAME
]))
1162 if (!set_iovec_field(iovec
, n_iovec
, "COREDUMP_COMM=", context
[CONTEXT_COMM
]))
1165 if (context
[CONTEXT_EXE
] &&
1166 !set_iovec_field(iovec
, n_iovec
, "COREDUMP_EXE=", context
[CONTEXT_EXE
]))
1169 if (sd_pid_get_session(pid
, &t
) >= 0)
1170 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_SESSION=", t
);
1172 if (sd_pid_get_owner_uid(pid
, &owner_uid
) >= 0) {
1173 r
= asprintf(&t
, "COREDUMP_OWNER_UID=" UID_FMT
, owner_uid
);
1175 iovec
[(*n_iovec
)++] = IOVEC_MAKE_STRING(t
);
1178 if (sd_pid_get_slice(pid
, &t
) >= 0)
1179 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_SLICE=", t
);
1181 if (get_process_cmdline(pid
, 0, false, &t
) >= 0)
1182 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CMDLINE=", t
);
1184 if (cg_pid_get_path_shifted(pid
, NULL
, &t
) >= 0)
1185 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CGROUP=", t
);
1187 if (compose_open_fds(pid
, &t
) >= 0)
1188 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_OPEN_FDS=", t
);
1190 p
= procfs_file_alloca(pid
, "status");
1191 if (read_full_file(p
, &t
, NULL
) >= 0)
1192 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_STATUS=", t
);
1194 p
= procfs_file_alloca(pid
, "maps");
1195 if (read_full_file(p
, &t
, NULL
) >= 0)
1196 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_MAPS=", t
);
1198 p
= procfs_file_alloca(pid
, "limits");
1199 if (read_full_file(p
, &t
, NULL
) >= 0)
1200 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_LIMITS=", t
);
1202 p
= procfs_file_alloca(pid
, "cgroup");
1203 if (read_full_file(p
, &t
, NULL
) >=0)
1204 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_CGROUP=", t
);
1206 p
= procfs_file_alloca(pid
, "mountinfo");
1207 if (read_full_file(p
, &t
, NULL
) >=0)
1208 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_PROC_MOUNTINFO=", t
);
1210 if (get_process_cwd(pid
, &t
) >= 0)
1211 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CWD=", t
);
1213 if (get_process_root(pid
, &t
) >= 0) {
1214 bool proc_self_root_is_slash
;
1216 proc_self_root_is_slash
= strcmp(t
, "/") == 0;
1218 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_ROOT=", t
);
1220 /* If the process' root is "/", then there is a chance it has
1221 * mounted own root and hence being containerized. */
1222 if (proc_self_root_is_slash
&& get_process_container_parent_cmdline(pid
, &t
) > 0)
1223 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_CONTAINER_CMDLINE=", t
);
1226 if (get_process_environ(pid
, &t
) >= 0)
1227 set_iovec_field_free(iovec
, n_iovec
, "COREDUMP_ENVIRON=", t
);
1229 t
= strjoin("COREDUMP_TIMESTAMP=", context
[CONTEXT_TIMESTAMP
], "000000");
1231 iovec
[(*n_iovec
)++] = IOVEC_MAKE_STRING(t
);
1233 if (safe_atoi(context
[CONTEXT_SIGNAL
], &signo
) >= 0 && SIGNAL_VALID(signo
))
1234 set_iovec_field(iovec
, n_iovec
, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo
));
1236 return 0; /* we successfully acquired all metadata */
1239 static int process_kernel(int argc
, char* argv
[]) {
1241 char* context
[_CONTEXT_MAX
] = {};
1242 struct iovec iovec
[29 + SUBMIT_COREDUMP_FIELDS
];
1243 size_t i
, n_iovec
, n_to_free
= 0;
1246 log_debug("Processing coredump received from the kernel...");
1248 if (argc
< CONTEXT_COMM
+ 1) {
1249 log_error("Not enough arguments passed by the kernel (%i, expected %i).", argc
- 1, CONTEXT_COMM
+ 1 - 1);
1253 context
[CONTEXT_PID
] = argv
[1 + CONTEXT_PID
];
1254 context
[CONTEXT_UID
] = argv
[1 + CONTEXT_UID
];
1255 context
[CONTEXT_GID
] = argv
[1 + CONTEXT_GID
];
1256 context
[CONTEXT_SIGNAL
] = argv
[1 + CONTEXT_SIGNAL
];
1257 context
[CONTEXT_TIMESTAMP
] = argv
[1 + CONTEXT_TIMESTAMP
];
1258 context
[CONTEXT_RLIMIT
] = argv
[1 + CONTEXT_RLIMIT
];
1259 context
[CONTEXT_HOSTNAME
] = argv
[1 + CONTEXT_HOSTNAME
];
1261 r
= gather_pid_metadata(context
, argv
+ 1 + CONTEXT_COMM
, iovec
, &n_to_free
);
1265 n_iovec
= n_to_free
;
1267 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR
);
1269 assert_cc(2 == LOG_CRIT
);
1270 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("PRIORITY=2");
1272 assert(n_iovec
<= ELEMENTSOF(iovec
));
1274 if (is_journald_crash((const char**) context
) || is_pid1_crash((const char**) context
))
1275 r
= submit_coredump((const char**) context
,
1276 iovec
, ELEMENTSOF(iovec
), n_iovec
,
1279 r
= send_iovec(iovec
, n_iovec
, STDIN_FILENO
);
1282 for (i
= 0; i
< n_to_free
; i
++)
1283 free(iovec
[i
].iov_base
);
1285 /* Those fields are allocated by gather_pid_metadata */
1286 free(context
[CONTEXT_COMM
]);
1287 free(context
[CONTEXT_EXE
]);
1288 free(context
[CONTEXT_UNIT
]);
1293 static int process_backtrace(int argc
, char *argv
[]) {
1294 char *context
[_CONTEXT_MAX
] = {};
1295 _cleanup_free_
char *message
= NULL
;
1296 _cleanup_free_
struct iovec
*iovec
= NULL
;
1297 size_t n_iovec
, n_allocated
, n_to_free
= 0, i
;
1299 JournalImporter importer
= {
1303 log_debug("Processing backtrace on stdin...");
1305 if (argc
< CONTEXT_COMM
+ 1) {
1306 log_error("Not enough arguments passed (%i, expected %i).", argc
- 1, CONTEXT_COMM
+ 1 - 1);
1310 context
[CONTEXT_PID
] = argv
[2 + CONTEXT_PID
];
1311 context
[CONTEXT_UID
] = argv
[2 + CONTEXT_UID
];
1312 context
[CONTEXT_GID
] = argv
[2 + CONTEXT_GID
];
1313 context
[CONTEXT_SIGNAL
] = argv
[2 + CONTEXT_SIGNAL
];
1314 context
[CONTEXT_TIMESTAMP
] = argv
[2 + CONTEXT_TIMESTAMP
];
1315 context
[CONTEXT_RLIMIT
] = argv
[2 + CONTEXT_RLIMIT
];
1316 context
[CONTEXT_HOSTNAME
] = argv
[2 + CONTEXT_HOSTNAME
];
1318 n_allocated
= 34 + COREDUMP_STORAGE_EXTERNAL
;
1319 /* 26 metadata, 2 static, +unknown input, 4 storage, rounded up */
1320 iovec
= new(struct iovec
, n_allocated
);
1324 r
= gather_pid_metadata(context
, argv
+ 2 + CONTEXT_COMM
, iovec
, &n_to_free
);
1328 /* This was a special crash, and has already been processed. */
1332 n_iovec
= n_to_free
;
1335 r
= journal_importer_process_data(&importer
);
1337 log_error_errno(r
, "Failed to parse journal entry on stdin: %m");
1340 if (r
== 1 || /* complete entry */
1341 journal_importer_eof(&importer
)) /* end of data */
1345 if (!GREEDY_REALLOC(iovec
, n_allocated
, n_iovec
+ importer
.iovw
.count
+ 2))
1348 if (journal_importer_eof(&importer
)) {
1349 log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter");
1351 message
= strjoin("MESSAGE=Process ", context
[CONTEXT_PID
],
1352 " (", context
[CONTEXT_COMM
], ")"
1353 " of user ", context
[CONTEXT_UID
],
1354 " failed with ", context
[CONTEXT_SIGNAL
]);
1359 iovec
[n_iovec
++] = IOVEC_MAKE_STRING(message
);
1361 for (i
= 0; i
< importer
.iovw
.count
; i
++)
1362 iovec
[n_iovec
++] = importer
.iovw
.iovec
[i
];
1365 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR
);
1366 assert_cc(2 == LOG_CRIT
);
1367 iovec
[n_iovec
++] = IOVEC_MAKE_STRING("PRIORITY=2");
1369 assert(n_iovec
<= n_allocated
);
1371 r
= sd_journal_sendv(iovec
, n_iovec
);
1373 log_error_errno(r
, "Failed to log backtrace: %m");
1376 for (i
= 0; i
< n_to_free
; i
++)
1377 free(iovec
[i
].iov_base
);
1379 /* Those fields are allocated by gather_pid_metadata */
1380 free(context
[CONTEXT_COMM
]);
1381 free(context
[CONTEXT_EXE
]);
1382 free(context
[CONTEXT_UNIT
]);
1387 int main(int argc
, char *argv
[]) {
1390 /* First, log to a safe place, since we don't know what crashed and it might
1391 * be journald which we'd rather not log to then. */
1393 log_set_target(LOG_TARGET_KMSG
);
1396 /* Make sure we never enter a loop */
1397 (void) prctl(PR_SET_DUMPABLE
, 0);
1399 /* Ignore all parse errors */
1400 (void) parse_config();
1402 log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage
));
1403 log_debug("Selected compression %s.", yes_no(arg_compress
));
1405 r
= sd_listen_fds(false);
1407 log_error_errno(r
, "Failed to determine number of file descriptor: %m");
1411 /* If we got an fd passed, we are running in coredumpd mode. Otherwise we
1412 * are invoked from the kernel as coredump handler. */
1414 if (streq_ptr(argv
[1], "--backtrace"))
1415 r
= process_backtrace(argc
, argv
);
1417 r
= process_kernel(argc
, argv
);
1419 r
= process_socket(SD_LISTEN_FDS_START
);
1421 log_error("Received unexpected number of file descriptors.");
1426 return r
< 0 ? EXIT_FAILURE
: EXIT_SUCCESS
;