1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include <sys/statvfs.h>
11 #include "sd-daemon.h"
12 #include "sd-journal.h"
15 #include "sd-messages.h"
18 #include "alloc-util.h"
19 #include "bus-error.h"
20 #include "capability-util.h"
21 #include "cgroup-util.h"
23 #include "conf-parser.h"
25 #include "coredump-util.h"
26 #include "coredump-vacuum.h"
27 #include "dirent-util.h"
29 #include "errno-util.h"
35 #include "iovec-util.h"
36 #include "journal-importer.h"
37 #include "journal-send.h"
38 #include "json-util.h"
40 #include "main-func.h"
41 #include "memory-util.h"
42 #include "memstream-util.h"
43 #include "mkdir-label.h"
44 #include "namespace-util.h"
45 #include "parse-util.h"
46 #include "path-util.h"
48 #include "process-util.h"
49 #include "signal-util.h"
50 #include "socket-util.h"
52 #include "stat-util.h"
53 #include "string-table.h"
54 #include "string-util.h"
55 #include "tmpfile-util.h"
56 #include "uid-classification.h"
57 #include "user-util.h"
59 /* The maximum size up to which we process coredumps. We use 1G on 32-bit systems, and 32G on 64-bit systems */
60 #if __SIZEOF_POINTER__ == 4
61 #define PROCESS_SIZE_MAX ((uint64_t) (1LLU*1024LLU*1024LLU*1024LLU))
62 #elif __SIZEOF_POINTER__ == 8
63 #define PROCESS_SIZE_MAX ((uint64_t) (32LLU*1024LLU*1024LLU*1024LLU))
65 #error "Unexpected pointer size"
68 /* The maximum size up to which we leave the coredump around on disk */
69 #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX
71 /* The maximum size up to which we store the coredump in the journal */
72 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
73 #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU))
75 /* oss-fuzz limits memory usage. */
76 #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU))
79 /* When checking for available memory and setting lower limits, don't
80 * go below 4MB for writing core files to storage. */
81 #define PROCESS_SIZE_MIN (4U*1024U*1024U)
83 /* Make sure to not make this larger than the maximum journal entry
84 * size. See DATA_SIZE_MAX in journal-importer.h. */
85 assert_cc(JOURNAL_SIZE_MAX
<= DATA_SIZE_MAX
);
87 #define MOUNT_TREE_ROOT "/run/systemd/mount-rootfs"
90 /* We use these as array indexes for our process metadata cache.
92 * The first indices of the cache stores the same metadata as the ones passed by the kernel via
93 * argv[], i.e. the strings specified in our pattern defined in /proc/sys/kernel/core_pattern,
96 META_ARGV_PID
, /* %P: as seen in the initial pid namespace */
97 META_ARGV_UID
, /* %u: as seen in the initial user namespace */
98 META_ARGV_GID
, /* %g: as seen in the initial user namespace */
99 META_ARGV_SIGNAL
, /* %s: number of signal causing dump */
100 META_ARGV_TIMESTAMP
, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to μs granularity) */
101 META_ARGV_RLIMIT
, /* %c: core file size soft resource limit */
103 /* The fields below were added to kernel/core_pattern at later points, so they might be missing. */
104 META_ARGV_HOSTNAME
= _META_ARGV_REQUIRED
, /* %h: hostname */
105 META_ARGV_DUMPABLE
, /* %d: as set by the kernel */
106 META_ARGV_PIDFD
, /* %F: pidfd of the process, since v6.16 */
107 /* If new fields are added, they should be added here, to maintain compatibility
108 * with callers which don't know about the new fields. */
111 /* The following indexes are cached for a couple of special fields we use (and
112 * thereby need to be retrieved quickly) for naming coredump files, and attaching
113 * xattrs. Unlike the previous ones they are retrieved from the runtime
116 META_COMM
= _META_ARGV_MAX
,
118 /* The rest are similar to the previous ones except that we won't fail if one of
119 * them is missing in a message sent over the socket. */
127 static const char * const meta_field_names
[_META_MAX
] = {
128 [META_ARGV_PID
] = "COREDUMP_PID=",
129 [META_ARGV_UID
] = "COREDUMP_UID=",
130 [META_ARGV_GID
] = "COREDUMP_GID=",
131 [META_ARGV_SIGNAL
] = "COREDUMP_SIGNAL=",
132 [META_ARGV_TIMESTAMP
] = "COREDUMP_TIMESTAMP=",
133 [META_ARGV_RLIMIT
] = "COREDUMP_RLIMIT=",
134 [META_ARGV_HOSTNAME
] = "COREDUMP_HOSTNAME=",
135 [META_ARGV_DUMPABLE
] = "COREDUMP_DUMPABLE=",
136 [META_ARGV_PIDFD
] = "COREDUMP_BY_PIDFD=",
137 [META_COMM
] = "COREDUMP_COMM=",
138 [META_EXE
] = "COREDUMP_EXE=",
139 [META_UNIT
] = "COREDUMP_UNIT=",
140 [META_PROC_AUXV
] = "COREDUMP_PROC_AUXV=",
143 typedef struct Context
{
155 /* These point into external memory, are not owned by this object */
156 const char *meta
[_META_MAX
];
157 size_t meta_size
[_META_MAX
];
160 #define CONTEXT_NULL \
162 .pidref = PIDREF_NULL, \
163 .uid = UID_INVALID, \
164 .gid = GID_INVALID, \
165 .mount_tree_fd = -EBADF, \
168 typedef enum CoredumpStorage
{
169 COREDUMP_STORAGE_NONE
,
170 COREDUMP_STORAGE_EXTERNAL
,
171 COREDUMP_STORAGE_JOURNAL
,
172 _COREDUMP_STORAGE_MAX
,
173 _COREDUMP_STORAGE_INVALID
= -EINVAL
,
176 static const char* const coredump_storage_table
[_COREDUMP_STORAGE_MAX
] = {
177 [COREDUMP_STORAGE_NONE
] = "none",
178 [COREDUMP_STORAGE_EXTERNAL
] = "external",
179 [COREDUMP_STORAGE_JOURNAL
] = "journal",
182 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage
, CoredumpStorage
);
183 static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage
, coredump_storage
, CoredumpStorage
);
185 static CoredumpStorage arg_storage
= COREDUMP_STORAGE_EXTERNAL
;
186 static bool arg_compress
= true;
187 static uint64_t arg_process_size_max
= PROCESS_SIZE_MAX
;
188 static uint64_t arg_external_size_max
= EXTERNAL_SIZE_MAX
;
189 static uint64_t arg_journal_size_max
= JOURNAL_SIZE_MAX
;
190 static uint64_t arg_keep_free
= UINT64_MAX
;
191 static uint64_t arg_max_use
= UINT64_MAX
;
192 #if HAVE_DWFL_SET_SYSROOT
193 static bool arg_enter_namespace
= false;
196 static void context_done(Context
*c
) {
199 pidref_done(&c
->pidref
);
200 c
->mount_tree_fd
= safe_close(c
->mount_tree_fd
);
203 static int parse_config(void) {
204 static const ConfigTableItem items
[] = {
205 { "Coredump", "Storage", config_parse_coredump_storage
, 0, &arg_storage
},
206 { "Coredump", "Compress", config_parse_bool
, 0, &arg_compress
},
207 { "Coredump", "ProcessSizeMax", config_parse_iec_uint64
, 0, &arg_process_size_max
},
208 { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity
, 0, &arg_external_size_max
},
209 { "Coredump", "JournalSizeMax", config_parse_iec_size
, 0, &arg_journal_size_max
},
210 { "Coredump", "KeepFree", config_parse_iec_uint64
, 0, &arg_keep_free
},
211 { "Coredump", "MaxUse", config_parse_iec_uint64
, 0, &arg_max_use
},
212 #if HAVE_DWFL_SET_SYSROOT
213 { "Coredump", "EnterNamespace", config_parse_bool
, 0, &arg_enter_namespace
},
215 { "Coredump", "EnterNamespace", config_parse_warn_compat
, DISABLED_CONFIGURATION
, NULL
},
222 r
= config_parse_standard_file_with_dropins(
223 "systemd/coredump.conf",
225 config_item_table_lookup
,
228 /* userdata= */ NULL
);
232 /* Let's make sure we fix up the maximum size we send to the journal here on the client side, for
233 * efficiency reasons. journald wouldn't accept anything larger anyway. */
234 if (arg_journal_size_max
> JOURNAL_SIZE_MAX
) {
235 log_warning("JournalSizeMax= set to larger value (%s) than journald would accept (%s), lowering automatically.",
236 FORMAT_BYTES(arg_journal_size_max
), FORMAT_BYTES(JOURNAL_SIZE_MAX
));
237 arg_journal_size_max
= JOURNAL_SIZE_MAX
;
243 static uint64_t storage_size_max(void) {
244 if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
)
245 return arg_external_size_max
;
246 if (arg_storage
== COREDUMP_STORAGE_JOURNAL
)
247 return arg_journal_size_max
;
248 assert(arg_storage
== COREDUMP_STORAGE_NONE
);
252 static int fix_acl(int fd
, uid_t uid
, bool allow_user
) {
254 assert(uid_is_valid(uid
));
259 /* We don't allow users to read coredumps if the uid or capabilities were changed. */
263 if (uid_is_system(uid
) || uid_is_dynamic(uid
) || uid_is_greeter(uid
) || uid
== UID_NOBODY
)
266 /* Make sure normal users can read (but not write or delete) their own coredumps */
267 r
= fd_add_uid_acl_permission(fd
, uid
, ACL_READ
);
269 return log_error_errno(r
, "Failed to adjust ACL of the coredump: %m");
275 static int fix_xattr(int fd
, const Context
*context
) {
276 static const char * const xattrs
[_META_MAX
] = {
277 [META_ARGV_PID
] = "user.coredump.pid",
278 [META_ARGV_UID
] = "user.coredump.uid",
279 [META_ARGV_GID
] = "user.coredump.gid",
280 [META_ARGV_SIGNAL
] = "user.coredump.signal",
281 [META_ARGV_TIMESTAMP
] = "user.coredump.timestamp",
282 [META_ARGV_RLIMIT
] = "user.coredump.rlimit",
283 [META_ARGV_HOSTNAME
] = "user.coredump.hostname",
284 [META_COMM
] = "user.coredump.comm",
285 [META_EXE
] = "user.coredump.exe",
292 /* Attach some metadata to coredumps via extended attributes. Just because we can. */
294 for (unsigned i
= 0; i
< _META_MAX
; i
++) {
297 if (isempty(context
->meta
[i
]) || !xattrs
[i
])
300 k
= RET_NERRNO(fsetxattr(fd
, xattrs
[i
], context
->meta
[i
], strlen(context
->meta
[i
]), XATTR_CREATE
));
307 #define filename_escape(s) xescape((s), "./ ")
309 static const char *coredump_tmpfile_name(const char *s
) {
310 return s
?: "(unnamed temporary file)";
313 static int fix_permissions_and_link(
315 const char *filename
,
317 const Context
*context
,
326 /* Ignore errors on these */
327 (void) fchmod(fd
, 0640);
328 (void) fix_acl(fd
, context
->uid
, allow_user
);
329 (void) fix_xattr(fd
, context
);
331 r
= link_tmpfile(fd
, filename
, target
, LINK_TMPFILE_SYNC
);
333 return log_error_errno(r
, "Failed to move coredump %s into place: %m", target
);
338 static int maybe_remove_external_coredump(
340 const char *filename
,
345 /* Returns true if might remove, false if will not remove, < 0 on error. */
347 if (arg_storage
!= COREDUMP_STORAGE_NONE
&&
348 (c
->is_pid1
|| c
->is_journald
)) /* Always keep around in case of journald/pid1, since we cannot rely on the journal to accept them */
351 if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
&&
352 size
<= arg_external_size_max
)
358 if (unlink(filename
) < 0 && errno
!= ENOENT
)
359 return log_error_errno(errno
, "Failed to unlink %s: %m", filename
);
364 static int make_filename(const Context
*context
, char **ret
) {
365 _cleanup_free_
char *c
= NULL
, *u
= NULL
, *p
= NULL
, *t
= NULL
;
366 sd_id128_t boot
= {};
371 c
= filename_escape(context
->meta
[META_COMM
]);
375 u
= filename_escape(context
->meta
[META_ARGV_UID
]);
379 r
= sd_id128_get_boot(&boot
);
383 p
= filename_escape(context
->meta
[META_ARGV_PID
]);
387 t
= filename_escape(context
->meta
[META_ARGV_TIMESTAMP
]);
392 "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR
".%s.%s",
395 SD_ID128_FORMAT_VAL(boot
),
403 static int grant_user_access(int core_fd
, const Context
*context
) {
405 uid_t uid
= UID_INVALID
, euid
= UID_INVALID
;
406 uid_t gid
= GID_INVALID
, egid
= GID_INVALID
;
409 assert(core_fd
>= 0);
412 if (!context
->meta
[META_PROC_AUXV
])
413 return log_warning_errno(SYNTHETIC_ERRNO(ENODATA
), "No auxv data, not adjusting permissions.");
415 uint8_t elf
[EI_NIDENT
];
417 if (pread(core_fd
, &elf
, sizeof(elf
), 0) != sizeof(elf
))
418 return log_warning_errno(errno_or_else(EIO
),
419 "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno
));
421 if (elf
[EI_MAG0
] != ELFMAG0
||
422 elf
[EI_MAG1
] != ELFMAG1
||
423 elf
[EI_MAG2
] != ELFMAG2
||
424 elf
[EI_MAG3
] != ELFMAG3
||
425 elf
[EI_VERSION
] != EV_CURRENT
)
426 return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN
),
427 "Core file does not have ELF header, not adjusting permissions.");
428 if (!IN_SET(elf
[EI_CLASS
], ELFCLASS32
, ELFCLASS64
) ||
429 !IN_SET(elf
[EI_DATA
], ELFDATA2LSB
, ELFDATA2MSB
))
430 return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN
),
431 "Core file has strange ELF class, not adjusting permissions.");
433 if ((elf
[EI_DATA
] == ELFDATA2LSB
) != (__BYTE_ORDER
== __LITTLE_ENDIAN
))
434 return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN
),
435 "Core file has non-native endianness, not adjusting permissions.");
437 r
= parse_auxv(LOG_WARNING
,
438 /* elf_class= */ elf
[EI_CLASS
],
439 context
->meta
[META_PROC_AUXV
],
440 context
->meta_size
[META_PROC_AUXV
],
441 &at_secure
, &uid
, &euid
, &gid
, &egid
);
445 /* We allow access if %d/dumpable on the command line was exactly 1, we got all the data,
446 * at_secure is not set, and the uid/gid match euid/egid. */
448 context
->dumpable
== SUID_DUMP_USER
&&
450 uid
!= UID_INVALID
&& euid
!= UID_INVALID
&& uid
== euid
&&
451 gid
!= GID_INVALID
&& egid
!= GID_INVALID
&& gid
== egid
;
452 log_debug("Will %s access (dumpable=%u uid="UID_FMT
" euid="UID_FMT
" gid="GID_FMT
" egid="GID_FMT
" at_secure=%s)",
453 ret
? "permit" : "restrict",
455 uid
, euid
, gid
, egid
, yes_no(at_secure
));
459 static int save_external_coredump(
460 const Context
*context
,
466 uint64_t *ret_compressed_size
,
467 bool *ret_truncated
) {
469 _cleanup_(unlink_and_freep
) char *tmp
= NULL
;
470 _cleanup_free_
char *fn
= NULL
;
471 _cleanup_close_
int fd
= -EBADF
;
472 uint64_t process_limit
, max_size
;
473 bool truncated
, storage_on_tmpfs
;
478 assert(ret_filename
);
482 assert(ret_compressed_size
);
483 assert(ret_truncated
);
485 if (context
->rlimit
< page_size())
486 /* Is coredumping disabled? Then don't bother saving/processing the
487 * coredump. Anything below PAGE_SIZE cannot give a readable coredump
488 * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but
489 * is usually the same as PAGE_SIZE. */
490 return log_info_errno(SYNTHETIC_ERRNO(EBADSLT
),
491 "Resource limits disable core dumping for process %s (%s).",
492 context
->meta
[META_ARGV_PID
], context
->meta
[META_COMM
]);
494 process_limit
= MAX(arg_process_size_max
, storage_size_max());
495 if (process_limit
== 0)
496 return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT
),
497 "Limits for coredump processing and storage are both 0, not dumping core.");
499 /* Never store more than the process configured, or than we actually shall keep or process */
500 max_size
= MIN(context
->rlimit
, process_limit
);
502 r
= make_filename(context
, &fn
);
504 return log_error_errno(r
, "Failed to determine coredump file name: %m");
506 (void) mkdir_parents_label(fn
, 0755);
508 fd
= open_tmpfile_linkable(fn
, O_RDWR
|O_CLOEXEC
, &tmp
);
510 return log_error_errno(fd
, "Failed to create temporary file for coredump %s: %m", fn
);
512 /* If storage is on tmpfs, the kernel oomd might kill us if there's MemoryMax set on
513 * the service or the slice it belongs to. This is common on low-resources systems,
514 * to avoid crashing processes to take away too many system resources.
515 * Check the cgroup settings, and set max_size to a bit less than half of the
516 * available memory left to the process.
517 * Then, attempt to write the core file uncompressed first - if the write gets
518 * interrupted, we know we won't be able to write it all, so instead compress what
519 * was written so far, delete the uncompressed truncated core, and then continue
520 * compressing from STDIN. Given the compressed core cannot be larger than the
521 * uncompressed one, and 1KB for metadata is accounted for in the calculation, we
522 * should be able to at least store the full compressed core file. */
524 storage_on_tmpfs
= fd_is_temporary_fs(fd
) > 0;
525 if (storage_on_tmpfs
&& arg_compress
) {
526 _cleanup_(sd_bus_flush_close_unrefp
) sd_bus
*bus
= NULL
;
527 uint64_t cgroup_limit
= UINT64_MAX
;
530 /* If we can't get the cgroup limit, just ignore it, but don't fail,
531 * try anyway with the config settings. */
532 r
= sd_bus_default_system(&bus
);
534 log_info_errno(r
, "Failed to connect to system bus, skipping MemoryAvailable check: %m");
536 _cleanup_(sd_bus_error_free
) sd_bus_error error
= SD_BUS_ERROR_NULL
;
538 r
= sd_bus_get_property_trivial(
540 "org.freedesktop.systemd1",
541 "/org/freedesktop/systemd1/unit/self",
542 "org.freedesktop.systemd1.Service",
548 "Failed to query MemoryAvailable for current unit, "
549 "falling back to static config settings: %s",
550 bus_error_message(&error
, r
));
553 /* First, ensure we are not going to go over the cgroup limit */
554 max_size
= MIN(cgroup_limit
, max_size
);
555 /* tmpfs might get full quickly, so check the available space too. But don't worry about
556 * errors here, failing to access the storage location will be better logged when writing to
558 if (fstatvfs(fd
, &sv
) >= 0)
559 max_size
= MIN((uint64_t)sv
.f_frsize
* (uint64_t)sv
.f_bfree
, max_size
);
560 /* Impose a lower minimum, otherwise we will miss the basic headers. */
561 max_size
= MAX(PROCESS_SIZE_MIN
, max_size
);
562 /* Ensure we can always switch to compressing on the fly in case we are running out of space
563 * by keeping half of the space/memory available, plus 1KB metadata overhead from the
564 * compression algorithm. */
565 max_size
= LESS_BY(max_size
, 1024U) / 2;
567 log_debug("Limiting core file size to %" PRIu64
" bytes due to cgroup and/or filesystem limits.", max_size
);
570 r
= copy_bytes(input_fd
, fd
, max_size
, 0);
572 return log_error_errno(r
, "Cannot store coredump of %s (%s): %m",
573 context
->meta
[META_ARGV_PID
], context
->meta
[META_COMM
]);
576 bool allow_user
= grant_user_access(fd
, context
) > 0;
580 _cleanup_(unlink_and_freep
) char *tmp_compressed
= NULL
;
581 _cleanup_free_
char *fn_compressed
= NULL
;
582 _cleanup_close_
int fd_compressed
= -EBADF
;
583 uint64_t uncompressed_size
= 0;
585 if (lseek(fd
, 0, SEEK_SET
) < 0)
586 return log_error_errno(errno
, "Failed to seek on coredump %s: %m", fn
);
588 fn_compressed
= strjoin(fn
, default_compression_extension());
592 fd_compressed
= open_tmpfile_linkable(fn_compressed
, O_RDWR
|O_CLOEXEC
, &tmp_compressed
);
593 if (fd_compressed
< 0)
594 return log_error_errno(fd_compressed
, "Failed to create temporary file for coredump %s: %m", fn_compressed
);
596 r
= compress_stream(fd
, fd_compressed
, max_size
, &uncompressed_size
);
598 return log_error_errno(r
, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed
));
600 if (truncated
&& storage_on_tmpfs
) {
601 uint64_t partial_uncompressed_size
= 0;
603 /* Uncompressed write was truncated and we are writing to tmpfs: delete
604 * the uncompressed core, and compress the remaining part from STDIN. */
606 tmp
= unlink_and_free(tmp
);
609 r
= compress_stream(input_fd
, fd_compressed
, max_size
, &partial_uncompressed_size
);
611 return log_error_errno(r
, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed
));
612 uncompressed_size
+= partial_uncompressed_size
;
615 r
= fix_permissions_and_link(fd_compressed
, tmp_compressed
, fn_compressed
, context
, allow_user
);
619 if (fstat(fd_compressed
, &st
) < 0)
620 return log_error_errno(errno
,
621 "Failed to fstat core file %s: %m",
622 coredump_tmpfile_name(tmp_compressed
));
624 *ret_filename
= TAKE_PTR(fn_compressed
); /* compressed */
625 *ret_node_fd
= TAKE_FD(fd_compressed
); /* compressed */
626 *ret_data_fd
= TAKE_FD(fd
);
627 *ret_size
= uncompressed_size
;
628 *ret_compressed_size
= (uint64_t) st
.st_size
; /* compressed */
629 *ret_truncated
= truncated
;
637 LOG_MESSAGE("Core file was truncated to %"PRIu64
" bytes.", max_size
),
638 LOG_ITEM("SIZE_LIMIT=%"PRIu64
, max_size
),
639 LOG_MESSAGE_ID(SD_MESSAGE_TRUNCATED_CORE_STR
));
641 r
= fix_permissions_and_link(fd
, tmp
, fn
, context
, allow_user
);
643 return log_error_errno(r
, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp
), fn
);
645 if (fstat(fd
, &st
) < 0)
646 return log_error_errno(errno
, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp
));
648 if (lseek(fd
, 0, SEEK_SET
) < 0)
649 return log_error_errno(errno
, "Failed to seek on coredump %s: %m", fn
);
651 *ret_filename
= TAKE_PTR(fn
);
652 *ret_node_fd
= -EBADF
;
653 *ret_data_fd
= TAKE_FD(fd
);
654 *ret_size
= (uint64_t) st
.st_size
;
655 *ret_compressed_size
= UINT64_MAX
;
656 *ret_truncated
= truncated
;
661 static int allocate_journal_field(int fd
, size_t size
, char **ret
, size_t *ret_size
) {
662 _cleanup_free_
char *field
= NULL
;
669 if (lseek(fd
, 0, SEEK_SET
) < 0)
670 return log_warning_errno(errno
, "Failed to seek: %m");
672 field
= malloc(9 + size
);
674 return log_warning_errno(SYNTHETIC_ERRNO(ENOMEM
),
675 "Failed to allocate memory for coredump, coredump will not be stored.");
677 memcpy(field
, "COREDUMP=", 9);
679 /* NB: simple read() would fail for overly large coredumps, since read() on Linux can only deal with
680 * 0x7ffff000 bytes max. Hence call things in a loop. */
681 n
= loop_read(fd
, field
+ 9, size
, /* do_poll= */ false);
683 return log_error_errno((int) n
, "Failed to read core data: %m");
684 if ((size_t) n
< size
)
685 return log_error_errno(SYNTHETIC_ERRNO(EIO
), "Core data too short.");
687 *ret
= TAKE_PTR(field
);
688 *ret_size
= size
+ 9;
693 /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines:
707 static int compose_open_fds(pid_t pid
, char **ret
) {
708 _cleanup_(memstream_done
) MemStream m
= {};
709 _cleanup_closedir_
DIR *proc_fd_dir
= NULL
;
710 _cleanup_close_
int proc_fdinfo_fd
= -EBADF
;
711 const char *fddelim
= "", *path
;
718 path
= procfs_file_alloca(pid
, "fd");
719 proc_fd_dir
= opendir(path
);
723 proc_fdinfo_fd
= openat(dirfd(proc_fd_dir
), "../fdinfo", O_DIRECTORY
|O_NOFOLLOW
|O_CLOEXEC
|O_PATH
);
724 if (proc_fdinfo_fd
< 0)
727 stream
= memstream_init(&m
);
731 FOREACH_DIRENT(de
, proc_fd_dir
, return -errno
) {
732 _cleanup_fclose_
FILE *fdinfo
= NULL
;
733 _cleanup_free_
char *fdname
= NULL
;
734 _cleanup_close_
int fd
= -EBADF
;
736 r
= readlinkat_malloc(dirfd(proc_fd_dir
), de
->d_name
, &fdname
);
740 fprintf(stream
, "%s%s:%s\n", fddelim
, de
->d_name
, fdname
);
743 /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */
744 fd
= openat(proc_fdinfo_fd
, de
->d_name
, O_NOFOLLOW
|O_CLOEXEC
|O_RDONLY
);
748 fdinfo
= take_fdopen(&fd
, "r");
753 _cleanup_free_
char *line
= NULL
;
755 r
= read_line(fdinfo
, LONG_LINE_MAX
, &line
);
766 return memstream_finalize(&m
, ret
, NULL
);
769 /* Returns 1 if the parent was found.
770 * Returns 0 if there is not a process we can call the pid's
771 * container parent (the pid's process isn't 'containerized').
772 * Returns a negative number on errors.
774 static int get_process_container_parent_cmdline(PidRef
*pid
, char** ret_cmdline
) {
777 assert(pidref_is_set(pid
));
778 assert(!pidref_is_remote(pid
));
780 r
= pidref_from_same_root_fs(pid
, &PIDREF_MAKE_FROM_PID(1));
784 /* The process uses system root. */
789 _cleanup_(pidref_done
) PidRef container_pid
= PIDREF_NULL
;
790 r
= namespace_get_leader(pid
, NAMESPACE_MOUNT
, &container_pid
);
794 r
= pidref_get_cmdline(&container_pid
, SIZE_MAX
, PROCESS_CMDLINE_QUOTE_POSIX
, ret_cmdline
);
801 static int change_uid_gid(const Context
*context
) {
806 uid_t uid
= context
->uid
;
807 gid_t gid
= context
->gid
;
809 if (uid_is_system(uid
)) {
810 const char *user
= "systemd-coredump";
812 r
= get_user_creds(&user
, &uid
, &gid
, NULL
, NULL
, 0);
814 log_warning_errno(r
, "Cannot resolve %s user. Proceeding to dump core as root: %m", user
);
819 return drop_privileges(uid
, gid
, 0);
822 static int attach_mount_tree(int mount_tree_fd
) {
825 assert(mount_tree_fd
>= 0);
827 r
= detach_mount_namespace();
829 return log_warning_errno(r
, "Failed to detach mount namespace: %m");
831 r
= mkdir_p_label(MOUNT_TREE_ROOT
, 0555);
833 return log_warning_errno(r
, "Failed to create directory: %m");
835 r
= mount_setattr(mount_tree_fd
, "", AT_EMPTY_PATH
,
836 &(struct mount_attr
) {
837 /* MOUNT_ATTR_NOSYMFOLLOW is left out on purpose to allow libdwfl to resolve symlinks.
838 * libdwfl will use openat2() with RESOLVE_IN_ROOT so there is no risk of symlink escape.
839 * https://sourceware.org/git/?p=elfutils.git;a=patch;h=06f0520f9a78b07c11c343181d552791dd630346 */
840 .attr_set
= MOUNT_ATTR_RDONLY
|MOUNT_ATTR_NOSUID
|MOUNT_ATTR_NODEV
|MOUNT_ATTR_NOEXEC
,
841 .propagation
= MS_SLAVE
,
842 }, sizeof(struct mount_attr
));
844 return log_warning_errno(errno
, "Failed to change properties of mount tree: %m");
846 r
= move_mount(mount_tree_fd
, "", -EBADF
, MOUNT_TREE_ROOT
, MOVE_MOUNT_F_EMPTY_PATH
);
848 return log_warning_errno(errno
, "Failed to attach mount tree: %m");
853 static int submit_coredump(
854 const Context
*context
,
855 struct iovec_wrapper
*iovw
,
858 _cleanup_(sd_json_variant_unrefp
) sd_json_variant
*json_metadata
= NULL
;
859 _cleanup_close_
int coredump_fd
= -EBADF
, coredump_node_fd
= -EBADF
;
860 _cleanup_free_
char *filename
= NULL
, *coredump_data
= NULL
, *stacktrace
= NULL
;
861 const char *module_name
, *root
= NULL
;
862 uint64_t coredump_size
= UINT64_MAX
, coredump_compressed_size
= UINT64_MAX
;
863 bool truncated
= false, written
= false;
864 sd_json_variant
*module_json
;
869 assert(input_fd
>= 0);
871 /* Vacuum before we write anything again */
872 (void) coredump_vacuum(-1, arg_keep_free
, arg_max_use
);
874 /* Always stream the coredump to disk, if that's possible */
875 written
= save_external_coredump(
877 &filename
, &coredump_node_fd
, &coredump_fd
,
878 &coredump_size
, &coredump_compressed_size
, &truncated
) >= 0;
880 /* If we could write it to disk we can now process it. */
881 /* If we don't want to keep the coredump on disk, remove it now, as later on we
882 * will lack the privileges for it. However, we keep the fd to it, so that we can
883 * still process it and log it. */
884 r
= maybe_remove_external_coredump(
887 coredump_node_fd
>= 0 ? coredump_compressed_size
: coredump_size
);
891 (void) iovw_put_string_field(iovw
, "COREDUMP_FILENAME=", filename
);
892 else if (arg_storage
== COREDUMP_STORAGE_EXTERNAL
)
893 log_info("The core will not be stored: size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
894 coredump_node_fd
>= 0 ? coredump_compressed_size
: coredump_size
, arg_external_size_max
);
896 /* Vacuum again, but exclude the coredump we just created */
897 (void) coredump_vacuum(coredump_node_fd
>= 0 ? coredump_node_fd
: coredump_fd
, arg_keep_free
, arg_max_use
);
900 if (context
->mount_tree_fd
>= 0 && attach_mount_tree(context
->mount_tree_fd
) >= 0)
901 root
= MOUNT_TREE_ROOT
;
903 /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the
904 * coredump memory under the user's uid. This also ensures that the credentials journald will see are
905 * the ones of the coredumping user, thus making sure the user gets access to the core dump. Let's
906 * also get rid of all capabilities, if we run as root, we won't need them anymore. */
907 r
= change_uid_gid(context
);
909 return log_error_errno(r
, "Failed to drop privileges: %m");
912 /* Try to get a stack trace if we can */
913 if (coredump_size
> arg_process_size_max
)
914 log_debug("Not generating stack trace: core size %"PRIu64
" is greater "
915 "than %"PRIu64
" (the configured maximum)",
916 coredump_size
, arg_process_size_max
);
917 else if (coredump_fd
>= 0) {
918 bool skip
= startswith(context
->meta
[META_COMM
], "systemd-coredum"); /* COMM is 16 bytes usually */
920 (void) parse_elf_object(coredump_fd
,
921 context
->meta
[META_EXE
],
923 /* fork_disable_dump= */ skip
, /* avoid loops */
929 _cleanup_free_
char *core_message
= NULL
;
930 core_message
= strjoin(
931 "Process ", context
->meta
[META_ARGV_PID
],
932 " (", context
->meta
[META_COMM
],
933 ") of user ", context
->meta
[META_ARGV_UID
],
934 written
? " dumped core." : " terminated abnormally without generating a coredump.");
938 if (context
->is_journald
&& filename
)
939 if (!strextend(&core_message
, "\nCoredump diverted to ", filename
))
943 if (!strextend(&core_message
, "\n\n", stacktrace
))
946 if (context
->is_journald
)
947 /* We might not be able to log to the journal, so let's always print the message to another
948 * log target. The target was set previously to something safe. */
949 log_dispatch(LOG_ERR
, 0, core_message
);
951 (void) iovw_put_string_field(iovw
, "MESSAGE=", core_message
);
954 (void) iovw_put_string_field(iovw
, "COREDUMP_TRUNCATED=", "1");
956 /* If we managed to parse any ELF metadata (build-id, ELF package meta),
957 * attach it as journal metadata. */
959 _cleanup_free_
char *formatted_json
= NULL
;
961 r
= sd_json_variant_format(json_metadata
, 0, &formatted_json
);
963 return log_error_errno(r
, "Failed to format JSON package metadata: %m");
965 (void) iovw_put_string_field(iovw
, "COREDUMP_PACKAGE_JSON=", formatted_json
);
968 /* In the unlikely scenario that context->meta[META_EXE] is not available,
969 * let's avoid guessing the module name and skip the loop. */
970 if (context
->meta
[META_EXE
])
971 JSON_VARIANT_OBJECT_FOREACH(module_name
, module_json
, json_metadata
) {
974 /* We only add structured fields for the 'main' ELF module, and only if we can identify it. */
975 if (!path_equal_filename(module_name
, context
->meta
[META_EXE
]))
978 t
= sd_json_variant_by_key(module_json
, "name");
980 (void) iovw_put_string_field(iovw
, "COREDUMP_PACKAGE_NAME=", sd_json_variant_string(t
));
982 t
= sd_json_variant_by_key(module_json
, "version");
984 (void) iovw_put_string_field(iovw
, "COREDUMP_PACKAGE_VERSION=", sd_json_variant_string(t
));
987 /* Optionally store the entire coredump in the journal */
988 if (arg_storage
== COREDUMP_STORAGE_JOURNAL
&& coredump_fd
>= 0) {
989 if (coredump_size
<= arg_journal_size_max
) {
992 /* Store the coredump itself in the journal */
994 r
= allocate_journal_field(coredump_fd
, (size_t) coredump_size
, &coredump_data
, &sz
);
996 if (iovw_put(iovw
, coredump_data
, sz
) >= 0)
997 TAKE_PTR(coredump_data
);
999 log_warning_errno(r
, "Failed to attach the core to the journal entry: %m");
1001 log_info("The core will not be stored: size %"PRIu64
" is greater than %"PRIu64
" (the configured maximum)",
1002 coredump_size
, arg_journal_size_max
);
1005 /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the
1006 * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write
1007 * the coredump to the socket. */
1009 if (context
->is_journald
) {
1010 r
= journal_fd_nonblock(true);
1012 return log_error_errno(r
, "Failed to make journal socket non-blocking: %m");
1015 r
= sd_journal_sendv(iovw
->iovec
, iovw
->count
);
1017 if (context
->is_journald
) {
1020 k
= journal_fd_nonblock(false);
1022 return log_error_errno(k
, "Failed to make journal socket blocking: %m");
1025 if (r
== -EAGAIN
&& context
->is_journald
)
1026 log_warning_errno(r
, "Failed to log journal coredump, ignoring: %m");
1028 return log_error_errno(r
, "Failed to log coredump: %m");
1033 static int context_parse_iovw(Context
*context
, struct iovec_wrapper
*iovw
) {
1040 /* Converts the data in the iovec array iovw into separate fields. Fills in context->meta[] (for
1041 * which no memory is allocated, it just contains direct pointers into the iovec array memory). */
1043 bool have_signal_name
= false;
1044 FOREACH_ARRAY(iovec
, iovw
->iovec
, iovw
->count
) {
1045 for (size_t i
= 0; i
< ELEMENTSOF(meta_field_names
); i
++) {
1046 /* Note that these strings are NUL-terminated, because we made sure that a
1047 * trailing NUL byte is in the buffer, though not included in the iov_len
1048 * count (see process_socket() and gather_pid_metadata_*()). */
1049 assert(((char*) iovec
->iov_base
)[iovec
->iov_len
] == 0);
1051 const char *p
= memory_startswith(iovec
->iov_base
, iovec
->iov_len
, meta_field_names
[i
]);
1053 context
->meta
[i
] = p
;
1054 context
->meta_size
[i
] = iovec
->iov_len
- strlen(meta_field_names
[i
]);
1059 have_signal_name
= have_signal_name
||
1060 memory_startswith(iovec
->iov_base
, iovec
->iov_len
, "COREDUMP_SIGNAL_NAME=");
1063 /* The basic fields from argv[] should always be there, refuse early if not. */
1064 for (int i
= 0; i
< _META_ARGV_REQUIRED
; i
++)
1065 if (!context
->meta
[i
])
1066 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1067 "A required (%s) has not been sent, aborting.", meta_field_names
[i
]);
1070 r
= parse_pid(context
->meta
[META_ARGV_PID
], &parsed_pid
);
1072 return log_error_errno(r
, "Failed to parse PID \"%s\": %m", context
->meta
[META_ARGV_PID
]);
1073 if (pidref_is_set(&context
->pidref
)) {
1074 if (context
->pidref
.pid
!= parsed_pid
)
1075 return log_error_errno(r
, "Passed PID " PID_FMT
" does not match passed " PID_FMT
": %m",
1076 parsed_pid
, context
->pidref
.pid
);
1078 r
= pidref_set_pid(&context
->pidref
, parsed_pid
);
1080 return log_error_errno(r
, "Failed to initialize pidref from pid " PID_FMT
": %m", parsed_pid
);
1083 r
= parse_uid(context
->meta
[META_ARGV_UID
], &context
->uid
);
1085 return log_error_errno(r
, "Failed to parse UID \"%s\": %m", context
->meta
[META_ARGV_UID
]);
1087 r
= parse_gid(context
->meta
[META_ARGV_GID
], &context
->gid
);
1089 return log_error_errno(r
, "Failed to parse GID \"%s\": %m", context
->meta
[META_ARGV_GID
]);
1091 r
= parse_signo(context
->meta
[META_ARGV_SIGNAL
], &context
->signo
);
1093 log_warning_errno(r
, "Failed to parse signal number \"%s\", ignoring: %m", context
->meta
[META_ARGV_SIGNAL
]);
1095 r
= safe_atou64(context
->meta
[META_ARGV_RLIMIT
], &context
->rlimit
);
1097 log_warning_errno(r
, "Failed to parse resource limit \"%s\", ignoring: %m", context
->meta
[META_ARGV_RLIMIT
]);
1099 /* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to SUID_DUMP_SAFE (2),
1100 * if the process is marked as not dumpable, see PR_SET_DUMPABLE(2const). */
1101 if (context
->meta
[META_ARGV_DUMPABLE
]) {
1102 r
= safe_atou(context
->meta
[META_ARGV_DUMPABLE
], &context
->dumpable
);
1104 return log_error_errno(r
, "Failed to parse dumpable field \"%s\": %m", context
->meta
[META_ARGV_DUMPABLE
]);
1105 if (context
->dumpable
> SUID_DUMP_SAFE
)
1106 log_notice("Got unexpected %%d/dumpable value %u.", context
->dumpable
);
1109 unit
= context
->meta
[META_UNIT
];
1110 context
->is_pid1
= streq(context
->meta
[META_ARGV_PID
], "1") || streq_ptr(unit
, SPECIAL_INIT_SCOPE
);
1111 context
->is_journald
= streq_ptr(unit
, SPECIAL_JOURNALD_SERVICE
);
1113 /* After parsing everything, let's also synthesize a new iovw field for the textual signal name if it
1114 * isn't already set. */
1115 if (SIGNAL_VALID(context
->signo
) && !have_signal_name
)
1116 (void) iovw_put_string_field(iovw
, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(context
->signo
));
1121 static int process_socket(int fd
) {
1122 _cleanup_(iovw_done_free
) struct iovec_wrapper iovw
= {};
1123 _cleanup_(context_done
) Context context
= CONTEXT_NULL
;
1124 _cleanup_close_
int input_fd
= -EBADF
;
1127 STATE_INPUT_FD_DONE
,
1129 } state
= STATE_PAYLOAD
;
1136 log_debug("Processing coredump received via socket...");
1139 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control
;
1140 struct msghdr mh
= {
1141 .msg_control
= &control
,
1142 .msg_controllen
= sizeof(control
),
1147 l
= next_datagram_size_fd(fd
);
1149 return log_error_errno(l
, "Failed to determine datagram size to read: %m");
1151 _cleanup_(iovec_done
) struct iovec iovec
= {
1153 .iov_base
= malloc(l
+ 1),
1155 if (!iovec
.iov_base
)
1158 mh
.msg_iov
= &iovec
;
1160 n
= recvmsg_safe(fd
, &mh
, MSG_CMSG_CLOEXEC
);
1162 return log_error_errno(n
, "Failed to receive datagram: %m");
1164 /* The final zero-length datagrams ("sentinels") carry file descriptors and tell us that
1165 * we're done. There are three sentinels: one with just the coredump fd, followed by one with
1166 * the pidfd, and finally one with the mount tree fd. The latter two or the last one may be
1167 * omitted (which is supported for compatibility with older systemd version, in particular to
1168 * facilitate cross-container coredumping). */
1170 struct cmsghdr
*found
;
1172 found
= cmsg_find(&mh
, SOL_SOCKET
, SCM_RIGHTS
, CMSG_LEN(sizeof(int)));
1174 /* This is zero length message but it either doesn't carry a single
1175 * descriptor, or it has more than one. This is a protocol violation so let's
1178 * Well, not quite! In practice there's one more complication: EOF on
1179 * SOCK_SEQPACKET is not distinguishable from a zero length datagram. Hence
1180 * if we get a zero length datagram without fds we consider it EOF, and
1181 * that's permissible for the final two fds. Hence let's be strict on the
1182 * first fd, but lenient on the other two. */
1184 if (!cmsg_find(&mh
, SOL_SOCKET
, SCM_RIGHTS
, (socklen_t
) -1) && state
!= STATE_PAYLOAD
)
1185 /* No fds, and already got the first fd → we are done. */
1188 cmsg_close_all(&mh
);
1189 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG
),
1190 "Received zero length message with zero or more than one file descriptor(s), expected one.");
1196 assert(input_fd
< 0);
1197 input_fd
= *CMSG_TYPED_DATA(found
, int);
1198 state
= STATE_INPUT_FD_DONE
;
1201 case STATE_INPUT_FD_DONE
:
1202 assert(!pidref_is_set(&context
.pidref
));
1204 r
= pidref_set_pidfd_consume(&context
.pidref
, *CMSG_TYPED_DATA(found
, int));
1206 return log_error_errno(r
, "Failed to initialize pidref: %m");
1208 state
= STATE_PID_FD_DONE
;
1211 case STATE_PID_FD_DONE
:
1212 assert(context
.mount_tree_fd
< 0);
1213 context
.mount_tree_fd
= *CMSG_TYPED_DATA(found
, int);
1214 /* We have all FDs we need so we are done. */
1221 cmsg_close_all(&mh
);
1223 /* Only zero length messages are allowed after the first message that carried a file descriptor. */
1224 if (state
!= STATE_PAYLOAD
)
1225 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG
), "Received unexpected message with non-zero length.");
1227 /* Payload messages should not carry fds */
1228 if (cmsg_find(&mh
, SOL_SOCKET
, SCM_RIGHTS
, (socklen_t
) -1))
1229 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG
),
1230 "Received payload message with file descriptor(s), expected none.");
1232 /* Add trailing NUL byte, in case these are strings */
1233 ((char*) iovec
.iov_base
)[n
] = 0;
1234 iovec
.iov_len
= (size_t) n
;
1236 if (iovw_put(&iovw
, iovec
.iov_base
, iovec
.iov_len
) < 0)
1242 /* Make sure we got all data we really need */
1243 assert(input_fd
>= 0);
1245 r
= context_parse_iovw(&context
, &iovw
);
1249 /* Make sure we received all the expected fields. We support being called by an *older*
1250 * systemd-coredump from the outside, so we require only the basic set of fields that
1251 * was being sent when the support for sending to containers over a socket was added
1252 * in a108c43e36d3ceb6e34efe37c014fc2cda856000. */
1259 META_ARGV_TIMESTAMP
,
1263 if (!context
.meta
[i
])
1264 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1265 "Mandatory argument %s not received on socket, aborting.",
1266 meta_field_names
[i
]);
1268 return submit_coredump(&context
, &iovw
, input_fd
);
1271 static int send_iovec(const struct iovec_wrapper
*iovw
, int input_fd
, PidRef
*pidref
, int mount_tree_fd
) {
1272 _cleanup_close_
int fd
= -EBADF
;
1276 assert(input_fd
>= 0);
1278 fd
= socket(AF_UNIX
, SOCK_SEQPACKET
|SOCK_CLOEXEC
, 0);
1280 return log_error_errno(errno
, "Failed to create coredump socket: %m");
1282 r
= connect_unix_path(fd
, AT_FDCWD
, "/run/systemd/coredump");
1284 return log_error_errno(r
, "Failed to connect to coredump service: %m");
1286 for (size_t i
= 0; i
< iovw
->count
; i
++) {
1287 struct msghdr mh
= {
1288 .msg_iov
= iovw
->iovec
+ i
,
1291 struct iovec copy
[2];
1294 if (sendmsg(fd
, &mh
, MSG_NOSIGNAL
) >= 0)
1297 if (errno
== EMSGSIZE
&& mh
.msg_iov
[0].iov_len
> 0) {
1298 /* This field didn't fit? That's a pity. Given that this is
1299 * just metadata, let's truncate the field at half, and try
1300 * again. We append three dots, in order to show that this is
1303 if (mh
.msg_iov
!= copy
) {
1304 /* We don't want to modify the caller's iovec, hence
1305 * let's create our own array, consisting of two new
1306 * iovecs, where the first is a (truncated) copy of
1307 * what we want to send, and the second one contains
1308 * the trailing dots. */
1309 copy
[0] = iovw
->iovec
[i
];
1310 copy
[1] = IOVEC_MAKE(((const char[]){'.', '.', '.'}), 3);
1316 copy
[0].iov_len
/= 2; /* halve it, and try again */
1320 return log_error_errno(errno
, "Failed to send coredump datagram: %m");
1324 /* First sentinel: the coredump fd */
1325 r
= send_one_fd(fd
, input_fd
, 0);
1327 return log_error_errno(r
, "Failed to send coredump fd: %m");
1329 /* The optional second sentinel: the pidfd */
1330 if (!pidref_is_set(pidref
) || pidref
->fd
< 0) /* If we have no pidfd, stop now */
1333 r
= send_one_fd(fd
, pidref
->fd
, 0);
1335 return log_error_errno(r
, "Failed to send pidfd: %m");
1337 /* The optional third sentinel: the mount tree fd */
1338 if (mount_tree_fd
< 0) /* If we have no mount tree, stop now */
1341 r
= send_one_fd(fd
, mount_tree_fd
, 0);
1343 return log_error_errno(r
, "Failed to send mount tree fd: %m");
1348 static int gather_pid_metadata_from_argv(
1349 struct iovec_wrapper
*iovw
,
1351 int argc
, char **argv
) {
1353 _cleanup_(pidref_done
) PidRef local_pidref
= PIDREF_NULL
;
1354 int r
, kernel_fd
= -EBADF
;
1359 /* We gather all metadata that were passed via argv[] into an array of iovecs that
1360 * we'll forward to the socket unit.
1362 * We require at least _META_ARGV_REQUIRED args, but will accept more.
1363 * We know how to parse _META_ARGV_MAX args. The rest will be ignored. */
1365 if (argc
< _META_ARGV_REQUIRED
)
1366 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1367 "Not enough arguments passed by the kernel (%i, expected between %i and %i).",
1368 argc
, _META_ARGV_REQUIRED
, _META_ARGV_MAX
);
1370 for (int i
= 0; i
< MIN(argc
, _META_ARGV_MAX
); i
++) {
1371 _cleanup_free_
char *buf
= NULL
;
1372 const char *t
= argv
[i
];
1374 if (i
== META_ARGV_TIMESTAMP
) {
1375 /* The journal fields contain the timestamp padded with six
1376 * zeroes, so that the kernel-supplied 1s granularity timestamps
1377 * becomes 1μs granularity, i.e. the granularity systemd usually
1379 buf
= strjoin(argv
[i
], "000000");
1386 if (i
== META_ARGV_PID
) {
1387 /* Store this so that we can check whether the core will be forwarded to a container
1388 * even when the kernel doesn't provide a pidfd. Can be dropped once baseline is
1390 r
= pidref_set_pidstr(&local_pidref
, t
);
1392 return log_error_errno(r
, "Failed to initialize pidref from pid %s: %m", t
);
1395 if (i
== META_ARGV_PIDFD
) {
1396 /* If the current kernel doesn't support the %F specifier (which resolves to a
1397 * pidfd), but we included it in the core_pattern expression, we'll receive an empty
1398 * string here. Deal with that gracefully. */
1402 assert(!pidref_is_set(&context
->pidref
));
1403 assert(kernel_fd
< 0);
1405 kernel_fd
= parse_fd(t
);
1407 return log_error_errno(kernel_fd
, "Failed to parse pidfd \"%s\": %m", t
);
1409 r
= pidref_set_pidfd(&context
->pidref
, kernel_fd
);
1411 return log_error_errno(r
, "Failed to initialize pidref from pidfd %d: %m", kernel_fd
);
1413 context
->got_pidfd
= 1;
1415 /* If there are containers involved with different versions of the code they might
1416 * not be using pidfds, so it would be wrong to set the metadata, skip it. */
1417 r
= pidref_in_same_namespace(/* pid1 = */ NULL
, &context
->pidref
, NAMESPACE_PID
);
1419 log_debug_errno(r
, "Failed to check pidns of crashing process, ignoring: %m");
1423 /* We don't print the fd number in the journal as it's meaningless, but we still
1424 * record that the parsing was done with a kernel-provided fd as it means it's safe
1425 * from races, which is valuable information to provide in the journal record. */
1429 r
= iovw_put_string_field(iovw
, meta_field_names
[i
], t
);
1434 /* Cache some of the process metadata we collected so far and that we'll need to
1436 r
= context_parse_iovw(context
, iovw
);
1440 /* If the kernel didn't give us a PIDFD, then use the one derived from the
1441 * PID immediately, given we have it. */
1442 if (!pidref_is_set(&context
->pidref
))
1443 context
->pidref
= TAKE_PIDREF(local_pidref
);
1445 /* Close the kernel-provided FD as the last thing after everything else succeeded. */
1446 kernel_fd
= safe_close(kernel_fd
);
1451 static int gather_pid_metadata_from_procfs(struct iovec_wrapper
*iovw
, Context
*context
) {
1462 /* Note that if we fail on oom later on, we do not roll-back changes to the iovec
1463 * structure. (It remains valid, with the first iovec fields initialized.) */
1465 pid
= context
->pidref
.pid
;
1467 /* The following is mandatory */
1468 r
= pidref_get_comm(&context
->pidref
, &t
);
1470 return log_error_errno(r
, "Failed to get COMM: %m");
1472 r
= iovw_put_string_field_free(iovw
, "COREDUMP_COMM=", t
);
1476 /* The following are optional, but we use them if present. */
1477 r
= get_process_exe(pid
, &t
);
1479 r
= iovw_put_string_field_free(iovw
, "COREDUMP_EXE=", t
);
1481 log_warning_errno(r
, "Failed to get EXE, ignoring: %m");
1483 if (cg_pidref_get_unit(&context
->pidref
, &t
) >= 0)
1484 (void) iovw_put_string_field_free(iovw
, "COREDUMP_UNIT=", t
);
1486 if (cg_pid_get_user_unit(pid
, &t
) >= 0)
1487 (void) iovw_put_string_field_free(iovw
, "COREDUMP_USER_UNIT=", t
);
1489 if (cg_pidref_get_session(&context
->pidref
, &t
) >= 0)
1490 (void) iovw_put_string_field_free(iovw
, "COREDUMP_SESSION=", t
);
1492 if (cg_pidref_get_owner_uid(&context
->pidref
, &owner_uid
) >= 0) {
1493 r
= asprintf(&t
, UID_FMT
, owner_uid
);
1495 (void) iovw_put_string_field_free(iovw
, "COREDUMP_OWNER_UID=", t
);
1498 if (sd_pid_get_slice(pid
, &t
) >= 0)
1499 (void) iovw_put_string_field_free(iovw
, "COREDUMP_SLICE=", t
);
1501 if (pidref_get_cmdline(&context
->pidref
, SIZE_MAX
, PROCESS_CMDLINE_QUOTE_POSIX
, &t
) >= 0)
1502 (void) iovw_put_string_field_free(iovw
, "COREDUMP_CMDLINE=", t
);
1504 if (cg_pid_get_path_shifted(pid
, NULL
, &t
) >= 0)
1505 (void) iovw_put_string_field_free(iovw
, "COREDUMP_CGROUP=", t
);
1507 if (compose_open_fds(pid
, &t
) >= 0)
1508 (void) iovw_put_string_field_free(iovw
, "COREDUMP_OPEN_FDS=", t
);
1510 p
= procfs_file_alloca(pid
, "status");
1511 if (read_full_file(p
, &t
, /* ret_size= */ NULL
) >= 0)
1512 (void) iovw_put_string_field_free(iovw
, "COREDUMP_PROC_STATUS=", t
);
1514 p
= procfs_file_alloca(pid
, "maps");
1515 if (read_full_file(p
, &t
, /* ret_size= */ NULL
) >= 0)
1516 (void) iovw_put_string_field_free(iovw
, "COREDUMP_PROC_MAPS=", t
);
1518 p
= procfs_file_alloca(pid
, "limits"); /* this uses 'seq_file' in kernel, use read_full_file_at() */
1519 if (read_full_file(p
, &t
, /* ret_size= */ NULL
) >= 0)
1520 (void) iovw_put_string_field_free(iovw
, "COREDUMP_PROC_LIMITS=", t
);
1522 p
= procfs_file_alloca(pid
, "cgroup");
1523 if (read_full_file(p
, &t
, /* ret_size= */ NULL
) >= 0)
1524 (void) iovw_put_string_field_free(iovw
, "COREDUMP_PROC_CGROUP=", t
);
1526 p
= procfs_file_alloca(pid
, "mountinfo");
1527 if (read_full_file(p
, &t
, /* ret_size= */ NULL
) >= 0)
1528 (void) iovw_put_string_field_free(iovw
, "COREDUMP_PROC_MOUNTINFO=", t
);
1530 /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */
1531 p
= procfs_file_alloca(pid
, "auxv");
1532 if (read_full_file(p
, &t
, &size
) >= 0) {
1533 char *buf
= malloc(strlen("COREDUMP_PROC_AUXV=") + size
+ 1);
1535 /* Add a dummy terminator to make context_parse_iovw() happy. */
1536 *mempcpy_typesafe(stpcpy(buf
, "COREDUMP_PROC_AUXV="), t
, size
) = '\0';
1537 (void) iovw_consume(iovw
, buf
, size
+ strlen("COREDUMP_PROC_AUXV="));
1543 if (get_process_cwd(pid
, &t
) >= 0)
1544 (void) iovw_put_string_field_free(iovw
, "COREDUMP_CWD=", t
);
1546 if (get_process_root(pid
, &t
) >= 0) {
1547 bool proc_self_root_is_slash
;
1549 proc_self_root_is_slash
= strcmp(t
, "/") == 0;
1551 (void) iovw_put_string_field_free(iovw
, "COREDUMP_ROOT=", t
);
1553 /* If the process' root is "/", then there is a chance it has
1554 * mounted own root and hence being containerized. */
1555 if (proc_self_root_is_slash
&& get_process_container_parent_cmdline(&context
->pidref
, &t
) > 0)
1556 (void) iovw_put_string_field_free(iovw
, "COREDUMP_CONTAINER_CMDLINE=", t
);
1559 if (get_process_environ(pid
, &t
) >= 0)
1560 (void) iovw_put_string_field_free(iovw
, "COREDUMP_ENVIRON=", t
);
1562 /* Now that we have parsed info from /proc/ ensure the pidfd is still valid before continuing. */
1563 r
= pidref_verify(&context
->pidref
);
1565 return log_error_errno(r
, "PIDFD validation failed: %m");
1567 /* We successfully acquired all metadata. */
1568 return context_parse_iovw(context
, iovw
);
1571 static int send_ucred(int transport_fd
, const struct ucred
*ucred
) {
1572 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred
))) control
= {};
1573 struct msghdr mh
= {
1574 .msg_control
= &control
,
1575 .msg_controllen
= sizeof(control
),
1577 struct cmsghdr
*cmsg
;
1579 assert(transport_fd
>= 0);
1582 cmsg
= CMSG_FIRSTHDR(&mh
);
1583 *cmsg
= (struct cmsghdr
) {
1584 .cmsg_level
= SOL_SOCKET
,
1585 .cmsg_type
= SCM_CREDENTIALS
,
1586 .cmsg_len
= CMSG_LEN(sizeof(struct ucred
)),
1588 memcpy(CMSG_DATA(cmsg
), ucred
, sizeof(struct ucred
));
1590 return RET_NERRNO(sendmsg(transport_fd
, &mh
, MSG_NOSIGNAL
));
1593 static int receive_ucred(int transport_fd
, struct ucred
*ret_ucred
) {
1594 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred
))) control
= {};
1595 struct msghdr mh
= {
1596 .msg_control
= &control
,
1597 .msg_controllen
= sizeof(control
),
1599 struct cmsghdr
*cmsg
= NULL
;
1600 struct ucred
*ucred
= NULL
;
1603 assert(transport_fd
>= 0);
1606 n
= recvmsg_safe(transport_fd
, &mh
, 0);
1610 CMSG_FOREACH(cmsg
, &mh
)
1611 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1612 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1613 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
))) {
1616 ucred
= CMSG_TYPED_DATA(cmsg
, struct ucred
);
1622 *ret_ucred
= *ucred
;
1627 static int can_forward_coredump(Context
*context
, const PidRef
*pid
) {
1628 _cleanup_free_
char *cgroup
= NULL
, *path
= NULL
, *unit
= NULL
;
1632 assert(pidref_is_set(pid
));
1633 assert(!pidref_is_remote(pid
));
1635 /* We need to avoid a situation where the attacker crashes a SUID process or a root daemon and
1636 * quickly replaces it with a namespaced process and we forward the coredump to the attacker, into
1637 * the namespace. With %F/pidfd we can reliably check the namespace of the original process, hence we
1638 * can allow forwarding. */
1639 if (!context
->got_pidfd
&& context
->dumpable
!= SUID_DUMP_USER
)
1642 r
= cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &cgroup
);
1646 r
= path_extract_directory(cgroup
, &path
);
1650 r
= cg_path_get_unit_path(path
, &unit
);
1654 /* No valid units in this path. */
1659 /* We require that this process belongs to a delegated cgroup
1660 * (i.e. Delegate=yes), with CoredumpReceive=yes also. */
1661 r
= cg_is_delegated(unit
);
1665 return cg_has_coredump_receive(unit
);
1668 static int forward_coredump_to_container(Context
*context
) {
1669 _cleanup_close_
int pidnsfd
= -EBADF
, mntnsfd
= -EBADF
, netnsfd
= -EBADF
, usernsfd
= -EBADF
, rootfd
= -EBADF
;
1670 _cleanup_close_pair_
int pair
[2] = EBADF_PAIR
;
1672 struct ucred ucred
= {
1673 .pid
= context
->pidref
.pid
,
1674 .uid
= context
->uid
,
1675 .gid
= context
->gid
,
1681 _cleanup_(pidref_done
) PidRef leader_pid
= PIDREF_NULL
;
1682 r
= namespace_get_leader(&context
->pidref
, NAMESPACE_PID
, &leader_pid
);
1684 return log_debug_errno(r
, "Failed to get namespace leader: %m");
1686 r
= can_forward_coredump(context
, &leader_pid
);
1688 return log_debug_errno(r
, "Failed to check if coredump can be forwarded: %m");
1690 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT
),
1691 "Coredump will not be forwarded because no target cgroup was found.");
1693 r
= RET_NERRNO(socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, pair
));
1695 return log_debug_errno(r
, "Failed to create socket pair: %m");
1697 r
= setsockopt_int(pair
[1], SOL_SOCKET
, SO_PASSCRED
, true);
1699 return log_debug_errno(r
, "Failed to set SO_PASSCRED: %m");
1701 r
= pidref_namespace_open(&leader_pid
, &pidnsfd
, &mntnsfd
, &netnsfd
, &usernsfd
, &rootfd
);
1703 return log_debug_errno(r
, "Failed to open namespaces of PID " PID_FMT
": %m", leader_pid
.pid
);
1705 r
= namespace_fork("(sd-coredumpns)", "(sd-coredump)", NULL
, 0,
1706 FORK_RESET_SIGNALS
|FORK_DEATHSIG_SIGTERM
,
1707 pidnsfd
, mntnsfd
, netnsfd
, usernsfd
, rootfd
, &child
);
1709 return log_debug_errno(r
, "Failed to fork into namespaces of PID " PID_FMT
": %m", leader_pid
.pid
);
1711 pair
[0] = safe_close(pair
[0]);
1713 r
= access_nofollow("/run/systemd/coredump", W_OK
);
1715 log_debug_errno(r
, "Cannot find coredump socket, exiting: %m");
1716 _exit(EXIT_FAILURE
);
1719 r
= receive_ucred(pair
[1], &ucred
);
1721 log_debug_errno(r
, "Failed to receive ucred and fd: %m");
1722 _exit(EXIT_FAILURE
);
1725 _cleanup_(iovw_free_freep
) struct iovec_wrapper
*iovw
= iovw_new();
1728 _exit(EXIT_FAILURE
);
1731 (void) iovw_put_string_field(iovw
, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR
);
1732 (void) iovw_put_string_field(iovw
, "PRIORITY=", STRINGIFY(LOG_CRIT
));
1733 (void) iovw_put_string_field(iovw
, "COREDUMP_FORWARDED=", "1");
1735 for (int i
= 0; i
< _META_ARGV_MAX
; i
++) {
1736 char buf
[DECIMAL_STR_MAX(pid_t
)];
1737 const char *t
= context
->meta
[i
];
1739 /* Patch some of the fields with the translated ucred data */
1743 xsprintf(buf
, PID_FMT
, ucred
.pid
);
1748 xsprintf(buf
, UID_FMT
, ucred
.uid
);
1753 xsprintf(buf
, GID_FMT
, ucred
.gid
);
1761 r
= iovw_put_string_field(iovw
, meta_field_names
[i
], t
);
1763 log_debug_errno(r
, "Failed to construct iovec: %m");
1764 _exit(EXIT_FAILURE
);
1768 _cleanup_(context_done
) Context child_context
= CONTEXT_NULL
;
1769 r
= context_parse_iovw(&child_context
, iovw
);
1771 log_debug_errno(r
, "Failed to save context: %m");
1772 _exit(EXIT_FAILURE
);
1775 r
= gather_pid_metadata_from_procfs(iovw
, &child_context
);
1777 log_debug_errno(r
, "Failed to gather metadata from procfs: %m");
1778 _exit(EXIT_FAILURE
);
1781 r
= send_iovec(iovw
, STDIN_FILENO
, &context
->pidref
, /* mount_tree_fd= */ -EBADF
);
1783 log_debug_errno(r
, "Failed to send iovec to coredump socket: %m");
1784 _exit(EXIT_FAILURE
);
1787 _exit(EXIT_SUCCESS
);
1790 pair
[1] = safe_close(pair
[1]);
1792 /* We need to translate the PID, UID, and GID of the crashing process
1793 * to the container's namespaces. Do this by sending an SCM_CREDENTIALS
1794 * message on a socket pair, and read the result when we join the
1795 * container. The kernel will perform the translation for us. */
1796 r
= send_ucred(pair
[0], &ucred
);
1798 return log_debug_errno(r
, "Failed to send metadata to container: %m");
1800 r
= wait_for_terminate_and_check("(sd-coredumpns)", child
, 0);
1802 return log_debug_errno(r
, "Failed to wait for child to terminate: %m");
1803 if (r
!= EXIT_SUCCESS
)
1804 return log_debug_errno(SYNTHETIC_ERRNO(EPROTO
), "Failed to process coredump in container.");
1809 static int acquire_pid_mount_tree_fd(const Context
*context
, int *ret_fd
) {
1810 /* Don't bother preparing environment if we can't pass it to libdwfl. */
1811 #if !HAVE_DWFL_SET_SYSROOT
1812 *ret_fd
= -EOPNOTSUPP
;
1813 log_debug("dwfl_set_sysroot() is not supported.");
1815 _cleanup_close_
int mntns_fd
= -EBADF
, root_fd
= -EBADF
, fd
= -EBADF
;
1816 _cleanup_close_pair_
int pair
[2] = EBADF_PAIR
;
1822 if (!arg_enter_namespace
) {
1823 *ret_fd
= -EHOSTDOWN
;
1824 log_debug("EnterNamespace=no so we won't use mount tree of the crashed process for generating backtrace.");
1828 if (socketpair(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
, 0, pair
) < 0)
1829 return log_error_errno(errno
, "Failed to create socket pair: %m");
1831 r
= pidref_namespace_open(
1833 /* ret_pidns_fd= */ NULL
,
1835 /* ret_netns_fd= */ NULL
,
1836 /* ret_userns_fd= */ NULL
,
1839 return log_error_errno(r
, "Failed to open mount namespace of crashing process: %m");
1841 r
= namespace_fork("(sd-mount-tree-ns)",
1843 /* except_fds= */ NULL
,
1844 /* n_except_fds= */ 0,
1845 FORK_RESET_SIGNALS
|FORK_DEATHSIG_SIGKILL
|FORK_LOG
|FORK_WAIT
,
1846 /* pidns_fd= */ -EBADF
,
1848 /* netns_fd= */ -EBADF
,
1849 /* userns_fd= */ -EBADF
,
1855 pair
[0] = safe_close(pair
[0]);
1857 fd
= open_tree(-EBADF
, "/", AT_NO_AUTOMOUNT
| AT_RECURSIVE
| AT_SYMLINK_NOFOLLOW
| OPEN_TREE_CLOEXEC
| OPEN_TREE_CLONE
);
1859 log_error_errno(errno
, "Failed to clone mount tree: %m");
1860 _exit(EXIT_FAILURE
);
1863 r
= send_one_fd(pair
[1], fd
, 0);
1865 log_error_errno(r
, "Failed to send mount tree to parent: %m");
1866 _exit(EXIT_FAILURE
);
1869 _exit(EXIT_SUCCESS
);
1872 pair
[1] = safe_close(pair
[1]);
1874 fd
= receive_one_fd(pair
[0], MSG_DONTWAIT
);
1876 return log_error_errno(fd
, "Failed to receive mount tree: %m");
1878 *ret_fd
= TAKE_FD(fd
);
1883 static int process_kernel(int argc
, char *argv
[]) {
1884 _cleanup_(iovw_free_freep
) struct iovec_wrapper
*iovw
= NULL
;
1885 _cleanup_(context_done
) Context context
= CONTEXT_NULL
;
1888 /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds
1889 * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to
1891 r
= rearrange_stdio(STDIN_FILENO
, -EBADF
, -EBADF
);
1893 return log_error_errno(r
, "Failed to connect stdout/stderr to /dev/null: %m");
1895 log_debug("Processing coredump received from the kernel...");
1901 /* Collect all process metadata passed by the kernel through argv[] */
1902 r
= gather_pid_metadata_from_argv(iovw
, &context
, argc
- 1, argv
+ 1);
1906 /* Collect the rest of the process metadata retrieved from the runtime */
1907 r
= gather_pid_metadata_from_procfs(iovw
, &context
);
1911 if (!context
.is_journald
)
1912 /* OK, now we know it's not the journal, hence we can make use of it now. */
1913 log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG
);
1915 /* Log minimal metadata now, so it is not lost if the system is about to shut down. */
1916 log_info("Process %s (%s) of user %s terminated abnormally with signal %s/%s, processing...",
1917 context
.meta
[META_ARGV_PID
], context
.meta
[META_COMM
],
1918 context
.meta
[META_ARGV_UID
], context
.meta
[META_ARGV_SIGNAL
],
1919 signal_to_string(context
.signo
));
1921 r
= pidref_in_same_namespace(/* pid1 = */ NULL
, &context
.pidref
, NAMESPACE_PID
);
1923 log_debug_errno(r
, "Failed to check pidns of crashing process, ignoring: %m");
1925 /* If this fails, fallback to the old behavior so that
1926 * there is still some record of the crash. */
1927 r
= forward_coredump_to_container(&context
);
1931 r
= acquire_pid_mount_tree_fd(&context
, &context
.mount_tree_fd
);
1933 log_warning_errno(r
, "Failed to access the mount tree of a container, ignoring: %m");
1936 /* If this is PID 1, disable coredump collection, we'll unlikely be able to process
1939 * FIXME: maybe we should disable coredumps generation from the beginning and
1940 * re-enable it only when we know it's either safe (i.e. we're not running OOM) or
1941 * it's not PID 1 ? */
1942 if (context
.is_pid1
) {
1943 log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
1944 disable_coredumps();
1947 (void) iovw_put_string_field(iovw
, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR
);
1948 (void) iovw_put_string_field(iovw
, "PRIORITY=", STRINGIFY(LOG_CRIT
));
1950 if (context
.is_journald
|| context
.is_pid1
)
1951 return submit_coredump(&context
, iovw
, STDIN_FILENO
);
1953 return send_iovec(iovw
, STDIN_FILENO
, &context
.pidref
, context
.mount_tree_fd
);
1956 static int process_backtrace(int argc
, char *argv
[]) {
1957 _cleanup_(journal_importer_cleanup
) JournalImporter importer
= JOURNAL_IMPORTER_INIT(STDIN_FILENO
);
1958 _cleanup_(iovw_free_freep
) struct iovec_wrapper
*iovw
= NULL
;
1959 _cleanup_(context_done
) Context context
= CONTEXT_NULL
;
1965 log_debug("Processing backtrace on stdin...");
1971 (void) iovw_put_string_field(iovw
, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR
);
1972 (void) iovw_put_string_field(iovw
, "PRIORITY=", STRINGIFY(LOG_CRIT
));
1974 /* Collect all process metadata from argv[] by making sure to skip the
1975 * '--backtrace' option */
1976 r
= gather_pid_metadata_from_argv(iovw
, &context
, argc
- 2, argv
+ 2);
1980 /* Collect the rest of the process metadata retrieved from the runtime */
1981 r
= gather_pid_metadata_from_procfs(iovw
, &context
);
1986 r
= journal_importer_process_data(&importer
);
1988 return log_error_errno(r
, "Failed to parse journal entry on stdin: %m");
1989 if (r
== 1 || /* complete entry */
1990 journal_importer_eof(&importer
)) /* end of data */
1994 if (journal_importer_eof(&importer
)) {
1995 log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter");
1997 message
= strjoina("Process ", context
.meta
[META_ARGV_PID
],
1998 " (", context
.meta
[META_COMM
], ")"
1999 " of user ", context
.meta
[META_ARGV_UID
],
2000 " failed with ", context
.meta
[META_ARGV_SIGNAL
]);
2002 r
= iovw_put_string_field(iovw
, "MESSAGE=", message
);
2006 /* The imported iovecs are not supposed to be freed by us so let's copy and merge them at the
2007 * end of the array. */
2008 r
= iovw_append(iovw
, &importer
.iovw
);
2013 r
= sd_journal_sendv(iovw
->iovec
, iovw
->count
);
2015 return log_error_errno(r
, "Failed to log backtrace: %m");
2020 static int run(int argc
, char *argv
[]) {
2023 /* First, log to a safe place, since we don't know what crashed and it might
2024 * be journald which we'd rather not log to then. */
2026 log_set_target_and_open(LOG_TARGET_KMSG
);
2028 /* Make sure we never enter a loop */
2029 (void) set_dumpable(SUID_DUMP_DISABLE
);
2031 /* Ignore all parse errors */
2032 (void) parse_config();
2034 log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage
));
2035 log_debug("Selected compression %s.", yes_no(arg_compress
));
2037 r
= sd_listen_fds(false);
2039 return log_error_errno(r
, "Failed to determine the number of file descriptors: %m");
2041 /* If we got an fd passed, we are running in coredumpd mode. Otherwise we
2042 * are invoked from the kernel as coredump handler. */
2044 if (streq_ptr(argv
[1], "--backtrace"))
2045 return process_backtrace(argc
, argv
);
2047 return process_kernel(argc
, argv
);
2049 return process_socket(SD_LISTEN_FDS_START
);
2051 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2052 "Received unexpected number of file descriptors.");
2055 DEFINE_MAIN_FUNCTION(run
);