]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/coredump/coredump.c
Merge pull request #25132 from yuwata/core-device-inactivate-removed-device-on-switch...
[thirdparty/systemd.git] / src / coredump / coredump.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stdio.h>
5 #include <sys/prctl.h>
6 #include <sys/statvfs.h>
7 #include <sys/xattr.h>
8 #include <unistd.h>
9
10 #include "sd-daemon.h"
11 #include "sd-journal.h"
12 #include "sd-login.h"
13 #include "sd-messages.h"
14
15 #include "acl-util.h"
16 #include "alloc-util.h"
17 #include "bus-error.h"
18 #include "capability-util.h"
19 #include "cgroup-util.h"
20 #include "compress.h"
21 #include "conf-parser.h"
22 #include "copy.h"
23 #include "coredump-util.h"
24 #include "coredump-vacuum.h"
25 #include "dirent-util.h"
26 #include "elf-util.h"
27 #include "escape.h"
28 #include "fd-util.h"
29 #include "fileio.h"
30 #include "fs-util.h"
31 #include "io-util.h"
32 #include "journal-importer.h"
33 #include "journal-send.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "main-func.h"
37 #include "memory-util.h"
38 #include "mkdir-label.h"
39 #include "parse-util.h"
40 #include "process-util.h"
41 #include "signal-util.h"
42 #include "socket-util.h"
43 #include "special.h"
44 #include "stat-util.h"
45 #include "string-table.h"
46 #include "string-util.h"
47 #include "strv.h"
48 #include "sync-util.h"
49 #include "tmpfile-util.h"
50 #include "uid-alloc-range.h"
51 #include "user-util.h"
52
53 /* The maximum size up to which we process coredumps. We use 1G on 32bit systems, and 32G on 64bit systems */
54 #if __SIZEOF_POINTER__ == 4
55 #define PROCESS_SIZE_MAX ((uint64_t) (1LLU*1024LLU*1024LLU*1024LLU))
56 #elif __SIZEOF_POINTER__ == 8
57 #define PROCESS_SIZE_MAX ((uint64_t) (32LLU*1024LLU*1024LLU*1024LLU))
58 #else
59 #error "Unexpected pointer size"
60 #endif
61
62 /* The maximum size up to which we leave the coredump around on disk */
63 #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX
64
65 /* The maximum size up to which we store the coredump in the journal */
66 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
67 #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU))
68 #else
69 /* oss-fuzz limits memory usage. */
70 #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU))
71 #endif
72
73 /* When checking for available memory and setting lower limits, don't
74 * go below 4MB for writing core files to storage. */
75 #define PROCESS_SIZE_MIN (4U*1024U*1024U)
76
77 /* Make sure to not make this larger than the maximum journal entry
78 * size. See DATA_SIZE_MAX in journal-importer.h. */
79 assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX);
80
81 enum {
82 /* We use these as array indexes for our process metadata cache.
83 *
84 * The first indices of the cache stores the same metadata as the ones passed by
85 * the kernel via argv[], ie the strings array passed by the kernel according to
86 * our pattern defined in /proc/sys/kernel/core_pattern (see man:core(5)). */
87
88 META_ARGV_PID, /* %P: as seen in the initial pid namespace */
89 META_ARGV_UID, /* %u: as seen in the initial user namespace */
90 META_ARGV_GID, /* %g: as seen in the initial user namespace */
91 META_ARGV_SIGNAL, /* %s: number of signal causing dump */
92 META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to µs granularity) */
93 META_ARGV_RLIMIT, /* %c: core file size soft resource limit */
94 META_ARGV_HOSTNAME, /* %h: hostname */
95 _META_ARGV_MAX,
96
97 /* The following indexes are cached for a couple of special fields we use (and
98 * thereby need to be retrieved quickly) for naming coredump files, and attaching
99 * xattrs. Unlike the previous ones they are retrieved from the runtime
100 * environment. */
101
102 META_COMM = _META_ARGV_MAX,
103 _META_MANDATORY_MAX,
104
105 /* The rest are similar to the previous ones except that we won't fail if one of
106 * them is missing. */
107
108 META_EXE = _META_MANDATORY_MAX,
109 META_UNIT,
110 _META_MAX
111 };
112
113 static const char * const meta_field_names[_META_MAX] = {
114 [META_ARGV_PID] = "COREDUMP_PID=",
115 [META_ARGV_UID] = "COREDUMP_UID=",
116 [META_ARGV_GID] = "COREDUMP_GID=",
117 [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=",
118 [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
119 [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=",
120 [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=",
121 [META_COMM] = "COREDUMP_COMM=",
122 [META_EXE] = "COREDUMP_EXE=",
123 [META_UNIT] = "COREDUMP_UNIT=",
124 };
125
126 typedef struct Context {
127 const char *meta[_META_MAX];
128 pid_t pid;
129 bool is_pid1;
130 bool is_journald;
131 } Context;
132
133 typedef enum CoredumpStorage {
134 COREDUMP_STORAGE_NONE,
135 COREDUMP_STORAGE_EXTERNAL,
136 COREDUMP_STORAGE_JOURNAL,
137 _COREDUMP_STORAGE_MAX,
138 _COREDUMP_STORAGE_INVALID = -EINVAL,
139 } CoredumpStorage;
140
141 static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = {
142 [COREDUMP_STORAGE_NONE] = "none",
143 [COREDUMP_STORAGE_EXTERNAL] = "external",
144 [COREDUMP_STORAGE_JOURNAL] = "journal",
145 };
146
147 DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage);
148 static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting");
149
150 static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL;
151 static bool arg_compress = true;
152 static uint64_t arg_process_size_max = PROCESS_SIZE_MAX;
153 static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX;
154 static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX;
155 static uint64_t arg_keep_free = UINT64_MAX;
156 static uint64_t arg_max_use = UINT64_MAX;
157
158 static int parse_config(void) {
159 static const ConfigTableItem items[] = {
160 { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
161 { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
162 { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
163 { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
164 { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
165 { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
166 { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
167 {}
168 };
169
170 return config_parse_many_nulstr(
171 PKGSYSCONFDIR "/coredump.conf",
172 CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
173 "Coredump\0",
174 config_item_table_lookup, items,
175 CONFIG_PARSE_WARN,
176 NULL,
177 NULL);
178 }
179
180 static uint64_t storage_size_max(void) {
181 if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
182 return arg_external_size_max;
183 if (arg_storage == COREDUMP_STORAGE_JOURNAL)
184 return arg_journal_size_max;
185 assert(arg_storage == COREDUMP_STORAGE_NONE);
186 return 0;
187 }
188
189 static int fix_acl(int fd, uid_t uid) {
190
191 #if HAVE_ACL
192 int r;
193
194 assert(fd >= 0);
195 assert(uid_is_valid(uid));
196
197 if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY)
198 return 0;
199
200 /* Make sure normal users can read (but not write or delete) their own coredumps */
201 r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
202 if (r < 0)
203 return log_error_errno(r, "Failed to adjust ACL of the coredump: %m");
204 #endif
205
206 return 0;
207 }
208
209 static int fix_xattr(int fd, const Context *context) {
210
211 static const char * const xattrs[_META_MAX] = {
212 [META_ARGV_PID] = "user.coredump.pid",
213 [META_ARGV_UID] = "user.coredump.uid",
214 [META_ARGV_GID] = "user.coredump.gid",
215 [META_ARGV_SIGNAL] = "user.coredump.signal",
216 [META_ARGV_TIMESTAMP] = "user.coredump.timestamp",
217 [META_ARGV_RLIMIT] = "user.coredump.rlimit",
218 [META_ARGV_HOSTNAME] = "user.coredump.hostname",
219 [META_COMM] = "user.coredump.comm",
220 [META_EXE] = "user.coredump.exe",
221 };
222
223 int r = 0;
224
225 assert(fd >= 0);
226
227 /* Attach some metadata to coredumps via extended
228 * attributes. Just because we can. */
229
230 for (unsigned i = 0; i < _META_MAX; i++) {
231 int k;
232
233 if (isempty(context->meta[i]) || !xattrs[i])
234 continue;
235
236 k = fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE);
237 if (k < 0 && r == 0)
238 r = -errno;
239 }
240
241 return r;
242 }
243
244 #define filename_escape(s) xescape((s), "./ ")
245
246 static const char *coredump_tmpfile_name(const char *s) {
247 return s ? s : "(unnamed temporary file)";
248 }
249
250 static int fix_permissions(
251 int fd,
252 const char *filename,
253 const char *target,
254 const Context *context,
255 uid_t uid) {
256
257 int r;
258
259 assert(fd >= 0);
260 assert(target);
261 assert(context);
262
263 /* Ignore errors on these */
264 (void) fchmod(fd, 0640);
265 (void) fix_acl(fd, uid);
266 (void) fix_xattr(fd, context);
267
268 r = fsync_full(fd);
269 if (r < 0)
270 return log_error_errno(r, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename));
271
272 r = link_tmpfile(fd, filename, target);
273 if (r < 0)
274 return log_error_errno(r, "Failed to move coredump %s into place: %m", target);
275
276 return 0;
277 }
278
279 static int maybe_remove_external_coredump(const char *filename, uint64_t size) {
280
281 /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */
282
283 if (arg_storage == COREDUMP_STORAGE_EXTERNAL &&
284 size <= arg_external_size_max)
285 return 0;
286
287 if (!filename)
288 return 1;
289
290 if (unlink(filename) < 0 && errno != ENOENT)
291 return log_error_errno(errno, "Failed to unlink %s: %m", filename);
292
293 return 1;
294 }
295
296 static int make_filename(const Context *context, char **ret) {
297 _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL;
298 sd_id128_t boot = {};
299 int r;
300
301 assert(context);
302
303 c = filename_escape(context->meta[META_COMM]);
304 if (!c)
305 return -ENOMEM;
306
307 u = filename_escape(context->meta[META_ARGV_UID]);
308 if (!u)
309 return -ENOMEM;
310
311 r = sd_id128_get_boot(&boot);
312 if (r < 0)
313 return r;
314
315 p = filename_escape(context->meta[META_ARGV_PID]);
316 if (!p)
317 return -ENOMEM;
318
319 t = filename_escape(context->meta[META_ARGV_TIMESTAMP]);
320 if (!t)
321 return -ENOMEM;
322
323 if (asprintf(ret,
324 "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s",
325 c,
326 u,
327 SD_ID128_FORMAT_VAL(boot),
328 p,
329 t) < 0)
330 return -ENOMEM;
331
332 return 0;
333 }
334
335 static int save_external_coredump(
336 const Context *context,
337 int input_fd,
338 char **ret_filename,
339 int *ret_node_fd,
340 int *ret_data_fd,
341 uint64_t *ret_size,
342 uint64_t *ret_compressed_size,
343 bool *ret_truncated) {
344
345 _cleanup_(unlink_and_freep) char *tmp = NULL;
346 _cleanup_free_ char *fn = NULL;
347 _cleanup_close_ int fd = -1;
348 uint64_t rlimit, process_limit, max_size;
349 bool truncated, storage_on_tmpfs;
350 struct stat st;
351 uid_t uid;
352 int r;
353
354 assert(context);
355 assert(ret_filename);
356 assert(ret_node_fd);
357 assert(ret_data_fd);
358 assert(ret_size);
359 assert(ret_compressed_size);
360 assert(ret_truncated);
361
362 r = parse_uid(context->meta[META_ARGV_UID], &uid);
363 if (r < 0)
364 return log_error_errno(r, "Failed to parse UID: %m");
365
366 r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit);
367 if (r < 0)
368 return log_error_errno(r, "Failed to parse resource limit '%s': %m",
369 context->meta[META_ARGV_RLIMIT]);
370 if (rlimit < page_size())
371 /* Is coredumping disabled? Then don't bother saving/processing the
372 * coredump. Anything below PAGE_SIZE cannot give a readable coredump
373 * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but
374 * is usually the same as PAGE_SIZE. */
375 return log_info_errno(SYNTHETIC_ERRNO(EBADSLT),
376 "Resource limits disable core dumping for process %s (%s).",
377 context->meta[META_ARGV_PID], context->meta[META_COMM]);
378
379 process_limit = MAX(arg_process_size_max, storage_size_max());
380 if (process_limit == 0)
381 return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT),
382 "Limits for coredump processing and storage are both 0, not dumping core.");
383
384 /* Never store more than the process configured, or than we actually shall keep or process */
385 max_size = MIN(rlimit, process_limit);
386
387 r = make_filename(context, &fn);
388 if (r < 0)
389 return log_error_errno(r, "Failed to determine coredump file name: %m");
390
391 (void) mkdir_parents_label(fn, 0755);
392
393 fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp);
394 if (fd < 0)
395 return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn);
396
397 /* If storage is on tmpfs, the kernel oomd might kill us if there's MemoryMax set on
398 * the service or the slice it belongs to. This is common on low-resources systems,
399 * to avoid crashing processes to take away too many system resources.
400 * Check the cgroup settings, and set max_size to a bit less than half of the
401 * available memory left to the process.
402 * Then, attempt to write the core file uncompressed first - if the write gets
403 * interrupted, we know we won't be able to write it all, so instead compress what
404 * was written so far, delete the uncompressed truncated core, and then continue
405 * compressing from STDIN. Given the compressed core cannot be larger than the
406 * uncompressed one, and 1KB for metadata is accounted for in the calculation, we
407 * should be able to at least store the full compressed core file. */
408
409 storage_on_tmpfs = fd_is_temporary_fs(fd) > 0;
410 if (storage_on_tmpfs && arg_compress) {
411 _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
412 uint64_t cgroup_limit = UINT64_MAX;
413 struct statvfs sv;
414
415 /* If we can't get the cgroup limit, just ignore it, but don't fail,
416 * try anyway with the config settings. */
417 r = sd_bus_default_system(&bus);
418 if (r < 0)
419 log_info_errno(r, "Failed to connect to system bus, skipping MemoryAvailable check: %m");
420 else {
421 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
422
423 r = sd_bus_get_property_trivial(
424 bus,
425 "org.freedesktop.systemd1",
426 "/org/freedesktop/systemd1/unit/self",
427 "org.freedesktop.systemd1.Service",
428 "MemoryAvailable",
429 &error,
430 't', &cgroup_limit);
431 if (r < 0)
432 log_warning_errno(r,
433 "Failed to query MemoryAvailable for current unit, "
434 "falling back to static config settings: %s",
435 bus_error_message(&error, r));
436 }
437
438 max_size = MIN(cgroup_limit, max_size);
439 max_size = LESS_BY(max_size, 1024U) / 2; /* Account for 1KB metadata overhead for compressing */
440 max_size = MAX(PROCESS_SIZE_MIN, max_size); /* Impose a lower minimum */
441
442 /* tmpfs might get full quickly, so check the available space too.
443 * But don't worry about errors here, failing to access the storage
444 * location will be better logged when writing to it. */
445 if (statvfs("/var/lib/systemd/coredump/", &sv) >= 0)
446 max_size = MIN((uint64_t)sv.f_frsize * (uint64_t)sv.f_bfree, max_size);
447
448 log_debug("Limiting core file size to %" PRIu64 " bytes due to cgroup memory limits.", max_size);
449 }
450
451 r = copy_bytes(input_fd, fd, max_size, 0);
452 if (r < 0)
453 return log_error_errno(r, "Cannot store coredump of %s (%s): %m",
454 context->meta[META_ARGV_PID], context->meta[META_COMM]);
455 truncated = r == 1;
456
457 #if HAVE_COMPRESSION
458 if (arg_compress) {
459 _cleanup_(unlink_and_freep) char *tmp_compressed = NULL;
460 _cleanup_free_ char *fn_compressed = NULL;
461 _cleanup_close_ int fd_compressed = -1;
462 uint64_t uncompressed_size = 0;
463
464 if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
465 return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn);
466
467 fn_compressed = strjoin(fn, default_compression_extension());
468 if (!fn_compressed)
469 return log_oom();
470
471 fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed);
472 if (fd_compressed < 0)
473 return log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed);
474
475 r = compress_stream(fd, fd_compressed, max_size, &uncompressed_size);
476 if (r < 0)
477 return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed));
478
479 if (truncated && storage_on_tmpfs) {
480 uint64_t partial_uncompressed_size = 0;
481
482 /* Uncompressed write was truncated and we are writing to tmpfs: delete
483 * the uncompressed core, and compress the remaining part from STDIN. */
484
485 tmp = unlink_and_free(tmp);
486 fd = safe_close(fd);
487
488 r = compress_stream(input_fd, fd_compressed, max_size, &partial_uncompressed_size);
489 if (r < 0)
490 return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed));
491 uncompressed_size += partial_uncompressed_size;
492 }
493
494 r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid);
495 if (r < 0)
496 return r;
497
498 if (fstat(fd_compressed, &st) < 0)
499 return log_error_errno(errno,
500 "Failed to fstat core file %s: %m",
501 coredump_tmpfile_name(tmp_compressed));
502
503 *ret_filename = TAKE_PTR(fn_compressed); /* compressed */
504 *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */
505 *ret_compressed_size = (uint64_t) st.st_size; /* compressed */
506 *ret_data_fd = TAKE_FD(fd);
507 *ret_size = uncompressed_size;
508 *ret_truncated = truncated;
509 tmp_compressed = mfree(tmp_compressed);
510
511 return 0;
512 }
513 #endif
514
515 if (truncated)
516 log_struct(LOG_INFO,
517 LOG_MESSAGE("Core file was truncated to %"PRIu64" bytes.", max_size),
518 "SIZE_LIMIT=%"PRIu64, max_size,
519 "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR);
520
521 r = fix_permissions(fd, tmp, fn, context, uid);
522 if (r < 0)
523 return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn);
524
525 if (fstat(fd, &st) < 0)
526 return log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp));
527
528 if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
529 return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn);
530
531 *ret_filename = TAKE_PTR(fn);
532 *ret_data_fd = TAKE_FD(fd);
533 *ret_size = (uint64_t) st.st_size;
534 *ret_truncated = truncated;
535
536 return 0;
537 }
538
539 static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) {
540 _cleanup_free_ char *field = NULL;
541 ssize_t n;
542
543 assert(fd >= 0);
544 assert(ret);
545 assert(ret_size);
546
547 if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
548 return log_warning_errno(errno, "Failed to seek: %m");
549
550 field = malloc(9 + size);
551 if (!field) {
552 log_warning("Failed to allocate memory for coredump, coredump will not be stored.");
553 return -ENOMEM;
554 }
555
556 memcpy(field, "COREDUMP=", 9);
557
558 n = read(fd, field + 9, size);
559 if (n < 0)
560 return log_error_errno((int) n, "Failed to read core data: %m");
561 if ((size_t) n < size)
562 return log_error_errno(SYNTHETIC_ERRNO(EIO),
563 "Core data too short.");
564
565 *ret = TAKE_PTR(field);
566 *ret_size = size + 9;
567
568 return 0;
569 }
570
571 /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines:
572 * 0:/dev/pts/23
573 * pos: 0
574 * flags: 0100002
575 *
576 * 1:/dev/pts/23
577 * pos: 0
578 * flags: 0100002
579 *
580 * 2:/dev/pts/23
581 * pos: 0
582 * flags: 0100002
583 * EOF
584 */
585 static int compose_open_fds(pid_t pid, char **open_fds) {
586 _cleanup_closedir_ DIR *proc_fd_dir = NULL;
587 _cleanup_close_ int proc_fdinfo_fd = -1;
588 _cleanup_free_ char *buffer = NULL;
589 _cleanup_fclose_ FILE *stream = NULL;
590 const char *fddelim = "", *path;
591 size_t size = 0;
592 int r;
593
594 assert(pid >= 0);
595 assert(open_fds != NULL);
596
597 path = procfs_file_alloca(pid, "fd");
598 proc_fd_dir = opendir(path);
599 if (!proc_fd_dir)
600 return -errno;
601
602 proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH);
603 if (proc_fdinfo_fd < 0)
604 return -errno;
605
606 stream = open_memstream_unlocked(&buffer, &size);
607 if (!stream)
608 return -ENOMEM;
609
610 FOREACH_DIRENT(de, proc_fd_dir, return -errno) {
611 _cleanup_fclose_ FILE *fdinfo = NULL;
612 _cleanup_free_ char *fdname = NULL;
613 _cleanup_close_ int fd = -1;
614
615 r = readlinkat_malloc(dirfd(proc_fd_dir), de->d_name, &fdname);
616 if (r < 0)
617 return r;
618
619 fprintf(stream, "%s%s:%s\n", fddelim, de->d_name, fdname);
620 fddelim = "\n";
621
622 /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */
623 fd = openat(proc_fdinfo_fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY);
624 if (fd < 0)
625 continue;
626
627 fdinfo = take_fdopen(&fd, "r");
628 if (!fdinfo)
629 continue;
630
631 for (;;) {
632 _cleanup_free_ char *line = NULL;
633
634 r = read_line(fdinfo, LONG_LINE_MAX, &line);
635 if (r < 0)
636 return r;
637 if (r == 0)
638 break;
639
640 fputs(line, stream);
641 fputc('\n', stream);
642 }
643 }
644
645 errno = 0;
646 stream = safe_fclose(stream);
647
648 if (errno > 0)
649 return -errno;
650
651 *open_fds = TAKE_PTR(buffer);
652
653 return 0;
654 }
655
656 static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) {
657 const char *p;
658 struct stat stbuf;
659 _cleanup_close_ int proc_ns_dir_fd = -1;
660
661 p = procfs_file_alloca(pid, "ns");
662
663 proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY);
664 if (proc_ns_dir_fd < 0)
665 return -errno;
666
667 if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0)
668 return -errno;
669
670 *ns = stbuf.st_ino;
671 return 0;
672 }
673
674 static int get_mount_namespace_leader(pid_t pid, pid_t *ret) {
675 ino_t proc_mntns;
676 int r;
677
678 r = get_process_ns(pid, "mnt", &proc_mntns);
679 if (r < 0)
680 return r;
681
682 for (;;) {
683 ino_t parent_mntns;
684 pid_t ppid;
685
686 r = get_process_ppid(pid, &ppid);
687 if (r == -EADDRNOTAVAIL) /* Reached the top (i.e. typically PID 1, but could also be a process
688 * whose parent is not in our pidns) */
689 return -ENOENT;
690 if (r < 0)
691 return r;
692
693 r = get_process_ns(ppid, "mnt", &parent_mntns);
694 if (r < 0)
695 return r;
696
697 if (proc_mntns != parent_mntns) {
698 *ret = ppid;
699 return 0;
700 }
701
702 pid = ppid;
703 }
704 }
705
706 /* Returns 1 if the parent was found.
707 * Returns 0 if there is not a process we can call the pid's
708 * container parent (the pid's process isn't 'containerized').
709 * Returns a negative number on errors.
710 */
711 static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) {
712 pid_t container_pid;
713 const char *proc_root_path;
714 struct stat root_stat, proc_root_stat;
715 int r;
716
717 /* To compare inodes of / and /proc/[pid]/root */
718 if (stat("/", &root_stat) < 0)
719 return -errno;
720
721 proc_root_path = procfs_file_alloca(pid, "root");
722 if (stat(proc_root_path, &proc_root_stat) < 0)
723 return -errno;
724
725 /* The process uses system root. */
726 if (stat_inode_same(&proc_root_stat, &root_stat)) {
727 *cmdline = NULL;
728 return 0;
729 }
730
731 r = get_mount_namespace_leader(pid, &container_pid);
732 if (r < 0)
733 return r;
734
735 r = get_process_cmdline(container_pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, cmdline);
736 if (r < 0)
737 return r;
738
739 return 1;
740 }
741
742 static int change_uid_gid(const Context *context) {
743 uid_t uid;
744 gid_t gid;
745 int r;
746
747 r = parse_uid(context->meta[META_ARGV_UID], &uid);
748 if (r < 0)
749 return r;
750
751 if (uid_is_system(uid)) {
752 const char *user = "systemd-coredump";
753
754 r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
755 if (r < 0) {
756 log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user);
757 uid = gid = 0;
758 }
759 } else {
760 r = parse_gid(context->meta[META_ARGV_GID], &gid);
761 if (r < 0)
762 return r;
763 }
764
765 return drop_privileges(uid, gid, 0);
766 }
767
768 static int submit_coredump(
769 Context *context,
770 struct iovec_wrapper *iovw,
771 int input_fd) {
772
773 _cleanup_(json_variant_unrefp) JsonVariant *json_metadata = NULL;
774 _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
775 _cleanup_free_ char *filename = NULL, *coredump_data = NULL;
776 _cleanup_free_ char *stacktrace = NULL;
777 char *core_message;
778 const char *module_name;
779 uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX;
780 bool truncated = false;
781 JsonVariant *module_json;
782 int r;
783
784 assert(context);
785 assert(iovw);
786 assert(input_fd >= 0);
787
788 /* Vacuum before we write anything again */
789 (void) coredump_vacuum(-1, arg_keep_free, arg_max_use);
790
791 /* Always stream the coredump to disk, if that's possible */
792 r = save_external_coredump(context, input_fd,
793 &filename, &coredump_node_fd, &coredump_fd,
794 &coredump_size, &coredump_compressed_size, &truncated);
795 if (r < 0)
796 /* Skip whole core dumping part */
797 goto log;
798
799 /* If we don't want to keep the coredump on disk, remove it now, as later on we
800 * will lack the privileges for it. However, we keep the fd to it, so that we can
801 * still process it and log it. */
802 r = maybe_remove_external_coredump(filename, coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size);
803 if (r < 0)
804 return r;
805 if (r == 0)
806 (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename);
807 else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
808 log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
809 coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size, arg_external_size_max);
810
811 /* Vacuum again, but exclude the coredump we just created */
812 (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
813
814 /* Now, let's drop privileges to become the user who owns the segfaulted process
815 * and allocate the coredump memory under the user's uid. This also ensures that
816 * the credentials journald will see are the ones of the coredumping user, thus
817 * making sure the user gets access to the core dump. Let's also get rid of all
818 * capabilities, if we run as root, we won't need them anymore. */
819 r = change_uid_gid(context);
820 if (r < 0)
821 return log_error_errno(r, "Failed to drop privileges: %m");
822
823 /* Try to get a stack trace if we can */
824 if (coredump_size > arg_process_size_max)
825 log_debug("Not generating stack trace: core size %"PRIu64" is greater "
826 "than %"PRIu64" (the configured maximum)",
827 coredump_size, arg_process_size_max);
828 else if (coredump_fd >= 0) {
829 bool skip = startswith(context->meta[META_COMM], "systemd-coredum"); /* COMM is 16 bytes usually */
830
831 (void) parse_elf_object(coredump_fd,
832 context->meta[META_EXE],
833 /* fork_disable_dump= */ skip, /* avoid loops */
834 &stacktrace,
835 &json_metadata);
836 }
837
838 log:
839 core_message = strjoina("Process ", context->meta[META_ARGV_PID],
840 " (", context->meta[META_COMM], ") of user ",
841 context->meta[META_ARGV_UID], " dumped core.",
842 context->is_journald && filename ? "\nCoredump diverted to " : NULL,
843 context->is_journald && filename ? filename : NULL);
844
845 core_message = strjoina(core_message, stacktrace ? "\n\n" : NULL, stacktrace);
846
847 if (context->is_journald)
848 /* We might not be able to log to the journal, so let's always print the message to another
849 * log target. The target was set previously to something safe. */
850 log_dispatch(LOG_ERR, 0, core_message);
851
852 (void) iovw_put_string_field(iovw, "MESSAGE=", core_message);
853
854 if (truncated)
855 (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1");
856
857 /* If we managed to parse any ELF metadata (build-id, ELF package meta),
858 * attach it as journal metadata. */
859 if (json_metadata) {
860 _cleanup_free_ char *formatted_json = NULL;
861
862 r = json_variant_format(json_metadata, 0, &formatted_json);
863 if (r < 0)
864 return log_error_errno(r, "Failed to format JSON package metadata: %m");
865
866 (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_JSON=", formatted_json);
867 }
868
869 /* In the unlikely scenario that context->meta[META_EXE] is not available,
870 * let's avoid guessing the module name and skip the loop. */
871 if (context->meta[META_EXE])
872 JSON_VARIANT_OBJECT_FOREACH(module_name, module_json, json_metadata) {
873 JsonVariant *t;
874
875 /* We only add structured fields for the 'main' ELF module, and only if we can identify it. */
876 if (!path_equal_filename(module_name, context->meta[META_EXE]))
877 continue;
878
879 t = json_variant_by_key(module_json, "name");
880 if (t)
881 (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_NAME=", json_variant_string(t));
882
883 t = json_variant_by_key(module_json, "version");
884 if (t)
885 (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_VERSION=", json_variant_string(t));
886 }
887
888 /* Optionally store the entire coredump in the journal */
889 if (arg_storage == COREDUMP_STORAGE_JOURNAL && coredump_fd >= 0) {
890 if (coredump_size <= arg_journal_size_max) {
891 size_t sz = 0;
892
893 /* Store the coredump itself in the journal */
894
895 r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
896 if (r >= 0) {
897 if (iovw_put(iovw, coredump_data, sz) >= 0)
898 TAKE_PTR(coredump_data);
899 } else
900 log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
901 } else
902 log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
903 coredump_size, arg_journal_size_max);
904 }
905
906 /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the
907 * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write
908 * the coredump to the socket. */
909
910 if (context->is_journald) {
911 r = journal_fd_nonblock(true);
912 if (r < 0)
913 return log_error_errno(r, "Failed to make journal socket non-blocking: %m");
914 }
915
916 r = sd_journal_sendv(iovw->iovec, iovw->count);
917
918 if (context->is_journald) {
919 int k;
920
921 k = journal_fd_nonblock(false);
922 if (k < 0)
923 return log_error_errno(k, "Failed to make journal socket blocking: %m");
924 }
925
926 if (r == -EAGAIN && context->is_journald)
927 log_warning_errno(r, "Failed to log journal coredump, ignoring: %m");
928 else if (r < 0)
929 return log_error_errno(r, "Failed to log coredump: %m");
930
931 return 0;
932 }
933
934 static int save_context(Context *context, const struct iovec_wrapper *iovw) {
935 const char *unit;
936 int r;
937
938 assert(context);
939 assert(iovw);
940 assert(iovw->count >= _META_ARGV_MAX);
941
942 /* The context does not allocate any memory on its own */
943
944 for (size_t n = 0; n < iovw->count; n++) {
945 struct iovec *iovec = iovw->iovec + n;
946
947 for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) {
948 char *p;
949
950 /* Note that these strings are NUL terminated, because we made sure that a
951 * trailing NUL byte is in the buffer, though not included in the iov_len
952 * count (see process_socket() and gather_pid_metadata_*()) */
953 assert(((char*) iovec->iov_base)[iovec->iov_len] == 0);
954
955 p = startswith(iovec->iov_base, meta_field_names[i]);
956 if (p) {
957 context->meta[i] = p;
958 break;
959 }
960 }
961 }
962
963 if (!context->meta[META_ARGV_PID])
964 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
965 "Failed to find the PID of crashing process");
966
967 r = parse_pid(context->meta[META_ARGV_PID], &context->pid);
968 if (r < 0)
969 return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]);
970
971 unit = context->meta[META_UNIT];
972 context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE);
973 context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE);
974
975 return 0;
976 }
977
978 static int process_socket(int fd) {
979 _cleanup_close_ int input_fd = -1;
980 Context context = {};
981 struct iovec_wrapper iovw = {};
982 struct iovec iovec;
983 int r;
984
985 assert(fd >= 0);
986
987 log_setup();
988
989 log_debug("Processing coredump received on stdin...");
990
991 for (;;) {
992 CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control;
993 struct msghdr mh = {
994 .msg_control = &control,
995 .msg_controllen = sizeof(control),
996 .msg_iovlen = 1,
997 };
998 ssize_t n;
999 ssize_t l;
1000
1001 l = next_datagram_size_fd(fd);
1002 if (l < 0) {
1003 r = log_error_errno(l, "Failed to determine datagram size to read: %m");
1004 goto finish;
1005 }
1006
1007 iovec.iov_len = l;
1008 iovec.iov_base = malloc(l + 1);
1009 if (!iovec.iov_base) {
1010 r = log_oom();
1011 goto finish;
1012 }
1013
1014 mh.msg_iov = &iovec;
1015
1016 n = recvmsg_safe(fd, &mh, MSG_CMSG_CLOEXEC);
1017 if (n < 0) {
1018 free(iovec.iov_base);
1019 r = log_error_errno(n, "Failed to receive datagram: %m");
1020 goto finish;
1021 }
1022
1023 /* The final zero-length datagram carries the file descriptor and tells us
1024 * that we're done. */
1025 if (n == 0) {
1026 struct cmsghdr *found;
1027
1028 free(iovec.iov_base);
1029
1030 found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
1031 if (!found) {
1032 cmsg_close_all(&mh);
1033 r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
1034 "Coredump file descriptor missing.");
1035 goto finish;
1036 }
1037
1038 assert(input_fd < 0);
1039 input_fd = *(int*) CMSG_DATA(found);
1040 break;
1041 } else
1042 cmsg_close_all(&mh);
1043
1044 /* Add trailing NUL byte, in case these are strings */
1045 ((char*) iovec.iov_base)[n] = 0;
1046 iovec.iov_len = (size_t) n;
1047
1048 r = iovw_put(&iovw, iovec.iov_base, iovec.iov_len);
1049 if (r < 0)
1050 goto finish;
1051 }
1052
1053 /* Make sure we got all data we really need */
1054 assert(input_fd >= 0);
1055
1056 r = save_context(&context, &iovw);
1057 if (r < 0)
1058 goto finish;
1059
1060 /* Make sure we received at least all fields we need. */
1061 for (int i = 0; i < _META_MANDATORY_MAX; i++)
1062 if (!context.meta[i]) {
1063 r = log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1064 "A mandatory argument (%i) has not been sent, aborting.",
1065 i);
1066 goto finish;
1067 }
1068
1069 r = submit_coredump(&context, &iovw, input_fd);
1070
1071 finish:
1072 iovw_free_contents(&iovw, true);
1073 return r;
1074 }
1075
1076 static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
1077 _cleanup_close_ int fd = -1;
1078 int r;
1079
1080 assert(iovw);
1081 assert(input_fd >= 0);
1082
1083 fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
1084 if (fd < 0)
1085 return log_error_errno(errno, "Failed to create coredump socket: %m");
1086
1087 r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/coredump");
1088 if (r < 0)
1089 return log_error_errno(r, "Failed to connect to coredump service: %m");
1090
1091 for (size_t i = 0; i < iovw->count; i++) {
1092 struct msghdr mh = {
1093 .msg_iov = iovw->iovec + i,
1094 .msg_iovlen = 1,
1095 };
1096 struct iovec copy[2];
1097
1098 for (;;) {
1099 if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0)
1100 break;
1101
1102 if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) {
1103 /* This field didn't fit? That's a pity. Given that this is
1104 * just metadata, let's truncate the field at half, and try
1105 * again. We append three dots, in order to show that this is
1106 * truncated. */
1107
1108 if (mh.msg_iov != copy) {
1109 /* We don't want to modify the caller's iovec, hence
1110 * let's create our own array, consisting of two new
1111 * iovecs, where the first is a (truncated) copy of
1112 * what we want to send, and the second one contains
1113 * the trailing dots. */
1114 copy[0] = iovw->iovec[i];
1115 copy[1] = IOVEC_MAKE(((char[]){'.', '.', '.'}), 3);
1116
1117 mh.msg_iov = copy;
1118 mh.msg_iovlen = 2;
1119 }
1120
1121 copy[0].iov_len /= 2; /* halve it, and try again */
1122 continue;
1123 }
1124
1125 return log_error_errno(errno, "Failed to send coredump datagram: %m");
1126 }
1127 }
1128
1129 r = send_one_fd(fd, input_fd, 0);
1130 if (r < 0)
1131 return log_error_errno(r, "Failed to send coredump fd: %m");
1132
1133 return 0;
1134 }
1135
1136 static int gather_pid_metadata_from_argv(
1137 struct iovec_wrapper *iovw,
1138 Context *context,
1139 int argc, char **argv) {
1140
1141 _cleanup_free_ char *free_timestamp = NULL;
1142 int r, signo;
1143 char *t;
1144
1145 /* We gather all metadata that were passed via argv[] into an array of iovecs that
1146 * we'll forward to the socket unit */
1147
1148 if (argc < _META_ARGV_MAX)
1149 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1150 "Not enough arguments passed by the kernel (%i, expected %i).",
1151 argc, _META_ARGV_MAX);
1152
1153 for (int i = 0; i < _META_ARGV_MAX; i++) {
1154
1155 t = argv[i];
1156
1157 switch (i) {
1158
1159 case META_ARGV_TIMESTAMP:
1160 /* The journal fields contain the timestamp padded with six
1161 * zeroes, so that the kernel-supplied 1s granularity timestamps
1162 * becomes 1µs granularity, i.e. the granularity systemd usually
1163 * operates in. */
1164 t = free_timestamp = strjoin(argv[i], "000000");
1165 if (!t)
1166 return log_oom();
1167 break;
1168
1169 case META_ARGV_SIGNAL:
1170 /* For signal, record its pretty name too */
1171 if (safe_atoi(argv[i], &signo) >= 0 && SIGNAL_VALID(signo))
1172 (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG",
1173 signal_to_string(signo));
1174 break;
1175
1176 default:
1177 break;
1178 }
1179
1180 r = iovw_put_string_field(iovw, meta_field_names[i], t);
1181 if (r < 0)
1182 return r;
1183 }
1184
1185 /* Cache some of the process metadata we collected so far and that we'll need to
1186 * access soon */
1187 return save_context(context, iovw);
1188 }
1189
1190 static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) {
1191 uid_t owner_uid;
1192 pid_t pid;
1193 char *t;
1194 const char *p;
1195 int r;
1196
1197 /* Note that if we fail on oom later on, we do not roll-back changes to the iovec
1198 * structure. (It remains valid, with the first iovec fields initialized.) */
1199
1200 pid = context->pid;
1201
1202 /* The following is mandatory */
1203 r = get_process_comm(pid, &t);
1204 if (r < 0)
1205 return log_error_errno(r, "Failed to get COMM: %m");
1206
1207 r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t);
1208 if (r < 0)
1209 return r;
1210
1211 /* The following are optional, but we use them if present. */
1212 r = get_process_exe(pid, &t);
1213 if (r >= 0)
1214 r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t);
1215 if (r < 0)
1216 log_warning_errno(r, "Failed to get EXE, ignoring: %m");
1217
1218 if (cg_pid_get_unit(pid, &t) >= 0)
1219 (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t);
1220
1221 if (cg_pid_get_user_unit(pid, &t) >= 0)
1222 (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t);
1223
1224 if (sd_pid_get_session(pid, &t) >= 0)
1225 (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t);
1226
1227 if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
1228 r = asprintf(&t, UID_FMT, owner_uid);
1229 if (r > 0)
1230 (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t);
1231 }
1232
1233 if (sd_pid_get_slice(pid, &t) >= 0)
1234 (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t);
1235
1236 if (get_process_cmdline(pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, &t) >= 0)
1237 (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t);
1238
1239 if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0)
1240 (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t);
1241
1242 if (compose_open_fds(pid, &t) >= 0)
1243 (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t);
1244
1245 p = procfs_file_alloca(pid, "status");
1246 if (read_full_virtual_file(p, &t, NULL) >= 0)
1247 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t);
1248
1249 p = procfs_file_alloca(pid, "maps");
1250 if (read_full_virtual_file(p, &t, NULL) >= 0)
1251 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t);
1252
1253 p = procfs_file_alloca(pid, "limits");
1254 if (read_full_virtual_file(p, &t, NULL) >= 0)
1255 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t);
1256
1257 p = procfs_file_alloca(pid, "cgroup");
1258 if (read_full_virtual_file(p, &t, NULL) >=0)
1259 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t);
1260
1261 p = procfs_file_alloca(pid, "mountinfo");
1262 if (read_full_virtual_file(p, &t, NULL) >=0)
1263 (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t);
1264
1265 if (get_process_cwd(pid, &t) >= 0)
1266 (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t);
1267
1268 if (get_process_root(pid, &t) >= 0) {
1269 bool proc_self_root_is_slash;
1270
1271 proc_self_root_is_slash = strcmp(t, "/") == 0;
1272
1273 (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t);
1274
1275 /* If the process' root is "/", then there is a chance it has
1276 * mounted own root and hence being containerized. */
1277 if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0)
1278 (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t);
1279 }
1280
1281 if (get_process_environ(pid, &t) >= 0)
1282 (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t);
1283
1284 /* we successfully acquired all metadata */
1285 return save_context(context, iovw);
1286 }
1287
1288 static int process_kernel(int argc, char* argv[]) {
1289 Context context = {};
1290 struct iovec_wrapper *iovw;
1291 int r;
1292
1293 /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds
1294 * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to
1295 * /dev/null. */
1296 r = rearrange_stdio(STDIN_FILENO, -1, -1);
1297 if (r < 0)
1298 return log_error_errno(r, "Failed to connect stdout/stderr to /dev/null: %m");
1299
1300 log_debug("Processing coredump received from the kernel...");
1301
1302 iovw = iovw_new();
1303 if (!iovw)
1304 return log_oom();
1305
1306 (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR);
1307 (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
1308
1309 /* Collect all process metadata passed by the kernel through argv[] */
1310 r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1);
1311 if (r < 0)
1312 goto finish;
1313
1314 /* Collect the rest of the process metadata retrieved from the runtime */
1315 r = gather_pid_metadata(iovw, &context);
1316 if (r < 0)
1317 goto finish;
1318
1319 if (!context.is_journald) {
1320 /* OK, now we know it's not the journal, hence we can make use of it now. */
1321 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1322 log_open();
1323 }
1324
1325 /* If this is PID 1 disable coredump collection, we'll unlikely be able to process
1326 * it later on.
1327 *
1328 * FIXME: maybe we should disable coredumps generation from the beginning and
1329 * re-enable it only when we know it's either safe (ie we're not running OOM) or
1330 * it's not pid1 ? */
1331 if (context.is_pid1) {
1332 log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
1333 disable_coredumps();
1334 }
1335
1336 if (context.is_journald || context.is_pid1)
1337 r = submit_coredump(&context, iovw, STDIN_FILENO);
1338 else
1339 r = send_iovec(iovw, STDIN_FILENO);
1340
1341 finish:
1342 iovw = iovw_free_free(iovw);
1343 return r;
1344 }
1345
1346 static int process_backtrace(int argc, char *argv[]) {
1347 Context context = {};
1348 struct iovec_wrapper *iovw;
1349 char *message;
1350 int r;
1351 _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO);
1352
1353 log_debug("Processing backtrace on stdin...");
1354
1355 iovw = iovw_new();
1356 if (!iovw)
1357 return log_oom();
1358
1359 (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR);
1360 (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
1361
1362 /* Collect all process metadata from argv[] by making sure to skip the
1363 * '--backtrace' option */
1364 r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2);
1365 if (r < 0)
1366 goto finish;
1367
1368 /* Collect the rest of the process metadata retrieved from the runtime */
1369 r = gather_pid_metadata(iovw, &context);
1370 if (r < 0)
1371 goto finish;
1372
1373 for (;;) {
1374 r = journal_importer_process_data(&importer);
1375 if (r < 0) {
1376 log_error_errno(r, "Failed to parse journal entry on stdin: %m");
1377 goto finish;
1378 }
1379 if (r == 1 || /* complete entry */
1380 journal_importer_eof(&importer)) /* end of data */
1381 break;
1382 }
1383
1384 if (journal_importer_eof(&importer)) {
1385 log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter");
1386
1387 message = strjoina("Process ", context.meta[META_ARGV_PID],
1388 " (", context.meta[META_COMM], ")"
1389 " of user ", context.meta[META_ARGV_UID],
1390 " failed with ", context.meta[META_ARGV_SIGNAL]);
1391
1392 r = iovw_put_string_field(iovw, "MESSAGE=", message);
1393 if (r < 0)
1394 return r;
1395 } else {
1396 /* The imported iovecs are not supposed to be freed by us so let's store
1397 * them at the end of the array so we can skip them while freeing the
1398 * rest. */
1399 for (size_t i = 0; i < importer.iovw.count; i++) {
1400 struct iovec *iovec = importer.iovw.iovec + i;
1401
1402 iovw_put(iovw, iovec->iov_base, iovec->iov_len);
1403 }
1404 }
1405
1406 r = sd_journal_sendv(iovw->iovec, iovw->count);
1407 if (r < 0)
1408 log_error_errno(r, "Failed to log backtrace: %m");
1409
1410 finish:
1411 iovw->count -= importer.iovw.count;
1412 iovw = iovw_free_free(iovw);
1413 return r;
1414 }
1415
1416 static int run(int argc, char *argv[]) {
1417 int r;
1418
1419 /* First, log to a safe place, since we don't know what crashed and it might
1420 * be journald which we'd rather not log to then. */
1421
1422 log_set_target(LOG_TARGET_KMSG);
1423 log_open();
1424
1425 /* Make sure we never enter a loop */
1426 (void) prctl(PR_SET_DUMPABLE, 0);
1427
1428 /* Ignore all parse errors */
1429 (void) parse_config();
1430
1431 log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage));
1432 log_debug("Selected compression %s.", yes_no(arg_compress));
1433
1434 r = sd_listen_fds(false);
1435 if (r < 0)
1436 return log_error_errno(r, "Failed to determine the number of file descriptors: %m");
1437
1438 /* If we got an fd passed, we are running in coredumpd mode. Otherwise we
1439 * are invoked from the kernel as coredump handler. */
1440 if (r == 0) {
1441 if (streq_ptr(argv[1], "--backtrace"))
1442 return process_backtrace(argc, argv);
1443 else
1444 return process_kernel(argc, argv);
1445 } else if (r == 1)
1446 return process_socket(SD_LISTEN_FDS_START);
1447
1448 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1449 "Received unexpected number of file descriptors.");
1450 }
1451
1452 DEFINE_MAIN_FUNCTION(run);