]>
Commit | Line | Data |
---|---|---|
1 | /*** | |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2012 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
20 | #include <errno.h> | |
21 | #include <stdio.h> | |
22 | #include <sys/prctl.h> | |
23 | #include <sys/xattr.h> | |
24 | #include <unistd.h> | |
25 | ||
26 | #ifdef HAVE_ELFUTILS | |
27 | #include <dwarf.h> | |
28 | #include <elfutils/libdwfl.h> | |
29 | #endif | |
30 | ||
31 | #include "sd-daemon.h" | |
32 | #include "sd-journal.h" | |
33 | #include "sd-login.h" | |
34 | #include "sd-messages.h" | |
35 | ||
36 | #include "acl-util.h" | |
37 | #include "alloc-util.h" | |
38 | #include "capability-util.h" | |
39 | #include "cgroup-util.h" | |
40 | #include "compress.h" | |
41 | #include "conf-parser.h" | |
42 | #include "copy.h" | |
43 | #include "coredump-vacuum.h" | |
44 | #include "dirent-util.h" | |
45 | #include "escape.h" | |
46 | #include "fd-util.h" | |
47 | #include "fileio.h" | |
48 | #include "fs-util.h" | |
49 | #include "io-util.h" | |
50 | #include "journal-importer.h" | |
51 | #include "log.h" | |
52 | #include "macro.h" | |
53 | #include "missing.h" | |
54 | #include "mkdir.h" | |
55 | #include "parse-util.h" | |
56 | #include "process-util.h" | |
57 | #include "signal-util.h" | |
58 | #include "socket-util.h" | |
59 | #include "special.h" | |
60 | #include "stacktrace.h" | |
61 | #include "string-table.h" | |
62 | #include "string-util.h" | |
63 | #include "strv.h" | |
64 | #include "user-util.h" | |
65 | #include "util.h" | |
66 | ||
67 | /* The maximum size up to which we process coredumps */ | |
68 | #define PROCESS_SIZE_MAX ((uint64_t) (2LLU*1024LLU*1024LLU*1024LLU)) | |
69 | ||
70 | /* The maximum size up to which we leave the coredump around on disk */ | |
71 | #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX | |
72 | ||
73 | /* The maximum size up to which we store the coredump in the journal */ | |
74 | #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU)) | |
75 | ||
76 | /* Make sure to not make this larger than the maximum journal entry | |
77 | * size. See DATA_SIZE_MAX in journald-native.c. */ | |
78 | assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); | |
79 | ||
80 | enum { | |
81 | /* We use this as array indexes for a couple of special fields we use for | |
82 | * naming coredump files, and attaching xattrs, and for indexing argv[]. | |
83 | ||
84 | * Our pattern for man:systectl(1) kernel.core_pattern is such that the | |
85 | * kernel passes fields until CONTEXT_RLIMIT as arguments in argv[]. After | |
86 | * that it gets complicated: the kernel passes "comm" as one or more fields | |
87 | * starting at index CONTEXT_COMM (in other words, full "comm" is under index | |
88 | * CONTEXT_COMM when it does not contain spaces, which is the common | |
89 | * case). This mapping is not reversible, so we prefer to retrieve "comm" | |
90 | * from /proc. We only fall back to argv[CONTEXT_COMM...] when that fails. | |
91 | * | |
92 | * In the internal context[] array, fields before CONTEXT_COMM are the | |
93 | * strings from argv[], so they should not be freed. The strings at indices | |
94 | * CONTEXT_COMM and higher are allocated by us and should be freed at the | |
95 | * end. | |
96 | */ | |
97 | CONTEXT_PID, | |
98 | CONTEXT_UID, | |
99 | CONTEXT_GID, | |
100 | CONTEXT_SIGNAL, | |
101 | CONTEXT_TIMESTAMP, | |
102 | CONTEXT_RLIMIT, | |
103 | CONTEXT_COMM, | |
104 | CONTEXT_EXE, | |
105 | CONTEXT_UNIT, | |
106 | _CONTEXT_MAX | |
107 | }; | |
108 | ||
109 | typedef enum CoredumpStorage { | |
110 | COREDUMP_STORAGE_NONE, | |
111 | COREDUMP_STORAGE_EXTERNAL, | |
112 | COREDUMP_STORAGE_JOURNAL, | |
113 | _COREDUMP_STORAGE_MAX, | |
114 | _COREDUMP_STORAGE_INVALID = -1 | |
115 | } CoredumpStorage; | |
116 | ||
117 | static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { | |
118 | [COREDUMP_STORAGE_NONE] = "none", | |
119 | [COREDUMP_STORAGE_EXTERNAL] = "external", | |
120 | [COREDUMP_STORAGE_JOURNAL] = "journal", | |
121 | }; | |
122 | ||
123 | DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); | |
124 | static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting"); | |
125 | ||
126 | static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL; | |
127 | static bool arg_compress = true; | |
128 | static uint64_t arg_process_size_max = PROCESS_SIZE_MAX; | |
129 | static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; | |
130 | static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; | |
131 | static uint64_t arg_keep_free = (uint64_t) -1; | |
132 | static uint64_t arg_max_use = (uint64_t) -1; | |
133 | ||
134 | static int parse_config(void) { | |
135 | static const ConfigTableItem items[] = { | |
136 | { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, | |
137 | { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, | |
138 | { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, | |
139 | { "Coredump", "ExternalSizeMax", config_parse_iec_uint64, 0, &arg_external_size_max }, | |
140 | { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, | |
141 | { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, | |
142 | { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, | |
143 | {} | |
144 | }; | |
145 | ||
146 | return config_parse_many_nulstr(PKGSYSCONFDIR "/coredump.conf", | |
147 | CONF_PATHS_NULSTR("systemd/coredump.conf.d"), | |
148 | "Coredump\0", | |
149 | config_item_table_lookup, items, | |
150 | false, NULL); | |
151 | } | |
152 | ||
153 | static inline uint64_t storage_size_max(void) { | |
154 | return arg_storage == COREDUMP_STORAGE_EXTERNAL ? arg_external_size_max : arg_journal_size_max; | |
155 | } | |
156 | ||
157 | static int fix_acl(int fd, uid_t uid) { | |
158 | ||
159 | #ifdef HAVE_ACL | |
160 | _cleanup_(acl_freep) acl_t acl = NULL; | |
161 | acl_entry_t entry; | |
162 | acl_permset_t permset; | |
163 | int r; | |
164 | ||
165 | assert(fd >= 0); | |
166 | ||
167 | if (uid <= SYSTEM_UID_MAX) | |
168 | return 0; | |
169 | ||
170 | /* Make sure normal users can read (but not write or delete) | |
171 | * their own coredumps */ | |
172 | ||
173 | acl = acl_get_fd(fd); | |
174 | if (!acl) | |
175 | return log_error_errno(errno, "Failed to get ACL: %m"); | |
176 | ||
177 | if (acl_create_entry(&acl, &entry) < 0 || | |
178 | acl_set_tag_type(entry, ACL_USER) < 0 || | |
179 | acl_set_qualifier(entry, &uid) < 0) | |
180 | return log_error_errno(errno, "Failed to patch ACL: %m"); | |
181 | ||
182 | if (acl_get_permset(entry, &permset) < 0 || | |
183 | acl_add_perm(permset, ACL_READ) < 0) | |
184 | return log_warning_errno(errno, "Failed to patch ACL: %m"); | |
185 | ||
186 | r = calc_acl_mask_if_needed(&acl); | |
187 | if (r < 0) | |
188 | return log_warning_errno(r, "Failed to patch ACL: %m"); | |
189 | ||
190 | if (acl_set_fd(fd, acl) < 0) | |
191 | return log_error_errno(errno, "Failed to apply ACL: %m"); | |
192 | #endif | |
193 | ||
194 | return 0; | |
195 | } | |
196 | ||
197 | static int fix_xattr(int fd, const char *context[_CONTEXT_MAX]) { | |
198 | ||
199 | static const char * const xattrs[_CONTEXT_MAX] = { | |
200 | [CONTEXT_PID] = "user.coredump.pid", | |
201 | [CONTEXT_UID] = "user.coredump.uid", | |
202 | [CONTEXT_GID] = "user.coredump.gid", | |
203 | [CONTEXT_SIGNAL] = "user.coredump.signal", | |
204 | [CONTEXT_TIMESTAMP] = "user.coredump.timestamp", | |
205 | [CONTEXT_RLIMIT] = "user.coredump.rlimit", | |
206 | [CONTEXT_COMM] = "user.coredump.comm", | |
207 | [CONTEXT_EXE] = "user.coredump.exe", | |
208 | }; | |
209 | ||
210 | int r = 0; | |
211 | unsigned i; | |
212 | ||
213 | assert(fd >= 0); | |
214 | ||
215 | /* Attach some metadata to coredumps via extended | |
216 | * attributes. Just because we can. */ | |
217 | ||
218 | for (i = 0; i < _CONTEXT_MAX; i++) { | |
219 | int k; | |
220 | ||
221 | if (isempty(context[i]) || !xattrs[i]) | |
222 | continue; | |
223 | ||
224 | k = fsetxattr(fd, xattrs[i], context[i], strlen(context[i]), XATTR_CREATE); | |
225 | if (k < 0 && r == 0) | |
226 | r = -errno; | |
227 | } | |
228 | ||
229 | return r; | |
230 | } | |
231 | ||
232 | #define filename_escape(s) xescape((s), "./ ") | |
233 | ||
234 | static inline const char *coredump_tmpfile_name(const char *s) { | |
235 | return s ? s : "(unnamed temporary file)"; | |
236 | } | |
237 | ||
238 | static int fix_permissions( | |
239 | int fd, | |
240 | const char *filename, | |
241 | const char *target, | |
242 | const char *context[_CONTEXT_MAX], | |
243 | uid_t uid) { | |
244 | ||
245 | int r; | |
246 | ||
247 | assert(fd >= 0); | |
248 | assert(target); | |
249 | assert(context); | |
250 | ||
251 | /* Ignore errors on these */ | |
252 | (void) fchmod(fd, 0640); | |
253 | (void) fix_acl(fd, uid); | |
254 | (void) fix_xattr(fd, context); | |
255 | ||
256 | if (fsync(fd) < 0) | |
257 | return log_error_errno(errno, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename)); | |
258 | ||
259 | r = link_tmpfile(fd, filename, target); | |
260 | if (r < 0) | |
261 | return log_error_errno(r, "Failed to move coredump %s into place: %m", target); | |
262 | ||
263 | return 0; | |
264 | } | |
265 | ||
266 | static int maybe_remove_external_coredump(const char *filename, uint64_t size) { | |
267 | ||
268 | /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */ | |
269 | ||
270 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL && | |
271 | size <= arg_external_size_max) | |
272 | return 0; | |
273 | ||
274 | if (!filename) | |
275 | return 1; | |
276 | ||
277 | if (unlink(filename) < 0 && errno != ENOENT) | |
278 | return log_error_errno(errno, "Failed to unlink %s: %m", filename); | |
279 | ||
280 | return 1; | |
281 | } | |
282 | ||
283 | static int make_filename(const char *context[_CONTEXT_MAX], char **ret) { | |
284 | _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL; | |
285 | sd_id128_t boot = {}; | |
286 | int r; | |
287 | ||
288 | assert(context); | |
289 | ||
290 | c = filename_escape(context[CONTEXT_COMM]); | |
291 | if (!c) | |
292 | return -ENOMEM; | |
293 | ||
294 | u = filename_escape(context[CONTEXT_UID]); | |
295 | if (!u) | |
296 | return -ENOMEM; | |
297 | ||
298 | r = sd_id128_get_boot(&boot); | |
299 | if (r < 0) | |
300 | return r; | |
301 | ||
302 | p = filename_escape(context[CONTEXT_PID]); | |
303 | if (!p) | |
304 | return -ENOMEM; | |
305 | ||
306 | t = filename_escape(context[CONTEXT_TIMESTAMP]); | |
307 | if (!t) | |
308 | return -ENOMEM; | |
309 | ||
310 | if (asprintf(ret, | |
311 | "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s000000", | |
312 | c, | |
313 | u, | |
314 | SD_ID128_FORMAT_VAL(boot), | |
315 | p, | |
316 | t) < 0) | |
317 | return -ENOMEM; | |
318 | ||
319 | return 0; | |
320 | } | |
321 | ||
322 | static int save_external_coredump( | |
323 | const char *context[_CONTEXT_MAX], | |
324 | int input_fd, | |
325 | char **ret_filename, | |
326 | int *ret_node_fd, | |
327 | int *ret_data_fd, | |
328 | uint64_t *ret_size, | |
329 | bool *ret_truncated) { | |
330 | ||
331 | _cleanup_free_ char *fn = NULL, *tmp = NULL; | |
332 | _cleanup_close_ int fd = -1; | |
333 | uint64_t rlimit, max_size; | |
334 | struct stat st; | |
335 | uid_t uid; | |
336 | int r; | |
337 | ||
338 | assert(context); | |
339 | assert(ret_filename); | |
340 | assert(ret_node_fd); | |
341 | assert(ret_data_fd); | |
342 | assert(ret_size); | |
343 | ||
344 | r = parse_uid(context[CONTEXT_UID], &uid); | |
345 | if (r < 0) | |
346 | return log_error_errno(r, "Failed to parse UID: %m"); | |
347 | ||
348 | r = safe_atou64(context[CONTEXT_RLIMIT], &rlimit); | |
349 | if (r < 0) | |
350 | return log_error_errno(r, "Failed to parse resource limit: %s", context[CONTEXT_RLIMIT]); | |
351 | if (rlimit < page_size()) { | |
352 | /* Is coredumping disabled? Then don't bother saving/processing the coredump. | |
353 | * Anything below PAGE_SIZE cannot give a readable coredump (the kernel uses | |
354 | * ELF_EXEC_PAGESIZE which is not easily accessible, but is usually the same as PAGE_SIZE. */ | |
355 | log_info("Resource limits disable core dumping for process %s (%s).", | |
356 | context[CONTEXT_PID], context[CONTEXT_COMM]); | |
357 | return -EBADSLT; | |
358 | } | |
359 | ||
360 | /* Never store more than the process configured, or than we actually shall keep or process */ | |
361 | max_size = MIN(rlimit, MAX(arg_process_size_max, storage_size_max())); | |
362 | ||
363 | r = make_filename(context, &fn); | |
364 | if (r < 0) | |
365 | return log_error_errno(r, "Failed to determine coredump file name: %m"); | |
366 | ||
367 | mkdir_p_label("/var/lib/systemd/coredump", 0755); | |
368 | ||
369 | fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp); | |
370 | if (fd < 0) | |
371 | return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); | |
372 | ||
373 | r = copy_bytes(input_fd, fd, max_size, 0); | |
374 | if (r < 0) { | |
375 | log_error_errno(r, "Cannot store coredump of %s (%s): %m", context[CONTEXT_PID], context[CONTEXT_COMM]); | |
376 | goto fail; | |
377 | } | |
378 | *ret_truncated = r == 1; | |
379 | if (*ret_truncated) | |
380 | log_struct(LOG_INFO, | |
381 | LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size), | |
382 | "SIZE_LIMIT=%zu", max_size, | |
383 | "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR, | |
384 | NULL); | |
385 | ||
386 | if (fstat(fd, &st) < 0) { | |
387 | log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); | |
388 | goto fail; | |
389 | } | |
390 | ||
391 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) { | |
392 | log_error_errno(errno, "Failed to seek on %s: %m", coredump_tmpfile_name(tmp)); | |
393 | goto fail; | |
394 | } | |
395 | ||
396 | #if defined(HAVE_XZ) || defined(HAVE_LZ4) | |
397 | /* If we will remove the coredump anyway, do not compress. */ | |
398 | if (arg_compress && !maybe_remove_external_coredump(NULL, st.st_size)) { | |
399 | ||
400 | _cleanup_free_ char *fn_compressed = NULL, *tmp_compressed = NULL; | |
401 | _cleanup_close_ int fd_compressed = -1; | |
402 | ||
403 | fn_compressed = strappend(fn, COMPRESSED_EXT); | |
404 | if (!fn_compressed) { | |
405 | log_oom(); | |
406 | goto uncompressed; | |
407 | } | |
408 | ||
409 | fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed); | |
410 | if (fd_compressed < 0) { | |
411 | log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed); | |
412 | goto uncompressed; | |
413 | } | |
414 | ||
415 | r = compress_stream(fd, fd_compressed, -1); | |
416 | if (r < 0) { | |
417 | log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
418 | goto fail_compressed; | |
419 | } | |
420 | ||
421 | r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid); | |
422 | if (r < 0) | |
423 | goto fail_compressed; | |
424 | ||
425 | /* OK, this worked, we can get rid of the uncompressed version now */ | |
426 | if (tmp) | |
427 | unlink_noerrno(tmp); | |
428 | ||
429 | *ret_filename = fn_compressed; /* compressed */ | |
430 | *ret_node_fd = fd_compressed; /* compressed */ | |
431 | *ret_data_fd = fd; /* uncompressed */ | |
432 | *ret_size = (uint64_t) st.st_size; /* uncompressed */ | |
433 | ||
434 | fn_compressed = NULL; | |
435 | fd = fd_compressed = -1; | |
436 | ||
437 | return 0; | |
438 | ||
439 | fail_compressed: | |
440 | if (tmp_compressed) | |
441 | (void) unlink(tmp_compressed); | |
442 | } | |
443 | ||
444 | uncompressed: | |
445 | #endif | |
446 | ||
447 | r = fix_permissions(fd, tmp, fn, context, uid); | |
448 | if (r < 0) | |
449 | goto fail; | |
450 | ||
451 | *ret_filename = fn; | |
452 | *ret_data_fd = fd; | |
453 | *ret_node_fd = -1; | |
454 | *ret_size = (uint64_t) st.st_size; | |
455 | ||
456 | fn = NULL; | |
457 | fd = -1; | |
458 | ||
459 | return 0; | |
460 | ||
461 | fail: | |
462 | if (tmp) | |
463 | (void) unlink(tmp); | |
464 | return r; | |
465 | } | |
466 | ||
467 | static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) { | |
468 | _cleanup_free_ char *field = NULL; | |
469 | ssize_t n; | |
470 | ||
471 | assert(fd >= 0); | |
472 | assert(ret); | |
473 | assert(ret_size); | |
474 | ||
475 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) | |
476 | return log_warning_errno(errno, "Failed to seek: %m"); | |
477 | ||
478 | field = malloc(9 + size); | |
479 | if (!field) { | |
480 | log_warning("Failed to allocate memory for coredump, coredump will not be stored."); | |
481 | return -ENOMEM; | |
482 | } | |
483 | ||
484 | memcpy(field, "COREDUMP=", 9); | |
485 | ||
486 | n = read(fd, field + 9, size); | |
487 | if (n < 0) | |
488 | return log_error_errno((int) n, "Failed to read core data: %m"); | |
489 | if ((size_t) n < size) { | |
490 | log_error("Core data too short."); | |
491 | return -EIO; | |
492 | } | |
493 | ||
494 | *ret = field; | |
495 | *ret_size = size + 9; | |
496 | ||
497 | field = NULL; | |
498 | ||
499 | return 0; | |
500 | } | |
501 | ||
502 | /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines: | |
503 | * 0:/dev/pts/23 | |
504 | * pos: 0 | |
505 | * flags: 0100002 | |
506 | * | |
507 | * 1:/dev/pts/23 | |
508 | * pos: 0 | |
509 | * flags: 0100002 | |
510 | * | |
511 | * 2:/dev/pts/23 | |
512 | * pos: 0 | |
513 | * flags: 0100002 | |
514 | * EOF | |
515 | */ | |
516 | static int compose_open_fds(pid_t pid, char **open_fds) { | |
517 | _cleanup_closedir_ DIR *proc_fd_dir = NULL; | |
518 | _cleanup_close_ int proc_fdinfo_fd = -1; | |
519 | _cleanup_free_ char *buffer = NULL; | |
520 | _cleanup_fclose_ FILE *stream = NULL; | |
521 | const char *fddelim = "", *path; | |
522 | struct dirent *dent = NULL; | |
523 | size_t size = 0; | |
524 | int r = 0; | |
525 | ||
526 | assert(pid >= 0); | |
527 | assert(open_fds != NULL); | |
528 | ||
529 | path = procfs_file_alloca(pid, "fd"); | |
530 | proc_fd_dir = opendir(path); | |
531 | if (!proc_fd_dir) | |
532 | return -errno; | |
533 | ||
534 | proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH); | |
535 | if (proc_fdinfo_fd < 0) | |
536 | return -errno; | |
537 | ||
538 | stream = open_memstream(&buffer, &size); | |
539 | if (!stream) | |
540 | return -ENOMEM; | |
541 | ||
542 | FOREACH_DIRENT(dent, proc_fd_dir, return -errno) { | |
543 | _cleanup_fclose_ FILE *fdinfo = NULL; | |
544 | _cleanup_free_ char *fdname = NULL; | |
545 | char line[LINE_MAX]; | |
546 | int fd; | |
547 | ||
548 | r = readlinkat_malloc(dirfd(proc_fd_dir), dent->d_name, &fdname); | |
549 | if (r < 0) | |
550 | return r; | |
551 | ||
552 | fprintf(stream, "%s%s:%s\n", fddelim, dent->d_name, fdname); | |
553 | fddelim = "\n"; | |
554 | ||
555 | /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */ | |
556 | fd = openat(proc_fdinfo_fd, dent->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY); | |
557 | if (fd < 0) | |
558 | continue; | |
559 | ||
560 | fdinfo = fdopen(fd, "re"); | |
561 | if (fdinfo == NULL) { | |
562 | close(fd); | |
563 | continue; | |
564 | } | |
565 | ||
566 | FOREACH_LINE(line, fdinfo, break) { | |
567 | fputs_unlocked(line, stream); | |
568 | if (!endswith(line, "\n")) | |
569 | fputc('\n', stream); | |
570 | } | |
571 | } | |
572 | ||
573 | errno = 0; | |
574 | stream = safe_fclose(stream); | |
575 | ||
576 | if (errno > 0) | |
577 | return -errno; | |
578 | ||
579 | *open_fds = buffer; | |
580 | buffer = NULL; | |
581 | ||
582 | return 0; | |
583 | } | |
584 | ||
585 | static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) { | |
586 | const char *p; | |
587 | struct stat stbuf; | |
588 | _cleanup_close_ int proc_ns_dir_fd; | |
589 | ||
590 | p = procfs_file_alloca(pid, "ns"); | |
591 | ||
592 | proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY); | |
593 | if (proc_ns_dir_fd < 0) | |
594 | return -errno; | |
595 | ||
596 | if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0) | |
597 | return -errno; | |
598 | ||
599 | *ns = stbuf.st_ino; | |
600 | return 0; | |
601 | } | |
602 | ||
603 | static int get_mount_namespace_leader(pid_t pid, pid_t *container_pid) { | |
604 | pid_t cpid = pid, ppid = 0; | |
605 | ino_t proc_mntns; | |
606 | int r = 0; | |
607 | ||
608 | r = get_process_ns(pid, "mnt", &proc_mntns); | |
609 | if (r < 0) | |
610 | return r; | |
611 | ||
612 | for (;;) { | |
613 | ino_t parent_mntns; | |
614 | ||
615 | r = get_process_ppid(cpid, &ppid); | |
616 | if (r < 0) | |
617 | return r; | |
618 | ||
619 | r = get_process_ns(ppid, "mnt", &parent_mntns); | |
620 | if (r < 0) | |
621 | return r; | |
622 | ||
623 | if (proc_mntns != parent_mntns) | |
624 | break; | |
625 | ||
626 | if (ppid == 1) | |
627 | return -ENOENT; | |
628 | ||
629 | cpid = ppid; | |
630 | } | |
631 | ||
632 | *container_pid = ppid; | |
633 | return 0; | |
634 | } | |
635 | ||
636 | /* Returns 1 if the parent was found. | |
637 | * Returns 0 if there is not a process we can call the pid's | |
638 | * container parent (the pid's process isn't 'containerized'). | |
639 | * Returns a negative number on errors. | |
640 | */ | |
641 | static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { | |
642 | int r = 0; | |
643 | pid_t container_pid; | |
644 | const char *proc_root_path; | |
645 | struct stat root_stat, proc_root_stat; | |
646 | ||
647 | /* To compare inodes of / and /proc/[pid]/root */ | |
648 | if (stat("/", &root_stat) < 0) | |
649 | return -errno; | |
650 | ||
651 | proc_root_path = procfs_file_alloca(pid, "root"); | |
652 | if (stat(proc_root_path, &proc_root_stat) < 0) | |
653 | return -errno; | |
654 | ||
655 | /* The process uses system root. */ | |
656 | if (proc_root_stat.st_ino == root_stat.st_ino) { | |
657 | *cmdline = NULL; | |
658 | return 0; | |
659 | } | |
660 | ||
661 | r = get_mount_namespace_leader(pid, &container_pid); | |
662 | if (r < 0) | |
663 | return r; | |
664 | ||
665 | r = get_process_cmdline(container_pid, 0, false, cmdline); | |
666 | if (r < 0) | |
667 | return r; | |
668 | ||
669 | return 1; | |
670 | } | |
671 | ||
672 | static int change_uid_gid(const char *context[]) { | |
673 | uid_t uid; | |
674 | gid_t gid; | |
675 | int r; | |
676 | ||
677 | r = parse_uid(context[CONTEXT_UID], &uid); | |
678 | if (r < 0) | |
679 | return r; | |
680 | ||
681 | if (uid <= SYSTEM_UID_MAX) { | |
682 | const char *user = "systemd-coredump"; | |
683 | ||
684 | r = get_user_creds(&user, &uid, &gid, NULL, NULL); | |
685 | if (r < 0) { | |
686 | log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user); | |
687 | uid = gid = 0; | |
688 | } | |
689 | } else { | |
690 | r = parse_gid(context[CONTEXT_GID], &gid); | |
691 | if (r < 0) | |
692 | return r; | |
693 | } | |
694 | ||
695 | return drop_privileges(uid, gid, 0); | |
696 | } | |
697 | ||
698 | static bool is_journald_crash(const char *context[_CONTEXT_MAX]) { | |
699 | assert(context); | |
700 | ||
701 | return streq_ptr(context[CONTEXT_UNIT], SPECIAL_JOURNALD_SERVICE); | |
702 | } | |
703 | ||
704 | static bool is_pid1_crash(const char *context[_CONTEXT_MAX]) { | |
705 | assert(context); | |
706 | ||
707 | return streq_ptr(context[CONTEXT_UNIT], SPECIAL_INIT_SCOPE) || | |
708 | streq_ptr(context[CONTEXT_PID], "1"); | |
709 | } | |
710 | ||
711 | #define SUBMIT_COREDUMP_FIELDS 4 | |
712 | ||
713 | static int submit_coredump( | |
714 | const char *context[_CONTEXT_MAX], | |
715 | struct iovec *iovec, | |
716 | size_t n_iovec_allocated, | |
717 | size_t n_iovec, | |
718 | int input_fd) { | |
719 | ||
720 | _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; | |
721 | _cleanup_free_ char *core_message = NULL, *filename = NULL, *coredump_data = NULL; | |
722 | uint64_t coredump_size = UINT64_MAX; | |
723 | bool truncated = false, journald_crash; | |
724 | int r; | |
725 | ||
726 | assert(context); | |
727 | assert(iovec); | |
728 | assert(n_iovec_allocated >= n_iovec + SUBMIT_COREDUMP_FIELDS); | |
729 | assert(input_fd >= 0); | |
730 | ||
731 | journald_crash = is_journald_crash(context); | |
732 | ||
733 | /* Vacuum before we write anything again */ | |
734 | (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); | |
735 | ||
736 | /* Always stream the coredump to disk, if that's possible */ | |
737 | r = save_external_coredump(context, input_fd, | |
738 | &filename, &coredump_node_fd, &coredump_fd, &coredump_size, &truncated); | |
739 | if (r < 0) | |
740 | /* Skip whole core dumping part */ | |
741 | goto log; | |
742 | ||
743 | /* If we don't want to keep the coredump on disk, remove it now, as later on we will lack the privileges for | |
744 | * it. However, we keep the fd to it, so that we can still process it and log it. */ | |
745 | r = maybe_remove_external_coredump(filename, coredump_size); | |
746 | if (r < 0) | |
747 | return r; | |
748 | if (r == 0) { | |
749 | const char *coredump_filename; | |
750 | ||
751 | coredump_filename = strjoina("COREDUMP_FILENAME=", filename); | |
752 | IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename); | |
753 | } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) | |
754 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", | |
755 | coredump_size, arg_external_size_max); | |
756 | ||
757 | /* Vacuum again, but exclude the coredump we just created */ | |
758 | (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); | |
759 | ||
760 | /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the coredump | |
761 | * memory under the user's uid. This also ensures that the credentials journald will see are the ones of the | |
762 | * coredumping user, thus making sure the user gets access to the core dump. Let's also get rid of all | |
763 | * capabilities, if we run as root, we won't need them anymore. */ | |
764 | r = change_uid_gid(context); | |
765 | if (r < 0) | |
766 | return log_error_errno(r, "Failed to drop privileges: %m"); | |
767 | ||
768 | #ifdef HAVE_ELFUTILS | |
769 | /* Try to get a strack trace if we can */ | |
770 | if (coredump_size <= arg_process_size_max) { | |
771 | _cleanup_free_ char *stacktrace = NULL; | |
772 | ||
773 | r = coredump_make_stack_trace(coredump_fd, context[CONTEXT_EXE], &stacktrace); | |
774 | if (r >= 0) | |
775 | core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], | |
776 | " (", context[CONTEXT_COMM], ") of user ", | |
777 | context[CONTEXT_UID], " dumped core.", | |
778 | journald_crash ? "\nCoredump diverted to " : "", | |
779 | journald_crash ? filename : "", | |
780 | "\n\n", stacktrace); | |
781 | else if (r == -EINVAL) | |
782 | log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno())); | |
783 | else | |
784 | log_warning_errno(r, "Failed to generate stack trace: %m"); | |
785 | } else | |
786 | log_debug("Not generating stack trace: core size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", | |
787 | coredump_size, arg_process_size_max); | |
788 | ||
789 | if (!core_message) | |
790 | #endif | |
791 | log: | |
792 | core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], | |
793 | " (", context[CONTEXT_COMM], ") of user ", | |
794 | context[CONTEXT_UID], " dumped core.", | |
795 | journald_crash ? "\nCoredump diverted to " : NULL, | |
796 | journald_crash ? filename : NULL); | |
797 | if (!core_message) | |
798 | return log_oom(); | |
799 | ||
800 | if (journald_crash) { | |
801 | /* We cannot log to the journal, so just print the MESSAGE. | |
802 | * The target was set previously to something safe. */ | |
803 | log_dispatch(LOG_ERR, 0, core_message); | |
804 | return 0; | |
805 | } | |
806 | ||
807 | IOVEC_SET_STRING(iovec[n_iovec++], core_message); | |
808 | ||
809 | if (truncated) | |
810 | IOVEC_SET_STRING(iovec[n_iovec++], "COREDUMP_TRUNCATED=1"); | |
811 | ||
812 | /* Optionally store the entire coredump in the journal */ | |
813 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) { | |
814 | if (coredump_size <= arg_journal_size_max) { | |
815 | size_t sz = 0; | |
816 | ||
817 | /* Store the coredump itself in the journal */ | |
818 | ||
819 | r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); | |
820 | if (r >= 0) { | |
821 | iovec[n_iovec].iov_base = coredump_data; | |
822 | iovec[n_iovec].iov_len = sz; | |
823 | n_iovec++; | |
824 | } else | |
825 | log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); | |
826 | } else | |
827 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", | |
828 | coredump_size, arg_journal_size_max); | |
829 | } | |
830 | ||
831 | assert(n_iovec <= n_iovec_allocated); | |
832 | ||
833 | r = sd_journal_sendv(iovec, n_iovec); | |
834 | if (r < 0) | |
835 | return log_error_errno(r, "Failed to log coredump: %m"); | |
836 | ||
837 | return 0; | |
838 | } | |
839 | ||
840 | static void map_context_fields(const struct iovec *iovec, const char* context[]) { | |
841 | ||
842 | static const char * const context_field_names[] = { | |
843 | [CONTEXT_PID] = "COREDUMP_PID=", | |
844 | [CONTEXT_UID] = "COREDUMP_UID=", | |
845 | [CONTEXT_GID] = "COREDUMP_GID=", | |
846 | [CONTEXT_SIGNAL] = "COREDUMP_SIGNAL=", | |
847 | [CONTEXT_TIMESTAMP] = "COREDUMP_TIMESTAMP=", | |
848 | [CONTEXT_RLIMIT] = "COREDUMP_RLIMIT=", | |
849 | [CONTEXT_COMM] = "COREDUMP_COMM=", | |
850 | [CONTEXT_EXE] = "COREDUMP_EXE=", | |
851 | }; | |
852 | ||
853 | unsigned i; | |
854 | ||
855 | assert(iovec); | |
856 | assert(context); | |
857 | ||
858 | for (i = 0; i < ELEMENTSOF(context_field_names); i++) { | |
859 | size_t l; | |
860 | ||
861 | if (!context_field_names[i]) | |
862 | continue; | |
863 | ||
864 | l = strlen(context_field_names[i]); | |
865 | if (iovec->iov_len < l) | |
866 | continue; | |
867 | ||
868 | if (memcmp(iovec->iov_base, context_field_names[i], l) != 0) | |
869 | continue; | |
870 | ||
871 | /* Note that these strings are NUL terminated, because we made sure that a trailing NUL byte is in the | |
872 | * buffer, though not included in the iov_len count. (see below) */ | |
873 | context[i] = (char*) iovec->iov_base + l; | |
874 | break; | |
875 | } | |
876 | } | |
877 | ||
878 | static int process_socket(int fd) { | |
879 | _cleanup_close_ int coredump_fd = -1; | |
880 | struct iovec *iovec = NULL; | |
881 | size_t n_iovec = 0, n_allocated = 0, i, k; | |
882 | const char *context[_CONTEXT_MAX] = {}; | |
883 | int r; | |
884 | ||
885 | assert(fd >= 0); | |
886 | ||
887 | log_set_target(LOG_TARGET_AUTO); | |
888 | log_parse_environment(); | |
889 | log_open(); | |
890 | ||
891 | log_debug("Processing coredump received on stdin..."); | |
892 | ||
893 | for (;;) { | |
894 | union { | |
895 | struct cmsghdr cmsghdr; | |
896 | uint8_t buf[CMSG_SPACE(sizeof(int))]; | |
897 | } control = {}; | |
898 | struct msghdr mh = { | |
899 | .msg_control = &control, | |
900 | .msg_controllen = sizeof(control), | |
901 | .msg_iovlen = 1, | |
902 | }; | |
903 | ssize_t n; | |
904 | ssize_t l; | |
905 | ||
906 | if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec + SUBMIT_COREDUMP_FIELDS)) { | |
907 | r = log_oom(); | |
908 | goto finish; | |
909 | } | |
910 | ||
911 | l = next_datagram_size_fd(fd); | |
912 | if (l < 0) { | |
913 | r = log_error_errno(l, "Failed to determine datagram size to read: %m"); | |
914 | goto finish; | |
915 | } | |
916 | ||
917 | assert(l >= 0); | |
918 | ||
919 | iovec[n_iovec].iov_len = l; | |
920 | iovec[n_iovec].iov_base = malloc(l + 1); | |
921 | if (!iovec[n_iovec].iov_base) { | |
922 | r = log_oom(); | |
923 | goto finish; | |
924 | } | |
925 | ||
926 | mh.msg_iov = iovec + n_iovec; | |
927 | ||
928 | n = recvmsg(fd, &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC); | |
929 | if (n < 0) { | |
930 | free(iovec[n_iovec].iov_base); | |
931 | r = log_error_errno(errno, "Failed to receive datagram: %m"); | |
932 | goto finish; | |
933 | } | |
934 | ||
935 | if (n == 0) { | |
936 | struct cmsghdr *cmsg, *found = NULL; | |
937 | /* The final zero-length datagram carries the file descriptor and tells us that we're done. */ | |
938 | ||
939 | free(iovec[n_iovec].iov_base); | |
940 | ||
941 | CMSG_FOREACH(cmsg, &mh) { | |
942 | if (cmsg->cmsg_level == SOL_SOCKET && | |
943 | cmsg->cmsg_type == SCM_RIGHTS && | |
944 | cmsg->cmsg_len == CMSG_LEN(sizeof(int))) { | |
945 | assert(!found); | |
946 | found = cmsg; | |
947 | } | |
948 | } | |
949 | ||
950 | if (!found) { | |
951 | log_error("Coredump file descriptor missing."); | |
952 | r = -EBADMSG; | |
953 | goto finish; | |
954 | } | |
955 | ||
956 | assert(coredump_fd < 0); | |
957 | coredump_fd = *(int*) CMSG_DATA(found); | |
958 | break; | |
959 | } | |
960 | ||
961 | /* Add trailing NUL byte, in case these are strings */ | |
962 | ((char*) iovec[n_iovec].iov_base)[n] = 0; | |
963 | iovec[n_iovec].iov_len = (size_t) n; | |
964 | ||
965 | cmsg_close_all(&mh); | |
966 | map_context_fields(iovec + n_iovec, context); | |
967 | n_iovec++; | |
968 | } | |
969 | ||
970 | if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec + SUBMIT_COREDUMP_FIELDS)) { | |
971 | r = log_oom(); | |
972 | goto finish; | |
973 | } | |
974 | ||
975 | /* Make sure we got all data we really need */ | |
976 | assert(context[CONTEXT_PID]); | |
977 | assert(context[CONTEXT_UID]); | |
978 | assert(context[CONTEXT_GID]); | |
979 | assert(context[CONTEXT_SIGNAL]); | |
980 | assert(context[CONTEXT_TIMESTAMP]); | |
981 | assert(context[CONTEXT_RLIMIT]); | |
982 | assert(context[CONTEXT_COMM]); | |
983 | assert(coredump_fd >= 0); | |
984 | ||
985 | /* Small quirk: the journal fields contain the timestamp padded with six zeroes, so that the kernel-supplied 1s | |
986 | * granularity timestamps becomes 1µs granularity, i.e. the granularity systemd usually operates in. Since we | |
987 | * are reconstructing the original kernel context, we chop this off again, here. */ | |
988 | k = strlen(context[CONTEXT_TIMESTAMP]); | |
989 | if (k > 6) | |
990 | context[CONTEXT_TIMESTAMP] = strndupa(context[CONTEXT_TIMESTAMP], k - 6); | |
991 | ||
992 | r = submit_coredump(context, iovec, n_allocated, n_iovec, coredump_fd); | |
993 | ||
994 | finish: | |
995 | for (i = 0; i < n_iovec; i++) | |
996 | free(iovec[i].iov_base); | |
997 | free(iovec); | |
998 | ||
999 | return r; | |
1000 | } | |
1001 | ||
1002 | static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) { | |
1003 | ||
1004 | static const union sockaddr_union sa = { | |
1005 | .un.sun_family = AF_UNIX, | |
1006 | .un.sun_path = "/run/systemd/coredump", | |
1007 | }; | |
1008 | _cleanup_close_ int fd = -1; | |
1009 | size_t i; | |
1010 | int r; | |
1011 | ||
1012 | assert(iovec || n_iovec <= 0); | |
1013 | assert(input_fd >= 0); | |
1014 | ||
1015 | fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); | |
1016 | if (fd < 0) | |
1017 | return log_error_errno(errno, "Failed to create coredump socket: %m"); | |
1018 | ||
1019 | if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) | |
1020 | return log_error_errno(errno, "Failed to connect to coredump service: %m"); | |
1021 | ||
1022 | for (i = 0; i < n_iovec; i++) { | |
1023 | struct msghdr mh = { | |
1024 | .msg_iov = (struct iovec*) iovec + i, | |
1025 | .msg_iovlen = 1, | |
1026 | }; | |
1027 | struct iovec copy[2]; | |
1028 | ||
1029 | for (;;) { | |
1030 | if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) | |
1031 | break; | |
1032 | ||
1033 | if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { | |
1034 | /* This field didn't fit? That's a pity. Given that this is just metadata, | |
1035 | * let's truncate the field at half, and try again. We append three dots, in | |
1036 | * order to show that this is truncated. */ | |
1037 | ||
1038 | if (mh.msg_iov != copy) { | |
1039 | /* We don't want to modify the caller's iovec, hence let's create our | |
1040 | * own array, consisting of two new iovecs, where the first is a | |
1041 | * (truncated) copy of what we want to send, and the second one | |
1042 | * contains the trailing dots. */ | |
1043 | copy[0] = iovec[i]; | |
1044 | copy[1] = (struct iovec) { | |
1045 | .iov_base = (char[]) { '.', '.', '.' }, | |
1046 | .iov_len = 3, | |
1047 | }; | |
1048 | ||
1049 | mh.msg_iov = copy; | |
1050 | mh.msg_iovlen = 2; | |
1051 | } | |
1052 | ||
1053 | copy[0].iov_len /= 2; /* halve it, and try again */ | |
1054 | continue; | |
1055 | } | |
1056 | ||
1057 | return log_error_errno(errno, "Failed to send coredump datagram: %m"); | |
1058 | } | |
1059 | } | |
1060 | ||
1061 | r = send_one_fd(fd, input_fd, 0); | |
1062 | if (r < 0) | |
1063 | return log_error_errno(r, "Failed to send coredump fd: %m"); | |
1064 | ||
1065 | return 0; | |
1066 | } | |
1067 | ||
1068 | static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char *field, const char *value) { | |
1069 | char *x; | |
1070 | ||
1071 | x = strappend(field, value); | |
1072 | if (x) | |
1073 | IOVEC_SET_STRING(iovec[(*n_iovec)++], x); | |
1074 | return x; | |
1075 | } | |
1076 | ||
1077 | static char* set_iovec_field_free(struct iovec iovec[27], size_t *n_iovec, const char *field, char *value) { | |
1078 | char *x; | |
1079 | ||
1080 | x = set_iovec_field(iovec, n_iovec, field, value); | |
1081 | free(value); | |
1082 | return x; | |
1083 | } | |
1084 | ||
1085 | static int gather_pid_metadata( | |
1086 | char* context[_CONTEXT_MAX], | |
1087 | char **comm_fallback, | |
1088 | struct iovec *iovec, size_t *n_iovec) { | |
1089 | ||
1090 | /* We need 26 empty slots in iovec! | |
1091 | * | |
1092 | * Note that if we fail on oom later on, we do not roll-back changes to the iovec structure. (It remains valid, | |
1093 | * with the first n_iovec fields initialized.) */ | |
1094 | ||
1095 | uid_t owner_uid; | |
1096 | pid_t pid; | |
1097 | char *t; | |
1098 | const char *p; | |
1099 | int r, signo; | |
1100 | ||
1101 | r = parse_pid(context[CONTEXT_PID], &pid); | |
1102 | if (r < 0) | |
1103 | return log_error_errno(r, "Failed to parse PID \"%s\": %m", context[CONTEXT_PID]); | |
1104 | ||
1105 | r = get_process_comm(pid, &context[CONTEXT_COMM]); | |
1106 | if (r < 0) { | |
1107 | log_warning_errno(r, "Failed to get COMM, falling back to the command line: %m"); | |
1108 | context[CONTEXT_COMM] = strv_join(comm_fallback, " "); | |
1109 | if (!context[CONTEXT_COMM]) | |
1110 | return log_oom(); | |
1111 | } | |
1112 | ||
1113 | r = get_process_exe(pid, &context[CONTEXT_EXE]); | |
1114 | if (r < 0) | |
1115 | log_warning_errno(r, "Failed to get EXE, ignoring: %m"); | |
1116 | ||
1117 | if (cg_pid_get_unit(pid, &context[CONTEXT_UNIT]) >= 0) { | |
1118 | if (!is_journald_crash((const char**) context)) { | |
1119 | /* OK, now we know it's not the journal, hence we can make use of it now. */ | |
1120 | log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); | |
1121 | log_open(); | |
1122 | } | |
1123 | ||
1124 | /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */ | |
1125 | if (is_pid1_crash((const char**) context)) { | |
1126 | log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); | |
1127 | (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0); | |
1128 | } | |
1129 | ||
1130 | set_iovec_field(iovec, n_iovec, "COREDUMP_UNIT=", context[CONTEXT_UNIT]); | |
1131 | } | |
1132 | ||
1133 | if (cg_pid_get_user_unit(pid, &t) >= 0) | |
1134 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_USER_UNIT=", t); | |
1135 | ||
1136 | /* The next few are mandatory */ | |
1137 | if (!set_iovec_field(iovec, n_iovec, "COREDUMP_PID=", context[CONTEXT_PID])) | |
1138 | return log_oom(); | |
1139 | ||
1140 | if (!set_iovec_field(iovec, n_iovec, "COREDUMP_UID=", context[CONTEXT_UID])) | |
1141 | return log_oom(); | |
1142 | ||
1143 | if (!set_iovec_field(iovec, n_iovec, "COREDUMP_GID=", context[CONTEXT_GID])) | |
1144 | return log_oom(); | |
1145 | ||
1146 | if (!set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL=", context[CONTEXT_SIGNAL])) | |
1147 | return log_oom(); | |
1148 | ||
1149 | if (!set_iovec_field(iovec, n_iovec, "COREDUMP_RLIMIT=", context[CONTEXT_RLIMIT])) | |
1150 | return log_oom(); | |
1151 | ||
1152 | if (!set_iovec_field(iovec, n_iovec, "COREDUMP_COMM=", context[CONTEXT_COMM])) | |
1153 | return log_oom(); | |
1154 | ||
1155 | if (context[CONTEXT_EXE] && | |
1156 | !set_iovec_field(iovec, n_iovec, "COREDUMP_EXE=", context[CONTEXT_EXE])) | |
1157 | return log_oom(); | |
1158 | ||
1159 | if (sd_pid_get_session(pid, &t) >= 0) | |
1160 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_SESSION=", t); | |
1161 | ||
1162 | if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) { | |
1163 | r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid); | |
1164 | if (r > 0) | |
1165 | IOVEC_SET_STRING(iovec[(*n_iovec)++], t); | |
1166 | } | |
1167 | ||
1168 | if (sd_pid_get_slice(pid, &t) >= 0) | |
1169 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_SLICE=", t); | |
1170 | ||
1171 | if (get_process_cmdline(pid, 0, false, &t) >= 0) | |
1172 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_CMDLINE=", t); | |
1173 | ||
1174 | if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) | |
1175 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_CGROUP=", t); | |
1176 | ||
1177 | if (compose_open_fds(pid, &t) >= 0) | |
1178 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_OPEN_FDS=", t); | |
1179 | ||
1180 | p = procfs_file_alloca(pid, "status"); | |
1181 | if (read_full_file(p, &t, NULL) >= 0) | |
1182 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_STATUS=", t); | |
1183 | ||
1184 | p = procfs_file_alloca(pid, "maps"); | |
1185 | if (read_full_file(p, &t, NULL) >= 0) | |
1186 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_MAPS=", t); | |
1187 | ||
1188 | p = procfs_file_alloca(pid, "limits"); | |
1189 | if (read_full_file(p, &t, NULL) >= 0) | |
1190 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_LIMITS=", t); | |
1191 | ||
1192 | p = procfs_file_alloca(pid, "cgroup"); | |
1193 | if (read_full_file(p, &t, NULL) >=0) | |
1194 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_CGROUP=", t); | |
1195 | ||
1196 | p = procfs_file_alloca(pid, "mountinfo"); | |
1197 | if (read_full_file(p, &t, NULL) >=0) | |
1198 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_MOUNTINFO=", t); | |
1199 | ||
1200 | if (get_process_cwd(pid, &t) >= 0) | |
1201 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_CWD=", t); | |
1202 | ||
1203 | if (get_process_root(pid, &t) >= 0) { | |
1204 | bool proc_self_root_is_slash; | |
1205 | ||
1206 | proc_self_root_is_slash = strcmp(t, "/") == 0; | |
1207 | ||
1208 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_ROOT=", t); | |
1209 | ||
1210 | /* If the process' root is "/", then there is a chance it has | |
1211 | * mounted own root and hence being containerized. */ | |
1212 | if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0) | |
1213 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_CONTAINER_CMDLINE=", t); | |
1214 | } | |
1215 | ||
1216 | if (get_process_environ(pid, &t) >= 0) | |
1217 | set_iovec_field_free(iovec, n_iovec, "COREDUMP_ENVIRON=", t); | |
1218 | ||
1219 | t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL); | |
1220 | if (t) | |
1221 | IOVEC_SET_STRING(iovec[(*n_iovec)++], t); | |
1222 | ||
1223 | if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo)) | |
1224 | set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo)); | |
1225 | ||
1226 | return 0; /* we successfully acquired all metadata */ | |
1227 | } | |
1228 | ||
1229 | static int process_kernel(int argc, char* argv[]) { | |
1230 | ||
1231 | char* context[_CONTEXT_MAX] = {}; | |
1232 | struct iovec iovec[28 + SUBMIT_COREDUMP_FIELDS]; | |
1233 | size_t i, n_iovec, n_to_free = 0; | |
1234 | int r; | |
1235 | ||
1236 | log_debug("Processing coredump received from the kernel..."); | |
1237 | ||
1238 | if (argc < CONTEXT_COMM + 1) { | |
1239 | log_error("Not enough arguments passed by the kernel (%i, expected %i).", argc - 1, CONTEXT_COMM + 1 - 1); | |
1240 | return -EINVAL; | |
1241 | } | |
1242 | ||
1243 | context[CONTEXT_PID] = argv[1 + CONTEXT_PID]; | |
1244 | context[CONTEXT_UID] = argv[1 + CONTEXT_UID]; | |
1245 | context[CONTEXT_GID] = argv[1 + CONTEXT_GID]; | |
1246 | context[CONTEXT_SIGNAL] = argv[1 + CONTEXT_SIGNAL]; | |
1247 | context[CONTEXT_TIMESTAMP] = argv[1 + CONTEXT_TIMESTAMP]; | |
1248 | context[CONTEXT_RLIMIT] = argv[1 + CONTEXT_RLIMIT]; | |
1249 | ||
1250 | r = gather_pid_metadata(context, argv + 1 + CONTEXT_COMM, iovec, &n_to_free); | |
1251 | if (r < 0) | |
1252 | goto finish; | |
1253 | ||
1254 | n_iovec = n_to_free; | |
1255 | ||
1256 | IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR); | |
1257 | ||
1258 | assert_cc(2 == LOG_CRIT); | |
1259 | IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2"); | |
1260 | ||
1261 | assert(n_iovec <= ELEMENTSOF(iovec)); | |
1262 | ||
1263 | if (is_journald_crash((const char**) context) || is_pid1_crash((const char**) context)) | |
1264 | r = submit_coredump((const char**) context, | |
1265 | iovec, ELEMENTSOF(iovec), n_iovec, | |
1266 | STDIN_FILENO); | |
1267 | else | |
1268 | r = send_iovec(iovec, n_iovec, STDIN_FILENO); | |
1269 | ||
1270 | finish: | |
1271 | for (i = 0; i < n_to_free; i++) | |
1272 | free(iovec[i].iov_base); | |
1273 | ||
1274 | /* Those fields are allocated by gather_pid_metadata */ | |
1275 | free(context[CONTEXT_COMM]); | |
1276 | free(context[CONTEXT_EXE]); | |
1277 | free(context[CONTEXT_UNIT]); | |
1278 | ||
1279 | return r; | |
1280 | } | |
1281 | ||
1282 | static int process_backtrace(int argc, char *argv[]) { | |
1283 | char *context[_CONTEXT_MAX] = {}; | |
1284 | _cleanup_free_ char *message = NULL; | |
1285 | _cleanup_free_ struct iovec *iovec = NULL; | |
1286 | size_t n_iovec, n_allocated, n_to_free = 0, i; | |
1287 | int r; | |
1288 | JournalImporter importer = { | |
1289 | .fd = STDIN_FILENO, | |
1290 | }; | |
1291 | ||
1292 | log_debug("Processing backtrace on stdin..."); | |
1293 | ||
1294 | if (argc < CONTEXT_COMM + 1) { | |
1295 | log_error("Not enough arguments passed (%i, expected %i).", argc - 1, CONTEXT_COMM + 1 - 1); | |
1296 | return -EINVAL; | |
1297 | } | |
1298 | ||
1299 | context[CONTEXT_PID] = argv[2 + CONTEXT_PID]; | |
1300 | context[CONTEXT_UID] = argv[2 + CONTEXT_UID]; | |
1301 | context[CONTEXT_GID] = argv[2 + CONTEXT_GID]; | |
1302 | context[CONTEXT_SIGNAL] = argv[2 + CONTEXT_SIGNAL]; | |
1303 | context[CONTEXT_TIMESTAMP] = argv[2 + CONTEXT_TIMESTAMP]; | |
1304 | context[CONTEXT_RLIMIT] = argv[2 + CONTEXT_RLIMIT]; | |
1305 | ||
1306 | n_allocated = 33 + COREDUMP_STORAGE_EXTERNAL; | |
1307 | /* 25 metadata, 2 static, +unknown input, 4 storage, rounded up */ | |
1308 | iovec = new(struct iovec, n_allocated); | |
1309 | if (!iovec) | |
1310 | return log_oom(); | |
1311 | ||
1312 | r = gather_pid_metadata(context, argv + 2 + CONTEXT_COMM, iovec, &n_to_free); | |
1313 | if (r < 0) | |
1314 | goto finish; | |
1315 | if (r > 0) { | |
1316 | /* This was a special crash, and has already been processed. */ | |
1317 | r = 0; | |
1318 | goto finish; | |
1319 | } | |
1320 | n_iovec = n_to_free; | |
1321 | ||
1322 | for (;;) { | |
1323 | r = journal_importer_process_data(&importer); | |
1324 | if (r < 0) { | |
1325 | log_error_errno(r, "Failed to parse journal entry on stdin: %m"); | |
1326 | goto finish; | |
1327 | } | |
1328 | if (r == 1 || /* complete entry */ | |
1329 | journal_importer_eof(&importer)) /* end of data */ | |
1330 | break; | |
1331 | } | |
1332 | ||
1333 | if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec + importer.iovw.count + 2)) | |
1334 | return log_oom(); | |
1335 | ||
1336 | if (journal_importer_eof(&importer)) { | |
1337 | log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter"); | |
1338 | ||
1339 | message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], | |
1340 | " (", context[CONTEXT_COMM], ")" | |
1341 | " of user ", context[CONTEXT_UID], | |
1342 | " failed with ", context[CONTEXT_SIGNAL]); | |
1343 | if (!message) { | |
1344 | r = log_oom(); | |
1345 | goto finish; | |
1346 | } | |
1347 | IOVEC_SET_STRING(iovec[n_iovec++], message); | |
1348 | } else { | |
1349 | for (i = 0; i < importer.iovw.count; i++) | |
1350 | iovec[n_iovec++] = importer.iovw.iovec[i]; | |
1351 | } | |
1352 | ||
1353 | IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR); | |
1354 | assert_cc(2 == LOG_CRIT); | |
1355 | IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2"); | |
1356 | ||
1357 | assert(n_iovec <= n_allocated); | |
1358 | ||
1359 | r = sd_journal_sendv(iovec, n_iovec); | |
1360 | if (r < 0) | |
1361 | log_error_errno(r, "Failed to log backtrace: %m"); | |
1362 | ||
1363 | finish: | |
1364 | for (i = 0; i < n_to_free; i++) | |
1365 | free(iovec[i].iov_base); | |
1366 | ||
1367 | /* Those fields are allocated by gather_pid_metadata */ | |
1368 | free(context[CONTEXT_COMM]); | |
1369 | free(context[CONTEXT_EXE]); | |
1370 | free(context[CONTEXT_UNIT]); | |
1371 | ||
1372 | return r; | |
1373 | } | |
1374 | ||
1375 | int main(int argc, char *argv[]) { | |
1376 | int r; | |
1377 | ||
1378 | /* First, log to a safe place, since we don't know what crashed and it might | |
1379 | * be journald which we'd rather not log to then. */ | |
1380 | ||
1381 | log_set_target(LOG_TARGET_KMSG); | |
1382 | log_open(); | |
1383 | ||
1384 | /* Make sure we never enter a loop */ | |
1385 | (void) prctl(PR_SET_DUMPABLE, 0); | |
1386 | ||
1387 | /* Ignore all parse errors */ | |
1388 | (void) parse_config(); | |
1389 | ||
1390 | log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage)); | |
1391 | log_debug("Selected compression %s.", yes_no(arg_compress)); | |
1392 | ||
1393 | r = sd_listen_fds(false); | |
1394 | if (r < 0) { | |
1395 | log_error_errno(r, "Failed to determine number of file descriptor: %m"); | |
1396 | goto finish; | |
1397 | } | |
1398 | ||
1399 | /* If we got an fd passed, we are running in coredumpd mode. Otherwise we | |
1400 | * are invoked from the kernel as coredump handler. */ | |
1401 | if (r == 0) { | |
1402 | if (streq_ptr(argv[1], "--backtrace")) | |
1403 | r = process_backtrace(argc, argv); | |
1404 | else | |
1405 | r = process_kernel(argc, argv); | |
1406 | } else if (r == 1) | |
1407 | r = process_socket(SD_LISTEN_FDS_START); | |
1408 | else { | |
1409 | log_error("Received unexpected number of file descriptors."); | |
1410 | r = -EINVAL; | |
1411 | } | |
1412 | ||
1413 | finish: | |
1414 | return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; | |
1415 | } |