]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ | |
2 | ||
3 | #include <elf.h> | |
4 | #include <stdio.h> | |
5 | #include <sys/mount.h> | |
6 | #include <sys/statvfs.h> | |
7 | #include <sys/xattr.h> | |
8 | #include <unistd.h> | |
9 | ||
10 | #include "sd-bus.h" | |
11 | #include "sd-daemon.h" | |
12 | #include "sd-journal.h" | |
13 | #include "sd-json.h" | |
14 | #include "sd-login.h" | |
15 | #include "sd-messages.h" | |
16 | ||
17 | #include "acl-util.h" | |
18 | #include "alloc-util.h" | |
19 | #include "bus-error.h" | |
20 | #include "capability-util.h" | |
21 | #include "cgroup-util.h" | |
22 | #include "compress.h" | |
23 | #include "conf-parser.h" | |
24 | #include "copy.h" | |
25 | #include "coredump-util.h" | |
26 | #include "coredump-vacuum.h" | |
27 | #include "dirent-util.h" | |
28 | #include "elf-util.h" | |
29 | #include "errno-util.h" | |
30 | #include "escape.h" | |
31 | #include "fd-util.h" | |
32 | #include "fileio.h" | |
33 | #include "fs-util.h" | |
34 | #include "io-util.h" | |
35 | #include "iovec-util.h" | |
36 | #include "journal-importer.h" | |
37 | #include "journal-send.h" | |
38 | #include "json-util.h" | |
39 | #include "log.h" | |
40 | #include "main-func.h" | |
41 | #include "memory-util.h" | |
42 | #include "memstream-util.h" | |
43 | #include "mkdir-label.h" | |
44 | #include "namespace-util.h" | |
45 | #include "parse-util.h" | |
46 | #include "path-util.h" | |
47 | #include "pidref.h" | |
48 | #include "process-util.h" | |
49 | #include "signal-util.h" | |
50 | #include "socket-util.h" | |
51 | #include "special.h" | |
52 | #include "stat-util.h" | |
53 | #include "string-table.h" | |
54 | #include "string-util.h" | |
55 | #include "tmpfile-util.h" | |
56 | #include "uid-classification.h" | |
57 | #include "user-util.h" | |
58 | ||
59 | /* The maximum size up to which we process coredumps. We use 1G on 32-bit systems, and 32G on 64-bit systems */ | |
60 | #if __SIZEOF_POINTER__ == 4 | |
61 | #define PROCESS_SIZE_MAX ((uint64_t) (1LLU*1024LLU*1024LLU*1024LLU)) | |
62 | #elif __SIZEOF_POINTER__ == 8 | |
63 | #define PROCESS_SIZE_MAX ((uint64_t) (32LLU*1024LLU*1024LLU*1024LLU)) | |
64 | #else | |
65 | #error "Unexpected pointer size" | |
66 | #endif | |
67 | ||
68 | /* The maximum size up to which we leave the coredump around on disk */ | |
69 | #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX | |
70 | ||
71 | /* The maximum size up to which we store the coredump in the journal */ | |
72 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |
73 | #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU)) | |
74 | #else | |
75 | /* oss-fuzz limits memory usage. */ | |
76 | #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU)) | |
77 | #endif | |
78 | ||
79 | /* When checking for available memory and setting lower limits, don't | |
80 | * go below 4MB for writing core files to storage. */ | |
81 | #define PROCESS_SIZE_MIN (4U*1024U*1024U) | |
82 | ||
83 | /* Make sure to not make this larger than the maximum journal entry | |
84 | * size. See DATA_SIZE_MAX in journal-importer.h. */ | |
85 | assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); | |
86 | ||
87 | #define MOUNT_TREE_ROOT "/run/systemd/mount-rootfs" | |
88 | ||
89 | typedef enum { | |
90 | /* We use these as array indexes for our process metadata cache. | |
91 | * | |
92 | * The first indices of the cache stores the same metadata as the ones passed by the kernel via | |
93 | * argv[], i.e. the strings specified in our pattern defined in /proc/sys/kernel/core_pattern, | |
94 | * see core(5). */ | |
95 | ||
96 | META_ARGV_PID, /* %P: as seen in the initial pid namespace */ | |
97 | META_ARGV_UID, /* %u: as seen in the initial user namespace */ | |
98 | META_ARGV_GID, /* %g: as seen in the initial user namespace */ | |
99 | META_ARGV_SIGNAL, /* %s: number of signal causing dump */ | |
100 | META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to μs granularity) */ | |
101 | META_ARGV_RLIMIT, /* %c: core file size soft resource limit */ | |
102 | _META_ARGV_REQUIRED, | |
103 | /* The fields below were added to kernel/core_pattern at later points, so they might be missing. */ | |
104 | META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */ | |
105 | META_ARGV_DUMPABLE, /* %d: as set by the kernel */ | |
106 | META_ARGV_PIDFD, /* %F: pidfd of the process, since v6.16 */ | |
107 | /* If new fields are added, they should be added here, to maintain compatibility | |
108 | * with callers which don't know about the new fields. */ | |
109 | _META_ARGV_MAX, | |
110 | ||
111 | /* The following indexes are cached for a couple of special fields we use (and | |
112 | * thereby need to be retrieved quickly) for naming coredump files, and attaching | |
113 | * xattrs. Unlike the previous ones they are retrieved from the runtime | |
114 | * environment. */ | |
115 | ||
116 | META_COMM = _META_ARGV_MAX, | |
117 | ||
118 | /* The rest are similar to the previous ones except that we won't fail if one of | |
119 | * them is missing in a message sent over the socket. */ | |
120 | ||
121 | META_EXE, | |
122 | META_UNIT, | |
123 | META_PROC_AUXV, | |
124 | _META_MAX | |
125 | } meta_argv_t; | |
126 | ||
127 | static const char * const meta_field_names[_META_MAX] = { | |
128 | [META_ARGV_PID] = "COREDUMP_PID=", | |
129 | [META_ARGV_UID] = "COREDUMP_UID=", | |
130 | [META_ARGV_GID] = "COREDUMP_GID=", | |
131 | [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=", | |
132 | [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", | |
133 | [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", | |
134 | [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", | |
135 | [META_ARGV_DUMPABLE] = "COREDUMP_DUMPABLE=", | |
136 | [META_ARGV_PIDFD] = "COREDUMP_BY_PIDFD=", | |
137 | [META_COMM] = "COREDUMP_COMM=", | |
138 | [META_EXE] = "COREDUMP_EXE=", | |
139 | [META_UNIT] = "COREDUMP_UNIT=", | |
140 | [META_PROC_AUXV] = "COREDUMP_PROC_AUXV=", | |
141 | }; | |
142 | ||
143 | typedef struct Context { | |
144 | PidRef pidref; | |
145 | uid_t uid; | |
146 | gid_t gid; | |
147 | unsigned dumpable; | |
148 | int signo; | |
149 | uint64_t rlimit; | |
150 | bool is_pid1; | |
151 | bool is_journald; | |
152 | bool got_pidfd; | |
153 | int mount_tree_fd; | |
154 | ||
155 | /* These point into external memory, are not owned by this object */ | |
156 | const char *meta[_META_MAX]; | |
157 | size_t meta_size[_META_MAX]; | |
158 | } Context; | |
159 | ||
160 | #define CONTEXT_NULL \ | |
161 | (Context) { \ | |
162 | .pidref = PIDREF_NULL, \ | |
163 | .uid = UID_INVALID, \ | |
164 | .gid = GID_INVALID, \ | |
165 | .mount_tree_fd = -EBADF, \ | |
166 | } | |
167 | ||
168 | typedef enum CoredumpStorage { | |
169 | COREDUMP_STORAGE_NONE, | |
170 | COREDUMP_STORAGE_EXTERNAL, | |
171 | COREDUMP_STORAGE_JOURNAL, | |
172 | _COREDUMP_STORAGE_MAX, | |
173 | _COREDUMP_STORAGE_INVALID = -EINVAL, | |
174 | } CoredumpStorage; | |
175 | ||
176 | static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { | |
177 | [COREDUMP_STORAGE_NONE] = "none", | |
178 | [COREDUMP_STORAGE_EXTERNAL] = "external", | |
179 | [COREDUMP_STORAGE_JOURNAL] = "journal", | |
180 | }; | |
181 | ||
182 | DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); | |
183 | static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage); | |
184 | ||
185 | static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL; | |
186 | static bool arg_compress = true; | |
187 | static uint64_t arg_process_size_max = PROCESS_SIZE_MAX; | |
188 | static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; | |
189 | static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; | |
190 | static uint64_t arg_keep_free = UINT64_MAX; | |
191 | static uint64_t arg_max_use = UINT64_MAX; | |
192 | #if HAVE_DWFL_SET_SYSROOT | |
193 | static bool arg_enter_namespace = false; | |
194 | #endif | |
195 | ||
196 | static void context_done(Context *c) { | |
197 | assert(c); | |
198 | ||
199 | pidref_done(&c->pidref); | |
200 | c->mount_tree_fd = safe_close(c->mount_tree_fd); | |
201 | } | |
202 | ||
203 | static int parse_config(void) { | |
204 | static const ConfigTableItem items[] = { | |
205 | { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, | |
206 | { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, | |
207 | { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, | |
208 | { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max }, | |
209 | { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, | |
210 | { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, | |
211 | { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, | |
212 | #if HAVE_DWFL_SET_SYSROOT | |
213 | { "Coredump", "EnterNamespace", config_parse_bool, 0, &arg_enter_namespace }, | |
214 | #else | |
215 | { "Coredump", "EnterNamespace", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL }, | |
216 | #endif | |
217 | {} | |
218 | }; | |
219 | ||
220 | int r; | |
221 | ||
222 | r = config_parse_standard_file_with_dropins( | |
223 | "systemd/coredump.conf", | |
224 | "Coredump\0", | |
225 | config_item_table_lookup, | |
226 | items, | |
227 | CONFIG_PARSE_WARN, | |
228 | /* userdata= */ NULL); | |
229 | if (r < 0) | |
230 | return r; | |
231 | ||
232 | /* Let's make sure we fix up the maximum size we send to the journal here on the client side, for | |
233 | * efficiency reasons. journald wouldn't accept anything larger anyway. */ | |
234 | if (arg_journal_size_max > JOURNAL_SIZE_MAX) { | |
235 | log_warning("JournalSizeMax= set to larger value (%s) than journald would accept (%s), lowering automatically.", | |
236 | FORMAT_BYTES(arg_journal_size_max), FORMAT_BYTES(JOURNAL_SIZE_MAX)); | |
237 | arg_journal_size_max = JOURNAL_SIZE_MAX; | |
238 | } | |
239 | ||
240 | return 0; | |
241 | } | |
242 | ||
243 | static uint64_t storage_size_max(void) { | |
244 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL) | |
245 | return arg_external_size_max; | |
246 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) | |
247 | return arg_journal_size_max; | |
248 | assert(arg_storage == COREDUMP_STORAGE_NONE); | |
249 | return 0; | |
250 | } | |
251 | ||
252 | static int fix_acl(int fd, uid_t uid, bool allow_user) { | |
253 | assert(fd >= 0); | |
254 | assert(uid_is_valid(uid)); | |
255 | ||
256 | #if HAVE_ACL | |
257 | int r; | |
258 | ||
259 | /* We don't allow users to read coredumps if the uid or capabilities were changed. */ | |
260 | if (!allow_user) | |
261 | return 0; | |
262 | ||
263 | if (uid_is_system(uid) || uid_is_dynamic(uid) || uid_is_greeter(uid) || uid == UID_NOBODY) | |
264 | return 0; | |
265 | ||
266 | /* Make sure normal users can read (but not write or delete) their own coredumps */ | |
267 | r = fd_add_uid_acl_permission(fd, uid, ACL_READ); | |
268 | if (r < 0) | |
269 | return log_error_errno(r, "Failed to adjust ACL of the coredump: %m"); | |
270 | #endif | |
271 | ||
272 | return 0; | |
273 | } | |
274 | ||
275 | static int fix_xattr(int fd, const Context *context) { | |
276 | static const char * const xattrs[_META_MAX] = { | |
277 | [META_ARGV_PID] = "user.coredump.pid", | |
278 | [META_ARGV_UID] = "user.coredump.uid", | |
279 | [META_ARGV_GID] = "user.coredump.gid", | |
280 | [META_ARGV_SIGNAL] = "user.coredump.signal", | |
281 | [META_ARGV_TIMESTAMP] = "user.coredump.timestamp", | |
282 | [META_ARGV_RLIMIT] = "user.coredump.rlimit", | |
283 | [META_ARGV_HOSTNAME] = "user.coredump.hostname", | |
284 | [META_COMM] = "user.coredump.comm", | |
285 | [META_EXE] = "user.coredump.exe", | |
286 | }; | |
287 | ||
288 | int r = 0; | |
289 | ||
290 | assert(fd >= 0); | |
291 | ||
292 | /* Attach some metadata to coredumps via extended attributes. Just because we can. */ | |
293 | ||
294 | for (unsigned i = 0; i < _META_MAX; i++) { | |
295 | int k; | |
296 | ||
297 | if (isempty(context->meta[i]) || !xattrs[i]) | |
298 | continue; | |
299 | ||
300 | k = RET_NERRNO(fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE)); | |
301 | RET_GATHER(r, k); | |
302 | } | |
303 | ||
304 | return r; | |
305 | } | |
306 | ||
307 | #define filename_escape(s) xescape((s), "./ ") | |
308 | ||
309 | static const char *coredump_tmpfile_name(const char *s) { | |
310 | return s ?: "(unnamed temporary file)"; | |
311 | } | |
312 | ||
313 | static int fix_permissions_and_link( | |
314 | int fd, | |
315 | const char *filename, | |
316 | const char *target, | |
317 | const Context *context, | |
318 | bool allow_user) { | |
319 | ||
320 | int r; | |
321 | ||
322 | assert(fd >= 0); | |
323 | assert(target); | |
324 | assert(context); | |
325 | ||
326 | /* Ignore errors on these */ | |
327 | (void) fchmod(fd, 0640); | |
328 | (void) fix_acl(fd, context->uid, allow_user); | |
329 | (void) fix_xattr(fd, context); | |
330 | ||
331 | r = link_tmpfile(fd, filename, target, LINK_TMPFILE_SYNC); | |
332 | if (r < 0) | |
333 | return log_error_errno(r, "Failed to move coredump %s into place: %m", target); | |
334 | ||
335 | return 0; | |
336 | } | |
337 | ||
338 | static int maybe_remove_external_coredump( | |
339 | const Context *c, | |
340 | const char *filename, | |
341 | uint64_t size) { | |
342 | ||
343 | assert(c); | |
344 | ||
345 | /* Returns true if might remove, false if will not remove, < 0 on error. */ | |
346 | ||
347 | if (arg_storage != COREDUMP_STORAGE_NONE && | |
348 | (c->is_pid1 || c->is_journald)) /* Always keep around in case of journald/pid1, since we cannot rely on the journal to accept them */ | |
349 | return false; | |
350 | ||
351 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL && | |
352 | size <= arg_external_size_max) | |
353 | return false; | |
354 | ||
355 | if (!filename) | |
356 | return true; | |
357 | ||
358 | if (unlink(filename) < 0 && errno != ENOENT) | |
359 | return log_error_errno(errno, "Failed to unlink %s: %m", filename); | |
360 | ||
361 | return true; | |
362 | } | |
363 | ||
364 | static int make_filename(const Context *context, char **ret) { | |
365 | _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL; | |
366 | sd_id128_t boot = {}; | |
367 | int r; | |
368 | ||
369 | assert(context); | |
370 | ||
371 | c = filename_escape(context->meta[META_COMM]); | |
372 | if (!c) | |
373 | return -ENOMEM; | |
374 | ||
375 | u = filename_escape(context->meta[META_ARGV_UID]); | |
376 | if (!u) | |
377 | return -ENOMEM; | |
378 | ||
379 | r = sd_id128_get_boot(&boot); | |
380 | if (r < 0) | |
381 | return r; | |
382 | ||
383 | p = filename_escape(context->meta[META_ARGV_PID]); | |
384 | if (!p) | |
385 | return -ENOMEM; | |
386 | ||
387 | t = filename_escape(context->meta[META_ARGV_TIMESTAMP]); | |
388 | if (!t) | |
389 | return -ENOMEM; | |
390 | ||
391 | if (asprintf(ret, | |
392 | "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s", | |
393 | c, | |
394 | u, | |
395 | SD_ID128_FORMAT_VAL(boot), | |
396 | p, | |
397 | t) < 0) | |
398 | return -ENOMEM; | |
399 | ||
400 | return 0; | |
401 | } | |
402 | ||
403 | static int grant_user_access(int core_fd, const Context *context) { | |
404 | int at_secure = -1; | |
405 | uid_t uid = UID_INVALID, euid = UID_INVALID; | |
406 | uid_t gid = GID_INVALID, egid = GID_INVALID; | |
407 | int r; | |
408 | ||
409 | assert(core_fd >= 0); | |
410 | assert(context); | |
411 | ||
412 | if (!context->meta[META_PROC_AUXV]) | |
413 | return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), "No auxv data, not adjusting permissions."); | |
414 | ||
415 | uint8_t elf[EI_NIDENT]; | |
416 | errno = 0; | |
417 | if (pread(core_fd, &elf, sizeof(elf), 0) != sizeof(elf)) | |
418 | return log_warning_errno(errno_or_else(EIO), | |
419 | "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno)); | |
420 | ||
421 | if (elf[EI_MAG0] != ELFMAG0 || | |
422 | elf[EI_MAG1] != ELFMAG1 || | |
423 | elf[EI_MAG2] != ELFMAG2 || | |
424 | elf[EI_MAG3] != ELFMAG3 || | |
425 | elf[EI_VERSION] != EV_CURRENT) | |
426 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
427 | "Core file does not have ELF header, not adjusting permissions."); | |
428 | if (!IN_SET(elf[EI_CLASS], ELFCLASS32, ELFCLASS64) || | |
429 | !IN_SET(elf[EI_DATA], ELFDATA2LSB, ELFDATA2MSB)) | |
430 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
431 | "Core file has strange ELF class, not adjusting permissions."); | |
432 | ||
433 | if ((elf[EI_DATA] == ELFDATA2LSB) != (__BYTE_ORDER == __LITTLE_ENDIAN)) | |
434 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
435 | "Core file has non-native endianness, not adjusting permissions."); | |
436 | ||
437 | r = parse_auxv(LOG_WARNING, | |
438 | /* elf_class= */ elf[EI_CLASS], | |
439 | context->meta[META_PROC_AUXV], | |
440 | context->meta_size[META_PROC_AUXV], | |
441 | &at_secure, &uid, &euid, &gid, &egid); | |
442 | if (r < 0) | |
443 | return r; | |
444 | ||
445 | /* We allow access if %d/dumpable on the command line was exactly 1, we got all the data, | |
446 | * at_secure is not set, and the uid/gid match euid/egid. */ | |
447 | bool ret = | |
448 | context->dumpable == SUID_DUMP_USER && | |
449 | at_secure == 0 && | |
450 | uid != UID_INVALID && euid != UID_INVALID && uid == euid && | |
451 | gid != GID_INVALID && egid != GID_INVALID && gid == egid; | |
452 | log_debug("Will %s access (dumpable=%u uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", | |
453 | ret ? "permit" : "restrict", | |
454 | context->dumpable, | |
455 | uid, euid, gid, egid, yes_no(at_secure)); | |
456 | return ret; | |
457 | } | |
458 | ||
459 | static int save_external_coredump( | |
460 | const Context *context, | |
461 | int input_fd, | |
462 | char **ret_filename, | |
463 | int *ret_node_fd, | |
464 | int *ret_data_fd, | |
465 | uint64_t *ret_size, | |
466 | uint64_t *ret_compressed_size, | |
467 | bool *ret_truncated) { | |
468 | ||
469 | _cleanup_(unlink_and_freep) char *tmp = NULL; | |
470 | _cleanup_free_ char *fn = NULL; | |
471 | _cleanup_close_ int fd = -EBADF; | |
472 | uint64_t process_limit, max_size; | |
473 | bool truncated, storage_on_tmpfs; | |
474 | struct stat st; | |
475 | int r; | |
476 | ||
477 | assert(context); | |
478 | assert(ret_filename); | |
479 | assert(ret_node_fd); | |
480 | assert(ret_data_fd); | |
481 | assert(ret_size); | |
482 | assert(ret_compressed_size); | |
483 | assert(ret_truncated); | |
484 | ||
485 | if (context->rlimit < page_size()) | |
486 | /* Is coredumping disabled? Then don't bother saving/processing the | |
487 | * coredump. Anything below PAGE_SIZE cannot give a readable coredump | |
488 | * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but | |
489 | * is usually the same as PAGE_SIZE. */ | |
490 | return log_info_errno(SYNTHETIC_ERRNO(EBADSLT), | |
491 | "Resource limits disable core dumping for process %s (%s).", | |
492 | context->meta[META_ARGV_PID], context->meta[META_COMM]); | |
493 | ||
494 | process_limit = MAX(arg_process_size_max, storage_size_max()); | |
495 | if (process_limit == 0) | |
496 | return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT), | |
497 | "Limits for coredump processing and storage are both 0, not dumping core."); | |
498 | ||
499 | /* Never store more than the process configured, or than we actually shall keep or process */ | |
500 | max_size = MIN(context->rlimit, process_limit); | |
501 | ||
502 | r = make_filename(context, &fn); | |
503 | if (r < 0) | |
504 | return log_error_errno(r, "Failed to determine coredump file name: %m"); | |
505 | ||
506 | (void) mkdir_parents_label(fn, 0755); | |
507 | ||
508 | fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp); | |
509 | if (fd < 0) | |
510 | return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); | |
511 | ||
512 | /* If storage is on tmpfs, the kernel oomd might kill us if there's MemoryMax set on | |
513 | * the service or the slice it belongs to. This is common on low-resources systems, | |
514 | * to avoid crashing processes to take away too many system resources. | |
515 | * Check the cgroup settings, and set max_size to a bit less than half of the | |
516 | * available memory left to the process. | |
517 | * Then, attempt to write the core file uncompressed first - if the write gets | |
518 | * interrupted, we know we won't be able to write it all, so instead compress what | |
519 | * was written so far, delete the uncompressed truncated core, and then continue | |
520 | * compressing from STDIN. Given the compressed core cannot be larger than the | |
521 | * uncompressed one, and 1KB for metadata is accounted for in the calculation, we | |
522 | * should be able to at least store the full compressed core file. */ | |
523 | ||
524 | storage_on_tmpfs = fd_is_temporary_fs(fd) > 0; | |
525 | if (storage_on_tmpfs && arg_compress) { | |
526 | _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; | |
527 | uint64_t cgroup_limit = UINT64_MAX; | |
528 | struct statvfs sv; | |
529 | ||
530 | /* If we can't get the cgroup limit, just ignore it, but don't fail, | |
531 | * try anyway with the config settings. */ | |
532 | r = sd_bus_default_system(&bus); | |
533 | if (r < 0) | |
534 | log_info_errno(r, "Failed to connect to system bus, skipping MemoryAvailable check: %m"); | |
535 | else { | |
536 | _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; | |
537 | ||
538 | r = sd_bus_get_property_trivial( | |
539 | bus, | |
540 | "org.freedesktop.systemd1", | |
541 | "/org/freedesktop/systemd1/unit/self", | |
542 | "org.freedesktop.systemd1.Service", | |
543 | "MemoryAvailable", | |
544 | &error, | |
545 | 't', &cgroup_limit); | |
546 | if (r < 0) | |
547 | log_warning_errno(r, | |
548 | "Failed to query MemoryAvailable for current unit, " | |
549 | "falling back to static config settings: %s", | |
550 | bus_error_message(&error, r)); | |
551 | } | |
552 | ||
553 | /* First, ensure we are not going to go over the cgroup limit */ | |
554 | max_size = MIN(cgroup_limit, max_size); | |
555 | /* tmpfs might get full quickly, so check the available space too. But don't worry about | |
556 | * errors here, failing to access the storage location will be better logged when writing to | |
557 | * it. */ | |
558 | if (fstatvfs(fd, &sv) >= 0) | |
559 | max_size = MIN((uint64_t)sv.f_frsize * (uint64_t)sv.f_bfree, max_size); | |
560 | /* Impose a lower minimum, otherwise we will miss the basic headers. */ | |
561 | max_size = MAX(PROCESS_SIZE_MIN, max_size); | |
562 | /* Ensure we can always switch to compressing on the fly in case we are running out of space | |
563 | * by keeping half of the space/memory available, plus 1KB metadata overhead from the | |
564 | * compression algorithm. */ | |
565 | max_size = LESS_BY(max_size, 1024U) / 2; | |
566 | ||
567 | log_debug("Limiting core file size to %" PRIu64 " bytes due to cgroup and/or filesystem limits.", max_size); | |
568 | } | |
569 | ||
570 | r = copy_bytes(input_fd, fd, max_size, 0); | |
571 | if (r < 0) | |
572 | return log_error_errno(r, "Cannot store coredump of %s (%s): %m", | |
573 | context->meta[META_ARGV_PID], context->meta[META_COMM]); | |
574 | truncated = r == 1; | |
575 | ||
576 | bool allow_user = grant_user_access(fd, context) > 0; | |
577 | ||
578 | #if HAVE_COMPRESSION | |
579 | if (arg_compress) { | |
580 | _cleanup_(unlink_and_freep) char *tmp_compressed = NULL; | |
581 | _cleanup_free_ char *fn_compressed = NULL; | |
582 | _cleanup_close_ int fd_compressed = -EBADF; | |
583 | uint64_t uncompressed_size = 0; | |
584 | ||
585 | if (lseek(fd, 0, SEEK_SET) < 0) | |
586 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); | |
587 | ||
588 | fn_compressed = strjoin(fn, default_compression_extension()); | |
589 | if (!fn_compressed) | |
590 | return log_oom(); | |
591 | ||
592 | fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed); | |
593 | if (fd_compressed < 0) | |
594 | return log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed); | |
595 | ||
596 | r = compress_stream(fd, fd_compressed, max_size, &uncompressed_size); | |
597 | if (r < 0) | |
598 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
599 | ||
600 | if (truncated && storage_on_tmpfs) { | |
601 | uint64_t partial_uncompressed_size = 0; | |
602 | ||
603 | /* Uncompressed write was truncated and we are writing to tmpfs: delete | |
604 | * the uncompressed core, and compress the remaining part from STDIN. */ | |
605 | ||
606 | tmp = unlink_and_free(tmp); | |
607 | fd = safe_close(fd); | |
608 | ||
609 | r = compress_stream(input_fd, fd_compressed, max_size, &partial_uncompressed_size); | |
610 | if (r < 0) | |
611 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
612 | uncompressed_size += partial_uncompressed_size; | |
613 | } | |
614 | ||
615 | r = fix_permissions_and_link(fd_compressed, tmp_compressed, fn_compressed, context, allow_user); | |
616 | if (r < 0) | |
617 | return r; | |
618 | ||
619 | if (fstat(fd_compressed, &st) < 0) | |
620 | return log_error_errno(errno, | |
621 | "Failed to fstat core file %s: %m", | |
622 | coredump_tmpfile_name(tmp_compressed)); | |
623 | ||
624 | *ret_filename = TAKE_PTR(fn_compressed); /* compressed */ | |
625 | *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */ | |
626 | *ret_data_fd = TAKE_FD(fd); | |
627 | *ret_size = uncompressed_size; | |
628 | *ret_compressed_size = (uint64_t) st.st_size; /* compressed */ | |
629 | *ret_truncated = truncated; | |
630 | ||
631 | return 0; | |
632 | } | |
633 | #endif | |
634 | ||
635 | if (truncated) | |
636 | log_struct(LOG_INFO, | |
637 | LOG_MESSAGE("Core file was truncated to %"PRIu64" bytes.", max_size), | |
638 | LOG_ITEM("SIZE_LIMIT=%"PRIu64, max_size), | |
639 | LOG_MESSAGE_ID(SD_MESSAGE_TRUNCATED_CORE_STR)); | |
640 | ||
641 | r = fix_permissions_and_link(fd, tmp, fn, context, allow_user); | |
642 | if (r < 0) | |
643 | return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn); | |
644 | ||
645 | if (fstat(fd, &st) < 0) | |
646 | return log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); | |
647 | ||
648 | if (lseek(fd, 0, SEEK_SET) < 0) | |
649 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); | |
650 | ||
651 | *ret_filename = TAKE_PTR(fn); | |
652 | *ret_node_fd = -EBADF; | |
653 | *ret_data_fd = TAKE_FD(fd); | |
654 | *ret_size = (uint64_t) st.st_size; | |
655 | *ret_compressed_size = UINT64_MAX; | |
656 | *ret_truncated = truncated; | |
657 | ||
658 | return 0; | |
659 | } | |
660 | ||
661 | static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) { | |
662 | _cleanup_free_ char *field = NULL; | |
663 | ssize_t n; | |
664 | ||
665 | assert(fd >= 0); | |
666 | assert(ret); | |
667 | assert(ret_size); | |
668 | ||
669 | if (lseek(fd, 0, SEEK_SET) < 0) | |
670 | return log_warning_errno(errno, "Failed to seek: %m"); | |
671 | ||
672 | field = malloc(9 + size); | |
673 | if (!field) | |
674 | return log_warning_errno(SYNTHETIC_ERRNO(ENOMEM), | |
675 | "Failed to allocate memory for coredump, coredump will not be stored."); | |
676 | ||
677 | memcpy(field, "COREDUMP=", 9); | |
678 | ||
679 | /* NB: simple read() would fail for overly large coredumps, since read() on Linux can only deal with | |
680 | * 0x7ffff000 bytes max. Hence call things in a loop. */ | |
681 | n = loop_read(fd, field + 9, size, /* do_poll= */ false); | |
682 | if (n < 0) | |
683 | return log_error_errno((int) n, "Failed to read core data: %m"); | |
684 | if ((size_t) n < size) | |
685 | return log_error_errno(SYNTHETIC_ERRNO(EIO), "Core data too short."); | |
686 | ||
687 | *ret = TAKE_PTR(field); | |
688 | *ret_size = size + 9; | |
689 | ||
690 | return 0; | |
691 | } | |
692 | ||
693 | /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines: | |
694 | * 0:/dev/pts/23 | |
695 | * pos: 0 | |
696 | * flags: 0100002 | |
697 | * | |
698 | * 1:/dev/pts/23 | |
699 | * pos: 0 | |
700 | * flags: 0100002 | |
701 | * | |
702 | * 2:/dev/pts/23 | |
703 | * pos: 0 | |
704 | * flags: 0100002 | |
705 | * EOF | |
706 | */ | |
707 | static int compose_open_fds(pid_t pid, char **ret) { | |
708 | _cleanup_(memstream_done) MemStream m = {}; | |
709 | _cleanup_closedir_ DIR *proc_fd_dir = NULL; | |
710 | _cleanup_close_ int proc_fdinfo_fd = -EBADF; | |
711 | const char *fddelim = "", *path; | |
712 | FILE *stream; | |
713 | int r; | |
714 | ||
715 | assert(pid >= 0); | |
716 | assert(ret); | |
717 | ||
718 | path = procfs_file_alloca(pid, "fd"); | |
719 | proc_fd_dir = opendir(path); | |
720 | if (!proc_fd_dir) | |
721 | return -errno; | |
722 | ||
723 | proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH); | |
724 | if (proc_fdinfo_fd < 0) | |
725 | return -errno; | |
726 | ||
727 | stream = memstream_init(&m); | |
728 | if (!stream) | |
729 | return -ENOMEM; | |
730 | ||
731 | FOREACH_DIRENT(de, proc_fd_dir, return -errno) { | |
732 | _cleanup_fclose_ FILE *fdinfo = NULL; | |
733 | _cleanup_free_ char *fdname = NULL; | |
734 | _cleanup_close_ int fd = -EBADF; | |
735 | ||
736 | r = readlinkat_malloc(dirfd(proc_fd_dir), de->d_name, &fdname); | |
737 | if (r < 0) | |
738 | return r; | |
739 | ||
740 | fprintf(stream, "%s%s:%s\n", fddelim, de->d_name, fdname); | |
741 | fddelim = "\n"; | |
742 | ||
743 | /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */ | |
744 | fd = openat(proc_fdinfo_fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY); | |
745 | if (fd < 0) | |
746 | continue; | |
747 | ||
748 | fdinfo = take_fdopen(&fd, "r"); | |
749 | if (!fdinfo) | |
750 | continue; | |
751 | ||
752 | for (;;) { | |
753 | _cleanup_free_ char *line = NULL; | |
754 | ||
755 | r = read_line(fdinfo, LONG_LINE_MAX, &line); | |
756 | if (r < 0) | |
757 | return r; | |
758 | if (r == 0) | |
759 | break; | |
760 | ||
761 | fputs(line, stream); | |
762 | fputc('\n', stream); | |
763 | } | |
764 | } | |
765 | ||
766 | return memstream_finalize(&m, ret, NULL); | |
767 | } | |
768 | ||
769 | /* Returns 1 if the parent was found. | |
770 | * Returns 0 if there is not a process we can call the pid's | |
771 | * container parent (the pid's process isn't 'containerized'). | |
772 | * Returns a negative number on errors. | |
773 | */ | |
774 | static int get_process_container_parent_cmdline(PidRef *pid, char** ret_cmdline) { | |
775 | int r; | |
776 | ||
777 | assert(pidref_is_set(pid)); | |
778 | assert(!pidref_is_remote(pid)); | |
779 | ||
780 | r = pidref_from_same_root_fs(pid, &PIDREF_MAKE_FROM_PID(1)); | |
781 | if (r < 0) | |
782 | return r; | |
783 | if (r > 0) { | |
784 | /* The process uses system root. */ | |
785 | *ret_cmdline = NULL; | |
786 | return 0; | |
787 | } | |
788 | ||
789 | _cleanup_(pidref_done) PidRef container_pid = PIDREF_NULL; | |
790 | r = namespace_get_leader(pid, NAMESPACE_MOUNT, &container_pid); | |
791 | if (r < 0) | |
792 | return r; | |
793 | ||
794 | r = pidref_get_cmdline(&container_pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, ret_cmdline); | |
795 | if (r < 0) | |
796 | return r; | |
797 | ||
798 | return 1; | |
799 | } | |
800 | ||
801 | static int change_uid_gid(const Context *context) { | |
802 | int r; | |
803 | ||
804 | assert(context); | |
805 | ||
806 | uid_t uid = context->uid; | |
807 | gid_t gid = context->gid; | |
808 | ||
809 | if (uid_is_system(uid)) { | |
810 | const char *user = "systemd-coredump"; | |
811 | ||
812 | r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); | |
813 | if (r < 0) { | |
814 | log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user); | |
815 | uid = gid = 0; | |
816 | } | |
817 | } | |
818 | ||
819 | return drop_privileges(uid, gid, 0); | |
820 | } | |
821 | ||
822 | static int attach_mount_tree(int mount_tree_fd) { | |
823 | int r; | |
824 | ||
825 | assert(mount_tree_fd >= 0); | |
826 | ||
827 | r = detach_mount_namespace(); | |
828 | if (r < 0) | |
829 | return log_warning_errno(r, "Failed to detach mount namespace: %m"); | |
830 | ||
831 | r = mkdir_p_label(MOUNT_TREE_ROOT, 0555); | |
832 | if (r < 0) | |
833 | return log_warning_errno(r, "Failed to create directory: %m"); | |
834 | ||
835 | r = mount_setattr(mount_tree_fd, "", AT_EMPTY_PATH, | |
836 | &(struct mount_attr) { | |
837 | /* MOUNT_ATTR_NOSYMFOLLOW is left out on purpose to allow libdwfl to resolve symlinks. | |
838 | * libdwfl will use openat2() with RESOLVE_IN_ROOT so there is no risk of symlink escape. | |
839 | * https://sourceware.org/git/?p=elfutils.git;a=patch;h=06f0520f9a78b07c11c343181d552791dd630346 */ | |
840 | .attr_set = MOUNT_ATTR_RDONLY|MOUNT_ATTR_NOSUID|MOUNT_ATTR_NODEV|MOUNT_ATTR_NOEXEC, | |
841 | .propagation = MS_SLAVE, | |
842 | }, sizeof(struct mount_attr)); | |
843 | if (r < 0) | |
844 | return log_warning_errno(errno, "Failed to change properties of mount tree: %m"); | |
845 | ||
846 | r = move_mount(mount_tree_fd, "", -EBADF, MOUNT_TREE_ROOT, MOVE_MOUNT_F_EMPTY_PATH); | |
847 | if (r < 0) | |
848 | return log_warning_errno(errno, "Failed to attach mount tree: %m"); | |
849 | ||
850 | return 0; | |
851 | } | |
852 | ||
853 | static int submit_coredump( | |
854 | const Context *context, | |
855 | struct iovec_wrapper *iovw, | |
856 | int input_fd) { | |
857 | ||
858 | _cleanup_(sd_json_variant_unrefp) sd_json_variant *json_metadata = NULL; | |
859 | _cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF; | |
860 | _cleanup_free_ char *filename = NULL, *coredump_data = NULL, *stacktrace = NULL; | |
861 | const char *module_name, *root = NULL; | |
862 | uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX; | |
863 | bool truncated = false, written = false; | |
864 | sd_json_variant *module_json; | |
865 | int r; | |
866 | ||
867 | assert(context); | |
868 | assert(iovw); | |
869 | assert(input_fd >= 0); | |
870 | ||
871 | /* Vacuum before we write anything again */ | |
872 | (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); | |
873 | ||
874 | /* Always stream the coredump to disk, if that's possible */ | |
875 | written = save_external_coredump( | |
876 | context, input_fd, | |
877 | &filename, &coredump_node_fd, &coredump_fd, | |
878 | &coredump_size, &coredump_compressed_size, &truncated) >= 0; | |
879 | if (written) { | |
880 | /* If we could write it to disk we can now process it. */ | |
881 | /* If we don't want to keep the coredump on disk, remove it now, as later on we | |
882 | * will lack the privileges for it. However, we keep the fd to it, so that we can | |
883 | * still process it and log it. */ | |
884 | r = maybe_remove_external_coredump( | |
885 | context, | |
886 | filename, | |
887 | coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size); | |
888 | if (r < 0) | |
889 | return r; | |
890 | if (r == 0) | |
891 | (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename); | |
892 | else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) | |
893 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", | |
894 | coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size, arg_external_size_max); | |
895 | ||
896 | /* Vacuum again, but exclude the coredump we just created */ | |
897 | (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); | |
898 | } | |
899 | ||
900 | if (context->mount_tree_fd >= 0 && attach_mount_tree(context->mount_tree_fd) >= 0) | |
901 | root = MOUNT_TREE_ROOT; | |
902 | ||
903 | /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the | |
904 | * coredump memory under the user's uid. This also ensures that the credentials journald will see are | |
905 | * the ones of the coredumping user, thus making sure the user gets access to the core dump. Let's | |
906 | * also get rid of all capabilities, if we run as root, we won't need them anymore. */ | |
907 | r = change_uid_gid(context); | |
908 | if (r < 0) | |
909 | return log_error_errno(r, "Failed to drop privileges: %m"); | |
910 | ||
911 | if (written) { | |
912 | /* Try to get a stack trace if we can */ | |
913 | if (coredump_size > arg_process_size_max) | |
914 | log_debug("Not generating stack trace: core size %"PRIu64" is greater " | |
915 | "than %"PRIu64" (the configured maximum)", | |
916 | coredump_size, arg_process_size_max); | |
917 | else if (coredump_fd >= 0) { | |
918 | bool skip = startswith(context->meta[META_COMM], "systemd-coredum"); /* COMM is 16 bytes usually */ | |
919 | ||
920 | (void) parse_elf_object(coredump_fd, | |
921 | context->meta[META_EXE], | |
922 | root, | |
923 | /* fork_disable_dump= */ skip, /* avoid loops */ | |
924 | &stacktrace, | |
925 | &json_metadata); | |
926 | } | |
927 | } | |
928 | ||
929 | _cleanup_free_ char *core_message = NULL; | |
930 | core_message = strjoin( | |
931 | "Process ", context->meta[META_ARGV_PID], | |
932 | " (", context->meta[META_COMM], | |
933 | ") of user ", context->meta[META_ARGV_UID], | |
934 | written ? " dumped core." : " terminated abnormally without generating a coredump."); | |
935 | if (!core_message) | |
936 | return log_oom(); | |
937 | ||
938 | if (context->is_journald && filename) | |
939 | if (!strextend(&core_message, "\nCoredump diverted to ", filename)) | |
940 | return log_oom(); | |
941 | ||
942 | if (stacktrace) | |
943 | if (!strextend(&core_message, "\n\n", stacktrace)) | |
944 | return log_oom(); | |
945 | ||
946 | if (context->is_journald) | |
947 | /* We might not be able to log to the journal, so let's always print the message to another | |
948 | * log target. The target was set previously to something safe. */ | |
949 | log_dispatch(LOG_ERR, 0, core_message); | |
950 | ||
951 | (void) iovw_put_string_field(iovw, "MESSAGE=", core_message); | |
952 | ||
953 | if (truncated) | |
954 | (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1"); | |
955 | ||
956 | /* If we managed to parse any ELF metadata (build-id, ELF package meta), | |
957 | * attach it as journal metadata. */ | |
958 | if (json_metadata) { | |
959 | _cleanup_free_ char *formatted_json = NULL; | |
960 | ||
961 | r = sd_json_variant_format(json_metadata, 0, &formatted_json); | |
962 | if (r < 0) | |
963 | return log_error_errno(r, "Failed to format JSON package metadata: %m"); | |
964 | ||
965 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_JSON=", formatted_json); | |
966 | } | |
967 | ||
968 | /* In the unlikely scenario that context->meta[META_EXE] is not available, | |
969 | * let's avoid guessing the module name and skip the loop. */ | |
970 | if (context->meta[META_EXE]) | |
971 | JSON_VARIANT_OBJECT_FOREACH(module_name, module_json, json_metadata) { | |
972 | sd_json_variant *t; | |
973 | ||
974 | /* We only add structured fields for the 'main' ELF module, and only if we can identify it. */ | |
975 | if (!path_equal_filename(module_name, context->meta[META_EXE])) | |
976 | continue; | |
977 | ||
978 | t = sd_json_variant_by_key(module_json, "name"); | |
979 | if (t) | |
980 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_NAME=", sd_json_variant_string(t)); | |
981 | ||
982 | t = sd_json_variant_by_key(module_json, "version"); | |
983 | if (t) | |
984 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_VERSION=", sd_json_variant_string(t)); | |
985 | } | |
986 | ||
987 | /* Optionally store the entire coredump in the journal */ | |
988 | if (arg_storage == COREDUMP_STORAGE_JOURNAL && coredump_fd >= 0) { | |
989 | if (coredump_size <= arg_journal_size_max) { | |
990 | size_t sz = 0; | |
991 | ||
992 | /* Store the coredump itself in the journal */ | |
993 | ||
994 | r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); | |
995 | if (r >= 0) { | |
996 | if (iovw_put(iovw, coredump_data, sz) >= 0) | |
997 | TAKE_PTR(coredump_data); | |
998 | } else | |
999 | log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); | |
1000 | } else | |
1001 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", | |
1002 | coredump_size, arg_journal_size_max); | |
1003 | } | |
1004 | ||
1005 | /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the | |
1006 | * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write | |
1007 | * the coredump to the socket. */ | |
1008 | ||
1009 | if (context->is_journald) { | |
1010 | r = journal_fd_nonblock(true); | |
1011 | if (r < 0) | |
1012 | return log_error_errno(r, "Failed to make journal socket non-blocking: %m"); | |
1013 | } | |
1014 | ||
1015 | r = sd_journal_sendv(iovw->iovec, iovw->count); | |
1016 | ||
1017 | if (context->is_journald) { | |
1018 | int k; | |
1019 | ||
1020 | k = journal_fd_nonblock(false); | |
1021 | if (k < 0) | |
1022 | return log_error_errno(k, "Failed to make journal socket blocking: %m"); | |
1023 | } | |
1024 | ||
1025 | if (r == -EAGAIN && context->is_journald) | |
1026 | log_warning_errno(r, "Failed to log journal coredump, ignoring: %m"); | |
1027 | else if (r < 0) | |
1028 | return log_error_errno(r, "Failed to log coredump: %m"); | |
1029 | ||
1030 | return 0; | |
1031 | } | |
1032 | ||
1033 | static int context_parse_iovw(Context *context, struct iovec_wrapper *iovw) { | |
1034 | const char *unit; | |
1035 | int r; | |
1036 | ||
1037 | assert(context); | |
1038 | assert(iovw); | |
1039 | ||
1040 | /* Converts the data in the iovec array iovw into separate fields. Fills in context->meta[] (for | |
1041 | * which no memory is allocated, it just contains direct pointers into the iovec array memory). */ | |
1042 | ||
1043 | bool have_signal_name = false; | |
1044 | FOREACH_ARRAY(iovec, iovw->iovec, iovw->count) { | |
1045 | for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) { | |
1046 | /* Note that these strings are NUL-terminated, because we made sure that a | |
1047 | * trailing NUL byte is in the buffer, though not included in the iov_len | |
1048 | * count (see process_socket() and gather_pid_metadata_*()). */ | |
1049 | assert(((char*) iovec->iov_base)[iovec->iov_len] == 0); | |
1050 | ||
1051 | const char *p = memory_startswith(iovec->iov_base, iovec->iov_len, meta_field_names[i]); | |
1052 | if (p) { | |
1053 | context->meta[i] = p; | |
1054 | context->meta_size[i] = iovec->iov_len - strlen(meta_field_names[i]); | |
1055 | break; | |
1056 | } | |
1057 | } | |
1058 | ||
1059 | have_signal_name = have_signal_name || | |
1060 | memory_startswith(iovec->iov_base, iovec->iov_len, "COREDUMP_SIGNAL_NAME="); | |
1061 | } | |
1062 | ||
1063 | /* The basic fields from argv[] should always be there, refuse early if not. */ | |
1064 | for (int i = 0; i < _META_ARGV_REQUIRED; i++) | |
1065 | if (!context->meta[i]) | |
1066 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1067 | "A required (%s) has not been sent, aborting.", meta_field_names[i]); | |
1068 | ||
1069 | pid_t parsed_pid; | |
1070 | r = parse_pid(context->meta[META_ARGV_PID], &parsed_pid); | |
1071 | if (r < 0) | |
1072 | return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]); | |
1073 | if (pidref_is_set(&context->pidref)) { | |
1074 | if (context->pidref.pid != parsed_pid) | |
1075 | return log_error_errno(r, "Passed PID " PID_FMT " does not match passed " PID_FMT ": %m", | |
1076 | parsed_pid, context->pidref.pid); | |
1077 | } else { | |
1078 | r = pidref_set_pid(&context->pidref, parsed_pid); | |
1079 | if (r < 0) | |
1080 | return log_error_errno(r, "Failed to initialize pidref from pid " PID_FMT ": %m", parsed_pid); | |
1081 | } | |
1082 | ||
1083 | r = parse_uid(context->meta[META_ARGV_UID], &context->uid); | |
1084 | if (r < 0) | |
1085 | return log_error_errno(r, "Failed to parse UID \"%s\": %m", context->meta[META_ARGV_UID]); | |
1086 | ||
1087 | r = parse_gid(context->meta[META_ARGV_GID], &context->gid); | |
1088 | if (r < 0) | |
1089 | return log_error_errno(r, "Failed to parse GID \"%s\": %m", context->meta[META_ARGV_GID]); | |
1090 | ||
1091 | r = parse_signo(context->meta[META_ARGV_SIGNAL], &context->signo); | |
1092 | if (r < 0) | |
1093 | log_warning_errno(r, "Failed to parse signal number \"%s\", ignoring: %m", context->meta[META_ARGV_SIGNAL]); | |
1094 | ||
1095 | r = safe_atou64(context->meta[META_ARGV_RLIMIT], &context->rlimit); | |
1096 | if (r < 0) | |
1097 | log_warning_errno(r, "Failed to parse resource limit \"%s\", ignoring: %m", context->meta[META_ARGV_RLIMIT]); | |
1098 | ||
1099 | /* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to SUID_DUMP_SAFE (2), | |
1100 | * if the process is marked as not dumpable, see PR_SET_DUMPABLE(2const). */ | |
1101 | if (context->meta[META_ARGV_DUMPABLE]) { | |
1102 | r = safe_atou(context->meta[META_ARGV_DUMPABLE], &context->dumpable); | |
1103 | if (r < 0) | |
1104 | return log_error_errno(r, "Failed to parse dumpable field \"%s\": %m", context->meta[META_ARGV_DUMPABLE]); | |
1105 | if (context->dumpable > SUID_DUMP_SAFE) | |
1106 | log_notice("Got unexpected %%d/dumpable value %u.", context->dumpable); | |
1107 | } | |
1108 | ||
1109 | unit = context->meta[META_UNIT]; | |
1110 | context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); | |
1111 | context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); | |
1112 | ||
1113 | /* After parsing everything, let's also synthesize a new iovw field for the textual signal name if it | |
1114 | * isn't already set. */ | |
1115 | if (SIGNAL_VALID(context->signo) && !have_signal_name) | |
1116 | (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(context->signo)); | |
1117 | ||
1118 | return 0; | |
1119 | } | |
1120 | ||
1121 | static int process_socket(int fd) { | |
1122 | _cleanup_(iovw_done_free) struct iovec_wrapper iovw = {}; | |
1123 | _cleanup_(context_done) Context context = CONTEXT_NULL; | |
1124 | _cleanup_close_ int input_fd = -EBADF; | |
1125 | enum { | |
1126 | STATE_PAYLOAD, | |
1127 | STATE_INPUT_FD_DONE, | |
1128 | STATE_PID_FD_DONE, | |
1129 | } state = STATE_PAYLOAD; | |
1130 | int r; | |
1131 | ||
1132 | assert(fd >= 0); | |
1133 | ||
1134 | log_setup(); | |
1135 | ||
1136 | log_debug("Processing coredump received via socket..."); | |
1137 | ||
1138 | for (;;) { | |
1139 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control; | |
1140 | struct msghdr mh = { | |
1141 | .msg_control = &control, | |
1142 | .msg_controllen = sizeof(control), | |
1143 | .msg_iovlen = 1, | |
1144 | }; | |
1145 | ssize_t n, l; | |
1146 | ||
1147 | l = next_datagram_size_fd(fd); | |
1148 | if (l < 0) | |
1149 | return log_error_errno(l, "Failed to determine datagram size to read: %m"); | |
1150 | ||
1151 | _cleanup_(iovec_done) struct iovec iovec = { | |
1152 | .iov_len = l, | |
1153 | .iov_base = malloc(l + 1), | |
1154 | }; | |
1155 | if (!iovec.iov_base) | |
1156 | return log_oom(); | |
1157 | ||
1158 | mh.msg_iov = &iovec; | |
1159 | ||
1160 | n = recvmsg_safe(fd, &mh, MSG_CMSG_CLOEXEC); | |
1161 | if (n < 0) | |
1162 | return log_error_errno(n, "Failed to receive datagram: %m"); | |
1163 | ||
1164 | /* The final zero-length datagrams ("sentinels") carry file descriptors and tell us that | |
1165 | * we're done. There are three sentinels: one with just the coredump fd, followed by one with | |
1166 | * the pidfd, and finally one with the mount tree fd. The latter two or the last one may be | |
1167 | * omitted (which is supported for compatibility with older systemd version, in particular to | |
1168 | * facilitate cross-container coredumping). */ | |
1169 | if (n == 0) { | |
1170 | struct cmsghdr *found; | |
1171 | ||
1172 | found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); | |
1173 | if (!found) { | |
1174 | /* This is zero length message but it either doesn't carry a single | |
1175 | * descriptor, or it has more than one. This is a protocol violation so let's | |
1176 | * bail out. | |
1177 | * | |
1178 | * Well, not quite! In practice there's one more complication: EOF on | |
1179 | * SOCK_SEQPACKET is not distinguishable from a zero length datagram. Hence | |
1180 | * if we get a zero length datagram without fds we consider it EOF, and | |
1181 | * that's permissible for the final two fds. Hence let's be strict on the | |
1182 | * first fd, but lenient on the other two. */ | |
1183 | ||
1184 | if (!cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, (socklen_t) -1) && state != STATE_PAYLOAD) | |
1185 | /* No fds, and already got the first fd → we are done. */ | |
1186 | break; | |
1187 | ||
1188 | cmsg_close_all(&mh); | |
1189 | return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), | |
1190 | "Received zero length message with zero or more than one file descriptor(s), expected one."); | |
1191 | } | |
1192 | ||
1193 | switch (state) { | |
1194 | ||
1195 | case STATE_PAYLOAD: | |
1196 | assert(input_fd < 0); | |
1197 | input_fd = *CMSG_TYPED_DATA(found, int); | |
1198 | state = STATE_INPUT_FD_DONE; | |
1199 | continue; | |
1200 | ||
1201 | case STATE_INPUT_FD_DONE: | |
1202 | assert(!pidref_is_set(&context.pidref)); | |
1203 | ||
1204 | r = pidref_set_pidfd_consume(&context.pidref, *CMSG_TYPED_DATA(found, int)); | |
1205 | if (r < 0) | |
1206 | return log_error_errno(r, "Failed to initialize pidref: %m"); | |
1207 | ||
1208 | state = STATE_PID_FD_DONE; | |
1209 | continue; | |
1210 | ||
1211 | case STATE_PID_FD_DONE: | |
1212 | assert(context.mount_tree_fd < 0); | |
1213 | context.mount_tree_fd = *CMSG_TYPED_DATA(found, int); | |
1214 | /* We have all FDs we need so we are done. */ | |
1215 | break; | |
1216 | } | |
1217 | ||
1218 | break; | |
1219 | } | |
1220 | ||
1221 | cmsg_close_all(&mh); | |
1222 | ||
1223 | /* Only zero length messages are allowed after the first message that carried a file descriptor. */ | |
1224 | if (state != STATE_PAYLOAD) | |
1225 | return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Received unexpected message with non-zero length."); | |
1226 | ||
1227 | /* Payload messages should not carry fds */ | |
1228 | if (cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, (socklen_t) -1)) | |
1229 | return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), | |
1230 | "Received payload message with file descriptor(s), expected none."); | |
1231 | ||
1232 | /* Add trailing NUL byte, in case these are strings */ | |
1233 | ((char*) iovec.iov_base)[n] = 0; | |
1234 | iovec.iov_len = (size_t) n; | |
1235 | ||
1236 | if (iovw_put(&iovw, iovec.iov_base, iovec.iov_len) < 0) | |
1237 | return log_oom(); | |
1238 | ||
1239 | TAKE_STRUCT(iovec); | |
1240 | } | |
1241 | ||
1242 | /* Make sure we got all data we really need */ | |
1243 | assert(input_fd >= 0); | |
1244 | ||
1245 | r = context_parse_iovw(&context, &iovw); | |
1246 | if (r < 0) | |
1247 | return r; | |
1248 | ||
1249 | /* Make sure we received all the expected fields. We support being called by an *older* | |
1250 | * systemd-coredump from the outside, so we require only the basic set of fields that | |
1251 | * was being sent when the support for sending to containers over a socket was added | |
1252 | * in a108c43e36d3ceb6e34efe37c014fc2cda856000. */ | |
1253 | meta_argv_t i; | |
1254 | FOREACH_ARGUMENT(i, | |
1255 | META_ARGV_PID, | |
1256 | META_ARGV_UID, | |
1257 | META_ARGV_GID, | |
1258 | META_ARGV_SIGNAL, | |
1259 | META_ARGV_TIMESTAMP, | |
1260 | META_ARGV_RLIMIT, | |
1261 | META_ARGV_HOSTNAME, | |
1262 | META_COMM) | |
1263 | if (!context.meta[i]) | |
1264 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1265 | "Mandatory argument %s not received on socket, aborting.", | |
1266 | meta_field_names[i]); | |
1267 | ||
1268 | return submit_coredump(&context, &iovw, input_fd); | |
1269 | } | |
1270 | ||
1271 | static int send_iovec(const struct iovec_wrapper *iovw, int input_fd, PidRef *pidref, int mount_tree_fd) { | |
1272 | _cleanup_close_ int fd = -EBADF; | |
1273 | int r; | |
1274 | ||
1275 | assert(iovw); | |
1276 | assert(input_fd >= 0); | |
1277 | ||
1278 | fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); | |
1279 | if (fd < 0) | |
1280 | return log_error_errno(errno, "Failed to create coredump socket: %m"); | |
1281 | ||
1282 | r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/coredump"); | |
1283 | if (r < 0) | |
1284 | return log_error_errno(r, "Failed to connect to coredump service: %m"); | |
1285 | ||
1286 | for (size_t i = 0; i < iovw->count; i++) { | |
1287 | struct msghdr mh = { | |
1288 | .msg_iov = iovw->iovec + i, | |
1289 | .msg_iovlen = 1, | |
1290 | }; | |
1291 | struct iovec copy[2]; | |
1292 | ||
1293 | for (;;) { | |
1294 | if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) | |
1295 | break; | |
1296 | ||
1297 | if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { | |
1298 | /* This field didn't fit? That's a pity. Given that this is | |
1299 | * just metadata, let's truncate the field at half, and try | |
1300 | * again. We append three dots, in order to show that this is | |
1301 | * truncated. */ | |
1302 | ||
1303 | if (mh.msg_iov != copy) { | |
1304 | /* We don't want to modify the caller's iovec, hence | |
1305 | * let's create our own array, consisting of two new | |
1306 | * iovecs, where the first is a (truncated) copy of | |
1307 | * what we want to send, and the second one contains | |
1308 | * the trailing dots. */ | |
1309 | copy[0] = iovw->iovec[i]; | |
1310 | copy[1] = IOVEC_MAKE(((const char[]){'.', '.', '.'}), 3); | |
1311 | ||
1312 | mh.msg_iov = copy; | |
1313 | mh.msg_iovlen = 2; | |
1314 | } | |
1315 | ||
1316 | copy[0].iov_len /= 2; /* halve it, and try again */ | |
1317 | continue; | |
1318 | } | |
1319 | ||
1320 | return log_error_errno(errno, "Failed to send coredump datagram: %m"); | |
1321 | } | |
1322 | } | |
1323 | ||
1324 | /* First sentinel: the coredump fd */ | |
1325 | r = send_one_fd(fd, input_fd, 0); | |
1326 | if (r < 0) | |
1327 | return log_error_errno(r, "Failed to send coredump fd: %m"); | |
1328 | ||
1329 | /* The optional second sentinel: the pidfd */ | |
1330 | if (!pidref_is_set(pidref) || pidref->fd < 0) /* If we have no pidfd, stop now */ | |
1331 | return 0; | |
1332 | ||
1333 | r = send_one_fd(fd, pidref->fd, 0); | |
1334 | if (r < 0) | |
1335 | return log_error_errno(r, "Failed to send pidfd: %m"); | |
1336 | ||
1337 | /* The optional third sentinel: the mount tree fd */ | |
1338 | if (mount_tree_fd < 0) /* If we have no mount tree, stop now */ | |
1339 | return 0; | |
1340 | ||
1341 | r = send_one_fd(fd, mount_tree_fd, 0); | |
1342 | if (r < 0) | |
1343 | return log_error_errno(r, "Failed to send mount tree fd: %m"); | |
1344 | ||
1345 | return 0; | |
1346 | } | |
1347 | ||
1348 | static int gather_pid_metadata_from_argv( | |
1349 | struct iovec_wrapper *iovw, | |
1350 | Context *context, | |
1351 | int argc, char **argv) { | |
1352 | ||
1353 | _cleanup_(pidref_done) PidRef local_pidref = PIDREF_NULL; | |
1354 | int r, kernel_fd = -EBADF; | |
1355 | ||
1356 | assert(iovw); | |
1357 | assert(context); | |
1358 | ||
1359 | /* We gather all metadata that were passed via argv[] into an array of iovecs that | |
1360 | * we'll forward to the socket unit. | |
1361 | * | |
1362 | * We require at least _META_ARGV_REQUIRED args, but will accept more. | |
1363 | * We know how to parse _META_ARGV_MAX args. The rest will be ignored. */ | |
1364 | ||
1365 | if (argc < _META_ARGV_REQUIRED) | |
1366 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1367 | "Not enough arguments passed by the kernel (%i, expected between %i and %i).", | |
1368 | argc, _META_ARGV_REQUIRED, _META_ARGV_MAX); | |
1369 | ||
1370 | for (int i = 0; i < MIN(argc, _META_ARGV_MAX); i++) { | |
1371 | _cleanup_free_ char *buf = NULL; | |
1372 | const char *t = argv[i]; | |
1373 | ||
1374 | if (i == META_ARGV_TIMESTAMP) { | |
1375 | /* The journal fields contain the timestamp padded with six | |
1376 | * zeroes, so that the kernel-supplied 1s granularity timestamps | |
1377 | * becomes 1μs granularity, i.e. the granularity systemd usually | |
1378 | * operates in. */ | |
1379 | buf = strjoin(argv[i], "000000"); | |
1380 | if (!buf) | |
1381 | return log_oom(); | |
1382 | ||
1383 | t = buf; | |
1384 | } | |
1385 | ||
1386 | if (i == META_ARGV_PID) { | |
1387 | /* Store this so that we can check whether the core will be forwarded to a container | |
1388 | * even when the kernel doesn't provide a pidfd. Can be dropped once baseline is | |
1389 | * >= v6.16. */ | |
1390 | r = pidref_set_pidstr(&local_pidref, t); | |
1391 | if (r < 0) | |
1392 | return log_error_errno(r, "Failed to initialize pidref from pid %s: %m", t); | |
1393 | } | |
1394 | ||
1395 | if (i == META_ARGV_PIDFD) { | |
1396 | /* If the current kernel doesn't support the %F specifier (which resolves to a | |
1397 | * pidfd), but we included it in the core_pattern expression, we'll receive an empty | |
1398 | * string here. Deal with that gracefully. */ | |
1399 | if (isempty(t)) | |
1400 | continue; | |
1401 | ||
1402 | assert(!pidref_is_set(&context->pidref)); | |
1403 | assert(kernel_fd < 0); | |
1404 | ||
1405 | kernel_fd = parse_fd(t); | |
1406 | if (kernel_fd < 0) | |
1407 | return log_error_errno(kernel_fd, "Failed to parse pidfd \"%s\": %m", t); | |
1408 | ||
1409 | r = pidref_set_pidfd(&context->pidref, kernel_fd); | |
1410 | if (r < 0) | |
1411 | return log_error_errno(r, "Failed to initialize pidref from pidfd %d: %m", kernel_fd); | |
1412 | ||
1413 | context->got_pidfd = 1; | |
1414 | ||
1415 | /* If there are containers involved with different versions of the code they might | |
1416 | * not be using pidfds, so it would be wrong to set the metadata, skip it. */ | |
1417 | r = pidref_in_same_namespace(/* pid1 = */ NULL, &context->pidref, NAMESPACE_PID); | |
1418 | if (r < 0) | |
1419 | log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m"); | |
1420 | if (r <= 0) | |
1421 | continue; | |
1422 | ||
1423 | /* We don't print the fd number in the journal as it's meaningless, but we still | |
1424 | * record that the parsing was done with a kernel-provided fd as it means it's safe | |
1425 | * from races, which is valuable information to provide in the journal record. */ | |
1426 | t = "1"; | |
1427 | } | |
1428 | ||
1429 | r = iovw_put_string_field(iovw, meta_field_names[i], t); | |
1430 | if (r < 0) | |
1431 | return r; | |
1432 | } | |
1433 | ||
1434 | /* Cache some of the process metadata we collected so far and that we'll need to | |
1435 | * access soon. */ | |
1436 | r = context_parse_iovw(context, iovw); | |
1437 | if (r < 0) | |
1438 | return r; | |
1439 | ||
1440 | /* If the kernel didn't give us a PIDFD, then use the one derived from the | |
1441 | * PID immediately, given we have it. */ | |
1442 | if (!pidref_is_set(&context->pidref)) | |
1443 | context->pidref = TAKE_PIDREF(local_pidref); | |
1444 | ||
1445 | /* Close the kernel-provided FD as the last thing after everything else succeeded. */ | |
1446 | kernel_fd = safe_close(kernel_fd); | |
1447 | ||
1448 | return 0; | |
1449 | } | |
1450 | ||
1451 | static int gather_pid_metadata_from_procfs(struct iovec_wrapper *iovw, Context *context) { | |
1452 | uid_t owner_uid; | |
1453 | pid_t pid; | |
1454 | char *t; | |
1455 | size_t size; | |
1456 | const char *p; | |
1457 | int r; | |
1458 | ||
1459 | assert(iovw); | |
1460 | assert(context); | |
1461 | ||
1462 | /* Note that if we fail on oom later on, we do not roll-back changes to the iovec | |
1463 | * structure. (It remains valid, with the first iovec fields initialized.) */ | |
1464 | ||
1465 | pid = context->pidref.pid; | |
1466 | ||
1467 | /* The following is mandatory */ | |
1468 | r = pidref_get_comm(&context->pidref, &t); | |
1469 | if (r < 0) | |
1470 | return log_error_errno(r, "Failed to get COMM: %m"); | |
1471 | ||
1472 | r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t); | |
1473 | if (r < 0) | |
1474 | return r; | |
1475 | ||
1476 | /* The following are optional, but we use them if present. */ | |
1477 | r = get_process_exe(pid, &t); | |
1478 | if (r >= 0) | |
1479 | r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t); | |
1480 | if (r < 0) | |
1481 | log_warning_errno(r, "Failed to get EXE, ignoring: %m"); | |
1482 | ||
1483 | if (cg_pidref_get_unit(&context->pidref, &t) >= 0) | |
1484 | (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t); | |
1485 | ||
1486 | if (cg_pid_get_user_unit(pid, &t) >= 0) | |
1487 | (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t); | |
1488 | ||
1489 | if (cg_pidref_get_session(&context->pidref, &t) >= 0) | |
1490 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t); | |
1491 | ||
1492 | if (cg_pidref_get_owner_uid(&context->pidref, &owner_uid) >= 0) { | |
1493 | r = asprintf(&t, UID_FMT, owner_uid); | |
1494 | if (r > 0) | |
1495 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t); | |
1496 | } | |
1497 | ||
1498 | if (sd_pid_get_slice(pid, &t) >= 0) | |
1499 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t); | |
1500 | ||
1501 | if (pidref_get_cmdline(&context->pidref, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, &t) >= 0) | |
1502 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t); | |
1503 | ||
1504 | if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) | |
1505 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t); | |
1506 | ||
1507 | if (compose_open_fds(pid, &t) >= 0) | |
1508 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t); | |
1509 | ||
1510 | p = procfs_file_alloca(pid, "status"); | |
1511 | if (read_full_file(p, &t, /* ret_size= */ NULL) >= 0) | |
1512 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t); | |
1513 | ||
1514 | p = procfs_file_alloca(pid, "maps"); | |
1515 | if (read_full_file(p, &t, /* ret_size= */ NULL) >= 0) | |
1516 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t); | |
1517 | ||
1518 | p = procfs_file_alloca(pid, "limits"); /* this uses 'seq_file' in kernel, use read_full_file_at() */ | |
1519 | if (read_full_file(p, &t, /* ret_size= */ NULL) >= 0) | |
1520 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t); | |
1521 | ||
1522 | p = procfs_file_alloca(pid, "cgroup"); | |
1523 | if (read_full_file(p, &t, /* ret_size= */ NULL) >= 0) | |
1524 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t); | |
1525 | ||
1526 | p = procfs_file_alloca(pid, "mountinfo"); | |
1527 | if (read_full_file(p, &t, /* ret_size= */ NULL) >= 0) | |
1528 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t); | |
1529 | ||
1530 | /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */ | |
1531 | p = procfs_file_alloca(pid, "auxv"); | |
1532 | if (read_full_file(p, &t, &size) >= 0) { | |
1533 | char *buf = malloc(strlen("COREDUMP_PROC_AUXV=") + size + 1); | |
1534 | if (buf) { | |
1535 | /* Add a dummy terminator to make context_parse_iovw() happy. */ | |
1536 | *mempcpy_typesafe(stpcpy(buf, "COREDUMP_PROC_AUXV="), t, size) = '\0'; | |
1537 | (void) iovw_consume(iovw, buf, size + strlen("COREDUMP_PROC_AUXV=")); | |
1538 | } | |
1539 | ||
1540 | free(t); | |
1541 | } | |
1542 | ||
1543 | if (get_process_cwd(pid, &t) >= 0) | |
1544 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t); | |
1545 | ||
1546 | if (get_process_root(pid, &t) >= 0) { | |
1547 | bool proc_self_root_is_slash; | |
1548 | ||
1549 | proc_self_root_is_slash = strcmp(t, "/") == 0; | |
1550 | ||
1551 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t); | |
1552 | ||
1553 | /* If the process' root is "/", then there is a chance it has | |
1554 | * mounted own root and hence being containerized. */ | |
1555 | if (proc_self_root_is_slash && get_process_container_parent_cmdline(&context->pidref, &t) > 0) | |
1556 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t); | |
1557 | } | |
1558 | ||
1559 | if (get_process_environ(pid, &t) >= 0) | |
1560 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t); | |
1561 | ||
1562 | /* Now that we have parsed info from /proc/ ensure the pidfd is still valid before continuing. */ | |
1563 | r = pidref_verify(&context->pidref); | |
1564 | if (r < 0) | |
1565 | return log_error_errno(r, "PIDFD validation failed: %m"); | |
1566 | ||
1567 | /* We successfully acquired all metadata. */ | |
1568 | return context_parse_iovw(context, iovw); | |
1569 | } | |
1570 | ||
1571 | static int send_ucred(int transport_fd, const struct ucred *ucred) { | |
1572 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control = {}; | |
1573 | struct msghdr mh = { | |
1574 | .msg_control = &control, | |
1575 | .msg_controllen = sizeof(control), | |
1576 | }; | |
1577 | struct cmsghdr *cmsg; | |
1578 | ||
1579 | assert(transport_fd >= 0); | |
1580 | assert(ucred); | |
1581 | ||
1582 | cmsg = CMSG_FIRSTHDR(&mh); | |
1583 | *cmsg = (struct cmsghdr) { | |
1584 | .cmsg_level = SOL_SOCKET, | |
1585 | .cmsg_type = SCM_CREDENTIALS, | |
1586 | .cmsg_len = CMSG_LEN(sizeof(struct ucred)), | |
1587 | }; | |
1588 | memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred)); | |
1589 | ||
1590 | return RET_NERRNO(sendmsg(transport_fd, &mh, MSG_NOSIGNAL)); | |
1591 | } | |
1592 | ||
1593 | static int receive_ucred(int transport_fd, struct ucred *ret_ucred) { | |
1594 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control = {}; | |
1595 | struct msghdr mh = { | |
1596 | .msg_control = &control, | |
1597 | .msg_controllen = sizeof(control), | |
1598 | }; | |
1599 | struct cmsghdr *cmsg = NULL; | |
1600 | struct ucred *ucred = NULL; | |
1601 | ssize_t n; | |
1602 | ||
1603 | assert(transport_fd >= 0); | |
1604 | assert(ret_ucred); | |
1605 | ||
1606 | n = recvmsg_safe(transport_fd, &mh, 0); | |
1607 | if (n < 0) | |
1608 | return n; | |
1609 | ||
1610 | CMSG_FOREACH(cmsg, &mh) | |
1611 | if (cmsg->cmsg_level == SOL_SOCKET && | |
1612 | cmsg->cmsg_type == SCM_CREDENTIALS && | |
1613 | cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) { | |
1614 | ||
1615 | assert(!ucred); | |
1616 | ucred = CMSG_TYPED_DATA(cmsg, struct ucred); | |
1617 | } | |
1618 | ||
1619 | if (!ucred) | |
1620 | return -EIO; | |
1621 | ||
1622 | *ret_ucred = *ucred; | |
1623 | ||
1624 | return 0; | |
1625 | } | |
1626 | ||
1627 | static int can_forward_coredump(Context *context, const PidRef *pid) { | |
1628 | _cleanup_free_ char *cgroup = NULL, *path = NULL, *unit = NULL; | |
1629 | int r; | |
1630 | ||
1631 | assert(context); | |
1632 | assert(pidref_is_set(pid)); | |
1633 | assert(!pidref_is_remote(pid)); | |
1634 | ||
1635 | /* We need to avoid a situation where the attacker crashes a SUID process or a root daemon and | |
1636 | * quickly replaces it with a namespaced process and we forward the coredump to the attacker, into | |
1637 | * the namespace. With %F/pidfd we can reliably check the namespace of the original process, hence we | |
1638 | * can allow forwarding. */ | |
1639 | if (!context->got_pidfd && context->dumpable != SUID_DUMP_USER) | |
1640 | return false; | |
1641 | ||
1642 | r = cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); | |
1643 | if (r < 0) | |
1644 | return r; | |
1645 | ||
1646 | r = path_extract_directory(cgroup, &path); | |
1647 | if (r < 0) | |
1648 | return r; | |
1649 | ||
1650 | r = cg_path_get_unit_path(path, &unit); | |
1651 | if (r == -ENOMEM) | |
1652 | return log_oom(); | |
1653 | if (r == -ENXIO) | |
1654 | /* No valid units in this path. */ | |
1655 | return false; | |
1656 | if (r < 0) | |
1657 | return r; | |
1658 | ||
1659 | /* We require that this process belongs to a delegated cgroup | |
1660 | * (i.e. Delegate=yes), with CoredumpReceive=yes also. */ | |
1661 | r = cg_is_delegated(unit); | |
1662 | if (r <= 0) | |
1663 | return r; | |
1664 | ||
1665 | return cg_has_coredump_receive(unit); | |
1666 | } | |
1667 | ||
1668 | static int forward_coredump_to_container(Context *context) { | |
1669 | _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, netnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF; | |
1670 | _cleanup_close_pair_ int pair[2] = EBADF_PAIR; | |
1671 | pid_t child; | |
1672 | struct ucred ucred = { | |
1673 | .pid = context->pidref.pid, | |
1674 | .uid = context->uid, | |
1675 | .gid = context->gid, | |
1676 | }; | |
1677 | int r; | |
1678 | ||
1679 | assert(context); | |
1680 | ||
1681 | _cleanup_(pidref_done) PidRef leader_pid = PIDREF_NULL; | |
1682 | r = namespace_get_leader(&context->pidref, NAMESPACE_PID, &leader_pid); | |
1683 | if (r < 0) | |
1684 | return log_debug_errno(r, "Failed to get namespace leader: %m"); | |
1685 | ||
1686 | r = can_forward_coredump(context, &leader_pid); | |
1687 | if (r < 0) | |
1688 | return log_debug_errno(r, "Failed to check if coredump can be forwarded: %m"); | |
1689 | if (r == 0) | |
1690 | return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), | |
1691 | "Coredump will not be forwarded because no target cgroup was found."); | |
1692 | ||
1693 | r = RET_NERRNO(socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair)); | |
1694 | if (r < 0) | |
1695 | return log_debug_errno(r, "Failed to create socket pair: %m"); | |
1696 | ||
1697 | r = setsockopt_int(pair[1], SOL_SOCKET, SO_PASSCRED, true); | |
1698 | if (r < 0) | |
1699 | return log_debug_errno(r, "Failed to set SO_PASSCRED: %m"); | |
1700 | ||
1701 | r = pidref_namespace_open(&leader_pid, &pidnsfd, &mntnsfd, &netnsfd, &usernsfd, &rootfd); | |
1702 | if (r < 0) | |
1703 | return log_debug_errno(r, "Failed to open namespaces of PID " PID_FMT ": %m", leader_pid.pid); | |
1704 | ||
1705 | r = namespace_fork("(sd-coredumpns)", "(sd-coredump)", NULL, 0, | |
1706 | FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM, | |
1707 | pidnsfd, mntnsfd, netnsfd, usernsfd, rootfd, &child); | |
1708 | if (r < 0) | |
1709 | return log_debug_errno(r, "Failed to fork into namespaces of PID " PID_FMT ": %m", leader_pid.pid); | |
1710 | if (r == 0) { | |
1711 | pair[0] = safe_close(pair[0]); | |
1712 | ||
1713 | r = access_nofollow("/run/systemd/coredump", W_OK); | |
1714 | if (r < 0) { | |
1715 | log_debug_errno(r, "Cannot find coredump socket, exiting: %m"); | |
1716 | _exit(EXIT_FAILURE); | |
1717 | } | |
1718 | ||
1719 | r = receive_ucred(pair[1], &ucred); | |
1720 | if (r < 0) { | |
1721 | log_debug_errno(r, "Failed to receive ucred and fd: %m"); | |
1722 | _exit(EXIT_FAILURE); | |
1723 | } | |
1724 | ||
1725 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = iovw_new(); | |
1726 | if (!iovw) { | |
1727 | log_oom(); | |
1728 | _exit(EXIT_FAILURE); | |
1729 | } | |
1730 | ||
1731 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); | |
1732 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
1733 | (void) iovw_put_string_field(iovw, "COREDUMP_FORWARDED=", "1"); | |
1734 | ||
1735 | for (int i = 0; i < _META_ARGV_MAX; i++) { | |
1736 | char buf[DECIMAL_STR_MAX(pid_t)]; | |
1737 | const char *t = context->meta[i]; | |
1738 | ||
1739 | /* Patch some of the fields with the translated ucred data */ | |
1740 | switch (i) { | |
1741 | ||
1742 | case META_ARGV_PID: | |
1743 | xsprintf(buf, PID_FMT, ucred.pid); | |
1744 | t = buf; | |
1745 | break; | |
1746 | ||
1747 | case META_ARGV_UID: | |
1748 | xsprintf(buf, UID_FMT, ucred.uid); | |
1749 | t = buf; | |
1750 | break; | |
1751 | ||
1752 | case META_ARGV_GID: | |
1753 | xsprintf(buf, GID_FMT, ucred.gid); | |
1754 | t = buf; | |
1755 | break; | |
1756 | ||
1757 | default: | |
1758 | ; | |
1759 | } | |
1760 | ||
1761 | r = iovw_put_string_field(iovw, meta_field_names[i], t); | |
1762 | if (r < 0) { | |
1763 | log_debug_errno(r, "Failed to construct iovec: %m"); | |
1764 | _exit(EXIT_FAILURE); | |
1765 | } | |
1766 | } | |
1767 | ||
1768 | _cleanup_(context_done) Context child_context = CONTEXT_NULL; | |
1769 | r = context_parse_iovw(&child_context, iovw); | |
1770 | if (r < 0) { | |
1771 | log_debug_errno(r, "Failed to save context: %m"); | |
1772 | _exit(EXIT_FAILURE); | |
1773 | } | |
1774 | ||
1775 | r = gather_pid_metadata_from_procfs(iovw, &child_context); | |
1776 | if (r < 0) { | |
1777 | log_debug_errno(r, "Failed to gather metadata from procfs: %m"); | |
1778 | _exit(EXIT_FAILURE); | |
1779 | } | |
1780 | ||
1781 | r = send_iovec(iovw, STDIN_FILENO, &context->pidref, /* mount_tree_fd= */ -EBADF); | |
1782 | if (r < 0) { | |
1783 | log_debug_errno(r, "Failed to send iovec to coredump socket: %m"); | |
1784 | _exit(EXIT_FAILURE); | |
1785 | } | |
1786 | ||
1787 | _exit(EXIT_SUCCESS); | |
1788 | } | |
1789 | ||
1790 | pair[1] = safe_close(pair[1]); | |
1791 | ||
1792 | /* We need to translate the PID, UID, and GID of the crashing process | |
1793 | * to the container's namespaces. Do this by sending an SCM_CREDENTIALS | |
1794 | * message on a socket pair, and read the result when we join the | |
1795 | * container. The kernel will perform the translation for us. */ | |
1796 | r = send_ucred(pair[0], &ucred); | |
1797 | if (r < 0) | |
1798 | return log_debug_errno(r, "Failed to send metadata to container: %m"); | |
1799 | ||
1800 | r = wait_for_terminate_and_check("(sd-coredumpns)", child, 0); | |
1801 | if (r < 0) | |
1802 | return log_debug_errno(r, "Failed to wait for child to terminate: %m"); | |
1803 | if (r != EXIT_SUCCESS) | |
1804 | return log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Failed to process coredump in container."); | |
1805 | ||
1806 | return 0; | |
1807 | } | |
1808 | ||
1809 | static int acquire_pid_mount_tree_fd(const Context *context, int *ret_fd) { | |
1810 | /* Don't bother preparing environment if we can't pass it to libdwfl. */ | |
1811 | #if !HAVE_DWFL_SET_SYSROOT | |
1812 | *ret_fd = -EOPNOTSUPP; | |
1813 | log_debug("dwfl_set_sysroot() is not supported."); | |
1814 | #else | |
1815 | _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, fd = -EBADF; | |
1816 | _cleanup_close_pair_ int pair[2] = EBADF_PAIR; | |
1817 | int r; | |
1818 | ||
1819 | assert(context); | |
1820 | assert(ret_fd); | |
1821 | ||
1822 | if (!arg_enter_namespace) { | |
1823 | *ret_fd = -EHOSTDOWN; | |
1824 | log_debug("EnterNamespace=no so we won't use mount tree of the crashed process for generating backtrace."); | |
1825 | return 0; | |
1826 | } | |
1827 | ||
1828 | if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair) < 0) | |
1829 | return log_error_errno(errno, "Failed to create socket pair: %m"); | |
1830 | ||
1831 | r = pidref_namespace_open( | |
1832 | &context->pidref, | |
1833 | /* ret_pidns_fd= */ NULL, | |
1834 | &mntns_fd, | |
1835 | /* ret_netns_fd= */ NULL, | |
1836 | /* ret_userns_fd= */ NULL, | |
1837 | &root_fd); | |
1838 | if (r < 0) | |
1839 | return log_error_errno(r, "Failed to open mount namespace of crashing process: %m"); | |
1840 | ||
1841 | r = namespace_fork("(sd-mount-tree-ns)", | |
1842 | "(sd-mount-tree)", | |
1843 | /* except_fds= */ NULL, | |
1844 | /* n_except_fds= */ 0, | |
1845 | FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_LOG|FORK_WAIT, | |
1846 | /* pidns_fd= */ -EBADF, | |
1847 | mntns_fd, | |
1848 | /* netns_fd= */ -EBADF, | |
1849 | /* userns_fd= */ -EBADF, | |
1850 | root_fd, | |
1851 | NULL); | |
1852 | if (r < 0) | |
1853 | return r; | |
1854 | if (r == 0) { | |
1855 | pair[0] = safe_close(pair[0]); | |
1856 | ||
1857 | fd = open_tree(-EBADF, "/", AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); | |
1858 | if (fd < 0) { | |
1859 | log_error_errno(errno, "Failed to clone mount tree: %m"); | |
1860 | _exit(EXIT_FAILURE); | |
1861 | } | |
1862 | ||
1863 | r = send_one_fd(pair[1], fd, 0); | |
1864 | if (r < 0) { | |
1865 | log_error_errno(r, "Failed to send mount tree to parent: %m"); | |
1866 | _exit(EXIT_FAILURE); | |
1867 | } | |
1868 | ||
1869 | _exit(EXIT_SUCCESS); | |
1870 | } | |
1871 | ||
1872 | pair[1] = safe_close(pair[1]); | |
1873 | ||
1874 | fd = receive_one_fd(pair[0], MSG_DONTWAIT); | |
1875 | if (fd < 0) | |
1876 | return log_error_errno(fd, "Failed to receive mount tree: %m"); | |
1877 | ||
1878 | *ret_fd = TAKE_FD(fd); | |
1879 | #endif | |
1880 | return 0; | |
1881 | } | |
1882 | ||
1883 | static int process_kernel(int argc, char *argv[]) { | |
1884 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; | |
1885 | _cleanup_(context_done) Context context = CONTEXT_NULL; | |
1886 | int r; | |
1887 | ||
1888 | /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds | |
1889 | * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to | |
1890 | * /dev/null. */ | |
1891 | r = rearrange_stdio(STDIN_FILENO, -EBADF, -EBADF); | |
1892 | if (r < 0) | |
1893 | return log_error_errno(r, "Failed to connect stdout/stderr to /dev/null: %m"); | |
1894 | ||
1895 | log_debug("Processing coredump received from the kernel..."); | |
1896 | ||
1897 | iovw = iovw_new(); | |
1898 | if (!iovw) | |
1899 | return log_oom(); | |
1900 | ||
1901 | /* Collect all process metadata passed by the kernel through argv[] */ | |
1902 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1); | |
1903 | if (r < 0) | |
1904 | return r; | |
1905 | ||
1906 | /* Collect the rest of the process metadata retrieved from the runtime */ | |
1907 | r = gather_pid_metadata_from_procfs(iovw, &context); | |
1908 | if (r < 0) | |
1909 | return r; | |
1910 | ||
1911 | if (!context.is_journald) | |
1912 | /* OK, now we know it's not the journal, hence we can make use of it now. */ | |
1913 | log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG); | |
1914 | ||
1915 | /* Log minimal metadata now, so it is not lost if the system is about to shut down. */ | |
1916 | log_info("Process %s (%s) of user %s terminated abnormally with signal %s/%s, processing...", | |
1917 | context.meta[META_ARGV_PID], context.meta[META_COMM], | |
1918 | context.meta[META_ARGV_UID], context.meta[META_ARGV_SIGNAL], | |
1919 | signal_to_string(context.signo)); | |
1920 | ||
1921 | r = pidref_in_same_namespace(/* pid1 = */ NULL, &context.pidref, NAMESPACE_PID); | |
1922 | if (r < 0) | |
1923 | log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m"); | |
1924 | if (r == 0) { | |
1925 | /* If this fails, fallback to the old behavior so that | |
1926 | * there is still some record of the crash. */ | |
1927 | r = forward_coredump_to_container(&context); | |
1928 | if (r >= 0) | |
1929 | return 0; | |
1930 | ||
1931 | r = acquire_pid_mount_tree_fd(&context, &context.mount_tree_fd); | |
1932 | if (r < 0) | |
1933 | log_warning_errno(r, "Failed to access the mount tree of a container, ignoring: %m"); | |
1934 | } | |
1935 | ||
1936 | /* If this is PID 1, disable coredump collection, we'll unlikely be able to process | |
1937 | * it later on. | |
1938 | * | |
1939 | * FIXME: maybe we should disable coredumps generation from the beginning and | |
1940 | * re-enable it only when we know it's either safe (i.e. we're not running OOM) or | |
1941 | * it's not PID 1 ? */ | |
1942 | if (context.is_pid1) { | |
1943 | log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); | |
1944 | disable_coredumps(); | |
1945 | } | |
1946 | ||
1947 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); | |
1948 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
1949 | ||
1950 | if (context.is_journald || context.is_pid1) | |
1951 | return submit_coredump(&context, iovw, STDIN_FILENO); | |
1952 | ||
1953 | return send_iovec(iovw, STDIN_FILENO, &context.pidref, context.mount_tree_fd); | |
1954 | } | |
1955 | ||
1956 | static int process_backtrace(int argc, char *argv[]) { | |
1957 | _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO); | |
1958 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; | |
1959 | _cleanup_(context_done) Context context = CONTEXT_NULL; | |
1960 | char *message; | |
1961 | int r; | |
1962 | ||
1963 | assert(argc >= 2); | |
1964 | ||
1965 | log_debug("Processing backtrace on stdin..."); | |
1966 | ||
1967 | iovw = iovw_new(); | |
1968 | if (!iovw) | |
1969 | return log_oom(); | |
1970 | ||
1971 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR); | |
1972 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
1973 | ||
1974 | /* Collect all process metadata from argv[] by making sure to skip the | |
1975 | * '--backtrace' option */ | |
1976 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2); | |
1977 | if (r < 0) | |
1978 | return r; | |
1979 | ||
1980 | /* Collect the rest of the process metadata retrieved from the runtime */ | |
1981 | r = gather_pid_metadata_from_procfs(iovw, &context); | |
1982 | if (r < 0) | |
1983 | return r; | |
1984 | ||
1985 | for (;;) { | |
1986 | r = journal_importer_process_data(&importer); | |
1987 | if (r < 0) | |
1988 | return log_error_errno(r, "Failed to parse journal entry on stdin: %m"); | |
1989 | if (r == 1 || /* complete entry */ | |
1990 | journal_importer_eof(&importer)) /* end of data */ | |
1991 | break; | |
1992 | } | |
1993 | ||
1994 | if (journal_importer_eof(&importer)) { | |
1995 | log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter"); | |
1996 | ||
1997 | message = strjoina("Process ", context.meta[META_ARGV_PID], | |
1998 | " (", context.meta[META_COMM], ")" | |
1999 | " of user ", context.meta[META_ARGV_UID], | |
2000 | " failed with ", context.meta[META_ARGV_SIGNAL]); | |
2001 | ||
2002 | r = iovw_put_string_field(iovw, "MESSAGE=", message); | |
2003 | if (r < 0) | |
2004 | return r; | |
2005 | } else { | |
2006 | /* The imported iovecs are not supposed to be freed by us so let's copy and merge them at the | |
2007 | * end of the array. */ | |
2008 | r = iovw_append(iovw, &importer.iovw); | |
2009 | if (r < 0) | |
2010 | return r; | |
2011 | } | |
2012 | ||
2013 | r = sd_journal_sendv(iovw->iovec, iovw->count); | |
2014 | if (r < 0) | |
2015 | return log_error_errno(r, "Failed to log backtrace: %m"); | |
2016 | ||
2017 | return 0; | |
2018 | } | |
2019 | ||
2020 | static int run(int argc, char *argv[]) { | |
2021 | int r; | |
2022 | ||
2023 | /* First, log to a safe place, since we don't know what crashed and it might | |
2024 | * be journald which we'd rather not log to then. */ | |
2025 | ||
2026 | log_set_target_and_open(LOG_TARGET_KMSG); | |
2027 | ||
2028 | /* Make sure we never enter a loop */ | |
2029 | (void) set_dumpable(SUID_DUMP_DISABLE); | |
2030 | ||
2031 | /* Ignore all parse errors */ | |
2032 | (void) parse_config(); | |
2033 | ||
2034 | log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage)); | |
2035 | log_debug("Selected compression %s.", yes_no(arg_compress)); | |
2036 | ||
2037 | r = sd_listen_fds(false); | |
2038 | if (r < 0) | |
2039 | return log_error_errno(r, "Failed to determine the number of file descriptors: %m"); | |
2040 | ||
2041 | /* If we got an fd passed, we are running in coredumpd mode. Otherwise we | |
2042 | * are invoked from the kernel as coredump handler. */ | |
2043 | if (r == 0) { | |
2044 | if (streq_ptr(argv[1], "--backtrace")) | |
2045 | return process_backtrace(argc, argv); | |
2046 | else | |
2047 | return process_kernel(argc, argv); | |
2048 | } else if (r == 1) | |
2049 | return process_socket(SD_LISTEN_FDS_START); | |
2050 | ||
2051 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
2052 | "Received unexpected number of file descriptors."); | |
2053 | } | |
2054 | ||
2055 | DEFINE_MAIN_FUNCTION(run); |