]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
f5e04665 LP |
2 | |
3 | #include <errno.h> | |
803a3464 LP |
4 | #include <stdio.h> |
5 | #include <sys/prctl.h> | |
587f2a5e | 6 | #include <sys/statvfs.h> |
3e4d0f6c | 7 | #include <sys/auxv.h> |
cacd6403 | 8 | #include <sys/xattr.h> |
4f5dd394 | 9 | #include <unistd.h> |
f5e04665 | 10 | |
73a99163 | 11 | #include "sd-daemon.h" |
f11943c5 LP |
12 | #include "sd-journal.h" |
13 | #include "sd-login.h" | |
73a99163 | 14 | #include "sd-messages.h" |
4f5dd394 LP |
15 | |
16 | #include "acl-util.h" | |
b5efdb8a | 17 | #include "alloc-util.h" |
587f2a5e | 18 | #include "bus-error.h" |
430f0182 | 19 | #include "capability-util.h" |
ba1261bc | 20 | #include "cgroup-util.h" |
4f5dd394 | 21 | #include "compress.h" |
34c10968 LP |
22 | #include "conf-parser.h" |
23 | #include "copy.h" | |
c8715007 | 24 | #include "coredump-util.h" |
f11943c5 | 25 | #include "coredump-vacuum.h" |
a0956174 | 26 | #include "dirent-util.h" |
ea680f05 | 27 | #include "elf-util.h" |
4f5dd394 | 28 | #include "escape.h" |
3ffd4af2 | 29 | #include "fd-util.h" |
4f5dd394 | 30 | #include "fileio.h" |
f4f15635 | 31 | #include "fs-util.h" |
afc5dbf3 | 32 | #include "io-util.h" |
b18453ed | 33 | #include "journal-importer.h" |
5edf875b | 34 | #include "journal-send.h" |
4f5dd394 LP |
35 | #include "log.h" |
36 | #include "macro.h" | |
5e332028 | 37 | #include "main-func.h" |
0a970718 | 38 | #include "memory-util.h" |
35cd0ba5 | 39 | #include "mkdir-label.h" |
6bedfcbb | 40 | #include "parse-util.h" |
0b452006 | 41 | #include "process-util.h" |
d14bcb4e | 42 | #include "signal-util.h" |
3c171f0b | 43 | #include "socket-util.h" |
4f5dd394 | 44 | #include "special.h" |
587f2a5e | 45 | #include "stat-util.h" |
8b43440b | 46 | #include "string-table.h" |
07630cea | 47 | #include "string-util.h" |
4f5dd394 | 48 | #include "strv.h" |
bf819d3a | 49 | #include "sync-util.h" |
e4de7287 | 50 | #include "tmpfile-util.h" |
b085d224 | 51 | #include "uid-alloc-range.h" |
b1d4f8e1 | 52 | #include "user-util.h" |
34727273 | 53 | |
e677041e LP |
54 | /* The maximum size up to which we process coredumps. We use 1G on 32bit systems, and 32G on 64bit systems */ |
55 | #if __SIZEOF_POINTER__ == 4 | |
56 | #define PROCESS_SIZE_MAX ((uint64_t) (1LLU*1024LLU*1024LLU*1024LLU)) | |
57 | #elif __SIZEOF_POINTER__ == 8 | |
58 | #define PROCESS_SIZE_MAX ((uint64_t) (32LLU*1024LLU*1024LLU*1024LLU)) | |
59 | #else | |
60 | #error "Unexpected pointer size" | |
61 | #endif | |
34c10968 | 62 | |
bdfd7b2c | 63 | /* The maximum size up to which we leave the coredump around on disk */ |
34c10968 LP |
64 | #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX |
65 | ||
bdfd7b2c | 66 | /* The maximum size up to which we store the coredump in the journal */ |
25cad95c | 67 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
34c10968 | 68 | #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU)) |
25cad95c YW |
69 | #else |
70 | /* oss-fuzz limits memory usage. */ | |
71 | #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU)) | |
72 | #endif | |
f5e04665 | 73 | |
587f2a5e LB |
74 | /* When checking for available memory and setting lower limits, don't |
75 | * go below 4MB for writing core files to storage. */ | |
76 | #define PROCESS_SIZE_MIN (4U*1024U*1024U) | |
77 | ||
c4aa09b0 | 78 | /* Make sure to not make this larger than the maximum journal entry |
27f931d1 | 79 | * size. See DATA_SIZE_MAX in journal-importer.h. */ |
874bc134 | 80 | assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); |
f5e04665 LP |
81 | |
82 | enum { | |
f46c706b | 83 | /* We use these as array indexes for our process metadata cache. |
ea5cc2a8 | 84 | * |
f46c706b FB |
85 | * The first indices of the cache stores the same metadata as the ones passed by |
86 | * the kernel via argv[], ie the strings array passed by the kernel according to | |
87 | * our pattern defined in /proc/sys/kernel/core_pattern (see man:core(5)). */ | |
88 | ||
89 | META_ARGV_PID, /* %P: as seen in the initial pid namespace */ | |
90 | META_ARGV_UID, /* %u: as seen in the initial user namespace */ | |
91 | META_ARGV_GID, /* %g: as seen in the initial user namespace */ | |
92 | META_ARGV_SIGNAL, /* %s: number of signal causing dump */ | |
64a5384f | 93 | META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to µs granularity) */ |
f46c706b FB |
94 | META_ARGV_RLIMIT, /* %c: core file size soft resource limit */ |
95 | META_ARGV_HOSTNAME, /* %h: hostname */ | |
96 | _META_ARGV_MAX, | |
97 | ||
98 | /* The following indexes are cached for a couple of special fields we use (and | |
99 | * thereby need to be retrieved quickly) for naming coredump files, and attaching | |
100 | * xattrs. Unlike the previous ones they are retrieved from the runtime | |
101 | * environment. */ | |
102 | ||
103 | META_COMM = _META_ARGV_MAX, | |
104 | _META_MANDATORY_MAX, | |
105 | ||
106 | /* The rest are similar to the previous ones except that we won't fail if one of | |
107 | * them is missing. */ | |
108 | ||
109 | META_EXE = _META_MANDATORY_MAX, | |
110 | META_UNIT, | |
3e4d0f6c | 111 | META_PROC_AUXV, |
f46c706b | 112 | _META_MAX |
f5e04665 LP |
113 | }; |
114 | ||
f46c706b | 115 | static const char * const meta_field_names[_META_MAX] = { |
510a1466 ZJS |
116 | [META_ARGV_PID] = "COREDUMP_PID=", |
117 | [META_ARGV_UID] = "COREDUMP_UID=", | |
118 | [META_ARGV_GID] = "COREDUMP_GID=", | |
119 | [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=", | |
120 | [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", | |
121 | [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", | |
122 | [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", | |
123 | [META_COMM] = "COREDUMP_COMM=", | |
124 | [META_EXE] = "COREDUMP_EXE=", | |
125 | [META_UNIT] = "COREDUMP_UNIT=", | |
3e4d0f6c | 126 | [META_PROC_AUXV] = "COREDUMP_PROC_AUXV=", |
f46c706b FB |
127 | }; |
128 | ||
129 | typedef struct Context { | |
130 | const char *meta[_META_MAX]; | |
3e4d0f6c | 131 | size_t meta_size[_META_MAX]; |
f46c706b FB |
132 | pid_t pid; |
133 | bool is_pid1; | |
134 | bool is_journald; | |
135 | } Context; | |
136 | ||
34c10968 LP |
137 | typedef enum CoredumpStorage { |
138 | COREDUMP_STORAGE_NONE, | |
139 | COREDUMP_STORAGE_EXTERNAL, | |
140 | COREDUMP_STORAGE_JOURNAL, | |
34c10968 | 141 | _COREDUMP_STORAGE_MAX, |
2d93c20e | 142 | _COREDUMP_STORAGE_INVALID = -EINVAL, |
34c10968 LP |
143 | } CoredumpStorage; |
144 | ||
34c10968 | 145 | static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { |
510a1466 | 146 | [COREDUMP_STORAGE_NONE] = "none", |
34c10968 | 147 | [COREDUMP_STORAGE_EXTERNAL] = "external", |
510a1466 | 148 | [COREDUMP_STORAGE_JOURNAL] = "journal", |
34c10968 LP |
149 | }; |
150 | ||
151 | DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); | |
8c9571d0 | 152 | static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting"); |
34727273 ZJS |
153 | |
154 | static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL; | |
8c9571d0 | 155 | static bool arg_compress = true; |
59f448cf LP |
156 | static uint64_t arg_process_size_max = PROCESS_SIZE_MAX; |
157 | static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; | |
6e2b4a69 | 158 | static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; |
f5fbe71d YW |
159 | static uint64_t arg_keep_free = UINT64_MAX; |
160 | static uint64_t arg_max_use = UINT64_MAX; | |
34c10968 LP |
161 | |
162 | static int parse_config(void) { | |
34c10968 | 163 | static const ConfigTableItem items[] = { |
510a1466 ZJS |
164 | { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, |
165 | { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, | |
166 | { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, | |
167 | { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max }, | |
168 | { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, | |
169 | { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, | |
170 | { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, | |
34c10968 LP |
171 | {} |
172 | }; | |
173 | ||
07e0ffc8 FB |
174 | return config_parse_config_file("coredump.conf", "Coredump\0", |
175 | config_item_table_lookup, items, | |
176 | CONFIG_PARSE_WARN, NULL); | |
34c10968 LP |
177 | } |
178 | ||
a1e92eee | 179 | static uint64_t storage_size_max(void) { |
ee0449fd ZJS |
180 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
181 | return arg_external_size_max; | |
182 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) | |
183 | return arg_journal_size_max; | |
184 | assert(arg_storage == COREDUMP_STORAGE_NONE); | |
185 | return 0; | |
73a99163 ZJS |
186 | } |
187 | ||
3e4d0f6c ZJS |
188 | static int fix_acl(int fd, uid_t uid, bool allow_user) { |
189 | assert(fd >= 0); | |
190 | assert(uid_is_valid(uid)); | |
34c10968 | 191 | |
349cc4a5 | 192 | #if HAVE_ACL |
709f6e46 | 193 | int r; |
34c10968 | 194 | |
3e4d0f6c ZJS |
195 | /* We don't allow users to read coredumps if the uid or capabilities were changed. */ |
196 | if (!allow_user) | |
197 | return 0; | |
b59233e6 | 198 | |
05fd2156 | 199 | if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY) |
34c10968 LP |
200 | return 0; |
201 | ||
d81be4e7 | 202 | /* Make sure normal users can read (but not write or delete) their own coredumps */ |
567aeb58 | 203 | r = fd_add_uid_acl_permission(fd, uid, ACL_READ); |
709f6e46 | 204 | if (r < 0) |
567aeb58 | 205 | return log_error_errno(r, "Failed to adjust ACL of the coredump: %m"); |
34c10968 LP |
206 | #endif |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
f46c706b FB |
211 | static int fix_xattr(int fd, const Context *context) { |
212 | ||
213 | static const char * const xattrs[_META_MAX] = { | |
510a1466 ZJS |
214 | [META_ARGV_PID] = "user.coredump.pid", |
215 | [META_ARGV_UID] = "user.coredump.uid", | |
216 | [META_ARGV_GID] = "user.coredump.gid", | |
217 | [META_ARGV_SIGNAL] = "user.coredump.signal", | |
218 | [META_ARGV_TIMESTAMP] = "user.coredump.timestamp", | |
219 | [META_ARGV_RLIMIT] = "user.coredump.rlimit", | |
220 | [META_ARGV_HOSTNAME] = "user.coredump.hostname", | |
221 | [META_COMM] = "user.coredump.comm", | |
222 | [META_EXE] = "user.coredump.exe", | |
0cd77f97 LP |
223 | }; |
224 | ||
34c10968 LP |
225 | int r = 0; |
226 | ||
b59233e6 LP |
227 | assert(fd >= 0); |
228 | ||
1eef15b1 | 229 | /* Attach some metadata to coredumps via extended |
34c10968 LP |
230 | * attributes. Just because we can. */ |
231 | ||
fe96c0f8 | 232 | for (unsigned i = 0; i < _META_MAX; i++) { |
1eef15b1 ZJS |
233 | int k; |
234 | ||
f46c706b | 235 | if (isempty(context->meta[i]) || !xattrs[i]) |
0cd77f97 | 236 | continue; |
34c10968 | 237 | |
f46c706b | 238 | k = fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE); |
1eef15b1 | 239 | if (k < 0 && r == 0) |
34c10968 | 240 | r = -errno; |
0cd77f97 | 241 | } |
34c10968 LP |
242 | |
243 | return r; | |
244 | } | |
245 | ||
b0b21dce | 246 | #define filename_escape(s) xescape((s), "./ ") |
34c10968 | 247 | |
a1e92eee | 248 | static const char *coredump_tmpfile_name(const char *s) { |
0c773903 EV |
249 | return s ? s : "(unnamed temporary file)"; |
250 | } | |
251 | ||
b59233e6 LP |
252 | static int fix_permissions( |
253 | int fd, | |
254 | const char *filename, | |
255 | const char *target, | |
f46c706b | 256 | const Context *context, |
3e4d0f6c ZJS |
257 | uid_t uid, |
258 | bool allow_user) { | |
b59233e6 | 259 | |
03532f0a LP |
260 | int r; |
261 | ||
b59233e6 | 262 | assert(fd >= 0); |
b59233e6 | 263 | assert(target); |
3c171f0b | 264 | assert(context); |
cfd652ed ZJS |
265 | |
266 | /* Ignore errors on these */ | |
3c171f0b | 267 | (void) fchmod(fd, 0640); |
3e4d0f6c | 268 | (void) fix_acl(fd, uid, allow_user); |
3c171f0b | 269 | (void) fix_xattr(fd, context); |
cfd652ed | 270 | |
bf819d3a LP |
271 | r = fsync_full(fd); |
272 | if (r < 0) | |
273 | return log_error_errno(r, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename)); | |
8ac2f74f | 274 | |
23e208e7 | 275 | r = link_tmpfile(fd, filename, target, /* replace= */ false); |
03532f0a LP |
276 | if (r < 0) |
277 | return log_error_errno(r, "Failed to move coredump %s into place: %m", target); | |
cfd652ed ZJS |
278 | |
279 | return 0; | |
280 | } | |
281 | ||
59f448cf | 282 | static int maybe_remove_external_coredump(const char *filename, uint64_t size) { |
cfd652ed | 283 | |
b59233e6 | 284 | /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */ |
cfd652ed | 285 | |
fc6cec86 | 286 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL && |
cfd652ed ZJS |
287 | size <= arg_external_size_max) |
288 | return 0; | |
289 | ||
290 | if (!filename) | |
291 | return 1; | |
292 | ||
4a62c710 MS |
293 | if (unlink(filename) < 0 && errno != ENOENT) |
294 | return log_error_errno(errno, "Failed to unlink %s: %m", filename); | |
cfd652ed ZJS |
295 | |
296 | return 1; | |
297 | } | |
298 | ||
f46c706b | 299 | static int make_filename(const Context *context, char **ret) { |
b59233e6 | 300 | _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL; |
a7f7d1bd | 301 | sd_id128_t boot = {}; |
34c10968 LP |
302 | int r; |
303 | ||
3c171f0b | 304 | assert(context); |
34c10968 | 305 | |
f46c706b | 306 | c = filename_escape(context->meta[META_COMM]); |
34c10968 | 307 | if (!c) |
b59233e6 | 308 | return -ENOMEM; |
34c10968 | 309 | |
f46c706b | 310 | u = filename_escape(context->meta[META_ARGV_UID]); |
0dc5d23c | 311 | if (!u) |
b59233e6 | 312 | return -ENOMEM; |
34c10968 LP |
313 | |
314 | r = sd_id128_get_boot(&boot); | |
b59233e6 | 315 | if (r < 0) |
34c10968 | 316 | return r; |
34c10968 | 317 | |
f46c706b | 318 | p = filename_escape(context->meta[META_ARGV_PID]); |
b59233e6 LP |
319 | if (!p) |
320 | return -ENOMEM; | |
321 | ||
f46c706b | 322 | t = filename_escape(context->meta[META_ARGV_TIMESTAMP]); |
b59233e6 LP |
323 | if (!t) |
324 | return -ENOMEM; | |
325 | ||
326 | if (asprintf(ret, | |
64a5384f | 327 | "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s", |
34c10968 | 328 | c, |
0dc5d23c | 329 | u, |
34c10968 LP |
330 | SD_ID128_FORMAT_VAL(boot), |
331 | p, | |
b59233e6 LP |
332 | t) < 0) |
333 | return -ENOMEM; | |
334 | ||
335 | return 0; | |
336 | } | |
337 | ||
3e4d0f6c ZJS |
338 | static int parse_auxv64( |
339 | const uint64_t *auxv, | |
340 | size_t size_bytes, | |
341 | int *at_secure, | |
342 | uid_t *uid, | |
343 | uid_t *euid, | |
344 | gid_t *gid, | |
345 | gid_t *egid) { | |
346 | ||
347 | assert(auxv || size_bytes == 0); | |
348 | ||
349 | if (size_bytes % (2 * sizeof(uint64_t)) != 0) | |
350 | return log_warning_errno(SYNTHETIC_ERRNO(EIO), "Incomplete auxv structure (%zu bytes).", size_bytes); | |
351 | ||
352 | size_t words = size_bytes / sizeof(uint64_t); | |
353 | ||
354 | /* Note that we set output variables even on error. */ | |
355 | ||
356 | for (size_t i = 0; i + 1 < words; i += 2) | |
357 | switch (auxv[i]) { | |
358 | case AT_SECURE: | |
359 | *at_secure = auxv[i + 1] != 0; | |
360 | break; | |
361 | case AT_UID: | |
362 | *uid = auxv[i + 1]; | |
363 | break; | |
364 | case AT_EUID: | |
365 | *euid = auxv[i + 1]; | |
366 | break; | |
367 | case AT_GID: | |
368 | *gid = auxv[i + 1]; | |
369 | break; | |
370 | case AT_EGID: | |
371 | *egid = auxv[i + 1]; | |
372 | break; | |
373 | case AT_NULL: | |
374 | if (auxv[i + 1] != 0) | |
375 | goto error; | |
376 | return 0; | |
377 | } | |
378 | error: | |
379 | return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), | |
380 | "AT_NULL terminator not found, cannot parse auxv structure."); | |
381 | } | |
382 | ||
383 | static int parse_auxv32( | |
384 | const uint32_t *auxv, | |
385 | size_t size_bytes, | |
386 | int *at_secure, | |
387 | uid_t *uid, | |
388 | uid_t *euid, | |
389 | gid_t *gid, | |
390 | gid_t *egid) { | |
391 | ||
392 | assert(auxv || size_bytes == 0); | |
393 | ||
394 | size_t words = size_bytes / sizeof(uint32_t); | |
395 | ||
396 | if (size_bytes % (2 * sizeof(uint32_t)) != 0) | |
397 | return log_warning_errno(SYNTHETIC_ERRNO(EIO), "Incomplete auxv structure (%zu bytes).", size_bytes); | |
398 | ||
399 | /* Note that we set output variables even on error. */ | |
400 | ||
401 | for (size_t i = 0; i + 1 < words; i += 2) | |
402 | switch (auxv[i]) { | |
403 | case AT_SECURE: | |
404 | *at_secure = auxv[i + 1] != 0; | |
405 | break; | |
406 | case AT_UID: | |
407 | *uid = auxv[i + 1]; | |
408 | break; | |
409 | case AT_EUID: | |
410 | *euid = auxv[i + 1]; | |
411 | break; | |
412 | case AT_GID: | |
413 | *gid = auxv[i + 1]; | |
414 | break; | |
415 | case AT_EGID: | |
416 | *egid = auxv[i + 1]; | |
417 | break; | |
418 | case AT_NULL: | |
419 | if (auxv[i + 1] != 0) | |
420 | goto error; | |
421 | return 0; | |
422 | } | |
423 | error: | |
424 | return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), | |
425 | "AT_NULL terminator not found, cannot parse auxv structure."); | |
426 | } | |
427 | ||
428 | static int grant_user_access(int core_fd, const Context *context) { | |
429 | int at_secure = -1; | |
430 | uid_t uid = UID_INVALID, euid = UID_INVALID; | |
431 | uid_t gid = GID_INVALID, egid = GID_INVALID; | |
432 | int r; | |
433 | ||
434 | assert(core_fd >= 0); | |
435 | assert(context); | |
436 | ||
437 | if (!context->meta[META_PROC_AUXV]) | |
438 | return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), "No auxv data, not adjusting permissions."); | |
439 | ||
440 | uint8_t elf[EI_NIDENT]; | |
441 | errno = 0; | |
442 | if (pread(core_fd, &elf, sizeof(elf), 0) != sizeof(elf)) | |
443 | return log_warning_errno(errno_or_else(EIO), | |
444 | "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno)); | |
445 | ||
446 | if (elf[EI_MAG0] != ELFMAG0 || | |
447 | elf[EI_MAG1] != ELFMAG1 || | |
448 | elf[EI_MAG2] != ELFMAG2 || | |
449 | elf[EI_MAG3] != ELFMAG3 || | |
450 | elf[EI_VERSION] != EV_CURRENT) | |
451 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
452 | "Core file does not have ELF header, not adjusting permissions."); | |
453 | if (!IN_SET(elf[EI_CLASS], ELFCLASS32, ELFCLASS64) || | |
454 | !IN_SET(elf[EI_DATA], ELFDATA2LSB, ELFDATA2MSB)) | |
455 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
456 | "Core file has strange ELF class, not adjusting permissions."); | |
457 | ||
458 | if ((elf[EI_DATA] == ELFDATA2LSB) != (__BYTE_ORDER == __LITTLE_ENDIAN)) | |
459 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
460 | "Core file has non-native endianness, not adjusting permissions."); | |
461 | ||
462 | if (elf[EI_CLASS] == ELFCLASS64) | |
463 | r = parse_auxv64((const uint64_t*) context->meta[META_PROC_AUXV], | |
464 | context->meta_size[META_PROC_AUXV], | |
465 | &at_secure, &uid, &euid, &gid, &egid); | |
466 | else | |
467 | r = parse_auxv32((const uint32_t*) context->meta[META_PROC_AUXV], | |
468 | context->meta_size[META_PROC_AUXV], | |
469 | &at_secure, &uid, &euid, &gid, &egid); | |
470 | if (r < 0) | |
471 | return r; | |
472 | ||
473 | /* We allow access if we got all the data and at_secure is not set and | |
474 | * the uid/gid matches euid/egid. */ | |
475 | bool ret = | |
476 | at_secure == 0 && | |
477 | uid != UID_INVALID && euid != UID_INVALID && uid == euid && | |
478 | gid != GID_INVALID && egid != GID_INVALID && gid == egid; | |
479 | log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", | |
480 | ret ? "permit" : "restrict", | |
481 | uid, euid, gid, egid, yes_no(at_secure)); | |
482 | return ret; | |
483 | } | |
484 | ||
b59233e6 | 485 | static int save_external_coredump( |
f46c706b | 486 | const Context *context, |
3c171f0b | 487 | int input_fd, |
b59233e6 | 488 | char **ret_filename, |
5f3e0a74 HW |
489 | int *ret_node_fd, |
490 | int *ret_data_fd, | |
0cd4e913 | 491 | uint64_t *ret_size, |
587f2a5e | 492 | uint64_t *ret_compressed_size, |
cc4419ed | 493 | bool *ret_truncated) { |
b59233e6 | 494 | |
587f2a5e LB |
495 | _cleanup_(unlink_and_freep) char *tmp = NULL; |
496 | _cleanup_free_ char *fn = NULL; | |
254d1313 | 497 | _cleanup_close_ int fd = -EBADF; |
ee0449fd | 498 | uint64_t rlimit, process_limit, max_size; |
587f2a5e | 499 | bool truncated, storage_on_tmpfs; |
b59233e6 | 500 | struct stat st; |
3c171f0b | 501 | uid_t uid; |
b59233e6 LP |
502 | int r; |
503 | ||
3c171f0b | 504 | assert(context); |
b59233e6 | 505 | assert(ret_filename); |
5f3e0a74 HW |
506 | assert(ret_node_fd); |
507 | assert(ret_data_fd); | |
b59233e6 | 508 | assert(ret_size); |
587f2a5e LB |
509 | assert(ret_compressed_size); |
510 | assert(ret_truncated); | |
b59233e6 | 511 | |
f46c706b | 512 | r = parse_uid(context->meta[META_ARGV_UID], &uid); |
3c171f0b LP |
513 | if (r < 0) |
514 | return log_error_errno(r, "Failed to parse UID: %m"); | |
515 | ||
f46c706b | 516 | r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit); |
bdfd7b2c | 517 | if (r < 0) |
f46c706b FB |
518 | return log_error_errno(r, "Failed to parse resource limit '%s': %m", |
519 | context->meta[META_ARGV_RLIMIT]); | |
d7a0f1f4 | 520 | if (rlimit < page_size()) |
f46c706b | 521 | /* Is coredumping disabled? Then don't bother saving/processing the |
3a559f22 | 522 | * coredump. Anything below PAGE_SIZE cannot give a readable coredump |
f46c706b FB |
523 | * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but |
524 | * is usually the same as PAGE_SIZE. */ | |
baaa35ad ZJS |
525 | return log_info_errno(SYNTHETIC_ERRNO(EBADSLT), |
526 | "Resource limits disable core dumping for process %s (%s).", | |
f46c706b | 527 | context->meta[META_ARGV_PID], context->meta[META_COMM]); |
bdfd7b2c | 528 | |
ee0449fd | 529 | process_limit = MAX(arg_process_size_max, storage_size_max()); |
baaa35ad ZJS |
530 | if (process_limit == 0) |
531 | return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT), | |
532 | "Limits for coredump processing and storage are both 0, not dumping core."); | |
ee0449fd | 533 | |
bdfd7b2c | 534 | /* Never store more than the process configured, or than we actually shall keep or process */ |
ee0449fd | 535 | max_size = MIN(rlimit, process_limit); |
bdfd7b2c | 536 | |
3c171f0b | 537 | r = make_filename(context, &fn); |
23bbb0de MS |
538 | if (r < 0) |
539 | return log_error_errno(r, "Failed to determine coredump file name: %m"); | |
34c10968 | 540 | |
1fbe8d0c | 541 | (void) mkdir_parents_label(fn, 0755); |
803a3464 | 542 | |
03532f0a | 543 | fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp); |
4a62c710 | 544 | if (fd < 0) |
03532f0a | 545 | return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); |
803a3464 | 546 | |
587f2a5e LB |
547 | /* If storage is on tmpfs, the kernel oomd might kill us if there's MemoryMax set on |
548 | * the service or the slice it belongs to. This is common on low-resources systems, | |
549 | * to avoid crashing processes to take away too many system resources. | |
550 | * Check the cgroup settings, and set max_size to a bit less than half of the | |
551 | * available memory left to the process. | |
552 | * Then, attempt to write the core file uncompressed first - if the write gets | |
553 | * interrupted, we know we won't be able to write it all, so instead compress what | |
554 | * was written so far, delete the uncompressed truncated core, and then continue | |
555 | * compressing from STDIN. Given the compressed core cannot be larger than the | |
556 | * uncompressed one, and 1KB for metadata is accounted for in the calculation, we | |
557 | * should be able to at least store the full compressed core file. */ | |
558 | ||
559 | storage_on_tmpfs = fd_is_temporary_fs(fd) > 0; | |
560 | if (storage_on_tmpfs && arg_compress) { | |
561 | _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; | |
562 | uint64_t cgroup_limit = UINT64_MAX; | |
563 | struct statvfs sv; | |
564 | ||
565 | /* If we can't get the cgroup limit, just ignore it, but don't fail, | |
566 | * try anyway with the config settings. */ | |
567 | r = sd_bus_default_system(&bus); | |
568 | if (r < 0) | |
569 | log_info_errno(r, "Failed to connect to system bus, skipping MemoryAvailable check: %m"); | |
570 | else { | |
571 | _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; | |
572 | ||
573 | r = sd_bus_get_property_trivial( | |
574 | bus, | |
575 | "org.freedesktop.systemd1", | |
576 | "/org/freedesktop/systemd1/unit/self", | |
577 | "org.freedesktop.systemd1.Service", | |
578 | "MemoryAvailable", | |
579 | &error, | |
580 | 't', &cgroup_limit); | |
581 | if (r < 0) | |
582 | log_warning_errno(r, | |
583 | "Failed to query MemoryAvailable for current unit, " | |
584 | "falling back to static config settings: %s", | |
585 | bus_error_message(&error, r)); | |
586 | } | |
803a3464 | 587 | |
587f2a5e LB |
588 | max_size = MIN(cgroup_limit, max_size); |
589 | max_size = LESS_BY(max_size, 1024U) / 2; /* Account for 1KB metadata overhead for compressing */ | |
590 | max_size = MAX(PROCESS_SIZE_MIN, max_size); /* Impose a lower minimum */ | |
591 | ||
592 | /* tmpfs might get full quickly, so check the available space too. | |
593 | * But don't worry about errors here, failing to access the storage | |
594 | * location will be better logged when writing to it. */ | |
8facac5f | 595 | if (fstatvfs(fd, &sv) >= 0) |
587f2a5e | 596 | max_size = MIN((uint64_t)sv.f_frsize * (uint64_t)sv.f_bfree, max_size); |
34c10968 | 597 | |
587f2a5e | 598 | log_debug("Limiting core file size to %" PRIu64 " bytes due to cgroup memory limits.", max_size); |
7849c2ac TA |
599 | } |
600 | ||
587f2a5e LB |
601 | r = copy_bytes(input_fd, fd, max_size, 0); |
602 | if (r < 0) | |
603 | return log_error_errno(r, "Cannot store coredump of %s (%s): %m", | |
604 | context->meta[META_ARGV_PID], context->meta[META_COMM]); | |
605 | truncated = r == 1; | |
cfd652ed | 606 | |
3e4d0f6c ZJS |
607 | bool allow_user = grant_user_access(fd, context) > 0; |
608 | ||
587f2a5e LB |
609 | #if HAVE_COMPRESSION |
610 | if (arg_compress) { | |
611 | _cleanup_(unlink_and_freep) char *tmp_compressed = NULL; | |
612 | _cleanup_free_ char *fn_compressed = NULL; | |
254d1313 | 613 | _cleanup_close_ int fd_compressed = -EBADF; |
587f2a5e LB |
614 | uint64_t uncompressed_size = 0; |
615 | ||
616 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) | |
617 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); | |
cfd652ed | 618 | |
ee00684c | 619 | fn_compressed = strjoin(fn, default_compression_extension()); |
587f2a5e LB |
620 | if (!fn_compressed) |
621 | return log_oom(); | |
cfd652ed | 622 | |
03532f0a | 623 | fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed); |
587f2a5e LB |
624 | if (fd_compressed < 0) |
625 | return log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed); | |
cfd652ed | 626 | |
587f2a5e LB |
627 | r = compress_stream(fd, fd_compressed, max_size, &uncompressed_size); |
628 | if (r < 0) | |
629 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
630 | ||
631 | if (truncated && storage_on_tmpfs) { | |
632 | uint64_t partial_uncompressed_size = 0; | |
633 | ||
634 | /* Uncompressed write was truncated and we are writing to tmpfs: delete | |
635 | * the uncompressed core, and compress the remaining part from STDIN. */ | |
636 | ||
637 | tmp = unlink_and_free(tmp); | |
638 | fd = safe_close(fd); | |
639 | ||
640 | r = compress_stream(input_fd, fd_compressed, max_size, &partial_uncompressed_size); | |
641 | if (r < 0) | |
642 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
643 | uncompressed_size += partial_uncompressed_size; | |
b59233e6 LP |
644 | } |
645 | ||
3e4d0f6c | 646 | r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid, allow_user); |
cfd652ed | 647 | if (r < 0) |
587f2a5e | 648 | return r; |
b59233e6 | 649 | |
587f2a5e LB |
650 | if (fstat(fd_compressed, &st) < 0) |
651 | return log_error_errno(errno, | |
652 | "Failed to fstat core file %s: %m", | |
653 | coredump_tmpfile_name(tmp_compressed)); | |
cfd652ed | 654 | |
587f2a5e LB |
655 | *ret_filename = TAKE_PTR(fn_compressed); /* compressed */ |
656 | *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */ | |
657 | *ret_compressed_size = (uint64_t) st.st_size; /* compressed */ | |
658 | *ret_data_fd = TAKE_FD(fd); | |
659 | *ret_size = uncompressed_size; | |
660 | *ret_truncated = truncated; | |
661 | tmp_compressed = mfree(tmp_compressed); | |
cfd652ed | 662 | |
cfd652ed | 663 | return 0; |
34c10968 | 664 | } |
3b1a55e1 | 665 | #endif |
5f3e0a74 | 666 | |
587f2a5e LB |
667 | if (truncated) |
668 | log_struct(LOG_INFO, | |
08e86b15 DDM |
669 | LOG_MESSAGE("Core file was truncated to %"PRIu64" bytes.", max_size), |
670 | "SIZE_LIMIT=%"PRIu64, max_size, | |
587f2a5e LB |
671 | "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR); |
672 | ||
3e4d0f6c | 673 | r = fix_permissions(fd, tmp, fn, context, uid, allow_user); |
cfd652ed | 674 | if (r < 0) |
587f2a5e LB |
675 | return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn); |
676 | ||
677 | if (fstat(fd, &st) < 0) | |
678 | return log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); | |
679 | ||
680 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) | |
681 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); | |
34c10968 | 682 | |
0cfb0971 | 683 | *ret_filename = TAKE_PTR(fn); |
1cc6c93a | 684 | *ret_data_fd = TAKE_FD(fd); |
59f448cf | 685 | *ret_size = (uint64_t) st.st_size; |
587f2a5e | 686 | *ret_truncated = truncated; |
34c10968 | 687 | |
34c10968 | 688 | return 0; |
34c10968 LP |
689 | } |
690 | ||
691 | static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) { | |
692 | _cleanup_free_ char *field = NULL; | |
693 | ssize_t n; | |
694 | ||
8d4e028f | 695 | assert(fd >= 0); |
34c10968 LP |
696 | assert(ret); |
697 | assert(ret_size); | |
698 | ||
4a62c710 MS |
699 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) |
700 | return log_warning_errno(errno, "Failed to seek: %m"); | |
803a3464 | 701 | |
34c10968 LP |
702 | field = malloc(9 + size); |
703 | if (!field) { | |
cfd652ed | 704 | log_warning("Failed to allocate memory for coredump, coredump will not be stored."); |
34c10968 LP |
705 | return -ENOMEM; |
706 | } | |
707 | ||
708 | memcpy(field, "COREDUMP=", 9); | |
709 | ||
710 | n = read(fd, field + 9, size); | |
23bbb0de MS |
711 | if (n < 0) |
712 | return log_error_errno((int) n, "Failed to read core data: %m"); | |
baaa35ad ZJS |
713 | if ((size_t) n < size) |
714 | return log_error_errno(SYNTHETIC_ERRNO(EIO), | |
715 | "Core data too short."); | |
34c10968 | 716 | |
1cc6c93a | 717 | *ret = TAKE_PTR(field); |
34c10968 LP |
718 | *ret_size = size + 9; |
719 | ||
34c10968 LP |
720 | return 0; |
721 | } | |
803a3464 | 722 | |
3f132692 JF |
723 | /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines: |
724 | * 0:/dev/pts/23 | |
725 | * pos: 0 | |
726 | * flags: 0100002 | |
727 | * | |
728 | * 1:/dev/pts/23 | |
729 | * pos: 0 | |
730 | * flags: 0100002 | |
731 | * | |
732 | * 2:/dev/pts/23 | |
733 | * pos: 0 | |
734 | * flags: 0100002 | |
735 | * EOF | |
736 | */ | |
737 | static int compose_open_fds(pid_t pid, char **open_fds) { | |
4d84bc2f | 738 | _cleanup_closedir_ DIR *proc_fd_dir = NULL; |
254d1313 | 739 | _cleanup_close_ int proc_fdinfo_fd = -EBADF; |
4d84bc2f | 740 | _cleanup_free_ char *buffer = NULL; |
3f132692 | 741 | _cleanup_fclose_ FILE *stream = NULL; |
59059b4a | 742 | const char *fddelim = "", *path; |
4d84bc2f | 743 | size_t size = 0; |
7b26ea6f | 744 | int r; |
3f132692 JF |
745 | |
746 | assert(pid >= 0); | |
747 | assert(open_fds != NULL); | |
748 | ||
59059b4a | 749 | path = procfs_file_alloca(pid, "fd"); |
3f132692 | 750 | proc_fd_dir = opendir(path); |
59059b4a ZJS |
751 | if (!proc_fd_dir) |
752 | return -errno; | |
3f132692 | 753 | |
4d84bc2f | 754 | proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH); |
59059b4a ZJS |
755 | if (proc_fdinfo_fd < 0) |
756 | return -errno; | |
3f132692 | 757 | |
2fe21124 | 758 | stream = open_memstream_unlocked(&buffer, &size); |
3f132692 JF |
759 | if (!stream) |
760 | return -ENOMEM; | |
761 | ||
af3b864d | 762 | FOREACH_DIRENT(de, proc_fd_dir, return -errno) { |
3f132692 | 763 | _cleanup_fclose_ FILE *fdinfo = NULL; |
4d84bc2f | 764 | _cleanup_free_ char *fdname = NULL; |
254d1313 | 765 | _cleanup_close_ int fd = -EBADF; |
3f132692 | 766 | |
af3b864d | 767 | r = readlinkat_malloc(dirfd(proc_fd_dir), de->d_name, &fdname); |
3f132692 JF |
768 | if (r < 0) |
769 | return r; | |
770 | ||
af3b864d | 771 | fprintf(stream, "%s%s:%s\n", fddelim, de->d_name, fdname); |
3f132692 JF |
772 | fddelim = "\n"; |
773 | ||
774 | /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */ | |
af3b864d | 775 | fd = openat(proc_fdinfo_fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY); |
59059b4a | 776 | if (fd < 0) |
3f132692 JF |
777 | continue; |
778 | ||
b46c3e49 VC |
779 | fdinfo = take_fdopen(&fd, "r"); |
780 | if (!fdinfo) | |
3f132692 JF |
781 | continue; |
782 | ||
7b26ea6f LP |
783 | for (;;) { |
784 | _cleanup_free_ char *line = NULL; | |
785 | ||
786 | r = read_line(fdinfo, LONG_LINE_MAX, &line); | |
787 | if (r < 0) | |
788 | return r; | |
789 | if (r == 0) | |
790 | break; | |
791 | ||
0d536673 | 792 | fputs(line, stream); |
7b26ea6f | 793 | fputc('\n', stream); |
4d84bc2f | 794 | } |
3f132692 JF |
795 | } |
796 | ||
4d84bc2f | 797 | errno = 0; |
74ca738f | 798 | stream = safe_fclose(stream); |
4d84bc2f | 799 | |
b3267152 | 800 | if (errno > 0) |
4d84bc2f LP |
801 | return -errno; |
802 | ||
ae2a15bc | 803 | *open_fds = TAKE_PTR(buffer); |
4d84bc2f | 804 | |
3f132692 JF |
805 | return 0; |
806 | } | |
807 | ||
7ed03ce6 JF |
808 | static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) { |
809 | const char *p; | |
810 | struct stat stbuf; | |
254d1313 | 811 | _cleanup_close_ int proc_ns_dir_fd = -EBADF; |
7ed03ce6 JF |
812 | |
813 | p = procfs_file_alloca(pid, "ns"); | |
814 | ||
815 | proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY); | |
816 | if (proc_ns_dir_fd < 0) | |
817 | return -errno; | |
818 | ||
819 | if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0) | |
820 | return -errno; | |
821 | ||
822 | *ns = stbuf.st_ino; | |
823 | return 0; | |
824 | } | |
825 | ||
0c4d1e6d | 826 | static int get_mount_namespace_leader(pid_t pid, pid_t *ret) { |
7ed03ce6 | 827 | ino_t proc_mntns; |
c53aafb7 | 828 | int r; |
7ed03ce6 JF |
829 | |
830 | r = get_process_ns(pid, "mnt", &proc_mntns); | |
831 | if (r < 0) | |
832 | return r; | |
833 | ||
aa7530d6 | 834 | for (;;) { |
7ed03ce6 | 835 | ino_t parent_mntns; |
0c4d1e6d | 836 | pid_t ppid; |
7ed03ce6 | 837 | |
0c4d1e6d LP |
838 | r = get_process_ppid(pid, &ppid); |
839 | if (r == -EADDRNOTAVAIL) /* Reached the top (i.e. typically PID 1, but could also be a process | |
840 | * whose parent is not in our pidns) */ | |
841 | return -ENOENT; | |
7ed03ce6 JF |
842 | if (r < 0) |
843 | return r; | |
844 | ||
845 | r = get_process_ns(ppid, "mnt", &parent_mntns); | |
846 | if (r < 0) | |
847 | return r; | |
848 | ||
0c4d1e6d LP |
849 | if (proc_mntns != parent_mntns) { |
850 | *ret = ppid; | |
851 | return 0; | |
852 | } | |
7ed03ce6 | 853 | |
0c4d1e6d | 854 | pid = ppid; |
7ed03ce6 | 855 | } |
7ed03ce6 JF |
856 | } |
857 | ||
858 | /* Returns 1 if the parent was found. | |
859 | * Returns 0 if there is not a process we can call the pid's | |
860 | * container parent (the pid's process isn't 'containerized'). | |
861 | * Returns a negative number on errors. | |
862 | */ | |
863 | static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { | |
7ed03ce6 JF |
864 | pid_t container_pid; |
865 | const char *proc_root_path; | |
866 | struct stat root_stat, proc_root_stat; | |
83844031 | 867 | int r; |
7ed03ce6 JF |
868 | |
869 | /* To compare inodes of / and /proc/[pid]/root */ | |
870 | if (stat("/", &root_stat) < 0) | |
871 | return -errno; | |
872 | ||
873 | proc_root_path = procfs_file_alloca(pid, "root"); | |
874 | if (stat(proc_root_path, &proc_root_stat) < 0) | |
875 | return -errno; | |
876 | ||
877 | /* The process uses system root. */ | |
c20c77ef | 878 | if (stat_inode_same(&proc_root_stat, &root_stat)) { |
7ed03ce6 JF |
879 | *cmdline = NULL; |
880 | return 0; | |
881 | } | |
882 | ||
883 | r = get_mount_namespace_leader(pid, &container_pid); | |
884 | if (r < 0) | |
885 | return r; | |
886 | ||
5dd55303 | 887 | r = get_process_cmdline(container_pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, cmdline); |
d3cba4ea EV |
888 | if (r < 0) |
889 | return r; | |
890 | ||
891 | return 1; | |
7ed03ce6 JF |
892 | } |
893 | ||
f46c706b | 894 | static int change_uid_gid(const Context *context) { |
3c171f0b LP |
895 | uid_t uid; |
896 | gid_t gid; | |
897 | int r; | |
34c10968 | 898 | |
f46c706b | 899 | r = parse_uid(context->meta[META_ARGV_UID], &uid); |
3c171f0b LP |
900 | if (r < 0) |
901 | return r; | |
8c8549db | 902 | |
28add648 | 903 | if (uid_is_system(uid)) { |
888e378d LP |
904 | const char *user = "systemd-coredump"; |
905 | ||
fafff8f1 | 906 | r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); |
888e378d LP |
907 | if (r < 0) { |
908 | log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user); | |
909 | uid = gid = 0; | |
910 | } | |
911 | } else { | |
f46c706b | 912 | r = parse_gid(context->meta[META_ARGV_GID], &gid); |
888e378d LP |
913 | if (r < 0) |
914 | return r; | |
915 | } | |
3c171f0b LP |
916 | |
917 | return drop_privileges(uid, gid, 0); | |
918 | } | |
8c8549db | 919 | |
3c171f0b | 920 | static int submit_coredump( |
3e4d0f6c | 921 | const Context *context, |
9a435388 | 922 | struct iovec_wrapper *iovw, |
3c171f0b | 923 | int input_fd) { |
34c10968 | 924 | |
c546154a | 925 | _cleanup_(json_variant_unrefp) JsonVariant *json_metadata = NULL; |
254d1313 | 926 | _cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF; |
9a435388 | 927 | _cleanup_free_ char *filename = NULL, *coredump_data = NULL; |
51d3783d | 928 | _cleanup_free_ char *stacktrace = NULL; |
9a435388 | 929 | char *core_message; |
c546154a | 930 | const char *module_name; |
587f2a5e | 931 | uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX; |
f46c706b | 932 | bool truncated = false; |
c546154a | 933 | JsonVariant *module_json; |
3c171f0b | 934 | int r; |
83844031 | 935 | |
3c171f0b | 936 | assert(context); |
9a435388 | 937 | assert(iovw); |
3c171f0b | 938 | assert(input_fd >= 0); |
f5e04665 | 939 | |
3c171f0b LP |
940 | /* Vacuum before we write anything again */ |
941 | (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); | |
803a3464 | 942 | |
3c171f0b | 943 | /* Always stream the coredump to disk, if that's possible */ |
0cd4e913 | 944 | r = save_external_coredump(context, input_fd, |
587f2a5e LB |
945 | &filename, &coredump_node_fd, &coredump_fd, |
946 | &coredump_size, &coredump_compressed_size, &truncated); | |
3c171f0b LP |
947 | if (r < 0) |
948 | /* Skip whole core dumping part */ | |
949 | goto log; | |
950 | ||
51d3783d FB |
951 | /* If we don't want to keep the coredump on disk, remove it now, as later on we |
952 | * will lack the privileges for it. However, we keep the fd to it, so that we can | |
953 | * still process it and log it. */ | |
587f2a5e | 954 | r = maybe_remove_external_coredump(filename, coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size); |
3c171f0b LP |
955 | if (r < 0) |
956 | return r; | |
633c3e8a | 957 | if (r == 0) |
2a3bebd0 | 958 | (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename); |
633c3e8a | 959 | else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
5206a724 | 960 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
587f2a5e | 961 | coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size, arg_external_size_max); |
f5e04665 | 962 | |
3c171f0b LP |
963 | /* Vacuum again, but exclude the coredump we just created */ |
964 | (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); | |
8c9571d0 | 965 | |
51d3783d FB |
966 | /* Now, let's drop privileges to become the user who owns the segfaulted process |
967 | * and allocate the coredump memory under the user's uid. This also ensures that | |
968 | * the credentials journald will see are the ones of the coredumping user, thus | |
969 | * making sure the user gets access to the core dump. Let's also get rid of all | |
3c171f0b LP |
970 | * capabilities, if we run as root, we won't need them anymore. */ |
971 | r = change_uid_gid(context); | |
972 | if (r < 0) | |
973 | return log_error_errno(r, "Failed to drop privileges: %m"); | |
34c10968 | 974 | |
5238e957 | 975 | /* Try to get a stack trace if we can */ |
c790632c | 976 | if (coredump_size > arg_process_size_max) |
51d3783d FB |
977 | log_debug("Not generating stack trace: core size %"PRIu64" is greater " |
978 | "than %"PRIu64" (the configured maximum)", | |
6e9ef603 | 979 | coredump_size, arg_process_size_max); |
c790632c ZJS |
980 | else if (coredump_fd >= 0) { |
981 | bool skip = startswith(context->meta[META_COMM], "systemd-coredum"); /* COMM is 16 bytes usually */ | |
982 | ||
61aea456 LB |
983 | (void) parse_elf_object(coredump_fd, |
984 | context->meta[META_EXE], | |
c790632c | 985 | /* fork_disable_dump= */ skip, /* avoid loops */ |
61aea456 LB |
986 | &stacktrace, |
987 | &json_metadata); | |
c790632c | 988 | } |
51d3783d | 989 | |
3c171f0b | 990 | log: |
f46c706b FB |
991 | core_message = strjoina("Process ", context->meta[META_ARGV_PID], |
992 | " (", context->meta[META_COMM], ") of user ", | |
993 | context->meta[META_ARGV_UID], " dumped core.", | |
994 | context->is_journald && filename ? "\nCoredump diverted to " : NULL, | |
995 | context->is_journald && filename ? filename : NULL); | |
51d3783d | 996 | |
9a435388 | 997 | core_message = strjoina(core_message, stacktrace ? "\n\n" : NULL, stacktrace); |
92e92d71 | 998 | |
5edf875b DDM |
999 | if (context->is_journald) |
1000 | /* We might not be able to log to the journal, so let's always print the message to another | |
1001 | * log target. The target was set previously to something safe. */ | |
9a435388 | 1002 | log_dispatch(LOG_ERR, 0, core_message); |
92e92d71 | 1003 | |
2a3bebd0 | 1004 | (void) iovw_put_string_field(iovw, "MESSAGE=", core_message); |
3c171f0b | 1005 | |
0cd4e913 | 1006 | if (truncated) |
2a3bebd0 | 1007 | (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1"); |
0cd4e913 | 1008 | |
c546154a LB |
1009 | /* If we managed to parse any ELF metadata (build-id, ELF package meta), |
1010 | * attach it as journal metadata. */ | |
1011 | if (json_metadata) { | |
1012 | _cleanup_free_ char *formatted_json = NULL; | |
1013 | ||
1014 | r = json_variant_format(json_metadata, 0, &formatted_json); | |
1015 | if (r < 0) | |
1016 | return log_error_errno(r, "Failed to format JSON package metadata: %m"); | |
1017 | ||
671769c9 | 1018 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_JSON=", formatted_json); |
c546154a LB |
1019 | } |
1020 | ||
c790632c ZJS |
1021 | /* In the unlikely scenario that context->meta[META_EXE] is not available, |
1022 | * let's avoid guessing the module name and skip the loop. */ | |
1023 | if (context->meta[META_EXE]) | |
1024 | JSON_VARIANT_OBJECT_FOREACH(module_name, module_json, json_metadata) { | |
1025 | JsonVariant *t; | |
c546154a | 1026 | |
c790632c ZJS |
1027 | /* We only add structured fields for the 'main' ELF module, and only if we can identify it. */ |
1028 | if (!path_equal_filename(module_name, context->meta[META_EXE])) | |
1029 | continue; | |
c546154a | 1030 | |
c790632c ZJS |
1031 | t = json_variant_by_key(module_json, "name"); |
1032 | if (t) | |
1033 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_NAME=", json_variant_string(t)); | |
1f2abb79 | 1034 | |
c790632c ZJS |
1035 | t = json_variant_by_key(module_json, "version"); |
1036 | if (t) | |
1037 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_VERSION=", json_variant_string(t)); | |
1038 | } | |
c546154a | 1039 | |
3c171f0b | 1040 | /* Optionally store the entire coredump in the journal */ |
587f2a5e | 1041 | if (arg_storage == COREDUMP_STORAGE_JOURNAL && coredump_fd >= 0) { |
6e9ef603 ZJS |
1042 | if (coredump_size <= arg_journal_size_max) { |
1043 | size_t sz = 0; | |
1044 | ||
1045 | /* Store the coredump itself in the journal */ | |
1046 | ||
1047 | r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); | |
9a435388 FB |
1048 | if (r >= 0) { |
1049 | if (iovw_put(iovw, coredump_data, sz) >= 0) | |
1050 | TAKE_PTR(coredump_data); | |
1051 | } else | |
6e9ef603 ZJS |
1052 | log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); |
1053 | } else | |
5206a724 | 1054 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
6e9ef603 | 1055 | coredump_size, arg_journal_size_max); |
f5e04665 LP |
1056 | } |
1057 | ||
5edf875b DDM |
1058 | /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the |
1059 | * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write | |
1060 | * the coredump to the socket. */ | |
1061 | ||
1062 | if (context->is_journald) { | |
1063 | r = journal_fd_nonblock(true); | |
1064 | if (r < 0) | |
1065 | return log_error_errno(r, "Failed to make journal socket non-blocking: %m"); | |
1066 | } | |
1067 | ||
9a435388 | 1068 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
5edf875b DDM |
1069 | |
1070 | if (context->is_journald) { | |
1071 | int k; | |
1072 | ||
1073 | k = journal_fd_nonblock(false); | |
1074 | if (k < 0) | |
1075 | return log_error_errno(k, "Failed to make journal socket blocking: %m"); | |
1076 | } | |
1077 | ||
1078 | if (r == -EAGAIN && context->is_journald) | |
1079 | log_warning_errno(r, "Failed to log journal coredump, ignoring: %m"); | |
1080 | else if (r < 0) | |
3c171f0b LP |
1081 | return log_error_errno(r, "Failed to log coredump: %m"); |
1082 | ||
1083 | return 0; | |
1084 | } | |
1085 | ||
f46c706b | 1086 | static int save_context(Context *context, const struct iovec_wrapper *iovw) { |
f46c706b FB |
1087 | const char *unit; |
1088 | int r; | |
3c171f0b | 1089 | |
3c171f0b | 1090 | assert(context); |
f46c706b FB |
1091 | assert(iovw); |
1092 | assert(iovw->count >= _META_ARGV_MAX); | |
3c171f0b | 1093 | |
f46c706b | 1094 | /* The context does not allocate any memory on its own */ |
3c171f0b | 1095 | |
fe96c0f8 | 1096 | for (size_t n = 0; n < iovw->count; n++) { |
f46c706b | 1097 | struct iovec *iovec = iovw->iovec + n; |
92e92d71 | 1098 | |
fe96c0f8 | 1099 | for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) { |
f46c706b FB |
1100 | /* Note that these strings are NUL terminated, because we made sure that a |
1101 | * trailing NUL byte is in the buffer, though not included in the iov_len | |
1102 | * count (see process_socket() and gather_pid_metadata_*()) */ | |
1103 | assert(((char*) iovec->iov_base)[iovec->iov_len] == 0); | |
3c171f0b | 1104 | |
3e4d0f6c | 1105 | const char *p = startswith(iovec->iov_base, meta_field_names[i]); |
f46c706b FB |
1106 | if (p) { |
1107 | context->meta[i] = p; | |
3e4d0f6c | 1108 | context->meta_size[i] = iovec->iov_len - strlen(meta_field_names[i]); |
f46c706b FB |
1109 | break; |
1110 | } | |
1111 | } | |
3c171f0b | 1112 | } |
f46c706b FB |
1113 | |
1114 | if (!context->meta[META_ARGV_PID]) | |
1115 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1116 | "Failed to find the PID of crashing process"); | |
1117 | ||
1118 | r = parse_pid(context->meta[META_ARGV_PID], &context->pid); | |
1119 | if (r < 0) | |
1120 | return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]); | |
1121 | ||
1122 | unit = context->meta[META_UNIT]; | |
1123 | context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); | |
1124 | context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); | |
1125 | ||
1126 | return 0; | |
3c171f0b LP |
1127 | } |
1128 | ||
1129 | static int process_socket(int fd) { | |
254d1313 | 1130 | _cleanup_close_ int input_fd = -EBADF; |
f46c706b | 1131 | Context context = {}; |
9a435388 FB |
1132 | struct iovec_wrapper iovw = {}; |
1133 | struct iovec iovec; | |
fe96c0f8 | 1134 | int r; |
3c171f0b LP |
1135 | |
1136 | assert(fd >= 0); | |
1137 | ||
d2acb93d | 1138 | log_setup(); |
3c171f0b | 1139 | |
988e89ee ZJS |
1140 | log_debug("Processing coredump received on stdin..."); |
1141 | ||
3c171f0b | 1142 | for (;;) { |
fb29cdbe | 1143 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control; |
3c171f0b LP |
1144 | struct msghdr mh = { |
1145 | .msg_control = &control, | |
1146 | .msg_controllen = sizeof(control), | |
1147 | .msg_iovlen = 1, | |
1148 | }; | |
1149 | ssize_t n; | |
fe1ef0f8 | 1150 | ssize_t l; |
3c171f0b | 1151 | |
fe1ef0f8 EV |
1152 | l = next_datagram_size_fd(fd); |
1153 | if (l < 0) { | |
1154 | r = log_error_errno(l, "Failed to determine datagram size to read: %m"); | |
3c171f0b LP |
1155 | goto finish; |
1156 | } | |
1157 | ||
9a435388 FB |
1158 | iovec.iov_len = l; |
1159 | iovec.iov_base = malloc(l + 1); | |
1160 | if (!iovec.iov_base) { | |
3c171f0b LP |
1161 | r = log_oom(); |
1162 | goto finish; | |
1163 | } | |
1164 | ||
9a435388 | 1165 | mh.msg_iov = &iovec; |
3c171f0b | 1166 | |
3691bcf3 | 1167 | n = recvmsg_safe(fd, &mh, MSG_CMSG_CLOEXEC); |
3c171f0b | 1168 | if (n < 0) { |
9a435388 | 1169 | free(iovec.iov_base); |
3691bcf3 | 1170 | r = log_error_errno(n, "Failed to receive datagram: %m"); |
3c171f0b LP |
1171 | goto finish; |
1172 | } | |
1173 | ||
9a435388 FB |
1174 | /* The final zero-length datagram carries the file descriptor and tells us |
1175 | * that we're done. */ | |
3c171f0b | 1176 | if (n == 0) { |
dac556fa | 1177 | struct cmsghdr *found; |
3c171f0b | 1178 | |
9a435388 | 1179 | free(iovec.iov_base); |
3c171f0b | 1180 | |
dac556fa | 1181 | found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); |
3c171f0b | 1182 | if (!found) { |
3691bcf3 LP |
1183 | cmsg_close_all(&mh); |
1184 | r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG), | |
1185 | "Coredump file descriptor missing."); | |
3c171f0b LP |
1186 | goto finish; |
1187 | } | |
1188 | ||
f8540bde FB |
1189 | assert(input_fd < 0); |
1190 | input_fd = *(int*) CMSG_DATA(found); | |
3c171f0b | 1191 | break; |
3691bcf3 LP |
1192 | } else |
1193 | cmsg_close_all(&mh); | |
3c171f0b LP |
1194 | |
1195 | /* Add trailing NUL byte, in case these are strings */ | |
9a435388 FB |
1196 | ((char*) iovec.iov_base)[n] = 0; |
1197 | iovec.iov_len = (size_t) n; | |
3c171f0b | 1198 | |
9a435388 FB |
1199 | r = iovw_put(&iovw, iovec.iov_base, iovec.iov_len); |
1200 | if (r < 0) | |
1201 | goto finish; | |
34c10968 LP |
1202 | } |
1203 | ||
61233823 | 1204 | /* Make sure we got all data we really need */ |
f8540bde | 1205 | assert(input_fd >= 0); |
3c171f0b | 1206 | |
f46c706b FB |
1207 | r = save_context(&context, &iovw); |
1208 | if (r < 0) | |
1209 | goto finish; | |
1210 | ||
1211 | /* Make sure we received at least all fields we need. */ | |
fe96c0f8 | 1212 | for (int i = 0; i < _META_MANDATORY_MAX; i++) |
f46c706b FB |
1213 | if (!context.meta[i]) { |
1214 | r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1215 | "A mandatory argument (%i) has not been sent, aborting.", | |
1216 | i); | |
1217 | goto finish; | |
1218 | } | |
80002f66 | 1219 | |
f46c706b | 1220 | r = submit_coredump(&context, &iovw, input_fd); |
3c171f0b LP |
1221 | |
1222 | finish: | |
9a435388 | 1223 | iovw_free_contents(&iovw, true); |
3c171f0b LP |
1224 | return r; |
1225 | } | |
1226 | ||
9a435388 | 1227 | static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) { |
254d1313 | 1228 | _cleanup_close_ int fd = -EBADF; |
3c171f0b LP |
1229 | int r; |
1230 | ||
9a435388 | 1231 | assert(iovw); |
3c171f0b LP |
1232 | assert(input_fd >= 0); |
1233 | ||
1234 | fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); | |
1235 | if (fd < 0) | |
1236 | return log_error_errno(errno, "Failed to create coredump socket: %m"); | |
1237 | ||
1861986a LP |
1238 | r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/coredump"); |
1239 | if (r < 0) | |
1240 | return log_error_errno(r, "Failed to connect to coredump service: %m"); | |
3c171f0b | 1241 | |
fe96c0f8 | 1242 | for (size_t i = 0; i < iovw->count; i++) { |
fec603eb | 1243 | struct msghdr mh = { |
9a435388 | 1244 | .msg_iov = iovw->iovec + i, |
fec603eb LP |
1245 | .msg_iovlen = 1, |
1246 | }; | |
1247 | struct iovec copy[2]; | |
1248 | ||
1249 | for (;;) { | |
1250 | if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) | |
1251 | break; | |
1252 | ||
1253 | if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { | |
f46c706b FB |
1254 | /* This field didn't fit? That's a pity. Given that this is |
1255 | * just metadata, let's truncate the field at half, and try | |
1256 | * again. We append three dots, in order to show that this is | |
1257 | * truncated. */ | |
fec603eb LP |
1258 | |
1259 | if (mh.msg_iov != copy) { | |
f46c706b FB |
1260 | /* We don't want to modify the caller's iovec, hence |
1261 | * let's create our own array, consisting of two new | |
1262 | * iovecs, where the first is a (truncated) copy of | |
1263 | * what we want to send, and the second one contains | |
1264 | * the trailing dots. */ | |
9a435388 | 1265 | copy[0] = iovw->iovec[i]; |
ed0cb346 | 1266 | copy[1] = IOVEC_MAKE(((char[]){'.', '.', '.'}), 3); |
fec603eb LP |
1267 | |
1268 | mh.msg_iov = copy; | |
1269 | mh.msg_iovlen = 2; | |
1270 | } | |
1271 | ||
1272 | copy[0].iov_len /= 2; /* halve it, and try again */ | |
1273 | continue; | |
1274 | } | |
3c171f0b | 1275 | |
3c171f0b | 1276 | return log_error_errno(errno, "Failed to send coredump datagram: %m"); |
fec603eb | 1277 | } |
1eef15b1 ZJS |
1278 | } |
1279 | ||
3c171f0b LP |
1280 | r = send_one_fd(fd, input_fd, 0); |
1281 | if (r < 0) | |
1282 | return log_error_errno(r, "Failed to send coredump fd: %m"); | |
1eef15b1 | 1283 | |
3c171f0b LP |
1284 | return 0; |
1285 | } | |
1eef15b1 | 1286 | |
64a5384f LP |
1287 | static int gather_pid_metadata_from_argv( |
1288 | struct iovec_wrapper *iovw, | |
1289 | Context *context, | |
1290 | int argc, char **argv) { | |
1291 | ||
f46c706b | 1292 | _cleanup_free_ char *free_timestamp = NULL; |
fe96c0f8 | 1293 | int r, signo; |
3c171f0b | 1294 | char *t; |
3c171f0b | 1295 | |
f46c706b FB |
1296 | /* We gather all metadata that were passed via argv[] into an array of iovecs that |
1297 | * we'll forward to the socket unit */ | |
3c171f0b | 1298 | |
f46c706b FB |
1299 | if (argc < _META_ARGV_MAX) |
1300 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1301 | "Not enough arguments passed by the kernel (%i, expected %i).", | |
1302 | argc, _META_ARGV_MAX); | |
3c171f0b | 1303 | |
fe96c0f8 | 1304 | for (int i = 0; i < _META_ARGV_MAX; i++) { |
3c171f0b | 1305 | |
f46c706b | 1306 | t = argv[i]; |
3c171f0b | 1307 | |
f46c706b | 1308 | switch (i) { |
64a5384f | 1309 | |
f46c706b FB |
1310 | case META_ARGV_TIMESTAMP: |
1311 | /* The journal fields contain the timestamp padded with six | |
1312 | * zeroes, so that the kernel-supplied 1s granularity timestamps | |
1313 | * becomes 1µs granularity, i.e. the granularity systemd usually | |
1314 | * operates in. */ | |
1315 | t = free_timestamp = strjoin(argv[i], "000000"); | |
1316 | if (!t) | |
1317 | return log_oom(); | |
1318 | break; | |
64a5384f | 1319 | |
f46c706b FB |
1320 | case META_ARGV_SIGNAL: |
1321 | /* For signal, record its pretty name too */ | |
1322 | if (safe_atoi(argv[i], &signo) >= 0 && SIGNAL_VALID(signo)) | |
2a3bebd0 FB |
1323 | (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG", |
1324 | signal_to_string(signo)); | |
f46c706b | 1325 | break; |
64a5384f | 1326 | |
f46c706b FB |
1327 | default: |
1328 | break; | |
c8091d92 LP |
1329 | } |
1330 | ||
f46c706b FB |
1331 | r = iovw_put_string_field(iovw, meta_field_names[i], t); |
1332 | if (r < 0) | |
1333 | return r; | |
8c8549db | 1334 | } |
803a3464 | 1335 | |
f46c706b FB |
1336 | /* Cache some of the process metadata we collected so far and that we'll need to |
1337 | * access soon */ | |
1338 | return save_context(context, iovw); | |
1339 | } | |
3c171f0b | 1340 | |
f46c706b FB |
1341 | static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) { |
1342 | uid_t owner_uid; | |
1343 | pid_t pid; | |
1344 | char *t; | |
3e4d0f6c | 1345 | size_t size; |
f46c706b FB |
1346 | const char *p; |
1347 | int r; | |
f5e04665 | 1348 | |
f46c706b FB |
1349 | /* Note that if we fail on oom later on, we do not roll-back changes to the iovec |
1350 | * structure. (It remains valid, with the first iovec fields initialized.) */ | |
f5e04665 | 1351 | |
f46c706b | 1352 | pid = context->pid; |
f5e04665 | 1353 | |
f46c706b FB |
1354 | /* The following is mandatory */ |
1355 | r = get_process_comm(pid, &t); | |
9a435388 | 1356 | if (r < 0) |
f46c706b | 1357 | return log_error_errno(r, "Failed to get COMM: %m"); |
f5e04665 | 1358 | |
f46c706b | 1359 | r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t); |
9a435388 FB |
1360 | if (r < 0) |
1361 | return r; | |
f45b8015 | 1362 | |
c790632c | 1363 | /* The following are optional, but we use them if present. */ |
2a3bebd0 FB |
1364 | r = get_process_exe(pid, &t); |
1365 | if (r >= 0) | |
1366 | r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t); | |
1367 | if (r < 0) | |
f46c706b | 1368 | log_warning_errno(r, "Failed to get EXE, ignoring: %m"); |
bdfd7b2c | 1369 | |
f46c706b | 1370 | if (cg_pid_get_unit(pid, &t) >= 0) |
2a3bebd0 | 1371 | (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t); |
f5e04665 | 1372 | |
f46c706b | 1373 | if (cg_pid_get_user_unit(pid, &t) >= 0) |
2a3bebd0 | 1374 | (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t); |
f46c706b | 1375 | |
9aa82023 | 1376 | if (sd_pid_get_session(pid, &t) >= 0) |
9a435388 | 1377 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t); |
f5e04665 | 1378 | |
a035f819 | 1379 | if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) { |
9a435388 | 1380 | r = asprintf(&t, UID_FMT, owner_uid); |
7de80bfe | 1381 | if (r > 0) |
9a435388 | 1382 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t); |
f5e04665 LP |
1383 | } |
1384 | ||
9aa82023 | 1385 | if (sd_pid_get_slice(pid, &t) >= 0) |
2a3bebd0 | 1386 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t); |
f5e04665 | 1387 | |
5dd55303 | 1388 | if (get_process_cmdline(pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, &t) >= 0) |
2a3bebd0 | 1389 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t); |
a035f819 | 1390 | |
9aa82023 | 1391 | if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) |
2a3bebd0 | 1392 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t); |
a035f819 | 1393 | |
9aa82023 | 1394 | if (compose_open_fds(pid, &t) >= 0) |
2a3bebd0 | 1395 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t); |
3f132692 JF |
1396 | |
1397 | p = procfs_file_alloca(pid, "status"); | |
627055ce | 1398 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1399 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t); |
3f132692 JF |
1400 | |
1401 | p = procfs_file_alloca(pid, "maps"); | |
627055ce | 1402 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1403 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t); |
3f132692 JF |
1404 | |
1405 | p = procfs_file_alloca(pid, "limits"); | |
627055ce | 1406 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1407 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t); |
3f132692 JF |
1408 | |
1409 | p = procfs_file_alloca(pid, "cgroup"); | |
3e4d0f6c | 1410 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1411 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t); |
3f132692 | 1412 | |
d7032b1f | 1413 | p = procfs_file_alloca(pid, "mountinfo"); |
3e4d0f6c | 1414 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1415 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t); |
d7032b1f | 1416 | |
3e4d0f6c ZJS |
1417 | /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */ |
1418 | p = procfs_file_alloca(pid, "auxv"); | |
1419 | if (read_full_virtual_file(p, &t, &size) >= 0) { | |
1420 | char *buf = malloc(strlen("COREDUMP_PROC_AUXV=") + size + 1); | |
1421 | if (buf) { | |
1422 | /* Add a dummy terminator to make save_context() happy. */ | |
1423 | *((uint8_t*) mempcpy(stpcpy(buf, "COREDUMP_PROC_AUXV="), t, size)) = '\0'; | |
1424 | (void) iovw_consume(iovw, buf, size + strlen("COREDUMP_PROC_AUXV=")); | |
1425 | } | |
1426 | ||
1427 | free(t); | |
1428 | } | |
1429 | ||
9aa82023 | 1430 | if (get_process_cwd(pid, &t) >= 0) |
2a3bebd0 | 1431 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t); |
3f132692 JF |
1432 | |
1433 | if (get_process_root(pid, &t) >= 0) { | |
9aa82023 ZJS |
1434 | bool proc_self_root_is_slash; |
1435 | ||
1436 | proc_self_root_is_slash = strcmp(t, "/") == 0; | |
3f132692 | 1437 | |
2a3bebd0 | 1438 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t); |
7ed03ce6 JF |
1439 | |
1440 | /* If the process' root is "/", then there is a chance it has | |
1441 | * mounted own root and hence being containerized. */ | |
9aa82023 | 1442 | if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0) |
2a3bebd0 | 1443 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t); |
3f132692 JF |
1444 | } |
1445 | ||
9aa82023 | 1446 | if (get_process_environ(pid, &t) >= 0) |
2a3bebd0 | 1447 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t); |
9aa82023 | 1448 | |
f46c706b FB |
1449 | /* we successfully acquired all metadata */ |
1450 | return save_context(context, iovw); | |
9aa82023 | 1451 | } |
3f132692 | 1452 | |
9aa82023 | 1453 | static int process_kernel(int argc, char* argv[]) { |
f46c706b | 1454 | Context context = {}; |
9a435388 | 1455 | struct iovec_wrapper *iovw; |
9aa82023 ZJS |
1456 | int r; |
1457 | ||
1f9d2a81 DDM |
1458 | /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds |
1459 | * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to | |
1460 | * /dev/null. */ | |
5bb1d7fb | 1461 | r = rearrange_stdio(STDIN_FILENO, -EBADF, -EBADF); |
1f9d2a81 DDM |
1462 | if (r < 0) |
1463 | return log_error_errno(r, "Failed to connect stdout/stderr to /dev/null: %m"); | |
1464 | ||
988e89ee ZJS |
1465 | log_debug("Processing coredump received from the kernel..."); |
1466 | ||
9a435388 FB |
1467 | iovw = iovw_new(); |
1468 | if (!iovw) | |
1469 | return log_oom(); | |
1470 | ||
2a3bebd0 FB |
1471 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); |
1472 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1473 | |
1474 | /* Collect all process metadata passed by the kernel through argv[] */ | |
1475 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1); | |
92e92d71 | 1476 | if (r < 0) |
86562420 | 1477 | goto finish; |
86562420 | 1478 | |
f46c706b FB |
1479 | /* Collect the rest of the process metadata retrieved from the runtime */ |
1480 | r = gather_pid_metadata(iovw, &context); | |
1481 | if (r < 0) | |
1482 | goto finish; | |
1483 | ||
1e344c1d | 1484 | if (!context.is_journald) |
f46c706b | 1485 | /* OK, now we know it's not the journal, hence we can make use of it now. */ |
1e344c1d | 1486 | log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG); |
f46c706b FB |
1487 | |
1488 | /* If this is PID 1 disable coredump collection, we'll unlikely be able to process | |
1489 | * it later on. | |
1490 | * | |
1491 | * FIXME: maybe we should disable coredumps generation from the beginning and | |
1492 | * re-enable it only when we know it's either safe (ie we're not running OOM) or | |
1493 | * it's not pid1 ? */ | |
1494 | if (context.is_pid1) { | |
1495 | log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); | |
1496 | disable_coredumps(); | |
1497 | } | |
34c10968 | 1498 | |
f46c706b FB |
1499 | if (context.is_journald || context.is_pid1) |
1500 | r = submit_coredump(&context, iovw, STDIN_FILENO); | |
92e92d71 | 1501 | else |
9a435388 | 1502 | r = send_iovec(iovw, STDIN_FILENO); |
9aa82023 ZJS |
1503 | |
1504 | finish: | |
9a435388 | 1505 | iovw = iovw_free_free(iovw); |
9aa82023 | 1506 | return r; |
3c171f0b | 1507 | } |
34c10968 | 1508 | |
988e89ee | 1509 | static int process_backtrace(int argc, char *argv[]) { |
f46c706b | 1510 | Context context = {}; |
9a435388 FB |
1511 | struct iovec_wrapper *iovw; |
1512 | char *message; | |
988e89ee | 1513 | int r; |
11e6d971 | 1514 | _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO); |
988e89ee ZJS |
1515 | |
1516 | log_debug("Processing backtrace on stdin..."); | |
1517 | ||
9a435388 FB |
1518 | iovw = iovw_new(); |
1519 | if (!iovw) | |
5b45a160 ZJS |
1520 | return log_oom(); |
1521 | ||
2a3bebd0 FB |
1522 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR); |
1523 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1524 | |
1525 | /* Collect all process metadata from argv[] by making sure to skip the | |
1526 | * '--backtrace' option */ | |
1527 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2); | |
988e89ee ZJS |
1528 | if (r < 0) |
1529 | goto finish; | |
aaeb2522 | 1530 | |
f46c706b FB |
1531 | /* Collect the rest of the process metadata retrieved from the runtime */ |
1532 | r = gather_pid_metadata(iovw, &context); | |
1533 | if (r < 0) | |
1534 | goto finish; | |
988e89ee | 1535 | |
86562420 | 1536 | for (;;) { |
5b45a160 ZJS |
1537 | r = journal_importer_process_data(&importer); |
1538 | if (r < 0) { | |
1539 | log_error_errno(r, "Failed to parse journal entry on stdin: %m"); | |
1540 | goto finish; | |
1541 | } | |
d74dc4f2 ZJS |
1542 | if (r == 1 || /* complete entry */ |
1543 | journal_importer_eof(&importer)) /* end of data */ | |
5b45a160 | 1544 | break; |
988e89ee | 1545 | } |
988e89ee | 1546 | |
5b45a160 ZJS |
1547 | if (journal_importer_eof(&importer)) { |
1548 | log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter"); | |
988e89ee | 1549 | |
f46c706b FB |
1550 | message = strjoina("Process ", context.meta[META_ARGV_PID], |
1551 | " (", context.meta[META_COMM], ")" | |
1552 | " of user ", context.meta[META_ARGV_UID], | |
1553 | " failed with ", context.meta[META_ARGV_SIGNAL]); | |
9a435388 FB |
1554 | |
1555 | r = iovw_put_string_field(iovw, "MESSAGE=", message); | |
1556 | if (r < 0) | |
1557 | return r; | |
5b45a160 | 1558 | } else { |
9a435388 FB |
1559 | /* The imported iovecs are not supposed to be freed by us so let's store |
1560 | * them at the end of the array so we can skip them while freeing the | |
1561 | * rest. */ | |
fe96c0f8 | 1562 | for (size_t i = 0; i < importer.iovw.count; i++) { |
9a435388 | 1563 | struct iovec *iovec = importer.iovw.iovec + i; |
988e89ee | 1564 | |
9a435388 FB |
1565 | iovw_put(iovw, iovec->iov_base, iovec->iov_len); |
1566 | } | |
1567 | } | |
988e89ee | 1568 | |
9a435388 | 1569 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
988e89ee ZJS |
1570 | if (r < 0) |
1571 | log_error_errno(r, "Failed to log backtrace: %m"); | |
1572 | ||
1573 | finish: | |
9a435388 FB |
1574 | iovw->count -= importer.iovw.count; |
1575 | iovw = iovw_free_free(iovw); | |
988e89ee ZJS |
1576 | return r; |
1577 | } | |
1578 | ||
4515a95e | 1579 | static int run(int argc, char *argv[]) { |
3c171f0b | 1580 | int r; |
fee80f69 | 1581 | |
9aa82023 ZJS |
1582 | /* First, log to a safe place, since we don't know what crashed and it might |
1583 | * be journald which we'd rather not log to then. */ | |
8d4e028f | 1584 | |
1e344c1d | 1585 | log_set_target_and_open(LOG_TARGET_KMSG); |
8d4e028f | 1586 | |
3c171f0b LP |
1587 | /* Make sure we never enter a loop */ |
1588 | (void) prctl(PR_SET_DUMPABLE, 0); | |
8d4e028f | 1589 | |
3c171f0b LP |
1590 | /* Ignore all parse errors */ |
1591 | (void) parse_config(); | |
fee80f69 | 1592 | |
3c171f0b LP |
1593 | log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage)); |
1594 | log_debug("Selected compression %s.", yes_no(arg_compress)); | |
fee80f69 | 1595 | |
3c171f0b | 1596 | r = sd_listen_fds(false); |
4515a95e ZJS |
1597 | if (r < 0) |
1598 | return log_error_errno(r, "Failed to determine the number of file descriptors: %m"); | |
fee80f69 | 1599 | |
9aa82023 ZJS |
1600 | /* If we got an fd passed, we are running in coredumpd mode. Otherwise we |
1601 | * are invoked from the kernel as coredump handler. */ | |
988e89ee ZJS |
1602 | if (r == 0) { |
1603 | if (streq_ptr(argv[1], "--backtrace")) | |
4515a95e | 1604 | return process_backtrace(argc, argv); |
988e89ee | 1605 | else |
4515a95e | 1606 | return process_kernel(argc, argv); |
988e89ee | 1607 | } else if (r == 1) |
4515a95e | 1608 | return process_socket(SD_LISTEN_FDS_START); |
f5e04665 | 1609 | |
baaa35ad ZJS |
1610 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), |
1611 | "Received unexpected number of file descriptors."); | |
f5e04665 | 1612 | } |
4515a95e ZJS |
1613 | |
1614 | DEFINE_MAIN_FUNCTION(run); |