]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
f5e04665 LP |
2 | |
3 | #include <errno.h> | |
803a3464 LP |
4 | #include <stdio.h> |
5 | #include <sys/prctl.h> | |
cacd6403 | 6 | #include <sys/xattr.h> |
4f5dd394 | 7 | #include <unistd.h> |
f5e04665 | 8 | |
349cc4a5 | 9 | #if HAVE_ELFUTILS |
3c171f0b LP |
10 | #include <dwarf.h> |
11 | #include <elfutils/libdwfl.h> | |
4d229b31 UTL |
12 | #endif |
13 | ||
73a99163 | 14 | #include "sd-daemon.h" |
f11943c5 LP |
15 | #include "sd-journal.h" |
16 | #include "sd-login.h" | |
73a99163 | 17 | #include "sd-messages.h" |
4f5dd394 LP |
18 | |
19 | #include "acl-util.h" | |
b5efdb8a | 20 | #include "alloc-util.h" |
430f0182 | 21 | #include "capability-util.h" |
ba1261bc | 22 | #include "cgroup-util.h" |
4f5dd394 | 23 | #include "compress.h" |
34c10968 LP |
24 | #include "conf-parser.h" |
25 | #include "copy.h" | |
f11943c5 | 26 | #include "coredump-vacuum.h" |
a0956174 | 27 | #include "dirent-util.h" |
4f5dd394 | 28 | #include "escape.h" |
3ffd4af2 | 29 | #include "fd-util.h" |
4f5dd394 | 30 | #include "fileio.h" |
f4f15635 | 31 | #include "fs-util.h" |
afc5dbf3 | 32 | #include "io-util.h" |
b18453ed | 33 | #include "journal-importer.h" |
4f5dd394 LP |
34 | #include "log.h" |
35 | #include "macro.h" | |
5e332028 | 36 | #include "main-func.h" |
0a970718 | 37 | #include "memory-util.h" |
4f5dd394 | 38 | #include "mkdir.h" |
6bedfcbb | 39 | #include "parse-util.h" |
0b452006 | 40 | #include "process-util.h" |
d14bcb4e | 41 | #include "signal-util.h" |
3c171f0b | 42 | #include "socket-util.h" |
4f5dd394 LP |
43 | #include "special.h" |
44 | #include "stacktrace.h" | |
8b43440b | 45 | #include "string-table.h" |
07630cea | 46 | #include "string-util.h" |
4f5dd394 | 47 | #include "strv.h" |
e4de7287 | 48 | #include "tmpfile-util.h" |
b1d4f8e1 | 49 | #include "user-util.h" |
34727273 | 50 | |
34c10968 | 51 | /* The maximum size up to which we process coredumps */ |
59f448cf | 52 | #define PROCESS_SIZE_MAX ((uint64_t) (2LLU*1024LLU*1024LLU*1024LLU)) |
34c10968 | 53 | |
bdfd7b2c | 54 | /* The maximum size up to which we leave the coredump around on disk */ |
34c10968 LP |
55 | #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX |
56 | ||
bdfd7b2c | 57 | /* The maximum size up to which we store the coredump in the journal */ |
25cad95c | 58 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
34c10968 | 59 | #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU)) |
25cad95c YW |
60 | #else |
61 | /* oss-fuzz limits memory usage. */ | |
62 | #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU)) | |
63 | #endif | |
f5e04665 | 64 | |
c4aa09b0 | 65 | /* Make sure to not make this larger than the maximum journal entry |
27f931d1 | 66 | * size. See DATA_SIZE_MAX in journal-importer.h. */ |
874bc134 | 67 | assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); |
f5e04665 LP |
68 | |
69 | enum { | |
f46c706b | 70 | /* We use these as array indexes for our process metadata cache. |
ea5cc2a8 | 71 | * |
f46c706b FB |
72 | * The first indices of the cache stores the same metadata as the ones passed by |
73 | * the kernel via argv[], ie the strings array passed by the kernel according to | |
74 | * our pattern defined in /proc/sys/kernel/core_pattern (see man:core(5)). */ | |
75 | ||
76 | META_ARGV_PID, /* %P: as seen in the initial pid namespace */ | |
77 | META_ARGV_UID, /* %u: as seen in the initial user namespace */ | |
78 | META_ARGV_GID, /* %g: as seen in the initial user namespace */ | |
79 | META_ARGV_SIGNAL, /* %s: number of signal causing dump */ | |
80 | META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch */ | |
81 | META_ARGV_RLIMIT, /* %c: core file size soft resource limit */ | |
82 | META_ARGV_HOSTNAME, /* %h: hostname */ | |
83 | _META_ARGV_MAX, | |
84 | ||
85 | /* The following indexes are cached for a couple of special fields we use (and | |
86 | * thereby need to be retrieved quickly) for naming coredump files, and attaching | |
87 | * xattrs. Unlike the previous ones they are retrieved from the runtime | |
88 | * environment. */ | |
89 | ||
90 | META_COMM = _META_ARGV_MAX, | |
91 | _META_MANDATORY_MAX, | |
92 | ||
93 | /* The rest are similar to the previous ones except that we won't fail if one of | |
94 | * them is missing. */ | |
95 | ||
96 | META_EXE = _META_MANDATORY_MAX, | |
97 | META_UNIT, | |
98 | _META_MAX | |
f5e04665 LP |
99 | }; |
100 | ||
f46c706b FB |
101 | static const char * const meta_field_names[_META_MAX] = { |
102 | [META_ARGV_PID] = "COREDUMP_PID=", | |
103 | [META_ARGV_UID] = "COREDUMP_UID=", | |
104 | [META_ARGV_GID] = "COREDUMP_GID=", | |
105 | [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=", | |
106 | [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", | |
107 | [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", | |
108 | [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", | |
109 | [META_COMM] = "COREDUMP_COMM=", | |
110 | [META_EXE] = "COREDUMP_EXE=", | |
111 | [META_UNIT] = "COREDUMP_UNIT=", | |
112 | }; | |
113 | ||
114 | typedef struct Context { | |
115 | const char *meta[_META_MAX]; | |
116 | pid_t pid; | |
117 | bool is_pid1; | |
118 | bool is_journald; | |
119 | } Context; | |
120 | ||
34c10968 LP |
121 | typedef enum CoredumpStorage { |
122 | COREDUMP_STORAGE_NONE, | |
123 | COREDUMP_STORAGE_EXTERNAL, | |
124 | COREDUMP_STORAGE_JOURNAL, | |
34c10968 LP |
125 | _COREDUMP_STORAGE_MAX, |
126 | _COREDUMP_STORAGE_INVALID = -1 | |
127 | } CoredumpStorage; | |
128 | ||
34c10968 LP |
129 | static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { |
130 | [COREDUMP_STORAGE_NONE] = "none", | |
131 | [COREDUMP_STORAGE_EXTERNAL] = "external", | |
132 | [COREDUMP_STORAGE_JOURNAL] = "journal", | |
34c10968 LP |
133 | }; |
134 | ||
135 | DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); | |
8c9571d0 | 136 | static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting"); |
34727273 ZJS |
137 | |
138 | static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL; | |
8c9571d0 | 139 | static bool arg_compress = true; |
59f448cf LP |
140 | static uint64_t arg_process_size_max = PROCESS_SIZE_MAX; |
141 | static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; | |
6e2b4a69 | 142 | static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; |
59f448cf LP |
143 | static uint64_t arg_keep_free = (uint64_t) -1; |
144 | static uint64_t arg_max_use = (uint64_t) -1; | |
34c10968 LP |
145 | |
146 | static int parse_config(void) { | |
34c10968 | 147 | static const ConfigTableItem items[] = { |
8c9571d0 LP |
148 | { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, |
149 | { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, | |
59f448cf LP |
150 | { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, |
151 | { "Coredump", "ExternalSizeMax", config_parse_iec_uint64, 0, &arg_external_size_max }, | |
8c9571d0 | 152 | { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, |
59f448cf LP |
153 | { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, |
154 | { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, | |
34c10968 LP |
155 | {} |
156 | }; | |
157 | ||
43688c49 | 158 | return config_parse_many_nulstr(PKGSYSCONFDIR "/coredump.conf", |
da412854 YW |
159 | CONF_PATHS_NULSTR("systemd/coredump.conf.d"), |
160 | "Coredump\0", | |
161 | config_item_table_lookup, items, | |
bcde742e | 162 | CONFIG_PARSE_WARN, NULL); |
34c10968 LP |
163 | } |
164 | ||
a1e92eee | 165 | static uint64_t storage_size_max(void) { |
ee0449fd ZJS |
166 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
167 | return arg_external_size_max; | |
168 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) | |
169 | return arg_journal_size_max; | |
170 | assert(arg_storage == COREDUMP_STORAGE_NONE); | |
171 | return 0; | |
73a99163 ZJS |
172 | } |
173 | ||
34c10968 LP |
174 | static int fix_acl(int fd, uid_t uid) { |
175 | ||
349cc4a5 | 176 | #if HAVE_ACL |
34c10968 LP |
177 | _cleanup_(acl_freep) acl_t acl = NULL; |
178 | acl_entry_t entry; | |
179 | acl_permset_t permset; | |
709f6e46 | 180 | int r; |
34c10968 | 181 | |
b59233e6 LP |
182 | assert(fd >= 0); |
183 | ||
05fd2156 | 184 | if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY) |
34c10968 LP |
185 | return 0; |
186 | ||
187 | /* Make sure normal users can read (but not write or delete) | |
188 | * their own coredumps */ | |
189 | ||
190 | acl = acl_get_fd(fd); | |
4a62c710 MS |
191 | if (!acl) |
192 | return log_error_errno(errno, "Failed to get ACL: %m"); | |
34c10968 LP |
193 | |
194 | if (acl_create_entry(&acl, &entry) < 0 || | |
195 | acl_set_tag_type(entry, ACL_USER) < 0 || | |
d710aaf7 ZJS |
196 | acl_set_qualifier(entry, &uid) < 0) |
197 | return log_error_errno(errno, "Failed to patch ACL: %m"); | |
34c10968 LP |
198 | |
199 | if (acl_get_permset(entry, &permset) < 0 || | |
709f6e46 MS |
200 | acl_add_perm(permset, ACL_READ) < 0) |
201 | return log_warning_errno(errno, "Failed to patch ACL: %m"); | |
202 | ||
203 | r = calc_acl_mask_if_needed(&acl); | |
204 | if (r < 0) | |
205 | return log_warning_errno(r, "Failed to patch ACL: %m"); | |
34c10968 | 206 | |
4a62c710 MS |
207 | if (acl_set_fd(fd, acl) < 0) |
208 | return log_error_errno(errno, "Failed to apply ACL: %m"); | |
34c10968 LP |
209 | #endif |
210 | ||
211 | return 0; | |
212 | } | |
213 | ||
f46c706b FB |
214 | static int fix_xattr(int fd, const Context *context) { |
215 | ||
216 | static const char * const xattrs[_META_MAX] = { | |
217 | [META_ARGV_PID] = "user.coredump.pid", | |
218 | [META_ARGV_UID] = "user.coredump.uid", | |
219 | [META_ARGV_GID] = "user.coredump.gid", | |
220 | [META_ARGV_SIGNAL] = "user.coredump.signal", | |
221 | [META_ARGV_TIMESTAMP] = "user.coredump.timestamp", | |
222 | [META_ARGV_RLIMIT] = "user.coredump.rlimit", | |
223 | [META_ARGV_HOSTNAME] = "user.coredump.hostname", | |
224 | [META_COMM] = "user.coredump.comm", | |
225 | [META_EXE] = "user.coredump.exe", | |
0cd77f97 LP |
226 | }; |
227 | ||
34c10968 | 228 | int r = 0; |
0cd77f97 | 229 | unsigned i; |
34c10968 | 230 | |
b59233e6 LP |
231 | assert(fd >= 0); |
232 | ||
1eef15b1 | 233 | /* Attach some metadata to coredumps via extended |
34c10968 LP |
234 | * attributes. Just because we can. */ |
235 | ||
f46c706b | 236 | for (i = 0; i < _META_MAX; i++) { |
1eef15b1 ZJS |
237 | int k; |
238 | ||
f46c706b | 239 | if (isempty(context->meta[i]) || !xattrs[i]) |
0cd77f97 | 240 | continue; |
34c10968 | 241 | |
f46c706b | 242 | k = fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE); |
1eef15b1 | 243 | if (k < 0 && r == 0) |
34c10968 | 244 | r = -errno; |
0cd77f97 | 245 | } |
34c10968 LP |
246 | |
247 | return r; | |
248 | } | |
249 | ||
b0b21dce | 250 | #define filename_escape(s) xescape((s), "./ ") |
34c10968 | 251 | |
a1e92eee | 252 | static const char *coredump_tmpfile_name(const char *s) { |
0c773903 EV |
253 | return s ? s : "(unnamed temporary file)"; |
254 | } | |
255 | ||
b59233e6 LP |
256 | static int fix_permissions( |
257 | int fd, | |
258 | const char *filename, | |
259 | const char *target, | |
f46c706b | 260 | const Context *context, |
b59233e6 LP |
261 | uid_t uid) { |
262 | ||
03532f0a LP |
263 | int r; |
264 | ||
b59233e6 | 265 | assert(fd >= 0); |
b59233e6 | 266 | assert(target); |
3c171f0b | 267 | assert(context); |
cfd652ed ZJS |
268 | |
269 | /* Ignore errors on these */ | |
3c171f0b LP |
270 | (void) fchmod(fd, 0640); |
271 | (void) fix_acl(fd, uid); | |
272 | (void) fix_xattr(fd, context); | |
cfd652ed | 273 | |
4a62c710 | 274 | if (fsync(fd) < 0) |
0c773903 | 275 | return log_error_errno(errno, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename)); |
cfd652ed | 276 | |
8ac2f74f LP |
277 | (void) fsync_directory_of_file(fd); |
278 | ||
03532f0a LP |
279 | r = link_tmpfile(fd, filename, target); |
280 | if (r < 0) | |
281 | return log_error_errno(r, "Failed to move coredump %s into place: %m", target); | |
cfd652ed ZJS |
282 | |
283 | return 0; | |
284 | } | |
285 | ||
59f448cf | 286 | static int maybe_remove_external_coredump(const char *filename, uint64_t size) { |
cfd652ed | 287 | |
b59233e6 | 288 | /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */ |
cfd652ed | 289 | |
fc6cec86 | 290 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL && |
cfd652ed ZJS |
291 | size <= arg_external_size_max) |
292 | return 0; | |
293 | ||
294 | if (!filename) | |
295 | return 1; | |
296 | ||
4a62c710 MS |
297 | if (unlink(filename) < 0 && errno != ENOENT) |
298 | return log_error_errno(errno, "Failed to unlink %s: %m", filename); | |
cfd652ed ZJS |
299 | |
300 | return 1; | |
301 | } | |
302 | ||
f46c706b | 303 | static int make_filename(const Context *context, char **ret) { |
b59233e6 | 304 | _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL; |
a7f7d1bd | 305 | sd_id128_t boot = {}; |
34c10968 LP |
306 | int r; |
307 | ||
3c171f0b | 308 | assert(context); |
34c10968 | 309 | |
f46c706b | 310 | c = filename_escape(context->meta[META_COMM]); |
34c10968 | 311 | if (!c) |
b59233e6 | 312 | return -ENOMEM; |
34c10968 | 313 | |
f46c706b | 314 | u = filename_escape(context->meta[META_ARGV_UID]); |
0dc5d23c | 315 | if (!u) |
b59233e6 | 316 | return -ENOMEM; |
34c10968 LP |
317 | |
318 | r = sd_id128_get_boot(&boot); | |
b59233e6 | 319 | if (r < 0) |
34c10968 | 320 | return r; |
34c10968 | 321 | |
f46c706b | 322 | p = filename_escape(context->meta[META_ARGV_PID]); |
b59233e6 LP |
323 | if (!p) |
324 | return -ENOMEM; | |
325 | ||
f46c706b | 326 | t = filename_escape(context->meta[META_ARGV_TIMESTAMP]); |
b59233e6 LP |
327 | if (!t) |
328 | return -ENOMEM; | |
329 | ||
330 | if (asprintf(ret, | |
0dc5d23c | 331 | "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s000000", |
34c10968 | 332 | c, |
0dc5d23c | 333 | u, |
34c10968 LP |
334 | SD_ID128_FORMAT_VAL(boot), |
335 | p, | |
b59233e6 LP |
336 | t) < 0) |
337 | return -ENOMEM; | |
338 | ||
339 | return 0; | |
340 | } | |
341 | ||
342 | static int save_external_coredump( | |
f46c706b | 343 | const Context *context, |
3c171f0b | 344 | int input_fd, |
b59233e6 | 345 | char **ret_filename, |
5f3e0a74 HW |
346 | int *ret_node_fd, |
347 | int *ret_data_fd, | |
0cd4e913 | 348 | uint64_t *ret_size, |
cc4419ed | 349 | bool *ret_truncated) { |
b59233e6 LP |
350 | |
351 | _cleanup_free_ char *fn = NULL, *tmp = NULL; | |
352 | _cleanup_close_ int fd = -1; | |
ee0449fd | 353 | uint64_t rlimit, process_limit, max_size; |
b59233e6 | 354 | struct stat st; |
3c171f0b | 355 | uid_t uid; |
b59233e6 LP |
356 | int r; |
357 | ||
3c171f0b | 358 | assert(context); |
b59233e6 | 359 | assert(ret_filename); |
5f3e0a74 HW |
360 | assert(ret_node_fd); |
361 | assert(ret_data_fd); | |
b59233e6 LP |
362 | assert(ret_size); |
363 | ||
f46c706b | 364 | r = parse_uid(context->meta[META_ARGV_UID], &uid); |
3c171f0b LP |
365 | if (r < 0) |
366 | return log_error_errno(r, "Failed to parse UID: %m"); | |
367 | ||
f46c706b | 368 | r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit); |
bdfd7b2c | 369 | if (r < 0) |
f46c706b FB |
370 | return log_error_errno(r, "Failed to parse resource limit '%s': %m", |
371 | context->meta[META_ARGV_RLIMIT]); | |
6998b540 | 372 | if (rlimit < page_size()) { |
f46c706b FB |
373 | /* Is coredumping disabled? Then don't bother saving/processing the |
374 | * coredump. Anything below PAGE_SIZE cannot give a readable coredump | |
375 | * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but | |
376 | * is usually the same as PAGE_SIZE. */ | |
baaa35ad ZJS |
377 | return log_info_errno(SYNTHETIC_ERRNO(EBADSLT), |
378 | "Resource limits disable core dumping for process %s (%s).", | |
f46c706b | 379 | context->meta[META_ARGV_PID], context->meta[META_COMM]); |
bdfd7b2c LP |
380 | } |
381 | ||
ee0449fd | 382 | process_limit = MAX(arg_process_size_max, storage_size_max()); |
baaa35ad ZJS |
383 | if (process_limit == 0) |
384 | return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT), | |
385 | "Limits for coredump processing and storage are both 0, not dumping core."); | |
ee0449fd | 386 | |
bdfd7b2c | 387 | /* Never store more than the process configured, or than we actually shall keep or process */ |
ee0449fd | 388 | max_size = MIN(rlimit, process_limit); |
bdfd7b2c | 389 | |
3c171f0b | 390 | r = make_filename(context, &fn); |
23bbb0de MS |
391 | if (r < 0) |
392 | return log_error_errno(r, "Failed to determine coredump file name: %m"); | |
34c10968 | 393 | |
6e5dcce4 | 394 | (void) mkdir_p_label("/var/lib/systemd/coredump", 0755); |
803a3464 | 395 | |
03532f0a | 396 | fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp); |
4a62c710 | 397 | if (fd < 0) |
03532f0a | 398 | return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); |
803a3464 | 399 | |
1c876927 | 400 | r = copy_bytes(input_fd, fd, max_size, 0); |
73a99163 | 401 | if (r < 0) { |
f46c706b FB |
402 | log_error_errno(r, "Cannot store coredump of %s (%s): %m", |
403 | context->meta[META_ARGV_PID], context->meta[META_COMM]); | |
93240d3a | 404 | goto fail; |
0cd4e913 | 405 | } |
cc4419ed ZJS |
406 | *ret_truncated = r == 1; |
407 | if (*ret_truncated) | |
73a99163 ZJS |
408 | log_struct(LOG_INFO, |
409 | LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size), | |
410 | "SIZE_LIMIT=%zu", max_size, | |
a1230ff9 | 411 | "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR); |
803a3464 | 412 | |
34c10968 | 413 | if (fstat(fd, &st) < 0) { |
73a99163 | 414 | log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); |
34c10968 LP |
415 | goto fail; |
416 | } | |
417 | ||
7849c2ac | 418 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) { |
0c773903 | 419 | log_error_errno(errno, "Failed to seek on %s: %m", coredump_tmpfile_name(tmp)); |
b59233e6 | 420 | goto fail; |
7849c2ac TA |
421 | } |
422 | ||
349cc4a5 | 423 | #if HAVE_XZ || HAVE_LZ4 |
cfd652ed | 424 | /* If we will remove the coredump anyway, do not compress. */ |
6e9ef603 | 425 | if (arg_compress && !maybe_remove_external_coredump(NULL, st.st_size)) { |
cfd652ed | 426 | |
b59233e6 LP |
427 | _cleanup_free_ char *fn_compressed = NULL, *tmp_compressed = NULL; |
428 | _cleanup_close_ int fd_compressed = -1; | |
cfd652ed | 429 | |
b910cc72 | 430 | fn_compressed = strjoin(fn, COMPRESSED_EXT); |
b59233e6 | 431 | if (!fn_compressed) { |
d89c8fdf | 432 | log_oom(); |
cfd652ed ZJS |
433 | goto uncompressed; |
434 | } | |
435 | ||
03532f0a LP |
436 | fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed); |
437 | if (fd_compressed < 0) { | |
438 | log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed); | |
b59233e6 | 439 | goto uncompressed; |
03532f0a | 440 | } |
cfd652ed | 441 | |
d89c8fdf | 442 | r = compress_stream(fd, fd_compressed, -1); |
b59233e6 | 443 | if (r < 0) { |
0c773903 | 444 | log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); |
b59233e6 LP |
445 | goto fail_compressed; |
446 | } | |
447 | ||
3c171f0b | 448 | r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid); |
cfd652ed | 449 | if (r < 0) |
b59233e6 LP |
450 | goto fail_compressed; |
451 | ||
452 | /* OK, this worked, we can get rid of the uncompressed version now */ | |
0c773903 EV |
453 | if (tmp) |
454 | unlink_noerrno(tmp); | |
cfd652ed | 455 | |
1cc6c93a YW |
456 | *ret_filename = TAKE_PTR(fn_compressed); /* compressed */ |
457 | *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */ | |
458 | *ret_data_fd = TAKE_FD(fd); /* uncompressed */ | |
59f448cf | 459 | *ret_size = (uint64_t) st.st_size; /* uncompressed */ |
cfd652ed | 460 | |
cfd652ed ZJS |
461 | return 0; |
462 | ||
b59233e6 | 463 | fail_compressed: |
0c773903 EV |
464 | if (tmp_compressed) |
465 | (void) unlink(tmp_compressed); | |
34c10968 | 466 | } |
cfd652ed ZJS |
467 | |
468 | uncompressed: | |
3b1a55e1 | 469 | #endif |
5f3e0a74 | 470 | |
3c171f0b | 471 | r = fix_permissions(fd, tmp, fn, context, uid); |
cfd652ed ZJS |
472 | if (r < 0) |
473 | goto fail; | |
34c10968 | 474 | |
1cc6c93a YW |
475 | *ret_filename = TAKE_PTR(fn); |
476 | *ret_data_fd = TAKE_FD(fd); | |
5f3e0a74 | 477 | *ret_node_fd = -1; |
59f448cf | 478 | *ret_size = (uint64_t) st.st_size; |
34c10968 | 479 | |
34c10968 LP |
480 | return 0; |
481 | ||
482 | fail: | |
0c773903 EV |
483 | if (tmp) |
484 | (void) unlink(tmp); | |
34c10968 LP |
485 | return r; |
486 | } | |
487 | ||
488 | static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) { | |
489 | _cleanup_free_ char *field = NULL; | |
490 | ssize_t n; | |
491 | ||
8d4e028f | 492 | assert(fd >= 0); |
34c10968 LP |
493 | assert(ret); |
494 | assert(ret_size); | |
495 | ||
4a62c710 MS |
496 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) |
497 | return log_warning_errno(errno, "Failed to seek: %m"); | |
803a3464 | 498 | |
34c10968 LP |
499 | field = malloc(9 + size); |
500 | if (!field) { | |
cfd652ed | 501 | log_warning("Failed to allocate memory for coredump, coredump will not be stored."); |
34c10968 LP |
502 | return -ENOMEM; |
503 | } | |
504 | ||
505 | memcpy(field, "COREDUMP=", 9); | |
506 | ||
507 | n = read(fd, field + 9, size); | |
23bbb0de MS |
508 | if (n < 0) |
509 | return log_error_errno((int) n, "Failed to read core data: %m"); | |
baaa35ad ZJS |
510 | if ((size_t) n < size) |
511 | return log_error_errno(SYNTHETIC_ERRNO(EIO), | |
512 | "Core data too short."); | |
34c10968 | 513 | |
1cc6c93a | 514 | *ret = TAKE_PTR(field); |
34c10968 LP |
515 | *ret_size = size + 9; |
516 | ||
34c10968 LP |
517 | return 0; |
518 | } | |
803a3464 | 519 | |
3f132692 JF |
520 | /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines: |
521 | * 0:/dev/pts/23 | |
522 | * pos: 0 | |
523 | * flags: 0100002 | |
524 | * | |
525 | * 1:/dev/pts/23 | |
526 | * pos: 0 | |
527 | * flags: 0100002 | |
528 | * | |
529 | * 2:/dev/pts/23 | |
530 | * pos: 0 | |
531 | * flags: 0100002 | |
532 | * EOF | |
533 | */ | |
534 | static int compose_open_fds(pid_t pid, char **open_fds) { | |
4d84bc2f LP |
535 | _cleanup_closedir_ DIR *proc_fd_dir = NULL; |
536 | _cleanup_close_ int proc_fdinfo_fd = -1; | |
537 | _cleanup_free_ char *buffer = NULL; | |
3f132692 | 538 | _cleanup_fclose_ FILE *stream = NULL; |
59059b4a | 539 | const char *fddelim = "", *path; |
3f132692 | 540 | struct dirent *dent = NULL; |
4d84bc2f | 541 | size_t size = 0; |
7b26ea6f | 542 | int r; |
3f132692 JF |
543 | |
544 | assert(pid >= 0); | |
545 | assert(open_fds != NULL); | |
546 | ||
59059b4a | 547 | path = procfs_file_alloca(pid, "fd"); |
3f132692 | 548 | proc_fd_dir = opendir(path); |
59059b4a ZJS |
549 | if (!proc_fd_dir) |
550 | return -errno; | |
3f132692 | 551 | |
4d84bc2f | 552 | proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH); |
59059b4a ZJS |
553 | if (proc_fdinfo_fd < 0) |
554 | return -errno; | |
3f132692 | 555 | |
2fe21124 | 556 | stream = open_memstream_unlocked(&buffer, &size); |
3f132692 JF |
557 | if (!stream) |
558 | return -ENOMEM; | |
559 | ||
4d84bc2f | 560 | FOREACH_DIRENT(dent, proc_fd_dir, return -errno) { |
3f132692 | 561 | _cleanup_fclose_ FILE *fdinfo = NULL; |
4d84bc2f | 562 | _cleanup_free_ char *fdname = NULL; |
4d84bc2f | 563 | int fd; |
3f132692 | 564 | |
59059b4a | 565 | r = readlinkat_malloc(dirfd(proc_fd_dir), dent->d_name, &fdname); |
3f132692 JF |
566 | if (r < 0) |
567 | return r; | |
568 | ||
569 | fprintf(stream, "%s%s:%s\n", fddelim, dent->d_name, fdname); | |
570 | fddelim = "\n"; | |
571 | ||
572 | /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */ | |
59059b4a ZJS |
573 | fd = openat(proc_fdinfo_fd, dent->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY); |
574 | if (fd < 0) | |
3f132692 JF |
575 | continue; |
576 | ||
e92aaed3 | 577 | fdinfo = fdopen(fd, "r"); |
234519ae | 578 | if (!fdinfo) { |
0d536673 | 579 | safe_close(fd); |
3f132692 | 580 | continue; |
59059b4a | 581 | } |
3f132692 | 582 | |
7b26ea6f LP |
583 | for (;;) { |
584 | _cleanup_free_ char *line = NULL; | |
585 | ||
586 | r = read_line(fdinfo, LONG_LINE_MAX, &line); | |
587 | if (r < 0) | |
588 | return r; | |
589 | if (r == 0) | |
590 | break; | |
591 | ||
0d536673 | 592 | fputs(line, stream); |
7b26ea6f | 593 | fputc('\n', stream); |
4d84bc2f | 594 | } |
3f132692 JF |
595 | } |
596 | ||
4d84bc2f | 597 | errno = 0; |
74ca738f | 598 | stream = safe_fclose(stream); |
4d84bc2f | 599 | |
b3267152 | 600 | if (errno > 0) |
4d84bc2f LP |
601 | return -errno; |
602 | ||
ae2a15bc | 603 | *open_fds = TAKE_PTR(buffer); |
4d84bc2f | 604 | |
3f132692 JF |
605 | return 0; |
606 | } | |
607 | ||
7ed03ce6 JF |
608 | static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) { |
609 | const char *p; | |
610 | struct stat stbuf; | |
611 | _cleanup_close_ int proc_ns_dir_fd; | |
612 | ||
613 | p = procfs_file_alloca(pid, "ns"); | |
614 | ||
615 | proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY); | |
616 | if (proc_ns_dir_fd < 0) | |
617 | return -errno; | |
618 | ||
619 | if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0) | |
620 | return -errno; | |
621 | ||
622 | *ns = stbuf.st_ino; | |
623 | return 0; | |
624 | } | |
625 | ||
626 | static int get_mount_namespace_leader(pid_t pid, pid_t *container_pid) { | |
627 | pid_t cpid = pid, ppid = 0; | |
628 | ino_t proc_mntns; | |
629 | int r = 0; | |
630 | ||
631 | r = get_process_ns(pid, "mnt", &proc_mntns); | |
632 | if (r < 0) | |
633 | return r; | |
634 | ||
aa7530d6 | 635 | for (;;) { |
7ed03ce6 JF |
636 | ino_t parent_mntns; |
637 | ||
638 | r = get_process_ppid(cpid, &ppid); | |
639 | if (r < 0) | |
640 | return r; | |
641 | ||
642 | r = get_process_ns(ppid, "mnt", &parent_mntns); | |
643 | if (r < 0) | |
644 | return r; | |
645 | ||
646 | if (proc_mntns != parent_mntns) | |
647 | break; | |
648 | ||
649 | if (ppid == 1) | |
650 | return -ENOENT; | |
651 | ||
652 | cpid = ppid; | |
653 | } | |
654 | ||
655 | *container_pid = ppid; | |
656 | return 0; | |
657 | } | |
658 | ||
659 | /* Returns 1 if the parent was found. | |
660 | * Returns 0 if there is not a process we can call the pid's | |
661 | * container parent (the pid's process isn't 'containerized'). | |
662 | * Returns a negative number on errors. | |
663 | */ | |
664 | static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { | |
665 | int r = 0; | |
666 | pid_t container_pid; | |
667 | const char *proc_root_path; | |
668 | struct stat root_stat, proc_root_stat; | |
669 | ||
670 | /* To compare inodes of / and /proc/[pid]/root */ | |
671 | if (stat("/", &root_stat) < 0) | |
672 | return -errno; | |
673 | ||
674 | proc_root_path = procfs_file_alloca(pid, "root"); | |
675 | if (stat(proc_root_path, &proc_root_stat) < 0) | |
676 | return -errno; | |
677 | ||
678 | /* The process uses system root. */ | |
679 | if (proc_root_stat.st_ino == root_stat.st_ino) { | |
680 | *cmdline = NULL; | |
681 | return 0; | |
682 | } | |
683 | ||
684 | r = get_mount_namespace_leader(pid, &container_pid); | |
685 | if (r < 0) | |
686 | return r; | |
687 | ||
09c1dcee | 688 | r = get_process_cmdline(container_pid, SIZE_MAX, 0, cmdline); |
d3cba4ea EV |
689 | if (r < 0) |
690 | return r; | |
691 | ||
692 | return 1; | |
7ed03ce6 JF |
693 | } |
694 | ||
f46c706b | 695 | static int change_uid_gid(const Context *context) { |
3c171f0b LP |
696 | uid_t uid; |
697 | gid_t gid; | |
698 | int r; | |
34c10968 | 699 | |
f46c706b | 700 | r = parse_uid(context->meta[META_ARGV_UID], &uid); |
3c171f0b LP |
701 | if (r < 0) |
702 | return r; | |
8c8549db | 703 | |
888e378d LP |
704 | if (uid <= SYSTEM_UID_MAX) { |
705 | const char *user = "systemd-coredump"; | |
706 | ||
fafff8f1 | 707 | r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); |
888e378d LP |
708 | if (r < 0) { |
709 | log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user); | |
710 | uid = gid = 0; | |
711 | } | |
712 | } else { | |
f46c706b | 713 | r = parse_gid(context->meta[META_ARGV_GID], &gid); |
888e378d LP |
714 | if (r < 0) |
715 | return r; | |
716 | } | |
3c171f0b LP |
717 | |
718 | return drop_privileges(uid, gid, 0); | |
719 | } | |
8c8549db | 720 | |
3c171f0b | 721 | static int submit_coredump( |
f46c706b | 722 | Context *context, |
9a435388 | 723 | struct iovec_wrapper *iovw, |
3c171f0b | 724 | int input_fd) { |
34c10968 | 725 | |
5f3e0a74 | 726 | _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; |
9a435388 | 727 | _cleanup_free_ char *filename = NULL, *coredump_data = NULL; |
51d3783d | 728 | _cleanup_free_ char *stacktrace = NULL; |
9a435388 | 729 | char *core_message; |
a5ca3649 | 730 | uint64_t coredump_size = UINT64_MAX; |
f46c706b | 731 | bool truncated = false; |
3c171f0b | 732 | int r; |
f5e04665 | 733 | |
3c171f0b | 734 | assert(context); |
9a435388 | 735 | assert(iovw); |
3c171f0b | 736 | assert(input_fd >= 0); |
f5e04665 | 737 | |
3c171f0b LP |
738 | /* Vacuum before we write anything again */ |
739 | (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); | |
803a3464 | 740 | |
3c171f0b | 741 | /* Always stream the coredump to disk, if that's possible */ |
0cd4e913 ZJS |
742 | r = save_external_coredump(context, input_fd, |
743 | &filename, &coredump_node_fd, &coredump_fd, &coredump_size, &truncated); | |
3c171f0b LP |
744 | if (r < 0) |
745 | /* Skip whole core dumping part */ | |
746 | goto log; | |
747 | ||
51d3783d FB |
748 | /* If we don't want to keep the coredump on disk, remove it now, as later on we |
749 | * will lack the privileges for it. However, we keep the fd to it, so that we can | |
750 | * still process it and log it. */ | |
3c171f0b LP |
751 | r = maybe_remove_external_coredump(filename, coredump_size); |
752 | if (r < 0) | |
753 | return r; | |
754 | if (r == 0) { | |
2a3bebd0 | 755 | (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename); |
3c171f0b | 756 | |
6e9ef603 | 757 | } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
5206a724 | 758 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
6e9ef603 | 759 | coredump_size, arg_external_size_max); |
f5e04665 | 760 | |
3c171f0b LP |
761 | /* Vacuum again, but exclude the coredump we just created */ |
762 | (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); | |
8c9571d0 | 763 | |
51d3783d FB |
764 | /* Now, let's drop privileges to become the user who owns the segfaulted process |
765 | * and allocate the coredump memory under the user's uid. This also ensures that | |
766 | * the credentials journald will see are the ones of the coredumping user, thus | |
767 | * making sure the user gets access to the core dump. Let's also get rid of all | |
3c171f0b LP |
768 | * capabilities, if we run as root, we won't need them anymore. */ |
769 | r = change_uid_gid(context); | |
770 | if (r < 0) | |
771 | return log_error_errno(r, "Failed to drop privileges: %m"); | |
34c10968 | 772 | |
349cc4a5 | 773 | #if HAVE_ELFUTILS |
5238e957 | 774 | /* Try to get a stack trace if we can */ |
51d3783d FB |
775 | if (coredump_size > arg_process_size_max) { |
776 | log_debug("Not generating stack trace: core size %"PRIu64" is greater " | |
777 | "than %"PRIu64" (the configured maximum)", | |
6e9ef603 | 778 | coredump_size, arg_process_size_max); |
51d3783d | 779 | } else |
f46c706b | 780 | coredump_make_stack_trace(coredump_fd, context->meta[META_EXE], &stacktrace); |
3c171f0b | 781 | #endif |
51d3783d | 782 | |
3c171f0b | 783 | log: |
f46c706b FB |
784 | core_message = strjoina("Process ", context->meta[META_ARGV_PID], |
785 | " (", context->meta[META_COMM], ") of user ", | |
786 | context->meta[META_ARGV_UID], " dumped core.", | |
787 | context->is_journald && filename ? "\nCoredump diverted to " : NULL, | |
788 | context->is_journald && filename ? filename : NULL); | |
51d3783d | 789 | |
9a435388 | 790 | core_message = strjoina(core_message, stacktrace ? "\n\n" : NULL, stacktrace); |
92e92d71 | 791 | |
f46c706b | 792 | if (context->is_journald) { |
4f62556d | 793 | /* We cannot log to the journal, so just print the message. |
92e92d71 | 794 | * The target was set previously to something safe. */ |
9a435388 | 795 | log_dispatch(LOG_ERR, 0, core_message); |
92e92d71 ZJS |
796 | return 0; |
797 | } | |
798 | ||
2a3bebd0 | 799 | (void) iovw_put_string_field(iovw, "MESSAGE=", core_message); |
3c171f0b | 800 | |
0cd4e913 | 801 | if (truncated) |
2a3bebd0 | 802 | (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1"); |
0cd4e913 | 803 | |
3c171f0b | 804 | /* Optionally store the entire coredump in the journal */ |
6e9ef603 ZJS |
805 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) { |
806 | if (coredump_size <= arg_journal_size_max) { | |
807 | size_t sz = 0; | |
808 | ||
809 | /* Store the coredump itself in the journal */ | |
810 | ||
811 | r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); | |
9a435388 FB |
812 | if (r >= 0) { |
813 | if (iovw_put(iovw, coredump_data, sz) >= 0) | |
814 | TAKE_PTR(coredump_data); | |
815 | } else | |
6e9ef603 ZJS |
816 | log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); |
817 | } else | |
5206a724 | 818 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
6e9ef603 | 819 | coredump_size, arg_journal_size_max); |
f5e04665 LP |
820 | } |
821 | ||
9a435388 | 822 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
3c171f0b LP |
823 | if (r < 0) |
824 | return log_error_errno(r, "Failed to log coredump: %m"); | |
825 | ||
826 | return 0; | |
827 | } | |
828 | ||
f46c706b FB |
829 | static int save_context(Context *context, const struct iovec_wrapper *iovw) { |
830 | unsigned n, i, count = 0; | |
831 | const char *unit; | |
832 | int r; | |
3c171f0b | 833 | |
3c171f0b | 834 | assert(context); |
f46c706b FB |
835 | assert(iovw); |
836 | assert(iovw->count >= _META_ARGV_MAX); | |
3c171f0b | 837 | |
f46c706b | 838 | /* The context does not allocate any memory on its own */ |
3c171f0b | 839 | |
f46c706b FB |
840 | for (n = 0; n < iovw->count; n++) { |
841 | struct iovec *iovec = iovw->iovec + n; | |
92e92d71 | 842 | |
f46c706b FB |
843 | for (i = 0; i < ELEMENTSOF(meta_field_names); i++) { |
844 | char *p; | |
845 | ||
846 | /* Note that these strings are NUL terminated, because we made sure that a | |
847 | * trailing NUL byte is in the buffer, though not included in the iov_len | |
848 | * count (see process_socket() and gather_pid_metadata_*()) */ | |
849 | assert(((char*) iovec->iov_base)[iovec->iov_len] == 0); | |
3c171f0b | 850 | |
f46c706b FB |
851 | p = startswith(iovec->iov_base, meta_field_names[i]); |
852 | if (p) { | |
853 | context->meta[i] = p; | |
854 | count++; | |
855 | break; | |
856 | } | |
857 | } | |
3c171f0b | 858 | } |
f46c706b FB |
859 | |
860 | if (!context->meta[META_ARGV_PID]) | |
861 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
862 | "Failed to find the PID of crashing process"); | |
863 | ||
864 | r = parse_pid(context->meta[META_ARGV_PID], &context->pid); | |
865 | if (r < 0) | |
866 | return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]); | |
867 | ||
868 | unit = context->meta[META_UNIT]; | |
869 | context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); | |
870 | context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); | |
871 | ||
872 | return 0; | |
3c171f0b LP |
873 | } |
874 | ||
875 | static int process_socket(int fd) { | |
f8540bde | 876 | _cleanup_close_ int input_fd = -1; |
f46c706b | 877 | Context context = {}; |
9a435388 FB |
878 | struct iovec_wrapper iovw = {}; |
879 | struct iovec iovec; | |
f46c706b | 880 | int i, r; |
3c171f0b LP |
881 | |
882 | assert(fd >= 0); | |
883 | ||
6bf3c61c | 884 | log_setup_service(); |
3c171f0b | 885 | |
988e89ee ZJS |
886 | log_debug("Processing coredump received on stdin..."); |
887 | ||
3c171f0b LP |
888 | for (;;) { |
889 | union { | |
890 | struct cmsghdr cmsghdr; | |
891 | uint8_t buf[CMSG_SPACE(sizeof(int))]; | |
892 | } control = {}; | |
893 | struct msghdr mh = { | |
894 | .msg_control = &control, | |
895 | .msg_controllen = sizeof(control), | |
896 | .msg_iovlen = 1, | |
897 | }; | |
898 | ssize_t n; | |
fe1ef0f8 | 899 | ssize_t l; |
3c171f0b | 900 | |
fe1ef0f8 EV |
901 | l = next_datagram_size_fd(fd); |
902 | if (l < 0) { | |
903 | r = log_error_errno(l, "Failed to determine datagram size to read: %m"); | |
3c171f0b LP |
904 | goto finish; |
905 | } | |
906 | ||
9a435388 FB |
907 | iovec.iov_len = l; |
908 | iovec.iov_base = malloc(l + 1); | |
909 | if (!iovec.iov_base) { | |
3c171f0b LP |
910 | r = log_oom(); |
911 | goto finish; | |
912 | } | |
913 | ||
9a435388 | 914 | mh.msg_iov = &iovec; |
3c171f0b | 915 | |
a6887cc0 | 916 | n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC); |
3c171f0b | 917 | if (n < 0) { |
9a435388 | 918 | free(iovec.iov_base); |
3c171f0b LP |
919 | r = log_error_errno(errno, "Failed to receive datagram: %m"); |
920 | goto finish; | |
921 | } | |
922 | ||
9a435388 FB |
923 | /* The final zero-length datagram carries the file descriptor and tells us |
924 | * that we're done. */ | |
3c171f0b LP |
925 | if (n == 0) { |
926 | struct cmsghdr *cmsg, *found = NULL; | |
3c171f0b | 927 | |
9a435388 | 928 | free(iovec.iov_base); |
3c171f0b LP |
929 | |
930 | CMSG_FOREACH(cmsg, &mh) { | |
931 | if (cmsg->cmsg_level == SOL_SOCKET && | |
932 | cmsg->cmsg_type == SCM_RIGHTS && | |
933 | cmsg->cmsg_len == CMSG_LEN(sizeof(int))) { | |
934 | assert(!found); | |
935 | found = cmsg; | |
936 | } | |
937 | } | |
938 | ||
939 | if (!found) { | |
940 | log_error("Coredump file descriptor missing."); | |
941 | r = -EBADMSG; | |
942 | goto finish; | |
943 | } | |
944 | ||
f8540bde FB |
945 | assert(input_fd < 0); |
946 | input_fd = *(int*) CMSG_DATA(found); | |
3c171f0b LP |
947 | break; |
948 | } | |
949 | ||
950 | /* Add trailing NUL byte, in case these are strings */ | |
9a435388 FB |
951 | ((char*) iovec.iov_base)[n] = 0; |
952 | iovec.iov_len = (size_t) n; | |
3c171f0b | 953 | |
9a435388 FB |
954 | r = iovw_put(&iovw, iovec.iov_base, iovec.iov_len); |
955 | if (r < 0) | |
956 | goto finish; | |
3c171f0b | 957 | |
9a435388 | 958 | cmsg_close_all(&mh); |
34c10968 LP |
959 | } |
960 | ||
61233823 | 961 | /* Make sure we got all data we really need */ |
f8540bde | 962 | assert(input_fd >= 0); |
3c171f0b | 963 | |
f46c706b FB |
964 | r = save_context(&context, &iovw); |
965 | if (r < 0) | |
966 | goto finish; | |
967 | ||
968 | /* Make sure we received at least all fields we need. */ | |
969 | for (i = 0; i < _META_MANDATORY_MAX; i++) | |
970 | if (!context.meta[i]) { | |
971 | r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
972 | "A mandatory argument (%i) has not been sent, aborting.", | |
973 | i); | |
974 | goto finish; | |
975 | } | |
80002f66 | 976 | |
f46c706b | 977 | r = submit_coredump(&context, &iovw, input_fd); |
3c171f0b LP |
978 | |
979 | finish: | |
9a435388 | 980 | iovw_free_contents(&iovw, true); |
3c171f0b LP |
981 | return r; |
982 | } | |
983 | ||
9a435388 | 984 | static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) { |
3c171f0b LP |
985 | |
986 | static const union sockaddr_union sa = { | |
987 | .un.sun_family = AF_UNIX, | |
988 | .un.sun_path = "/run/systemd/coredump", | |
989 | }; | |
990 | _cleanup_close_ int fd = -1; | |
991 | size_t i; | |
992 | int r; | |
993 | ||
9a435388 | 994 | assert(iovw); |
3c171f0b LP |
995 | assert(input_fd >= 0); |
996 | ||
997 | fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); | |
998 | if (fd < 0) | |
999 | return log_error_errno(errno, "Failed to create coredump socket: %m"); | |
1000 | ||
fc2fffe7 | 1001 | if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) |
3c171f0b LP |
1002 | return log_error_errno(errno, "Failed to connect to coredump service: %m"); |
1003 | ||
9a435388 | 1004 | for (i = 0; i < iovw->count; i++) { |
fec603eb | 1005 | struct msghdr mh = { |
9a435388 | 1006 | .msg_iov = iovw->iovec + i, |
fec603eb LP |
1007 | .msg_iovlen = 1, |
1008 | }; | |
1009 | struct iovec copy[2]; | |
1010 | ||
1011 | for (;;) { | |
1012 | if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) | |
1013 | break; | |
1014 | ||
1015 | if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { | |
f46c706b FB |
1016 | /* This field didn't fit? That's a pity. Given that this is |
1017 | * just metadata, let's truncate the field at half, and try | |
1018 | * again. We append three dots, in order to show that this is | |
1019 | * truncated. */ | |
fec603eb LP |
1020 | |
1021 | if (mh.msg_iov != copy) { | |
f46c706b FB |
1022 | /* We don't want to modify the caller's iovec, hence |
1023 | * let's create our own array, consisting of two new | |
1024 | * iovecs, where the first is a (truncated) copy of | |
1025 | * what we want to send, and the second one contains | |
1026 | * the trailing dots. */ | |
9a435388 | 1027 | copy[0] = iovw->iovec[i]; |
ed0cb346 | 1028 | copy[1] = IOVEC_MAKE(((char[]){'.', '.', '.'}), 3); |
fec603eb LP |
1029 | |
1030 | mh.msg_iov = copy; | |
1031 | mh.msg_iovlen = 2; | |
1032 | } | |
1033 | ||
1034 | copy[0].iov_len /= 2; /* halve it, and try again */ | |
1035 | continue; | |
1036 | } | |
3c171f0b | 1037 | |
3c171f0b | 1038 | return log_error_errno(errno, "Failed to send coredump datagram: %m"); |
fec603eb | 1039 | } |
1eef15b1 ZJS |
1040 | } |
1041 | ||
3c171f0b LP |
1042 | r = send_one_fd(fd, input_fd, 0); |
1043 | if (r < 0) | |
1044 | return log_error_errno(r, "Failed to send coredump fd: %m"); | |
1eef15b1 | 1045 | |
3c171f0b LP |
1046 | return 0; |
1047 | } | |
1eef15b1 | 1048 | |
f46c706b FB |
1049 | static int gather_pid_metadata_from_argv(struct iovec_wrapper *iovw, Context *context, |
1050 | int argc, char **argv) { | |
1051 | _cleanup_free_ char *free_timestamp = NULL; | |
1052 | int i, r, signo; | |
3c171f0b | 1053 | char *t; |
3c171f0b | 1054 | |
f46c706b FB |
1055 | /* We gather all metadata that were passed via argv[] into an array of iovecs that |
1056 | * we'll forward to the socket unit */ | |
3c171f0b | 1057 | |
f46c706b FB |
1058 | if (argc < _META_ARGV_MAX) |
1059 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1060 | "Not enough arguments passed by the kernel (%i, expected %i).", | |
1061 | argc, _META_ARGV_MAX); | |
3c171f0b | 1062 | |
f46c706b | 1063 | for (i = 0; i < _META_ARGV_MAX; i++) { |
3c171f0b | 1064 | |
f46c706b | 1065 | t = argv[i]; |
3c171f0b | 1066 | |
f46c706b FB |
1067 | switch (i) { |
1068 | case META_ARGV_TIMESTAMP: | |
1069 | /* The journal fields contain the timestamp padded with six | |
1070 | * zeroes, so that the kernel-supplied 1s granularity timestamps | |
1071 | * becomes 1µs granularity, i.e. the granularity systemd usually | |
1072 | * operates in. */ | |
1073 | t = free_timestamp = strjoin(argv[i], "000000"); | |
1074 | if (!t) | |
1075 | return log_oom(); | |
1076 | break; | |
1077 | case META_ARGV_SIGNAL: | |
1078 | /* For signal, record its pretty name too */ | |
1079 | if (safe_atoi(argv[i], &signo) >= 0 && SIGNAL_VALID(signo)) | |
2a3bebd0 FB |
1080 | (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG", |
1081 | signal_to_string(signo)); | |
f46c706b FB |
1082 | break; |
1083 | default: | |
1084 | break; | |
c8091d92 LP |
1085 | } |
1086 | ||
f46c706b FB |
1087 | r = iovw_put_string_field(iovw, meta_field_names[i], t); |
1088 | if (r < 0) | |
1089 | return r; | |
8c8549db | 1090 | } |
803a3464 | 1091 | |
f46c706b FB |
1092 | /* Cache some of the process metadata we collected so far and that we'll need to |
1093 | * access soon */ | |
1094 | return save_context(context, iovw); | |
1095 | } | |
3c171f0b | 1096 | |
f46c706b FB |
1097 | static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) { |
1098 | uid_t owner_uid; | |
1099 | pid_t pid; | |
1100 | char *t; | |
1101 | const char *p; | |
1102 | int r; | |
f5e04665 | 1103 | |
f46c706b FB |
1104 | /* Note that if we fail on oom later on, we do not roll-back changes to the iovec |
1105 | * structure. (It remains valid, with the first iovec fields initialized.) */ | |
f5e04665 | 1106 | |
f46c706b | 1107 | pid = context->pid; |
f5e04665 | 1108 | |
f46c706b FB |
1109 | /* The following is mandatory */ |
1110 | r = get_process_comm(pid, &t); | |
9a435388 | 1111 | if (r < 0) |
f46c706b | 1112 | return log_error_errno(r, "Failed to get COMM: %m"); |
f5e04665 | 1113 | |
f46c706b | 1114 | r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t); |
9a435388 FB |
1115 | if (r < 0) |
1116 | return r; | |
f45b8015 | 1117 | |
f46c706b | 1118 | /* The following are optional but we used them if present */ |
2a3bebd0 FB |
1119 | r = get_process_exe(pid, &t); |
1120 | if (r >= 0) | |
1121 | r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t); | |
1122 | if (r < 0) | |
f46c706b | 1123 | log_warning_errno(r, "Failed to get EXE, ignoring: %m"); |
bdfd7b2c | 1124 | |
f46c706b | 1125 | if (cg_pid_get_unit(pid, &t) >= 0) |
2a3bebd0 | 1126 | (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t); |
f5e04665 | 1127 | |
9a435388 | 1128 | /* The next are optional */ |
f46c706b | 1129 | if (cg_pid_get_user_unit(pid, &t) >= 0) |
2a3bebd0 | 1130 | (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t); |
f46c706b | 1131 | |
9aa82023 | 1132 | if (sd_pid_get_session(pid, &t) >= 0) |
9a435388 | 1133 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t); |
f5e04665 | 1134 | |
a035f819 | 1135 | if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) { |
9a435388 | 1136 | r = asprintf(&t, UID_FMT, owner_uid); |
7de80bfe | 1137 | if (r > 0) |
9a435388 | 1138 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t); |
f5e04665 LP |
1139 | } |
1140 | ||
9aa82023 | 1141 | if (sd_pid_get_slice(pid, &t) >= 0) |
2a3bebd0 | 1142 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t); |
f5e04665 | 1143 | |
09c1dcee | 1144 | if (get_process_cmdline(pid, SIZE_MAX, 0, &t) >= 0) |
2a3bebd0 | 1145 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t); |
a035f819 | 1146 | |
9aa82023 | 1147 | if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) |
2a3bebd0 | 1148 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t); |
a035f819 | 1149 | |
9aa82023 | 1150 | if (compose_open_fds(pid, &t) >= 0) |
2a3bebd0 | 1151 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t); |
3f132692 JF |
1152 | |
1153 | p = procfs_file_alloca(pid, "status"); | |
9aa82023 | 1154 | if (read_full_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1155 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t); |
3f132692 JF |
1156 | |
1157 | p = procfs_file_alloca(pid, "maps"); | |
9aa82023 | 1158 | if (read_full_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1159 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t); |
3f132692 JF |
1160 | |
1161 | p = procfs_file_alloca(pid, "limits"); | |
9aa82023 | 1162 | if (read_full_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1163 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t); |
3f132692 JF |
1164 | |
1165 | p = procfs_file_alloca(pid, "cgroup"); | |
9aa82023 | 1166 | if (read_full_file(p, &t, NULL) >=0) |
2a3bebd0 | 1167 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t); |
3f132692 | 1168 | |
d7032b1f | 1169 | p = procfs_file_alloca(pid, "mountinfo"); |
9aa82023 | 1170 | if (read_full_file(p, &t, NULL) >=0) |
2a3bebd0 | 1171 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t); |
d7032b1f | 1172 | |
9aa82023 | 1173 | if (get_process_cwd(pid, &t) >= 0) |
2a3bebd0 | 1174 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t); |
3f132692 JF |
1175 | |
1176 | if (get_process_root(pid, &t) >= 0) { | |
9aa82023 ZJS |
1177 | bool proc_self_root_is_slash; |
1178 | ||
1179 | proc_self_root_is_slash = strcmp(t, "/") == 0; | |
3f132692 | 1180 | |
2a3bebd0 | 1181 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t); |
7ed03ce6 JF |
1182 | |
1183 | /* If the process' root is "/", then there is a chance it has | |
1184 | * mounted own root and hence being containerized. */ | |
9aa82023 | 1185 | if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0) |
2a3bebd0 | 1186 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t); |
3f132692 JF |
1187 | } |
1188 | ||
9aa82023 | 1189 | if (get_process_environ(pid, &t) >= 0) |
2a3bebd0 | 1190 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t); |
9aa82023 | 1191 | |
f46c706b FB |
1192 | /* we successfully acquired all metadata */ |
1193 | return save_context(context, iovw); | |
9aa82023 | 1194 | } |
3f132692 | 1195 | |
9aa82023 | 1196 | static int process_kernel(int argc, char* argv[]) { |
f46c706b | 1197 | Context context = {}; |
9a435388 | 1198 | struct iovec_wrapper *iovw; |
9aa82023 ZJS |
1199 | int r; |
1200 | ||
988e89ee ZJS |
1201 | log_debug("Processing coredump received from the kernel..."); |
1202 | ||
9a435388 FB |
1203 | iovw = iovw_new(); |
1204 | if (!iovw) | |
1205 | return log_oom(); | |
1206 | ||
2a3bebd0 FB |
1207 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); |
1208 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1209 | |
1210 | /* Collect all process metadata passed by the kernel through argv[] */ | |
1211 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1); | |
92e92d71 | 1212 | if (r < 0) |
86562420 | 1213 | goto finish; |
86562420 | 1214 | |
f46c706b FB |
1215 | /* Collect the rest of the process metadata retrieved from the runtime */ |
1216 | r = gather_pid_metadata(iovw, &context); | |
1217 | if (r < 0) | |
1218 | goto finish; | |
1219 | ||
1220 | if (!context.is_journald) { | |
1221 | /* OK, now we know it's not the journal, hence we can make use of it now. */ | |
1222 | log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); | |
1223 | log_open(); | |
1224 | } | |
1225 | ||
1226 | /* If this is PID 1 disable coredump collection, we'll unlikely be able to process | |
1227 | * it later on. | |
1228 | * | |
1229 | * FIXME: maybe we should disable coredumps generation from the beginning and | |
1230 | * re-enable it only when we know it's either safe (ie we're not running OOM) or | |
1231 | * it's not pid1 ? */ | |
1232 | if (context.is_pid1) { | |
1233 | log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); | |
1234 | disable_coredumps(); | |
1235 | } | |
34c10968 | 1236 | |
f46c706b FB |
1237 | if (context.is_journald || context.is_pid1) |
1238 | r = submit_coredump(&context, iovw, STDIN_FILENO); | |
92e92d71 | 1239 | else |
9a435388 | 1240 | r = send_iovec(iovw, STDIN_FILENO); |
9aa82023 ZJS |
1241 | |
1242 | finish: | |
9a435388 | 1243 | iovw = iovw_free_free(iovw); |
9aa82023 | 1244 | return r; |
3c171f0b | 1245 | } |
34c10968 | 1246 | |
988e89ee | 1247 | static int process_backtrace(int argc, char *argv[]) { |
f46c706b | 1248 | Context context = {}; |
9a435388 FB |
1249 | struct iovec_wrapper *iovw; |
1250 | char *message; | |
1251 | size_t i; | |
988e89ee | 1252 | int r; |
11e6d971 | 1253 | _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO); |
988e89ee ZJS |
1254 | |
1255 | log_debug("Processing backtrace on stdin..."); | |
1256 | ||
9a435388 FB |
1257 | iovw = iovw_new(); |
1258 | if (!iovw) | |
5b45a160 ZJS |
1259 | return log_oom(); |
1260 | ||
2a3bebd0 FB |
1261 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR); |
1262 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1263 | |
1264 | /* Collect all process metadata from argv[] by making sure to skip the | |
1265 | * '--backtrace' option */ | |
1266 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2); | |
988e89ee ZJS |
1267 | if (r < 0) |
1268 | goto finish; | |
aaeb2522 | 1269 | |
f46c706b FB |
1270 | /* Collect the rest of the process metadata retrieved from the runtime */ |
1271 | r = gather_pid_metadata(iovw, &context); | |
1272 | if (r < 0) | |
1273 | goto finish; | |
988e89ee | 1274 | |
86562420 | 1275 | for (;;) { |
5b45a160 ZJS |
1276 | r = journal_importer_process_data(&importer); |
1277 | if (r < 0) { | |
1278 | log_error_errno(r, "Failed to parse journal entry on stdin: %m"); | |
1279 | goto finish; | |
1280 | } | |
d74dc4f2 ZJS |
1281 | if (r == 1 || /* complete entry */ |
1282 | journal_importer_eof(&importer)) /* end of data */ | |
5b45a160 | 1283 | break; |
988e89ee | 1284 | } |
988e89ee | 1285 | |
5b45a160 ZJS |
1286 | if (journal_importer_eof(&importer)) { |
1287 | log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter"); | |
988e89ee | 1288 | |
f46c706b FB |
1289 | message = strjoina("Process ", context.meta[META_ARGV_PID], |
1290 | " (", context.meta[META_COMM], ")" | |
1291 | " of user ", context.meta[META_ARGV_UID], | |
1292 | " failed with ", context.meta[META_ARGV_SIGNAL]); | |
9a435388 FB |
1293 | |
1294 | r = iovw_put_string_field(iovw, "MESSAGE=", message); | |
1295 | if (r < 0) | |
1296 | return r; | |
5b45a160 | 1297 | } else { |
9a435388 FB |
1298 | /* The imported iovecs are not supposed to be freed by us so let's store |
1299 | * them at the end of the array so we can skip them while freeing the | |
1300 | * rest. */ | |
9a435388 FB |
1301 | for (i = 0; i < importer.iovw.count; i++) { |
1302 | struct iovec *iovec = importer.iovw.iovec + i; | |
988e89ee | 1303 | |
9a435388 FB |
1304 | iovw_put(iovw, iovec->iov_base, iovec->iov_len); |
1305 | } | |
1306 | } | |
988e89ee | 1307 | |
9a435388 | 1308 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
988e89ee ZJS |
1309 | if (r < 0) |
1310 | log_error_errno(r, "Failed to log backtrace: %m"); | |
1311 | ||
1312 | finish: | |
9a435388 FB |
1313 | iovw->count -= importer.iovw.count; |
1314 | iovw = iovw_free_free(iovw); | |
988e89ee ZJS |
1315 | return r; |
1316 | } | |
1317 | ||
4515a95e | 1318 | static int run(int argc, char *argv[]) { |
3c171f0b | 1319 | int r; |
fee80f69 | 1320 | |
9aa82023 ZJS |
1321 | /* First, log to a safe place, since we don't know what crashed and it might |
1322 | * be journald which we'd rather not log to then. */ | |
8d4e028f | 1323 | |
3c171f0b LP |
1324 | log_set_target(LOG_TARGET_KMSG); |
1325 | log_open(); | |
8d4e028f | 1326 | |
3c171f0b LP |
1327 | /* Make sure we never enter a loop */ |
1328 | (void) prctl(PR_SET_DUMPABLE, 0); | |
8d4e028f | 1329 | |
3c171f0b LP |
1330 | /* Ignore all parse errors */ |
1331 | (void) parse_config(); | |
fee80f69 | 1332 | |
3c171f0b LP |
1333 | log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage)); |
1334 | log_debug("Selected compression %s.", yes_no(arg_compress)); | |
fee80f69 | 1335 | |
3c171f0b | 1336 | r = sd_listen_fds(false); |
4515a95e ZJS |
1337 | if (r < 0) |
1338 | return log_error_errno(r, "Failed to determine the number of file descriptors: %m"); | |
fee80f69 | 1339 | |
9aa82023 ZJS |
1340 | /* If we got an fd passed, we are running in coredumpd mode. Otherwise we |
1341 | * are invoked from the kernel as coredump handler. */ | |
988e89ee ZJS |
1342 | if (r == 0) { |
1343 | if (streq_ptr(argv[1], "--backtrace")) | |
4515a95e | 1344 | return process_backtrace(argc, argv); |
988e89ee | 1345 | else |
4515a95e | 1346 | return process_kernel(argc, argv); |
988e89ee | 1347 | } else if (r == 1) |
4515a95e | 1348 | return process_socket(SD_LISTEN_FDS_START); |
f5e04665 | 1349 | |
baaa35ad ZJS |
1350 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), |
1351 | "Received unexpected number of file descriptors."); | |
f5e04665 | 1352 | } |
4515a95e ZJS |
1353 | |
1354 | DEFINE_MAIN_FUNCTION(run); |