]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
f5e04665 LP |
2 | |
3 | #include <errno.h> | |
803a3464 LP |
4 | #include <stdio.h> |
5 | #include <sys/prctl.h> | |
587f2a5e | 6 | #include <sys/statvfs.h> |
3e4d0f6c | 7 | #include <sys/auxv.h> |
cacd6403 | 8 | #include <sys/xattr.h> |
4f5dd394 | 9 | #include <unistd.h> |
f5e04665 | 10 | |
73a99163 | 11 | #include "sd-daemon.h" |
f11943c5 LP |
12 | #include "sd-journal.h" |
13 | #include "sd-login.h" | |
73a99163 | 14 | #include "sd-messages.h" |
4f5dd394 LP |
15 | |
16 | #include "acl-util.h" | |
b5efdb8a | 17 | #include "alloc-util.h" |
587f2a5e | 18 | #include "bus-error.h" |
430f0182 | 19 | #include "capability-util.h" |
ba1261bc | 20 | #include "cgroup-util.h" |
4f5dd394 | 21 | #include "compress.h" |
34c10968 LP |
22 | #include "conf-parser.h" |
23 | #include "copy.h" | |
c8715007 | 24 | #include "coredump-util.h" |
f11943c5 | 25 | #include "coredump-vacuum.h" |
a0956174 | 26 | #include "dirent-util.h" |
ea680f05 | 27 | #include "elf-util.h" |
4f5dd394 | 28 | #include "escape.h" |
3ffd4af2 | 29 | #include "fd-util.h" |
4f5dd394 | 30 | #include "fileio.h" |
f4f15635 | 31 | #include "fs-util.h" |
bd1ae178 | 32 | #include "iovec-util.h" |
b18453ed | 33 | #include "journal-importer.h" |
5edf875b | 34 | #include "journal-send.h" |
4f5dd394 LP |
35 | #include "log.h" |
36 | #include "macro.h" | |
5e332028 | 37 | #include "main-func.h" |
0a970718 | 38 | #include "memory-util.h" |
2485b7e2 | 39 | #include "memstream-util.h" |
35cd0ba5 | 40 | #include "mkdir-label.h" |
a108c43e | 41 | #include "namespace-util.h" |
6bedfcbb | 42 | #include "parse-util.h" |
a108c43e | 43 | #include "path-util.h" |
0b452006 | 44 | #include "process-util.h" |
d14bcb4e | 45 | #include "signal-util.h" |
3c171f0b | 46 | #include "socket-util.h" |
4f5dd394 | 47 | #include "special.h" |
587f2a5e | 48 | #include "stat-util.h" |
8b43440b | 49 | #include "string-table.h" |
07630cea | 50 | #include "string-util.h" |
4f5dd394 | 51 | #include "strv.h" |
bf819d3a | 52 | #include "sync-util.h" |
e4de7287 | 53 | #include "tmpfile-util.h" |
8e1ac16b | 54 | #include "uid-classification.h" |
b1d4f8e1 | 55 | #include "user-util.h" |
34727273 | 56 | |
da890466 | 57 | /* The maximum size up to which we process coredumps. We use 1G on 32-bit systems, and 32G on 64-bit systems */ |
e677041e LP |
58 | #if __SIZEOF_POINTER__ == 4 |
59 | #define PROCESS_SIZE_MAX ((uint64_t) (1LLU*1024LLU*1024LLU*1024LLU)) | |
60 | #elif __SIZEOF_POINTER__ == 8 | |
61 | #define PROCESS_SIZE_MAX ((uint64_t) (32LLU*1024LLU*1024LLU*1024LLU)) | |
62 | #else | |
63 | #error "Unexpected pointer size" | |
64 | #endif | |
34c10968 | 65 | |
bdfd7b2c | 66 | /* The maximum size up to which we leave the coredump around on disk */ |
34c10968 LP |
67 | #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX |
68 | ||
bdfd7b2c | 69 | /* The maximum size up to which we store the coredump in the journal */ |
25cad95c | 70 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
34c10968 | 71 | #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU)) |
25cad95c YW |
72 | #else |
73 | /* oss-fuzz limits memory usage. */ | |
74 | #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU)) | |
75 | #endif | |
f5e04665 | 76 | |
587f2a5e LB |
77 | /* When checking for available memory and setting lower limits, don't |
78 | * go below 4MB for writing core files to storage. */ | |
79 | #define PROCESS_SIZE_MIN (4U*1024U*1024U) | |
80 | ||
c4aa09b0 | 81 | /* Make sure to not make this larger than the maximum journal entry |
27f931d1 | 82 | * size. See DATA_SIZE_MAX in journal-importer.h. */ |
874bc134 | 83 | assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); |
f5e04665 LP |
84 | |
85 | enum { | |
f46c706b | 86 | /* We use these as array indexes for our process metadata cache. |
ea5cc2a8 | 87 | * |
f46c706b FB |
88 | * The first indices of the cache stores the same metadata as the ones passed by |
89 | * the kernel via argv[], ie the strings array passed by the kernel according to | |
90 | * our pattern defined in /proc/sys/kernel/core_pattern (see man:core(5)). */ | |
91 | ||
92 | META_ARGV_PID, /* %P: as seen in the initial pid namespace */ | |
93 | META_ARGV_UID, /* %u: as seen in the initial user namespace */ | |
94 | META_ARGV_GID, /* %g: as seen in the initial user namespace */ | |
95 | META_ARGV_SIGNAL, /* %s: number of signal causing dump */ | |
e503019b | 96 | META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to μs granularity) */ |
f46c706b FB |
97 | META_ARGV_RLIMIT, /* %c: core file size soft resource limit */ |
98 | META_ARGV_HOSTNAME, /* %h: hostname */ | |
99 | _META_ARGV_MAX, | |
100 | ||
101 | /* The following indexes are cached for a couple of special fields we use (and | |
102 | * thereby need to be retrieved quickly) for naming coredump files, and attaching | |
103 | * xattrs. Unlike the previous ones they are retrieved from the runtime | |
104 | * environment. */ | |
105 | ||
106 | META_COMM = _META_ARGV_MAX, | |
107 | _META_MANDATORY_MAX, | |
108 | ||
109 | /* The rest are similar to the previous ones except that we won't fail if one of | |
110 | * them is missing. */ | |
111 | ||
112 | META_EXE = _META_MANDATORY_MAX, | |
113 | META_UNIT, | |
3e4d0f6c | 114 | META_PROC_AUXV, |
f46c706b | 115 | _META_MAX |
f5e04665 LP |
116 | }; |
117 | ||
f46c706b | 118 | static const char * const meta_field_names[_META_MAX] = { |
510a1466 ZJS |
119 | [META_ARGV_PID] = "COREDUMP_PID=", |
120 | [META_ARGV_UID] = "COREDUMP_UID=", | |
121 | [META_ARGV_GID] = "COREDUMP_GID=", | |
122 | [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=", | |
123 | [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", | |
124 | [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", | |
125 | [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", | |
126 | [META_COMM] = "COREDUMP_COMM=", | |
127 | [META_EXE] = "COREDUMP_EXE=", | |
128 | [META_UNIT] = "COREDUMP_UNIT=", | |
3e4d0f6c | 129 | [META_PROC_AUXV] = "COREDUMP_PROC_AUXV=", |
f46c706b FB |
130 | }; |
131 | ||
132 | typedef struct Context { | |
133 | const char *meta[_META_MAX]; | |
3e4d0f6c | 134 | size_t meta_size[_META_MAX]; |
f46c706b | 135 | pid_t pid; |
9764bca9 NR |
136 | uid_t uid; |
137 | gid_t gid; | |
f46c706b FB |
138 | bool is_pid1; |
139 | bool is_journald; | |
140 | } Context; | |
141 | ||
34c10968 LP |
142 | typedef enum CoredumpStorage { |
143 | COREDUMP_STORAGE_NONE, | |
144 | COREDUMP_STORAGE_EXTERNAL, | |
145 | COREDUMP_STORAGE_JOURNAL, | |
34c10968 | 146 | _COREDUMP_STORAGE_MAX, |
2d93c20e | 147 | _COREDUMP_STORAGE_INVALID = -EINVAL, |
34c10968 LP |
148 | } CoredumpStorage; |
149 | ||
34c10968 | 150 | static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { |
510a1466 | 151 | [COREDUMP_STORAGE_NONE] = "none", |
34c10968 | 152 | [COREDUMP_STORAGE_EXTERNAL] = "external", |
510a1466 | 153 | [COREDUMP_STORAGE_JOURNAL] = "journal", |
34c10968 LP |
154 | }; |
155 | ||
156 | DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); | |
8c9571d0 | 157 | static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting"); |
34727273 ZJS |
158 | |
159 | static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL; | |
8c9571d0 | 160 | static bool arg_compress = true; |
59f448cf LP |
161 | static uint64_t arg_process_size_max = PROCESS_SIZE_MAX; |
162 | static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; | |
6e2b4a69 | 163 | static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; |
f5fbe71d YW |
164 | static uint64_t arg_keep_free = UINT64_MAX; |
165 | static uint64_t arg_max_use = UINT64_MAX; | |
34c10968 LP |
166 | |
167 | static int parse_config(void) { | |
34c10968 | 168 | static const ConfigTableItem items[] = { |
510a1466 ZJS |
169 | { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, |
170 | { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, | |
171 | { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, | |
172 | { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max }, | |
173 | { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, | |
174 | { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, | |
175 | { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, | |
34c10968 LP |
176 | {} |
177 | }; | |
178 | ||
4a78074f LP |
179 | int r; |
180 | ||
6378f257 | 181 | r = config_parse_standard_file_with_dropins( |
e5abff37 | 182 | "systemd/coredump.conf", |
4a78074f LP |
183 | "Coredump\0", |
184 | config_item_table_lookup, | |
185 | items, | |
186 | CONFIG_PARSE_WARN, | |
187 | /* userdata= */ NULL); | |
188 | if (r < 0) | |
189 | return r; | |
190 | ||
191 | /* Let's make sure we fix up the maximum size we send to the journal here on the client side, for | |
192 | * efficiency reasons. journald wouldn't accept anything larger anyway. */ | |
193 | if (arg_journal_size_max > JOURNAL_SIZE_MAX) { | |
194 | log_warning("JournalSizeMax= set to larger value (%s) than journald would accept (%s), lowering automatically.", | |
195 | FORMAT_BYTES(arg_journal_size_max), FORMAT_BYTES(JOURNAL_SIZE_MAX)); | |
196 | arg_journal_size_max = JOURNAL_SIZE_MAX; | |
197 | } | |
198 | ||
199 | return 0; | |
34c10968 LP |
200 | } |
201 | ||
a1e92eee | 202 | static uint64_t storage_size_max(void) { |
ee0449fd ZJS |
203 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
204 | return arg_external_size_max; | |
205 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) | |
206 | return arg_journal_size_max; | |
207 | assert(arg_storage == COREDUMP_STORAGE_NONE); | |
208 | return 0; | |
73a99163 ZJS |
209 | } |
210 | ||
3e4d0f6c ZJS |
211 | static int fix_acl(int fd, uid_t uid, bool allow_user) { |
212 | assert(fd >= 0); | |
213 | assert(uid_is_valid(uid)); | |
34c10968 | 214 | |
349cc4a5 | 215 | #if HAVE_ACL |
709f6e46 | 216 | int r; |
34c10968 | 217 | |
3e4d0f6c ZJS |
218 | /* We don't allow users to read coredumps if the uid or capabilities were changed. */ |
219 | if (!allow_user) | |
220 | return 0; | |
b59233e6 | 221 | |
05fd2156 | 222 | if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY) |
34c10968 LP |
223 | return 0; |
224 | ||
d81be4e7 | 225 | /* Make sure normal users can read (but not write or delete) their own coredumps */ |
567aeb58 | 226 | r = fd_add_uid_acl_permission(fd, uid, ACL_READ); |
709f6e46 | 227 | if (r < 0) |
567aeb58 | 228 | return log_error_errno(r, "Failed to adjust ACL of the coredump: %m"); |
34c10968 LP |
229 | #endif |
230 | ||
231 | return 0; | |
232 | } | |
233 | ||
f46c706b FB |
234 | static int fix_xattr(int fd, const Context *context) { |
235 | ||
236 | static const char * const xattrs[_META_MAX] = { | |
510a1466 ZJS |
237 | [META_ARGV_PID] = "user.coredump.pid", |
238 | [META_ARGV_UID] = "user.coredump.uid", | |
239 | [META_ARGV_GID] = "user.coredump.gid", | |
240 | [META_ARGV_SIGNAL] = "user.coredump.signal", | |
241 | [META_ARGV_TIMESTAMP] = "user.coredump.timestamp", | |
242 | [META_ARGV_RLIMIT] = "user.coredump.rlimit", | |
243 | [META_ARGV_HOSTNAME] = "user.coredump.hostname", | |
244 | [META_COMM] = "user.coredump.comm", | |
245 | [META_EXE] = "user.coredump.exe", | |
0cd77f97 LP |
246 | }; |
247 | ||
34c10968 LP |
248 | int r = 0; |
249 | ||
b59233e6 LP |
250 | assert(fd >= 0); |
251 | ||
60ecc386 | 252 | /* Attach some metadata to coredumps via extended attributes. Just because we can. */ |
34c10968 | 253 | |
fe96c0f8 | 254 | for (unsigned i = 0; i < _META_MAX; i++) { |
1eef15b1 ZJS |
255 | int k; |
256 | ||
f46c706b | 257 | if (isempty(context->meta[i]) || !xattrs[i]) |
0cd77f97 | 258 | continue; |
34c10968 | 259 | |
60ecc386 ZJS |
260 | k = RET_NERRNO(fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE)); |
261 | RET_GATHER(r, k); | |
0cd77f97 | 262 | } |
34c10968 LP |
263 | |
264 | return r; | |
265 | } | |
266 | ||
b0b21dce | 267 | #define filename_escape(s) xescape((s), "./ ") |
34c10968 | 268 | |
a1e92eee | 269 | static const char *coredump_tmpfile_name(const char *s) { |
1da3cb81 | 270 | return s ?: "(unnamed temporary file)"; |
0c773903 EV |
271 | } |
272 | ||
b59233e6 LP |
273 | static int fix_permissions( |
274 | int fd, | |
275 | const char *filename, | |
276 | const char *target, | |
f46c706b | 277 | const Context *context, |
3e4d0f6c | 278 | bool allow_user) { |
b59233e6 | 279 | |
03532f0a LP |
280 | int r; |
281 | ||
b59233e6 | 282 | assert(fd >= 0); |
b59233e6 | 283 | assert(target); |
3c171f0b | 284 | assert(context); |
cfd652ed ZJS |
285 | |
286 | /* Ignore errors on these */ | |
3c171f0b | 287 | (void) fchmod(fd, 0640); |
9764bca9 | 288 | (void) fix_acl(fd, context->uid, allow_user); |
3c171f0b | 289 | (void) fix_xattr(fd, context); |
cfd652ed | 290 | |
74402bf0 | 291 | r = link_tmpfile(fd, filename, target, LINK_TMPFILE_SYNC); |
03532f0a LP |
292 | if (r < 0) |
293 | return log_error_errno(r, "Failed to move coredump %s into place: %m", target); | |
cfd652ed ZJS |
294 | |
295 | return 0; | |
296 | } | |
297 | ||
59f448cf | 298 | static int maybe_remove_external_coredump(const char *filename, uint64_t size) { |
cfd652ed | 299 | |
b59233e6 | 300 | /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */ |
cfd652ed | 301 | |
fc6cec86 | 302 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL && |
cfd652ed ZJS |
303 | size <= arg_external_size_max) |
304 | return 0; | |
305 | ||
306 | if (!filename) | |
307 | return 1; | |
308 | ||
4a62c710 MS |
309 | if (unlink(filename) < 0 && errno != ENOENT) |
310 | return log_error_errno(errno, "Failed to unlink %s: %m", filename); | |
cfd652ed ZJS |
311 | |
312 | return 1; | |
313 | } | |
314 | ||
f46c706b | 315 | static int make_filename(const Context *context, char **ret) { |
b59233e6 | 316 | _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL; |
a7f7d1bd | 317 | sd_id128_t boot = {}; |
34c10968 LP |
318 | int r; |
319 | ||
3c171f0b | 320 | assert(context); |
34c10968 | 321 | |
f46c706b | 322 | c = filename_escape(context->meta[META_COMM]); |
34c10968 | 323 | if (!c) |
b59233e6 | 324 | return -ENOMEM; |
34c10968 | 325 | |
f46c706b | 326 | u = filename_escape(context->meta[META_ARGV_UID]); |
0dc5d23c | 327 | if (!u) |
b59233e6 | 328 | return -ENOMEM; |
34c10968 LP |
329 | |
330 | r = sd_id128_get_boot(&boot); | |
b59233e6 | 331 | if (r < 0) |
34c10968 | 332 | return r; |
34c10968 | 333 | |
f46c706b | 334 | p = filename_escape(context->meta[META_ARGV_PID]); |
b59233e6 LP |
335 | if (!p) |
336 | return -ENOMEM; | |
337 | ||
f46c706b | 338 | t = filename_escape(context->meta[META_ARGV_TIMESTAMP]); |
b59233e6 LP |
339 | if (!t) |
340 | return -ENOMEM; | |
341 | ||
342 | if (asprintf(ret, | |
64a5384f | 343 | "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s", |
34c10968 | 344 | c, |
0dc5d23c | 345 | u, |
34c10968 LP |
346 | SD_ID128_FORMAT_VAL(boot), |
347 | p, | |
b59233e6 LP |
348 | t) < 0) |
349 | return -ENOMEM; | |
350 | ||
351 | return 0; | |
352 | } | |
353 | ||
3e4d0f6c ZJS |
354 | static int grant_user_access(int core_fd, const Context *context) { |
355 | int at_secure = -1; | |
356 | uid_t uid = UID_INVALID, euid = UID_INVALID; | |
357 | uid_t gid = GID_INVALID, egid = GID_INVALID; | |
358 | int r; | |
359 | ||
360 | assert(core_fd >= 0); | |
361 | assert(context); | |
362 | ||
363 | if (!context->meta[META_PROC_AUXV]) | |
364 | return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), "No auxv data, not adjusting permissions."); | |
365 | ||
366 | uint8_t elf[EI_NIDENT]; | |
367 | errno = 0; | |
368 | if (pread(core_fd, &elf, sizeof(elf), 0) != sizeof(elf)) | |
369 | return log_warning_errno(errno_or_else(EIO), | |
370 | "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno)); | |
371 | ||
372 | if (elf[EI_MAG0] != ELFMAG0 || | |
373 | elf[EI_MAG1] != ELFMAG1 || | |
374 | elf[EI_MAG2] != ELFMAG2 || | |
375 | elf[EI_MAG3] != ELFMAG3 || | |
376 | elf[EI_VERSION] != EV_CURRENT) | |
377 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
378 | "Core file does not have ELF header, not adjusting permissions."); | |
379 | if (!IN_SET(elf[EI_CLASS], ELFCLASS32, ELFCLASS64) || | |
380 | !IN_SET(elf[EI_DATA], ELFDATA2LSB, ELFDATA2MSB)) | |
381 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
382 | "Core file has strange ELF class, not adjusting permissions."); | |
383 | ||
384 | if ((elf[EI_DATA] == ELFDATA2LSB) != (__BYTE_ORDER == __LITTLE_ENDIAN)) | |
385 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
386 | "Core file has non-native endianness, not adjusting permissions."); | |
387 | ||
cb38fdbe ZJS |
388 | r = parse_auxv(LOG_WARNING, |
389 | /* elf_class= */ elf[EI_CLASS], | |
390 | context->meta[META_PROC_AUXV], | |
391 | context->meta_size[META_PROC_AUXV], | |
392 | &at_secure, &uid, &euid, &gid, &egid); | |
3e4d0f6c ZJS |
393 | if (r < 0) |
394 | return r; | |
395 | ||
396 | /* We allow access if we got all the data and at_secure is not set and | |
397 | * the uid/gid matches euid/egid. */ | |
398 | bool ret = | |
399 | at_secure == 0 && | |
400 | uid != UID_INVALID && euid != UID_INVALID && uid == euid && | |
401 | gid != GID_INVALID && egid != GID_INVALID && gid == egid; | |
402 | log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", | |
403 | ret ? "permit" : "restrict", | |
404 | uid, euid, gid, egid, yes_no(at_secure)); | |
405 | return ret; | |
406 | } | |
407 | ||
b59233e6 | 408 | static int save_external_coredump( |
f46c706b | 409 | const Context *context, |
3c171f0b | 410 | int input_fd, |
b59233e6 | 411 | char **ret_filename, |
5f3e0a74 HW |
412 | int *ret_node_fd, |
413 | int *ret_data_fd, | |
0cd4e913 | 414 | uint64_t *ret_size, |
587f2a5e | 415 | uint64_t *ret_compressed_size, |
cc4419ed | 416 | bool *ret_truncated) { |
b59233e6 | 417 | |
587f2a5e LB |
418 | _cleanup_(unlink_and_freep) char *tmp = NULL; |
419 | _cleanup_free_ char *fn = NULL; | |
254d1313 | 420 | _cleanup_close_ int fd = -EBADF; |
ee0449fd | 421 | uint64_t rlimit, process_limit, max_size; |
587f2a5e | 422 | bool truncated, storage_on_tmpfs; |
b59233e6 LP |
423 | struct stat st; |
424 | int r; | |
425 | ||
3c171f0b | 426 | assert(context); |
b59233e6 | 427 | assert(ret_filename); |
5f3e0a74 HW |
428 | assert(ret_node_fd); |
429 | assert(ret_data_fd); | |
b59233e6 | 430 | assert(ret_size); |
587f2a5e LB |
431 | assert(ret_compressed_size); |
432 | assert(ret_truncated); | |
b59233e6 | 433 | |
f46c706b | 434 | r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit); |
bdfd7b2c | 435 | if (r < 0) |
f46c706b FB |
436 | return log_error_errno(r, "Failed to parse resource limit '%s': %m", |
437 | context->meta[META_ARGV_RLIMIT]); | |
d7a0f1f4 | 438 | if (rlimit < page_size()) |
f46c706b | 439 | /* Is coredumping disabled? Then don't bother saving/processing the |
3a559f22 | 440 | * coredump. Anything below PAGE_SIZE cannot give a readable coredump |
f46c706b FB |
441 | * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but |
442 | * is usually the same as PAGE_SIZE. */ | |
baaa35ad ZJS |
443 | return log_info_errno(SYNTHETIC_ERRNO(EBADSLT), |
444 | "Resource limits disable core dumping for process %s (%s).", | |
f46c706b | 445 | context->meta[META_ARGV_PID], context->meta[META_COMM]); |
bdfd7b2c | 446 | |
ee0449fd | 447 | process_limit = MAX(arg_process_size_max, storage_size_max()); |
baaa35ad ZJS |
448 | if (process_limit == 0) |
449 | return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT), | |
450 | "Limits for coredump processing and storage are both 0, not dumping core."); | |
ee0449fd | 451 | |
bdfd7b2c | 452 | /* Never store more than the process configured, or than we actually shall keep or process */ |
ee0449fd | 453 | max_size = MIN(rlimit, process_limit); |
bdfd7b2c | 454 | |
3c171f0b | 455 | r = make_filename(context, &fn); |
23bbb0de MS |
456 | if (r < 0) |
457 | return log_error_errno(r, "Failed to determine coredump file name: %m"); | |
34c10968 | 458 | |
1fbe8d0c | 459 | (void) mkdir_parents_label(fn, 0755); |
803a3464 | 460 | |
03532f0a | 461 | fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp); |
4a62c710 | 462 | if (fd < 0) |
03532f0a | 463 | return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); |
803a3464 | 464 | |
587f2a5e LB |
465 | /* If storage is on tmpfs, the kernel oomd might kill us if there's MemoryMax set on |
466 | * the service or the slice it belongs to. This is common on low-resources systems, | |
467 | * to avoid crashing processes to take away too many system resources. | |
468 | * Check the cgroup settings, and set max_size to a bit less than half of the | |
469 | * available memory left to the process. | |
470 | * Then, attempt to write the core file uncompressed first - if the write gets | |
471 | * interrupted, we know we won't be able to write it all, so instead compress what | |
472 | * was written so far, delete the uncompressed truncated core, and then continue | |
473 | * compressing from STDIN. Given the compressed core cannot be larger than the | |
474 | * uncompressed one, and 1KB for metadata is accounted for in the calculation, we | |
475 | * should be able to at least store the full compressed core file. */ | |
476 | ||
477 | storage_on_tmpfs = fd_is_temporary_fs(fd) > 0; | |
478 | if (storage_on_tmpfs && arg_compress) { | |
479 | _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; | |
480 | uint64_t cgroup_limit = UINT64_MAX; | |
481 | struct statvfs sv; | |
482 | ||
483 | /* If we can't get the cgroup limit, just ignore it, but don't fail, | |
484 | * try anyway with the config settings. */ | |
485 | r = sd_bus_default_system(&bus); | |
486 | if (r < 0) | |
487 | log_info_errno(r, "Failed to connect to system bus, skipping MemoryAvailable check: %m"); | |
488 | else { | |
489 | _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; | |
490 | ||
491 | r = sd_bus_get_property_trivial( | |
492 | bus, | |
493 | "org.freedesktop.systemd1", | |
494 | "/org/freedesktop/systemd1/unit/self", | |
495 | "org.freedesktop.systemd1.Service", | |
496 | "MemoryAvailable", | |
497 | &error, | |
498 | 't', &cgroup_limit); | |
499 | if (r < 0) | |
500 | log_warning_errno(r, | |
501 | "Failed to query MemoryAvailable for current unit, " | |
502 | "falling back to static config settings: %s", | |
503 | bus_error_message(&error, r)); | |
504 | } | |
803a3464 | 505 | |
587f2a5e LB |
506 | max_size = MIN(cgroup_limit, max_size); |
507 | max_size = LESS_BY(max_size, 1024U) / 2; /* Account for 1KB metadata overhead for compressing */ | |
508 | max_size = MAX(PROCESS_SIZE_MIN, max_size); /* Impose a lower minimum */ | |
509 | ||
510 | /* tmpfs might get full quickly, so check the available space too. | |
511 | * But don't worry about errors here, failing to access the storage | |
512 | * location will be better logged when writing to it. */ | |
8facac5f | 513 | if (fstatvfs(fd, &sv) >= 0) |
587f2a5e | 514 | max_size = MIN((uint64_t)sv.f_frsize * (uint64_t)sv.f_bfree, max_size); |
34c10968 | 515 | |
587f2a5e | 516 | log_debug("Limiting core file size to %" PRIu64 " bytes due to cgroup memory limits.", max_size); |
7849c2ac TA |
517 | } |
518 | ||
587f2a5e LB |
519 | r = copy_bytes(input_fd, fd, max_size, 0); |
520 | if (r < 0) | |
521 | return log_error_errno(r, "Cannot store coredump of %s (%s): %m", | |
522 | context->meta[META_ARGV_PID], context->meta[META_COMM]); | |
523 | truncated = r == 1; | |
cfd652ed | 524 | |
3e4d0f6c ZJS |
525 | bool allow_user = grant_user_access(fd, context) > 0; |
526 | ||
587f2a5e LB |
527 | #if HAVE_COMPRESSION |
528 | if (arg_compress) { | |
529 | _cleanup_(unlink_and_freep) char *tmp_compressed = NULL; | |
530 | _cleanup_free_ char *fn_compressed = NULL; | |
254d1313 | 531 | _cleanup_close_ int fd_compressed = -EBADF; |
587f2a5e LB |
532 | uint64_t uncompressed_size = 0; |
533 | ||
86cbbc6d | 534 | if (lseek(fd, 0, SEEK_SET) < 0) |
587f2a5e | 535 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); |
cfd652ed | 536 | |
ee00684c | 537 | fn_compressed = strjoin(fn, default_compression_extension()); |
587f2a5e LB |
538 | if (!fn_compressed) |
539 | return log_oom(); | |
cfd652ed | 540 | |
03532f0a | 541 | fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed); |
587f2a5e LB |
542 | if (fd_compressed < 0) |
543 | return log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed); | |
cfd652ed | 544 | |
587f2a5e LB |
545 | r = compress_stream(fd, fd_compressed, max_size, &uncompressed_size); |
546 | if (r < 0) | |
547 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
548 | ||
549 | if (truncated && storage_on_tmpfs) { | |
550 | uint64_t partial_uncompressed_size = 0; | |
551 | ||
552 | /* Uncompressed write was truncated and we are writing to tmpfs: delete | |
553 | * the uncompressed core, and compress the remaining part from STDIN. */ | |
554 | ||
555 | tmp = unlink_and_free(tmp); | |
556 | fd = safe_close(fd); | |
557 | ||
558 | r = compress_stream(input_fd, fd_compressed, max_size, &partial_uncompressed_size); | |
559 | if (r < 0) | |
560 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
561 | uncompressed_size += partial_uncompressed_size; | |
b59233e6 LP |
562 | } |
563 | ||
9764bca9 | 564 | r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, allow_user); |
cfd652ed | 565 | if (r < 0) |
587f2a5e | 566 | return r; |
b59233e6 | 567 | |
587f2a5e LB |
568 | if (fstat(fd_compressed, &st) < 0) |
569 | return log_error_errno(errno, | |
570 | "Failed to fstat core file %s: %m", | |
571 | coredump_tmpfile_name(tmp_compressed)); | |
cfd652ed | 572 | |
587f2a5e LB |
573 | *ret_filename = TAKE_PTR(fn_compressed); /* compressed */ |
574 | *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */ | |
575 | *ret_compressed_size = (uint64_t) st.st_size; /* compressed */ | |
576 | *ret_data_fd = TAKE_FD(fd); | |
577 | *ret_size = uncompressed_size; | |
578 | *ret_truncated = truncated; | |
579 | tmp_compressed = mfree(tmp_compressed); | |
cfd652ed | 580 | |
cfd652ed | 581 | return 0; |
34c10968 | 582 | } |
3b1a55e1 | 583 | #endif |
5f3e0a74 | 584 | |
587f2a5e LB |
585 | if (truncated) |
586 | log_struct(LOG_INFO, | |
08e86b15 DDM |
587 | LOG_MESSAGE("Core file was truncated to %"PRIu64" bytes.", max_size), |
588 | "SIZE_LIMIT=%"PRIu64, max_size, | |
587f2a5e LB |
589 | "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR); |
590 | ||
9764bca9 | 591 | r = fix_permissions(fd, tmp, fn, context, allow_user); |
cfd652ed | 592 | if (r < 0) |
587f2a5e LB |
593 | return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn); |
594 | ||
595 | if (fstat(fd, &st) < 0) | |
596 | return log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); | |
597 | ||
86cbbc6d | 598 | if (lseek(fd, 0, SEEK_SET) < 0) |
587f2a5e | 599 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); |
34c10968 | 600 | |
0cfb0971 | 601 | *ret_filename = TAKE_PTR(fn); |
1cc6c93a | 602 | *ret_data_fd = TAKE_FD(fd); |
59f448cf | 603 | *ret_size = (uint64_t) st.st_size; |
587f2a5e | 604 | *ret_truncated = truncated; |
34c10968 | 605 | |
34c10968 | 606 | return 0; |
34c10968 LP |
607 | } |
608 | ||
609 | static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) { | |
610 | _cleanup_free_ char *field = NULL; | |
611 | ssize_t n; | |
612 | ||
8d4e028f | 613 | assert(fd >= 0); |
34c10968 LP |
614 | assert(ret); |
615 | assert(ret_size); | |
616 | ||
86cbbc6d | 617 | if (lseek(fd, 0, SEEK_SET) < 0) |
4a62c710 | 618 | return log_warning_errno(errno, "Failed to seek: %m"); |
803a3464 | 619 | |
34c10968 | 620 | field = malloc(9 + size); |
a73c74db LP |
621 | if (!field) |
622 | return log_warning_errno(SYNTHETIC_ERRNO(ENOMEM), | |
623 | "Failed to allocate memory for coredump, coredump will not be stored."); | |
34c10968 LP |
624 | |
625 | memcpy(field, "COREDUMP=", 9); | |
626 | ||
a73c74db LP |
627 | /* NB: simple read() would fail for overly large coredumps, since read() on Linux can only deal with |
628 | * 0x7ffff000 bytes max. Hence call things in a loop. */ | |
629 | n = loop_read(fd, field + 9, size, /* do_poll= */ false); | |
23bbb0de MS |
630 | if (n < 0) |
631 | return log_error_errno((int) n, "Failed to read core data: %m"); | |
baaa35ad ZJS |
632 | if ((size_t) n < size) |
633 | return log_error_errno(SYNTHETIC_ERRNO(EIO), | |
634 | "Core data too short."); | |
34c10968 | 635 | |
1cc6c93a | 636 | *ret = TAKE_PTR(field); |
34c10968 LP |
637 | *ret_size = size + 9; |
638 | ||
34c10968 LP |
639 | return 0; |
640 | } | |
803a3464 | 641 | |
3f132692 JF |
642 | /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines: |
643 | * 0:/dev/pts/23 | |
644 | * pos: 0 | |
645 | * flags: 0100002 | |
646 | * | |
647 | * 1:/dev/pts/23 | |
648 | * pos: 0 | |
649 | * flags: 0100002 | |
650 | * | |
651 | * 2:/dev/pts/23 | |
652 | * pos: 0 | |
653 | * flags: 0100002 | |
654 | * EOF | |
655 | */ | |
2485b7e2 YW |
656 | static int compose_open_fds(pid_t pid, char **ret) { |
657 | _cleanup_(memstream_done) MemStream m = {}; | |
4d84bc2f | 658 | _cleanup_closedir_ DIR *proc_fd_dir = NULL; |
254d1313 | 659 | _cleanup_close_ int proc_fdinfo_fd = -EBADF; |
59059b4a | 660 | const char *fddelim = "", *path; |
2485b7e2 | 661 | FILE *stream; |
7b26ea6f | 662 | int r; |
3f132692 JF |
663 | |
664 | assert(pid >= 0); | |
2485b7e2 | 665 | assert(ret); |
3f132692 | 666 | |
59059b4a | 667 | path = procfs_file_alloca(pid, "fd"); |
3f132692 | 668 | proc_fd_dir = opendir(path); |
59059b4a ZJS |
669 | if (!proc_fd_dir) |
670 | return -errno; | |
3f132692 | 671 | |
4d84bc2f | 672 | proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH); |
59059b4a ZJS |
673 | if (proc_fdinfo_fd < 0) |
674 | return -errno; | |
3f132692 | 675 | |
2485b7e2 | 676 | stream = memstream_init(&m); |
3f132692 JF |
677 | if (!stream) |
678 | return -ENOMEM; | |
679 | ||
af3b864d | 680 | FOREACH_DIRENT(de, proc_fd_dir, return -errno) { |
3f132692 | 681 | _cleanup_fclose_ FILE *fdinfo = NULL; |
4d84bc2f | 682 | _cleanup_free_ char *fdname = NULL; |
254d1313 | 683 | _cleanup_close_ int fd = -EBADF; |
3f132692 | 684 | |
af3b864d | 685 | r = readlinkat_malloc(dirfd(proc_fd_dir), de->d_name, &fdname); |
3f132692 JF |
686 | if (r < 0) |
687 | return r; | |
688 | ||
af3b864d | 689 | fprintf(stream, "%s%s:%s\n", fddelim, de->d_name, fdname); |
3f132692 JF |
690 | fddelim = "\n"; |
691 | ||
692 | /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */ | |
af3b864d | 693 | fd = openat(proc_fdinfo_fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY); |
59059b4a | 694 | if (fd < 0) |
3f132692 JF |
695 | continue; |
696 | ||
b46c3e49 VC |
697 | fdinfo = take_fdopen(&fd, "r"); |
698 | if (!fdinfo) | |
3f132692 JF |
699 | continue; |
700 | ||
7b26ea6f LP |
701 | for (;;) { |
702 | _cleanup_free_ char *line = NULL; | |
703 | ||
704 | r = read_line(fdinfo, LONG_LINE_MAX, &line); | |
705 | if (r < 0) | |
706 | return r; | |
707 | if (r == 0) | |
708 | break; | |
709 | ||
0d536673 | 710 | fputs(line, stream); |
7b26ea6f | 711 | fputc('\n', stream); |
4d84bc2f | 712 | } |
3f132692 JF |
713 | } |
714 | ||
2485b7e2 | 715 | return memstream_finalize(&m, ret, NULL); |
3f132692 JF |
716 | } |
717 | ||
7ed03ce6 JF |
718 | /* Returns 1 if the parent was found. |
719 | * Returns 0 if there is not a process we can call the pid's | |
720 | * container parent (the pid's process isn't 'containerized'). | |
721 | * Returns a negative number on errors. | |
722 | */ | |
723 | static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { | |
7ed03ce6 JF |
724 | pid_t container_pid; |
725 | const char *proc_root_path; | |
726 | struct stat root_stat, proc_root_stat; | |
83844031 | 727 | int r; |
7ed03ce6 JF |
728 | |
729 | /* To compare inodes of / and /proc/[pid]/root */ | |
730 | if (stat("/", &root_stat) < 0) | |
731 | return -errno; | |
732 | ||
733 | proc_root_path = procfs_file_alloca(pid, "root"); | |
734 | if (stat(proc_root_path, &proc_root_stat) < 0) | |
735 | return -errno; | |
736 | ||
737 | /* The process uses system root. */ | |
c20c77ef | 738 | if (stat_inode_same(&proc_root_stat, &root_stat)) { |
7ed03ce6 JF |
739 | *cmdline = NULL; |
740 | return 0; | |
741 | } | |
742 | ||
ade39d9a | 743 | r = namespace_get_leader(pid, NAMESPACE_MOUNT, &container_pid); |
7ed03ce6 JF |
744 | if (r < 0) |
745 | return r; | |
746 | ||
a034620f | 747 | r = pid_get_cmdline(container_pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, cmdline); |
d3cba4ea EV |
748 | if (r < 0) |
749 | return r; | |
750 | ||
751 | return 1; | |
7ed03ce6 JF |
752 | } |
753 | ||
f46c706b | 754 | static int change_uid_gid(const Context *context) { |
9764bca9 NR |
755 | uid_t uid = context->uid; |
756 | gid_t gid = context->gid; | |
3c171f0b | 757 | int r; |
34c10968 | 758 | |
28add648 | 759 | if (uid_is_system(uid)) { |
888e378d LP |
760 | const char *user = "systemd-coredump"; |
761 | ||
fafff8f1 | 762 | r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); |
888e378d LP |
763 | if (r < 0) { |
764 | log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user); | |
765 | uid = gid = 0; | |
766 | } | |
888e378d | 767 | } |
3c171f0b LP |
768 | |
769 | return drop_privileges(uid, gid, 0); | |
770 | } | |
8c8549db | 771 | |
3c171f0b | 772 | static int submit_coredump( |
3e4d0f6c | 773 | const Context *context, |
9a435388 | 774 | struct iovec_wrapper *iovw, |
3c171f0b | 775 | int input_fd) { |
34c10968 | 776 | |
c546154a | 777 | _cleanup_(json_variant_unrefp) JsonVariant *json_metadata = NULL; |
254d1313 | 778 | _cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF; |
9a435388 | 779 | _cleanup_free_ char *filename = NULL, *coredump_data = NULL; |
51d3783d | 780 | _cleanup_free_ char *stacktrace = NULL; |
c546154a | 781 | const char *module_name; |
587f2a5e | 782 | uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX; |
6fea39ba | 783 | bool truncated = false, written = false; |
c546154a | 784 | JsonVariant *module_json; |
3c171f0b | 785 | int r; |
83844031 | 786 | |
3c171f0b | 787 | assert(context); |
9a435388 | 788 | assert(iovw); |
3c171f0b | 789 | assert(input_fd >= 0); |
f5e04665 | 790 | |
3c171f0b LP |
791 | /* Vacuum before we write anything again */ |
792 | (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); | |
803a3464 | 793 | |
3c171f0b | 794 | /* Always stream the coredump to disk, if that's possible */ |
c8e94763 LP |
795 | written = save_external_coredump( |
796 | context, input_fd, | |
797 | &filename, &coredump_node_fd, &coredump_fd, | |
798 | &coredump_size, &coredump_compressed_size, &truncated) >= 0; | |
799 | if (written) { | |
800 | /* If we could write it to disk we can now process it. */ | |
801 | /* If we don't want to keep the coredump on disk, remove it now, as later on we | |
802 | * will lack the privileges for it. However, we keep the fd to it, so that we can | |
803 | * still process it and log it. */ | |
804 | r = maybe_remove_external_coredump(filename, coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size); | |
805 | if (r < 0) | |
806 | return r; | |
807 | if (r == 0) | |
808 | (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename); | |
809 | else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) | |
810 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", | |
811 | coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size, arg_external_size_max); | |
812 | ||
813 | /* Vacuum again, but exclude the coredump we just created */ | |
814 | (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); | |
815 | } | |
6fea39ba | 816 | |
c8e94763 LP |
817 | /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the |
818 | * coredump memory under the user's uid. This also ensures that the credentials journald will see are | |
819 | * the ones of the coredumping user, thus making sure the user gets access to the core dump. Let's | |
820 | * also get rid of all capabilities, if we run as root, we won't need them anymore. */ | |
3c171f0b LP |
821 | r = change_uid_gid(context); |
822 | if (r < 0) | |
823 | return log_error_errno(r, "Failed to drop privileges: %m"); | |
34c10968 | 824 | |
c8e94763 LP |
825 | if (written) { |
826 | /* Try to get a stack trace if we can */ | |
827 | if (coredump_size > arg_process_size_max) | |
828 | log_debug("Not generating stack trace: core size %"PRIu64" is greater " | |
829 | "than %"PRIu64" (the configured maximum)", | |
830 | coredump_size, arg_process_size_max); | |
831 | else if (coredump_fd >= 0) { | |
832 | bool skip = startswith(context->meta[META_COMM], "systemd-coredum"); /* COMM is 16 bytes usually */ | |
833 | ||
834 | (void) parse_elf_object(coredump_fd, | |
835 | context->meta[META_EXE], | |
836 | /* fork_disable_dump= */ skip, /* avoid loops */ | |
837 | &stacktrace, | |
838 | &json_metadata); | |
839 | } | |
c790632c | 840 | } |
51d3783d | 841 | |
6fea39ba | 842 | _cleanup_free_ char *core_message = NULL; |
6fea39ba LP |
843 | core_message = strjoin( |
844 | "Process ", context->meta[META_ARGV_PID], | |
845 | " (", context->meta[META_COMM], | |
846 | ") of user ", context->meta[META_ARGV_UID], | |
847 | written ? " dumped core." : " terminated abnormally without generating a coredump."); | |
848 | if (!core_message) | |
849 | return log_oom(); | |
850 | ||
851 | if (context->is_journald && filename) | |
852 | if (!strextend(&core_message, "\nCoredump diverted to ", filename)) | |
853 | return log_oom(); | |
51d3783d | 854 | |
6fea39ba LP |
855 | if (stacktrace) |
856 | if (!strextend(&core_message, "\n\n", stacktrace)) | |
857 | return log_oom(); | |
92e92d71 | 858 | |
5edf875b DDM |
859 | if (context->is_journald) |
860 | /* We might not be able to log to the journal, so let's always print the message to another | |
861 | * log target. The target was set previously to something safe. */ | |
9a435388 | 862 | log_dispatch(LOG_ERR, 0, core_message); |
92e92d71 | 863 | |
2a3bebd0 | 864 | (void) iovw_put_string_field(iovw, "MESSAGE=", core_message); |
3c171f0b | 865 | |
0cd4e913 | 866 | if (truncated) |
2a3bebd0 | 867 | (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1"); |
0cd4e913 | 868 | |
c546154a LB |
869 | /* If we managed to parse any ELF metadata (build-id, ELF package meta), |
870 | * attach it as journal metadata. */ | |
871 | if (json_metadata) { | |
872 | _cleanup_free_ char *formatted_json = NULL; | |
873 | ||
874 | r = json_variant_format(json_metadata, 0, &formatted_json); | |
875 | if (r < 0) | |
876 | return log_error_errno(r, "Failed to format JSON package metadata: %m"); | |
877 | ||
671769c9 | 878 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_JSON=", formatted_json); |
c546154a LB |
879 | } |
880 | ||
c790632c ZJS |
881 | /* In the unlikely scenario that context->meta[META_EXE] is not available, |
882 | * let's avoid guessing the module name and skip the loop. */ | |
883 | if (context->meta[META_EXE]) | |
884 | JSON_VARIANT_OBJECT_FOREACH(module_name, module_json, json_metadata) { | |
885 | JsonVariant *t; | |
c546154a | 886 | |
c790632c ZJS |
887 | /* We only add structured fields for the 'main' ELF module, and only if we can identify it. */ |
888 | if (!path_equal_filename(module_name, context->meta[META_EXE])) | |
889 | continue; | |
c546154a | 890 | |
c790632c ZJS |
891 | t = json_variant_by_key(module_json, "name"); |
892 | if (t) | |
893 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_NAME=", json_variant_string(t)); | |
1f2abb79 | 894 | |
c790632c ZJS |
895 | t = json_variant_by_key(module_json, "version"); |
896 | if (t) | |
897 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_VERSION=", json_variant_string(t)); | |
898 | } | |
c546154a | 899 | |
3c171f0b | 900 | /* Optionally store the entire coredump in the journal */ |
587f2a5e | 901 | if (arg_storage == COREDUMP_STORAGE_JOURNAL && coredump_fd >= 0) { |
6e9ef603 ZJS |
902 | if (coredump_size <= arg_journal_size_max) { |
903 | size_t sz = 0; | |
904 | ||
905 | /* Store the coredump itself in the journal */ | |
906 | ||
907 | r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); | |
9a435388 FB |
908 | if (r >= 0) { |
909 | if (iovw_put(iovw, coredump_data, sz) >= 0) | |
910 | TAKE_PTR(coredump_data); | |
911 | } else | |
6e9ef603 ZJS |
912 | log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); |
913 | } else | |
5206a724 | 914 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
6e9ef603 | 915 | coredump_size, arg_journal_size_max); |
f5e04665 LP |
916 | } |
917 | ||
5edf875b DDM |
918 | /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the |
919 | * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write | |
920 | * the coredump to the socket. */ | |
921 | ||
922 | if (context->is_journald) { | |
923 | r = journal_fd_nonblock(true); | |
924 | if (r < 0) | |
925 | return log_error_errno(r, "Failed to make journal socket non-blocking: %m"); | |
926 | } | |
927 | ||
9a435388 | 928 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
5edf875b DDM |
929 | |
930 | if (context->is_journald) { | |
931 | int k; | |
932 | ||
933 | k = journal_fd_nonblock(false); | |
934 | if (k < 0) | |
935 | return log_error_errno(k, "Failed to make journal socket blocking: %m"); | |
936 | } | |
937 | ||
938 | if (r == -EAGAIN && context->is_journald) | |
939 | log_warning_errno(r, "Failed to log journal coredump, ignoring: %m"); | |
940 | else if (r < 0) | |
3c171f0b LP |
941 | return log_error_errno(r, "Failed to log coredump: %m"); |
942 | ||
943 | return 0; | |
944 | } | |
945 | ||
f46c706b | 946 | static int save_context(Context *context, const struct iovec_wrapper *iovw) { |
f46c706b FB |
947 | const char *unit; |
948 | int r; | |
3c171f0b | 949 | |
3c171f0b | 950 | assert(context); |
f46c706b FB |
951 | assert(iovw); |
952 | assert(iovw->count >= _META_ARGV_MAX); | |
3c171f0b | 953 | |
f46c706b | 954 | /* The context does not allocate any memory on its own */ |
3c171f0b | 955 | |
fe96c0f8 | 956 | for (size_t n = 0; n < iovw->count; n++) { |
f46c706b | 957 | struct iovec *iovec = iovw->iovec + n; |
92e92d71 | 958 | |
fe96c0f8 | 959 | for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) { |
f46c706b FB |
960 | /* Note that these strings are NUL terminated, because we made sure that a |
961 | * trailing NUL byte is in the buffer, though not included in the iov_len | |
962 | * count (see process_socket() and gather_pid_metadata_*()) */ | |
963 | assert(((char*) iovec->iov_base)[iovec->iov_len] == 0); | |
3c171f0b | 964 | |
3e4d0f6c | 965 | const char *p = startswith(iovec->iov_base, meta_field_names[i]); |
f46c706b FB |
966 | if (p) { |
967 | context->meta[i] = p; | |
3e4d0f6c | 968 | context->meta_size[i] = iovec->iov_len - strlen(meta_field_names[i]); |
f46c706b FB |
969 | break; |
970 | } | |
971 | } | |
3c171f0b | 972 | } |
f46c706b FB |
973 | |
974 | if (!context->meta[META_ARGV_PID]) | |
975 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
976 | "Failed to find the PID of crashing process"); | |
977 | ||
978 | r = parse_pid(context->meta[META_ARGV_PID], &context->pid); | |
979 | if (r < 0) | |
980 | return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]); | |
981 | ||
9764bca9 NR |
982 | r = parse_uid(context->meta[META_ARGV_UID], &context->uid); |
983 | if (r < 0) | |
984 | return log_error_errno(r, "Failed to parse UID \"%s\": %m", context->meta[META_ARGV_UID]); | |
985 | ||
986 | r = parse_gid(context->meta[META_ARGV_GID], &context->gid); | |
987 | if (r < 0) | |
988 | return log_error_errno(r, "Failed to parse GID \"%s\": %m", context->meta[META_ARGV_GID]); | |
989 | ||
f46c706b FB |
990 | unit = context->meta[META_UNIT]; |
991 | context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); | |
992 | context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); | |
993 | ||
994 | return 0; | |
3c171f0b LP |
995 | } |
996 | ||
997 | static int process_socket(int fd) { | |
254d1313 | 998 | _cleanup_close_ int input_fd = -EBADF; |
f46c706b | 999 | Context context = {}; |
9a435388 FB |
1000 | struct iovec_wrapper iovw = {}; |
1001 | struct iovec iovec; | |
fe96c0f8 | 1002 | int r; |
3c171f0b LP |
1003 | |
1004 | assert(fd >= 0); | |
1005 | ||
d2acb93d | 1006 | log_setup(); |
3c171f0b | 1007 | |
988e89ee ZJS |
1008 | log_debug("Processing coredump received on stdin..."); |
1009 | ||
3c171f0b | 1010 | for (;;) { |
fb29cdbe | 1011 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control; |
3c171f0b LP |
1012 | struct msghdr mh = { |
1013 | .msg_control = &control, | |
1014 | .msg_controllen = sizeof(control), | |
1015 | .msg_iovlen = 1, | |
1016 | }; | |
1017 | ssize_t n; | |
fe1ef0f8 | 1018 | ssize_t l; |
3c171f0b | 1019 | |
fe1ef0f8 EV |
1020 | l = next_datagram_size_fd(fd); |
1021 | if (l < 0) { | |
1022 | r = log_error_errno(l, "Failed to determine datagram size to read: %m"); | |
3c171f0b LP |
1023 | goto finish; |
1024 | } | |
1025 | ||
9a435388 FB |
1026 | iovec.iov_len = l; |
1027 | iovec.iov_base = malloc(l + 1); | |
1028 | if (!iovec.iov_base) { | |
3c171f0b LP |
1029 | r = log_oom(); |
1030 | goto finish; | |
1031 | } | |
1032 | ||
9a435388 | 1033 | mh.msg_iov = &iovec; |
3c171f0b | 1034 | |
3691bcf3 | 1035 | n = recvmsg_safe(fd, &mh, MSG_CMSG_CLOEXEC); |
3c171f0b | 1036 | if (n < 0) { |
9a435388 | 1037 | free(iovec.iov_base); |
3691bcf3 | 1038 | r = log_error_errno(n, "Failed to receive datagram: %m"); |
3c171f0b LP |
1039 | goto finish; |
1040 | } | |
1041 | ||
9a435388 FB |
1042 | /* The final zero-length datagram carries the file descriptor and tells us |
1043 | * that we're done. */ | |
3c171f0b | 1044 | if (n == 0) { |
dac556fa | 1045 | struct cmsghdr *found; |
3c171f0b | 1046 | |
9a435388 | 1047 | free(iovec.iov_base); |
3c171f0b | 1048 | |
dac556fa | 1049 | found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); |
3c171f0b | 1050 | if (!found) { |
3691bcf3 LP |
1051 | cmsg_close_all(&mh); |
1052 | r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG), | |
1053 | "Coredump file descriptor missing."); | |
3c171f0b LP |
1054 | goto finish; |
1055 | } | |
1056 | ||
f8540bde | 1057 | assert(input_fd < 0); |
b1d02191 | 1058 | input_fd = *CMSG_TYPED_DATA(found, int); |
3c171f0b | 1059 | break; |
3691bcf3 LP |
1060 | } else |
1061 | cmsg_close_all(&mh); | |
3c171f0b LP |
1062 | |
1063 | /* Add trailing NUL byte, in case these are strings */ | |
9a435388 FB |
1064 | ((char*) iovec.iov_base)[n] = 0; |
1065 | iovec.iov_len = (size_t) n; | |
3c171f0b | 1066 | |
9a435388 FB |
1067 | r = iovw_put(&iovw, iovec.iov_base, iovec.iov_len); |
1068 | if (r < 0) | |
1069 | goto finish; | |
34c10968 LP |
1070 | } |
1071 | ||
61233823 | 1072 | /* Make sure we got all data we really need */ |
f8540bde | 1073 | assert(input_fd >= 0); |
3c171f0b | 1074 | |
f46c706b FB |
1075 | r = save_context(&context, &iovw); |
1076 | if (r < 0) | |
1077 | goto finish; | |
1078 | ||
1079 | /* Make sure we received at least all fields we need. */ | |
fe96c0f8 | 1080 | for (int i = 0; i < _META_MANDATORY_MAX; i++) |
f46c706b FB |
1081 | if (!context.meta[i]) { |
1082 | r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1083 | "A mandatory argument (%i) has not been sent, aborting.", | |
1084 | i); | |
1085 | goto finish; | |
1086 | } | |
80002f66 | 1087 | |
f46c706b | 1088 | r = submit_coredump(&context, &iovw, input_fd); |
3c171f0b LP |
1089 | |
1090 | finish: | |
9a435388 | 1091 | iovw_free_contents(&iovw, true); |
3c171f0b LP |
1092 | return r; |
1093 | } | |
1094 | ||
9a435388 | 1095 | static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) { |
254d1313 | 1096 | _cleanup_close_ int fd = -EBADF; |
3c171f0b LP |
1097 | int r; |
1098 | ||
9a435388 | 1099 | assert(iovw); |
3c171f0b LP |
1100 | assert(input_fd >= 0); |
1101 | ||
1102 | fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); | |
1103 | if (fd < 0) | |
1104 | return log_error_errno(errno, "Failed to create coredump socket: %m"); | |
1105 | ||
1861986a LP |
1106 | r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/coredump"); |
1107 | if (r < 0) | |
1108 | return log_error_errno(r, "Failed to connect to coredump service: %m"); | |
3c171f0b | 1109 | |
fe96c0f8 | 1110 | for (size_t i = 0; i < iovw->count; i++) { |
fec603eb | 1111 | struct msghdr mh = { |
9a435388 | 1112 | .msg_iov = iovw->iovec + i, |
fec603eb LP |
1113 | .msg_iovlen = 1, |
1114 | }; | |
1115 | struct iovec copy[2]; | |
1116 | ||
1117 | for (;;) { | |
1118 | if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) | |
1119 | break; | |
1120 | ||
1121 | if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { | |
f46c706b FB |
1122 | /* This field didn't fit? That's a pity. Given that this is |
1123 | * just metadata, let's truncate the field at half, and try | |
1124 | * again. We append three dots, in order to show that this is | |
1125 | * truncated. */ | |
fec603eb LP |
1126 | |
1127 | if (mh.msg_iov != copy) { | |
f46c706b FB |
1128 | /* We don't want to modify the caller's iovec, hence |
1129 | * let's create our own array, consisting of two new | |
1130 | * iovecs, where the first is a (truncated) copy of | |
1131 | * what we want to send, and the second one contains | |
1132 | * the trailing dots. */ | |
9a435388 | 1133 | copy[0] = iovw->iovec[i]; |
ed0cb346 | 1134 | copy[1] = IOVEC_MAKE(((char[]){'.', '.', '.'}), 3); |
fec603eb LP |
1135 | |
1136 | mh.msg_iov = copy; | |
1137 | mh.msg_iovlen = 2; | |
1138 | } | |
1139 | ||
1140 | copy[0].iov_len /= 2; /* halve it, and try again */ | |
1141 | continue; | |
1142 | } | |
3c171f0b | 1143 | |
3c171f0b | 1144 | return log_error_errno(errno, "Failed to send coredump datagram: %m"); |
fec603eb | 1145 | } |
1eef15b1 ZJS |
1146 | } |
1147 | ||
3c171f0b LP |
1148 | r = send_one_fd(fd, input_fd, 0); |
1149 | if (r < 0) | |
1150 | return log_error_errno(r, "Failed to send coredump fd: %m"); | |
1eef15b1 | 1151 | |
3c171f0b LP |
1152 | return 0; |
1153 | } | |
1eef15b1 | 1154 | |
64a5384f LP |
1155 | static int gather_pid_metadata_from_argv( |
1156 | struct iovec_wrapper *iovw, | |
1157 | Context *context, | |
1158 | int argc, char **argv) { | |
1159 | ||
f46c706b | 1160 | _cleanup_free_ char *free_timestamp = NULL; |
fe96c0f8 | 1161 | int r, signo; |
3c171f0b | 1162 | char *t; |
3c171f0b | 1163 | |
e6aa443f LP |
1164 | assert(iovw); |
1165 | assert(context); | |
1166 | ||
f46c706b FB |
1167 | /* We gather all metadata that were passed via argv[] into an array of iovecs that |
1168 | * we'll forward to the socket unit */ | |
3c171f0b | 1169 | |
f46c706b FB |
1170 | if (argc < _META_ARGV_MAX) |
1171 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1172 | "Not enough arguments passed by the kernel (%i, expected %i).", | |
1173 | argc, _META_ARGV_MAX); | |
3c171f0b | 1174 | |
fe96c0f8 | 1175 | for (int i = 0; i < _META_ARGV_MAX; i++) { |
3c171f0b | 1176 | |
f46c706b | 1177 | t = argv[i]; |
3c171f0b | 1178 | |
f46c706b | 1179 | switch (i) { |
64a5384f | 1180 | |
f46c706b FB |
1181 | case META_ARGV_TIMESTAMP: |
1182 | /* The journal fields contain the timestamp padded with six | |
1183 | * zeroes, so that the kernel-supplied 1s granularity timestamps | |
e503019b | 1184 | * becomes 1μs granularity, i.e. the granularity systemd usually |
f46c706b FB |
1185 | * operates in. */ |
1186 | t = free_timestamp = strjoin(argv[i], "000000"); | |
1187 | if (!t) | |
1188 | return log_oom(); | |
1189 | break; | |
64a5384f | 1190 | |
f46c706b FB |
1191 | case META_ARGV_SIGNAL: |
1192 | /* For signal, record its pretty name too */ | |
1193 | if (safe_atoi(argv[i], &signo) >= 0 && SIGNAL_VALID(signo)) | |
2a3bebd0 FB |
1194 | (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG", |
1195 | signal_to_string(signo)); | |
f46c706b | 1196 | break; |
64a5384f | 1197 | |
f46c706b FB |
1198 | default: |
1199 | break; | |
c8091d92 LP |
1200 | } |
1201 | ||
f46c706b FB |
1202 | r = iovw_put_string_field(iovw, meta_field_names[i], t); |
1203 | if (r < 0) | |
1204 | return r; | |
8c8549db | 1205 | } |
803a3464 | 1206 | |
f46c706b FB |
1207 | /* Cache some of the process metadata we collected so far and that we'll need to |
1208 | * access soon */ | |
1209 | return save_context(context, iovw); | |
1210 | } | |
3c171f0b | 1211 | |
db9ac801 | 1212 | static int gather_pid_metadata_from_procfs(struct iovec_wrapper *iovw, Context *context) { |
f46c706b FB |
1213 | uid_t owner_uid; |
1214 | pid_t pid; | |
1215 | char *t; | |
3e4d0f6c | 1216 | size_t size; |
f46c706b FB |
1217 | const char *p; |
1218 | int r; | |
f5e04665 | 1219 | |
e6aa443f LP |
1220 | assert(iovw); |
1221 | assert(context); | |
1222 | ||
f46c706b FB |
1223 | /* Note that if we fail on oom later on, we do not roll-back changes to the iovec |
1224 | * structure. (It remains valid, with the first iovec fields initialized.) */ | |
f5e04665 | 1225 | |
f46c706b | 1226 | pid = context->pid; |
f5e04665 | 1227 | |
f46c706b | 1228 | /* The following is mandatory */ |
d7d74854 | 1229 | r = pid_get_comm(pid, &t); |
9a435388 | 1230 | if (r < 0) |
f46c706b | 1231 | return log_error_errno(r, "Failed to get COMM: %m"); |
f5e04665 | 1232 | |
f46c706b | 1233 | r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t); |
9a435388 FB |
1234 | if (r < 0) |
1235 | return r; | |
f45b8015 | 1236 | |
c790632c | 1237 | /* The following are optional, but we use them if present. */ |
2a3bebd0 FB |
1238 | r = get_process_exe(pid, &t); |
1239 | if (r >= 0) | |
1240 | r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t); | |
1241 | if (r < 0) | |
f46c706b | 1242 | log_warning_errno(r, "Failed to get EXE, ignoring: %m"); |
bdfd7b2c | 1243 | |
f46c706b | 1244 | if (cg_pid_get_unit(pid, &t) >= 0) |
2a3bebd0 | 1245 | (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t); |
f5e04665 | 1246 | |
f46c706b | 1247 | if (cg_pid_get_user_unit(pid, &t) >= 0) |
2a3bebd0 | 1248 | (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t); |
f46c706b | 1249 | |
9aa82023 | 1250 | if (sd_pid_get_session(pid, &t) >= 0) |
9a435388 | 1251 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t); |
f5e04665 | 1252 | |
a035f819 | 1253 | if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) { |
9a435388 | 1254 | r = asprintf(&t, UID_FMT, owner_uid); |
7de80bfe | 1255 | if (r > 0) |
9a435388 | 1256 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t); |
f5e04665 LP |
1257 | } |
1258 | ||
9aa82023 | 1259 | if (sd_pid_get_slice(pid, &t) >= 0) |
2a3bebd0 | 1260 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t); |
f5e04665 | 1261 | |
a034620f | 1262 | if (pid_get_cmdline(pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, &t) >= 0) |
2a3bebd0 | 1263 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t); |
a035f819 | 1264 | |
9aa82023 | 1265 | if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) |
2a3bebd0 | 1266 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t); |
a035f819 | 1267 | |
9aa82023 | 1268 | if (compose_open_fds(pid, &t) >= 0) |
2a3bebd0 | 1269 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t); |
3f132692 JF |
1270 | |
1271 | p = procfs_file_alloca(pid, "status"); | |
627055ce | 1272 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1273 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t); |
3f132692 JF |
1274 | |
1275 | p = procfs_file_alloca(pid, "maps"); | |
627055ce | 1276 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1277 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t); |
3f132692 JF |
1278 | |
1279 | p = procfs_file_alloca(pid, "limits"); | |
627055ce | 1280 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1281 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t); |
3f132692 JF |
1282 | |
1283 | p = procfs_file_alloca(pid, "cgroup"); | |
3e4d0f6c | 1284 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1285 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t); |
3f132692 | 1286 | |
d7032b1f | 1287 | p = procfs_file_alloca(pid, "mountinfo"); |
3e4d0f6c | 1288 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1289 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t); |
d7032b1f | 1290 | |
3e4d0f6c ZJS |
1291 | /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */ |
1292 | p = procfs_file_alloca(pid, "auxv"); | |
1293 | if (read_full_virtual_file(p, &t, &size) >= 0) { | |
1294 | char *buf = malloc(strlen("COREDUMP_PROC_AUXV=") + size + 1); | |
1295 | if (buf) { | |
1296 | /* Add a dummy terminator to make save_context() happy. */ | |
1297 | *((uint8_t*) mempcpy(stpcpy(buf, "COREDUMP_PROC_AUXV="), t, size)) = '\0'; | |
1298 | (void) iovw_consume(iovw, buf, size + strlen("COREDUMP_PROC_AUXV=")); | |
1299 | } | |
1300 | ||
1301 | free(t); | |
1302 | } | |
1303 | ||
9aa82023 | 1304 | if (get_process_cwd(pid, &t) >= 0) |
2a3bebd0 | 1305 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t); |
3f132692 JF |
1306 | |
1307 | if (get_process_root(pid, &t) >= 0) { | |
9aa82023 ZJS |
1308 | bool proc_self_root_is_slash; |
1309 | ||
1310 | proc_self_root_is_slash = strcmp(t, "/") == 0; | |
3f132692 | 1311 | |
2a3bebd0 | 1312 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t); |
7ed03ce6 JF |
1313 | |
1314 | /* If the process' root is "/", then there is a chance it has | |
1315 | * mounted own root and hence being containerized. */ | |
9aa82023 | 1316 | if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0) |
2a3bebd0 | 1317 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t); |
3f132692 JF |
1318 | } |
1319 | ||
9aa82023 | 1320 | if (get_process_environ(pid, &t) >= 0) |
2a3bebd0 | 1321 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t); |
9aa82023 | 1322 | |
f46c706b FB |
1323 | /* we successfully acquired all metadata */ |
1324 | return save_context(context, iovw); | |
9aa82023 | 1325 | } |
3f132692 | 1326 | |
a108c43e NR |
1327 | static int send_ucred(int transport_fd, struct ucred *ucred) { |
1328 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control = {}; | |
1329 | struct msghdr mh = { | |
1330 | .msg_control = &control, | |
1331 | .msg_controllen = sizeof(control), | |
1332 | }; | |
1333 | struct cmsghdr *cmsg; | |
1334 | ||
1335 | assert(transport_fd >= 0); | |
1336 | ||
1337 | cmsg = CMSG_FIRSTHDR(&mh); | |
1338 | *cmsg = (struct cmsghdr) { | |
1339 | .cmsg_level = SOL_SOCKET, | |
1340 | .cmsg_type = SCM_CREDENTIALS, | |
1341 | .cmsg_len = CMSG_LEN(sizeof(struct ucred)), | |
1342 | }; | |
1343 | memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred)); | |
1344 | ||
1345 | return RET_NERRNO(sendmsg(transport_fd, &mh, MSG_NOSIGNAL)); | |
1346 | } | |
1347 | ||
1348 | static int receive_ucred(int transport_fd, struct ucred *ret_ucred) { | |
1349 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control = {}; | |
1350 | struct msghdr mh = { | |
1351 | .msg_control = &control, | |
1352 | .msg_controllen = sizeof(control), | |
1353 | }; | |
1354 | struct cmsghdr *cmsg = NULL; | |
1355 | struct ucred *ucred = NULL; | |
1356 | ssize_t n; | |
1357 | ||
1358 | assert(ret_ucred); | |
1359 | ||
1360 | n = recvmsg_safe(transport_fd, &mh, 0); | |
1361 | if (n < 0) | |
1362 | return n; | |
1363 | ||
1364 | CMSG_FOREACH(cmsg, &mh) | |
1365 | if (cmsg->cmsg_level == SOL_SOCKET && | |
1366 | cmsg->cmsg_type == SCM_CREDENTIALS && | |
1367 | cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) { | |
1368 | ||
1369 | assert(!ucred); | |
1370 | ucred = CMSG_TYPED_DATA(cmsg, struct ucred); | |
1371 | } | |
1372 | ||
1373 | if (!ucred) | |
1374 | return -EIO; | |
1375 | ||
1376 | *ret_ucred = *ucred; | |
1377 | ||
1378 | return 0; | |
1379 | } | |
1380 | ||
1381 | static int can_forward_coredump(pid_t pid) { | |
1382 | _cleanup_free_ char *cgroup = NULL, *path = NULL, *unit = NULL; | |
1383 | int r; | |
1384 | ||
1385 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); | |
1386 | if (r < 0) | |
1387 | return r; | |
1388 | ||
1389 | r = path_extract_directory(cgroup, &path); | |
1390 | if (r < 0) | |
1391 | return r; | |
1392 | ||
1393 | r = cg_path_get_unit_path(path, &unit); | |
1394 | if (r == -ENOMEM) | |
1395 | return log_oom(); | |
1396 | if (r == -ENXIO) | |
1397 | /* No valid units in this path. */ | |
1398 | return false; | |
1399 | if (r < 0) | |
1400 | return r; | |
1401 | ||
1402 | /* We require that this process belongs to a delegated cgroup | |
1403 | * (i.e. Delegate=yes), with CoredumpReceive=yes also. */ | |
1404 | r = cg_is_delegated(unit); | |
1405 | if (r <= 0) | |
1406 | return r; | |
1407 | ||
1408 | return cg_has_coredump_receive(unit); | |
1409 | } | |
1410 | ||
1411 | static int forward_coredump_to_container(Context *context) { | |
1412 | _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, netnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF; | |
71136404 | 1413 | _cleanup_close_pair_ int pair[2] = EBADF_PAIR; |
a108c43e NR |
1414 | pid_t pid, child; |
1415 | struct ucred ucred = { | |
1416 | .pid = context->pid, | |
1417 | .uid = context->uid, | |
1418 | .gid = context->gid, | |
1419 | }; | |
1420 | int r; | |
1421 | ||
1422 | r = namespace_get_leader(context->pid, NAMESPACE_PID, &pid); | |
1423 | if (r < 0) | |
1424 | return log_debug_errno(r, "Failed to get namespace leader: %m"); | |
1425 | ||
1426 | r = can_forward_coredump(pid); | |
1427 | if (r < 0) | |
1428 | return log_debug_errno(r, "Failed to check if coredump can be forwarded: %m"); | |
1429 | if (r == 0) | |
1430 | return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), | |
1431 | "Coredump will not be forwarded because no target cgroup was found."); | |
1432 | ||
1433 | r = RET_NERRNO(socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair)); | |
1434 | if (r < 0) | |
1435 | return log_debug_errno(r, "Failed to create socket pair: %m"); | |
1436 | ||
1437 | r = setsockopt_int(pair[1], SOL_SOCKET, SO_PASSCRED, true); | |
1438 | if (r < 0) | |
1439 | return log_debug_errno(r, "Failed to set SO_PASSCRED: %m"); | |
1440 | ||
1441 | r = namespace_open(pid, &pidnsfd, &mntnsfd, &netnsfd, &usernsfd, &rootfd); | |
1442 | if (r < 0) | |
1443 | return log_debug_errno(r, "Failed to join namespaces of PID " PID_FMT ": %m", pid); | |
1444 | ||
1445 | r = namespace_fork("(sd-coredumpns)", "(sd-coredump)", NULL, 0, | |
e9ccae31 | 1446 | FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM, |
a108c43e NR |
1447 | pidnsfd, mntnsfd, netnsfd, usernsfd, rootfd, &child); |
1448 | if (r < 0) | |
1449 | return log_debug_errno(r, "Failed to fork into namespaces of PID " PID_FMT ": %m", pid); | |
1450 | if (r == 0) { | |
1451 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; | |
1452 | Context child_context = {}; | |
1453 | ||
1454 | pair[0] = safe_close(pair[0]); | |
1455 | ||
1456 | if (laccess("/run/systemd/coredump", W_OK) < 0) { | |
1457 | log_debug_errno(errno, "Cannot find coredump socket, exiting: %m"); | |
1458 | _exit(EXIT_FAILURE); | |
1459 | } | |
1460 | ||
1461 | r = receive_ucred(pair[1], &ucred); | |
1462 | if (r < 0) { | |
1463 | log_debug_errno(r, "Failed to receive ucred and fd: %m"); | |
1464 | _exit(EXIT_FAILURE); | |
1465 | } | |
1466 | ||
1467 | iovw = iovw_new(); | |
1468 | if (!iovw) { | |
1469 | log_oom(); | |
1470 | _exit(EXIT_FAILURE); | |
1471 | } | |
1472 | ||
1473 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); | |
1474 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
1475 | (void) iovw_put_string_field(iovw, "COREDUMP_FORWARDED=", "1"); | |
1476 | ||
1477 | for (int i = 0; i < _META_ARGV_MAX; i++) { | |
1478 | int signo; | |
1479 | char buf[DECIMAL_STR_MAX(pid_t)]; | |
1480 | const char *t = context->meta[i]; | |
1481 | ||
1d03d970 | 1482 | switch (i) { |
a108c43e NR |
1483 | |
1484 | case META_ARGV_PID: | |
1485 | xsprintf(buf, PID_FMT, ucred.pid); | |
1486 | t = buf; | |
1487 | ||
1488 | break; | |
1489 | ||
1490 | case META_ARGV_UID: | |
1491 | xsprintf(buf, UID_FMT, ucred.uid); | |
1492 | t = buf; | |
1493 | break; | |
1494 | ||
1495 | case META_ARGV_GID: | |
1496 | xsprintf(buf, GID_FMT, ucred.gid); | |
1497 | t = buf; | |
1498 | break; | |
1499 | ||
1500 | case META_ARGV_SIGNAL: | |
1501 | if (safe_atoi(t, &signo) >= 0 && SIGNAL_VALID(signo)) | |
1502 | (void) iovw_put_string_field(iovw, | |
1503 | "COREDUMP_SIGNAL_NAME=SIG", | |
1504 | signal_to_string(signo)); | |
1505 | break; | |
1506 | ||
1507 | default: | |
1508 | break; | |
1509 | } | |
1510 | ||
1511 | r = iovw_put_string_field(iovw, meta_field_names[i], t); | |
1512 | if (r < 0) { | |
1513 | log_debug_errno(r, "Failed to construct iovec: %m"); | |
1514 | _exit(EXIT_FAILURE); | |
1515 | } | |
1516 | } | |
1517 | ||
1518 | r = save_context(&child_context, iovw); | |
1519 | if (r < 0) { | |
1520 | log_debug_errno(r, "Failed to save context: %m"); | |
1521 | _exit(EXIT_FAILURE); | |
1522 | } | |
1523 | ||
1524 | r = gather_pid_metadata_from_procfs(iovw, &child_context); | |
1525 | if (r < 0) { | |
1526 | log_debug_errno(r, "Failed to gather metadata from procfs: %m"); | |
1527 | _exit(EXIT_FAILURE); | |
1528 | } | |
1529 | ||
1530 | r = send_iovec(iovw, STDIN_FILENO); | |
1531 | if (r < 0) { | |
1532 | log_debug_errno(r, "Failed to send iovec to coredump socket: %m"); | |
1533 | _exit(EXIT_FAILURE); | |
1534 | } | |
1535 | ||
1536 | _exit(EXIT_SUCCESS); | |
1537 | } | |
1538 | ||
1539 | pair[1] = safe_close(pair[1]); | |
1540 | ||
1541 | /* We need to translate the PID, UID, and GID of the crashing process | |
1542 | * to the container's namespaces. Do this by sending an SCM_CREDENTIALS | |
1543 | * message on a socket pair, and read the result when we join the | |
1544 | * container. The kernel will perform the translation for us. */ | |
1545 | r = send_ucred(pair[0], &ucred); | |
1546 | if (r < 0) | |
1547 | return log_debug_errno(r, "Failed to send metadata to container: %m"); | |
1548 | ||
1549 | r = wait_for_terminate_and_check("(sd-coredumpns)", child, 0); | |
1550 | if (r < 0) | |
1551 | return log_debug_errno(r, "Failed to wait for child to terminate: %m"); | |
1552 | if (r != EXIT_SUCCESS) | |
1553 | return log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Failed to process coredump in container: %m"); | |
1554 | ||
1555 | return 0; | |
1556 | } | |
1557 | ||
9aa82023 | 1558 | static int process_kernel(int argc, char* argv[]) { |
6257e2fb | 1559 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; |
f46c706b | 1560 | Context context = {}; |
2a9b1a76 | 1561 | int r, signo; |
9aa82023 | 1562 | |
1f9d2a81 DDM |
1563 | /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds |
1564 | * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to | |
1565 | * /dev/null. */ | |
5bb1d7fb | 1566 | r = rearrange_stdio(STDIN_FILENO, -EBADF, -EBADF); |
1f9d2a81 DDM |
1567 | if (r < 0) |
1568 | return log_error_errno(r, "Failed to connect stdout/stderr to /dev/null: %m"); | |
1569 | ||
988e89ee ZJS |
1570 | log_debug("Processing coredump received from the kernel..."); |
1571 | ||
9a435388 FB |
1572 | iovw = iovw_new(); |
1573 | if (!iovw) | |
1574 | return log_oom(); | |
1575 | ||
f46c706b FB |
1576 | /* Collect all process metadata passed by the kernel through argv[] */ |
1577 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1); | |
92e92d71 | 1578 | if (r < 0) |
6257e2fb | 1579 | return r; |
86562420 | 1580 | |
f46c706b | 1581 | /* Collect the rest of the process metadata retrieved from the runtime */ |
db9ac801 | 1582 | r = gather_pid_metadata_from_procfs(iovw, &context); |
f46c706b | 1583 | if (r < 0) |
6257e2fb | 1584 | return r; |
f46c706b | 1585 | |
1e344c1d | 1586 | if (!context.is_journald) |
f46c706b | 1587 | /* OK, now we know it's not the journal, hence we can make use of it now. */ |
1e344c1d | 1588 | log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG); |
f46c706b | 1589 | |
2a9b1a76 HB |
1590 | /* Log minimal metadata now, so it is not lost if the system is about to shut down. */ |
1591 | log_info("Process %s (%s) of user %s terminated abnormally with signal %s/%s, processing...", | |
1592 | context.meta[META_ARGV_PID], context.meta[META_COMM], | |
1593 | context.meta[META_ARGV_UID], context.meta[META_ARGV_SIGNAL], | |
1594 | strna(safe_atoi(context.meta[META_ARGV_SIGNAL], &signo) >= 0 ? signal_to_string(signo) : NULL)); | |
1595 | ||
a108c43e NR |
1596 | r = in_same_namespace(getpid_cached(), context.pid, NAMESPACE_PID); |
1597 | if (r < 0) | |
1598 | log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m"); | |
1599 | if (r == 0) { | |
1600 | /* If this fails, fallback to the old behavior so that | |
1601 | * there is still some record of the crash. */ | |
1602 | r = forward_coredump_to_container(&context); | |
1603 | if (r >= 0) | |
1604 | return 0; | |
1605 | } | |
1606 | ||
f46c706b FB |
1607 | /* If this is PID 1 disable coredump collection, we'll unlikely be able to process |
1608 | * it later on. | |
1609 | * | |
1610 | * FIXME: maybe we should disable coredumps generation from the beginning and | |
1611 | * re-enable it only when we know it's either safe (ie we're not running OOM) or | |
1612 | * it's not pid1 ? */ | |
1613 | if (context.is_pid1) { | |
1614 | log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); | |
1615 | disable_coredumps(); | |
1616 | } | |
34c10968 | 1617 | |
a108c43e NR |
1618 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); |
1619 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
1620 | ||
f46c706b | 1621 | if (context.is_journald || context.is_pid1) |
6257e2fb | 1622 | return submit_coredump(&context, iovw, STDIN_FILENO); |
9aa82023 | 1623 | |
6257e2fb | 1624 | return send_iovec(iovw, STDIN_FILENO); |
3c171f0b | 1625 | } |
34c10968 | 1626 | |
988e89ee | 1627 | static int process_backtrace(int argc, char *argv[]) { |
3a19fe46 YW |
1628 | _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO); |
1629 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; | |
f46c706b | 1630 | Context context = {}; |
9a435388 | 1631 | char *message; |
988e89ee ZJS |
1632 | int r; |
1633 | ||
1634 | log_debug("Processing backtrace on stdin..."); | |
1635 | ||
9a435388 FB |
1636 | iovw = iovw_new(); |
1637 | if (!iovw) | |
5b45a160 ZJS |
1638 | return log_oom(); |
1639 | ||
2a3bebd0 FB |
1640 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR); |
1641 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1642 | |
1643 | /* Collect all process metadata from argv[] by making sure to skip the | |
1644 | * '--backtrace' option */ | |
1645 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2); | |
988e89ee | 1646 | if (r < 0) |
3a19fe46 | 1647 | return r; |
aaeb2522 | 1648 | |
f46c706b | 1649 | /* Collect the rest of the process metadata retrieved from the runtime */ |
db9ac801 | 1650 | r = gather_pid_metadata_from_procfs(iovw, &context); |
f46c706b | 1651 | if (r < 0) |
3a19fe46 | 1652 | return r; |
988e89ee | 1653 | |
86562420 | 1654 | for (;;) { |
5b45a160 | 1655 | r = journal_importer_process_data(&importer); |
3a19fe46 YW |
1656 | if (r < 0) |
1657 | return log_error_errno(r, "Failed to parse journal entry on stdin: %m"); | |
d74dc4f2 ZJS |
1658 | if (r == 1 || /* complete entry */ |
1659 | journal_importer_eof(&importer)) /* end of data */ | |
5b45a160 | 1660 | break; |
988e89ee | 1661 | } |
988e89ee | 1662 | |
5b45a160 ZJS |
1663 | if (journal_importer_eof(&importer)) { |
1664 | log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter"); | |
988e89ee | 1665 | |
f46c706b FB |
1666 | message = strjoina("Process ", context.meta[META_ARGV_PID], |
1667 | " (", context.meta[META_COMM], ")" | |
1668 | " of user ", context.meta[META_ARGV_UID], | |
1669 | " failed with ", context.meta[META_ARGV_SIGNAL]); | |
9a435388 FB |
1670 | |
1671 | r = iovw_put_string_field(iovw, "MESSAGE=", message); | |
1672 | if (r < 0) | |
3a19fe46 | 1673 | return r; |
5b45a160 | 1674 | } else { |
3a19fe46 YW |
1675 | /* The imported iovecs are not supposed to be freed by us so let's copy and merge them at the |
1676 | * end of the array. */ | |
1677 | r = iovw_append(iovw, &importer.iovw); | |
1678 | if (r < 0) | |
1679 | return r; | |
9a435388 | 1680 | } |
988e89ee | 1681 | |
9a435388 | 1682 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
988e89ee | 1683 | if (r < 0) |
3a19fe46 | 1684 | return log_error_errno(r, "Failed to log backtrace: %m"); |
988e89ee | 1685 | |
3a19fe46 | 1686 | return 0; |
988e89ee ZJS |
1687 | } |
1688 | ||
4515a95e | 1689 | static int run(int argc, char *argv[]) { |
3c171f0b | 1690 | int r; |
fee80f69 | 1691 | |
9aa82023 ZJS |
1692 | /* First, log to a safe place, since we don't know what crashed and it might |
1693 | * be journald which we'd rather not log to then. */ | |
8d4e028f | 1694 | |
1e344c1d | 1695 | log_set_target_and_open(LOG_TARGET_KMSG); |
8d4e028f | 1696 | |
3c171f0b LP |
1697 | /* Make sure we never enter a loop */ |
1698 | (void) prctl(PR_SET_DUMPABLE, 0); | |
8d4e028f | 1699 | |
3c171f0b LP |
1700 | /* Ignore all parse errors */ |
1701 | (void) parse_config(); | |
fee80f69 | 1702 | |
3c171f0b LP |
1703 | log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage)); |
1704 | log_debug("Selected compression %s.", yes_no(arg_compress)); | |
fee80f69 | 1705 | |
3c171f0b | 1706 | r = sd_listen_fds(false); |
4515a95e ZJS |
1707 | if (r < 0) |
1708 | return log_error_errno(r, "Failed to determine the number of file descriptors: %m"); | |
fee80f69 | 1709 | |
9aa82023 ZJS |
1710 | /* If we got an fd passed, we are running in coredumpd mode. Otherwise we |
1711 | * are invoked from the kernel as coredump handler. */ | |
988e89ee ZJS |
1712 | if (r == 0) { |
1713 | if (streq_ptr(argv[1], "--backtrace")) | |
4515a95e | 1714 | return process_backtrace(argc, argv); |
988e89ee | 1715 | else |
4515a95e | 1716 | return process_kernel(argc, argv); |
988e89ee | 1717 | } else if (r == 1) |
4515a95e | 1718 | return process_socket(SD_LISTEN_FDS_START); |
f5e04665 | 1719 | |
baaa35ad ZJS |
1720 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), |
1721 | "Received unexpected number of file descriptors."); | |
f5e04665 | 1722 | } |
4515a95e ZJS |
1723 | |
1724 | DEFINE_MAIN_FUNCTION(run); |