]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
f5e04665 LP |
2 | |
3 | #include <errno.h> | |
803a3464 LP |
4 | #include <stdio.h> |
5 | #include <sys/prctl.h> | |
587f2a5e | 6 | #include <sys/statvfs.h> |
3e4d0f6c | 7 | #include <sys/auxv.h> |
cacd6403 | 8 | #include <sys/xattr.h> |
4f5dd394 | 9 | #include <unistd.h> |
f5e04665 | 10 | |
73a99163 | 11 | #include "sd-daemon.h" |
f11943c5 LP |
12 | #include "sd-journal.h" |
13 | #include "sd-login.h" | |
73a99163 | 14 | #include "sd-messages.h" |
4f5dd394 LP |
15 | |
16 | #include "acl-util.h" | |
b5efdb8a | 17 | #include "alloc-util.h" |
587f2a5e | 18 | #include "bus-error.h" |
430f0182 | 19 | #include "capability-util.h" |
ba1261bc | 20 | #include "cgroup-util.h" |
4f5dd394 | 21 | #include "compress.h" |
34c10968 LP |
22 | #include "conf-parser.h" |
23 | #include "copy.h" | |
c8715007 | 24 | #include "coredump-util.h" |
f11943c5 | 25 | #include "coredump-vacuum.h" |
a0956174 | 26 | #include "dirent-util.h" |
ea680f05 | 27 | #include "elf-util.h" |
4f5dd394 | 28 | #include "escape.h" |
3ffd4af2 | 29 | #include "fd-util.h" |
4f5dd394 | 30 | #include "fileio.h" |
f4f15635 | 31 | #include "fs-util.h" |
afc5dbf3 | 32 | #include "io-util.h" |
b18453ed | 33 | #include "journal-importer.h" |
5edf875b | 34 | #include "journal-send.h" |
4f5dd394 LP |
35 | #include "log.h" |
36 | #include "macro.h" | |
5e332028 | 37 | #include "main-func.h" |
0a970718 | 38 | #include "memory-util.h" |
2485b7e2 | 39 | #include "memstream-util.h" |
35cd0ba5 | 40 | #include "mkdir-label.h" |
6bedfcbb | 41 | #include "parse-util.h" |
0b452006 | 42 | #include "process-util.h" |
d14bcb4e | 43 | #include "signal-util.h" |
3c171f0b | 44 | #include "socket-util.h" |
4f5dd394 | 45 | #include "special.h" |
587f2a5e | 46 | #include "stat-util.h" |
8b43440b | 47 | #include "string-table.h" |
07630cea | 48 | #include "string-util.h" |
4f5dd394 | 49 | #include "strv.h" |
bf819d3a | 50 | #include "sync-util.h" |
e4de7287 | 51 | #include "tmpfile-util.h" |
b085d224 | 52 | #include "uid-alloc-range.h" |
b1d4f8e1 | 53 | #include "user-util.h" |
34727273 | 54 | |
da890466 | 55 | /* The maximum size up to which we process coredumps. We use 1G on 32-bit systems, and 32G on 64-bit systems */ |
e677041e LP |
56 | #if __SIZEOF_POINTER__ == 4 |
57 | #define PROCESS_SIZE_MAX ((uint64_t) (1LLU*1024LLU*1024LLU*1024LLU)) | |
58 | #elif __SIZEOF_POINTER__ == 8 | |
59 | #define PROCESS_SIZE_MAX ((uint64_t) (32LLU*1024LLU*1024LLU*1024LLU)) | |
60 | #else | |
61 | #error "Unexpected pointer size" | |
62 | #endif | |
34c10968 | 63 | |
bdfd7b2c | 64 | /* The maximum size up to which we leave the coredump around on disk */ |
34c10968 LP |
65 | #define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX |
66 | ||
bdfd7b2c | 67 | /* The maximum size up to which we store the coredump in the journal */ |
25cad95c | 68 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
34c10968 | 69 | #define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU)) |
25cad95c YW |
70 | #else |
71 | /* oss-fuzz limits memory usage. */ | |
72 | #define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU)) | |
73 | #endif | |
f5e04665 | 74 | |
587f2a5e LB |
75 | /* When checking for available memory and setting lower limits, don't |
76 | * go below 4MB for writing core files to storage. */ | |
77 | #define PROCESS_SIZE_MIN (4U*1024U*1024U) | |
78 | ||
c4aa09b0 | 79 | /* Make sure to not make this larger than the maximum journal entry |
27f931d1 | 80 | * size. See DATA_SIZE_MAX in journal-importer.h. */ |
874bc134 | 81 | assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); |
f5e04665 LP |
82 | |
83 | enum { | |
f46c706b | 84 | /* We use these as array indexes for our process metadata cache. |
ea5cc2a8 | 85 | * |
f46c706b FB |
86 | * The first indices of the cache stores the same metadata as the ones passed by |
87 | * the kernel via argv[], ie the strings array passed by the kernel according to | |
88 | * our pattern defined in /proc/sys/kernel/core_pattern (see man:core(5)). */ | |
89 | ||
90 | META_ARGV_PID, /* %P: as seen in the initial pid namespace */ | |
91 | META_ARGV_UID, /* %u: as seen in the initial user namespace */ | |
92 | META_ARGV_GID, /* %g: as seen in the initial user namespace */ | |
93 | META_ARGV_SIGNAL, /* %s: number of signal causing dump */ | |
e503019b | 94 | META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to μs granularity) */ |
f46c706b FB |
95 | META_ARGV_RLIMIT, /* %c: core file size soft resource limit */ |
96 | META_ARGV_HOSTNAME, /* %h: hostname */ | |
97 | _META_ARGV_MAX, | |
98 | ||
99 | /* The following indexes are cached for a couple of special fields we use (and | |
100 | * thereby need to be retrieved quickly) for naming coredump files, and attaching | |
101 | * xattrs. Unlike the previous ones they are retrieved from the runtime | |
102 | * environment. */ | |
103 | ||
104 | META_COMM = _META_ARGV_MAX, | |
105 | _META_MANDATORY_MAX, | |
106 | ||
107 | /* The rest are similar to the previous ones except that we won't fail if one of | |
108 | * them is missing. */ | |
109 | ||
110 | META_EXE = _META_MANDATORY_MAX, | |
111 | META_UNIT, | |
3e4d0f6c | 112 | META_PROC_AUXV, |
f46c706b | 113 | _META_MAX |
f5e04665 LP |
114 | }; |
115 | ||
f46c706b | 116 | static const char * const meta_field_names[_META_MAX] = { |
510a1466 ZJS |
117 | [META_ARGV_PID] = "COREDUMP_PID=", |
118 | [META_ARGV_UID] = "COREDUMP_UID=", | |
119 | [META_ARGV_GID] = "COREDUMP_GID=", | |
120 | [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=", | |
121 | [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", | |
122 | [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", | |
123 | [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", | |
124 | [META_COMM] = "COREDUMP_COMM=", | |
125 | [META_EXE] = "COREDUMP_EXE=", | |
126 | [META_UNIT] = "COREDUMP_UNIT=", | |
3e4d0f6c | 127 | [META_PROC_AUXV] = "COREDUMP_PROC_AUXV=", |
f46c706b FB |
128 | }; |
129 | ||
130 | typedef struct Context { | |
131 | const char *meta[_META_MAX]; | |
3e4d0f6c | 132 | size_t meta_size[_META_MAX]; |
f46c706b | 133 | pid_t pid; |
9764bca9 NR |
134 | uid_t uid; |
135 | gid_t gid; | |
f46c706b FB |
136 | bool is_pid1; |
137 | bool is_journald; | |
138 | } Context; | |
139 | ||
34c10968 LP |
140 | typedef enum CoredumpStorage { |
141 | COREDUMP_STORAGE_NONE, | |
142 | COREDUMP_STORAGE_EXTERNAL, | |
143 | COREDUMP_STORAGE_JOURNAL, | |
34c10968 | 144 | _COREDUMP_STORAGE_MAX, |
2d93c20e | 145 | _COREDUMP_STORAGE_INVALID = -EINVAL, |
34c10968 LP |
146 | } CoredumpStorage; |
147 | ||
34c10968 | 148 | static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { |
510a1466 | 149 | [COREDUMP_STORAGE_NONE] = "none", |
34c10968 | 150 | [COREDUMP_STORAGE_EXTERNAL] = "external", |
510a1466 | 151 | [COREDUMP_STORAGE_JOURNAL] = "journal", |
34c10968 LP |
152 | }; |
153 | ||
154 | DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); | |
8c9571d0 | 155 | static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting"); |
34727273 ZJS |
156 | |
157 | static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL; | |
8c9571d0 | 158 | static bool arg_compress = true; |
59f448cf LP |
159 | static uint64_t arg_process_size_max = PROCESS_SIZE_MAX; |
160 | static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; | |
6e2b4a69 | 161 | static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; |
f5fbe71d YW |
162 | static uint64_t arg_keep_free = UINT64_MAX; |
163 | static uint64_t arg_max_use = UINT64_MAX; | |
34c10968 LP |
164 | |
165 | static int parse_config(void) { | |
34c10968 | 166 | static const ConfigTableItem items[] = { |
510a1466 ZJS |
167 | { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, |
168 | { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, | |
169 | { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, | |
170 | { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max }, | |
171 | { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, | |
172 | { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, | |
173 | { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, | |
34c10968 LP |
174 | {} |
175 | }; | |
176 | ||
4a78074f LP |
177 | int r; |
178 | ||
179 | r = config_parse_config_file( | |
180 | "coredump.conf", | |
181 | "Coredump\0", | |
182 | config_item_table_lookup, | |
183 | items, | |
184 | CONFIG_PARSE_WARN, | |
185 | /* userdata= */ NULL); | |
186 | if (r < 0) | |
187 | return r; | |
188 | ||
189 | /* Let's make sure we fix up the maximum size we send to the journal here on the client side, for | |
190 | * efficiency reasons. journald wouldn't accept anything larger anyway. */ | |
191 | if (arg_journal_size_max > JOURNAL_SIZE_MAX) { | |
192 | log_warning("JournalSizeMax= set to larger value (%s) than journald would accept (%s), lowering automatically.", | |
193 | FORMAT_BYTES(arg_journal_size_max), FORMAT_BYTES(JOURNAL_SIZE_MAX)); | |
194 | arg_journal_size_max = JOURNAL_SIZE_MAX; | |
195 | } | |
196 | ||
197 | return 0; | |
34c10968 LP |
198 | } |
199 | ||
a1e92eee | 200 | static uint64_t storage_size_max(void) { |
ee0449fd ZJS |
201 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
202 | return arg_external_size_max; | |
203 | if (arg_storage == COREDUMP_STORAGE_JOURNAL) | |
204 | return arg_journal_size_max; | |
205 | assert(arg_storage == COREDUMP_STORAGE_NONE); | |
206 | return 0; | |
73a99163 ZJS |
207 | } |
208 | ||
3e4d0f6c ZJS |
209 | static int fix_acl(int fd, uid_t uid, bool allow_user) { |
210 | assert(fd >= 0); | |
211 | assert(uid_is_valid(uid)); | |
34c10968 | 212 | |
349cc4a5 | 213 | #if HAVE_ACL |
709f6e46 | 214 | int r; |
34c10968 | 215 | |
3e4d0f6c ZJS |
216 | /* We don't allow users to read coredumps if the uid or capabilities were changed. */ |
217 | if (!allow_user) | |
218 | return 0; | |
b59233e6 | 219 | |
05fd2156 | 220 | if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY) |
34c10968 LP |
221 | return 0; |
222 | ||
d81be4e7 | 223 | /* Make sure normal users can read (but not write or delete) their own coredumps */ |
567aeb58 | 224 | r = fd_add_uid_acl_permission(fd, uid, ACL_READ); |
709f6e46 | 225 | if (r < 0) |
567aeb58 | 226 | return log_error_errno(r, "Failed to adjust ACL of the coredump: %m"); |
34c10968 LP |
227 | #endif |
228 | ||
229 | return 0; | |
230 | } | |
231 | ||
f46c706b FB |
232 | static int fix_xattr(int fd, const Context *context) { |
233 | ||
234 | static const char * const xattrs[_META_MAX] = { | |
510a1466 ZJS |
235 | [META_ARGV_PID] = "user.coredump.pid", |
236 | [META_ARGV_UID] = "user.coredump.uid", | |
237 | [META_ARGV_GID] = "user.coredump.gid", | |
238 | [META_ARGV_SIGNAL] = "user.coredump.signal", | |
239 | [META_ARGV_TIMESTAMP] = "user.coredump.timestamp", | |
240 | [META_ARGV_RLIMIT] = "user.coredump.rlimit", | |
241 | [META_ARGV_HOSTNAME] = "user.coredump.hostname", | |
242 | [META_COMM] = "user.coredump.comm", | |
243 | [META_EXE] = "user.coredump.exe", | |
0cd77f97 LP |
244 | }; |
245 | ||
34c10968 LP |
246 | int r = 0; |
247 | ||
b59233e6 LP |
248 | assert(fd >= 0); |
249 | ||
60ecc386 | 250 | /* Attach some metadata to coredumps via extended attributes. Just because we can. */ |
34c10968 | 251 | |
fe96c0f8 | 252 | for (unsigned i = 0; i < _META_MAX; i++) { |
1eef15b1 ZJS |
253 | int k; |
254 | ||
f46c706b | 255 | if (isempty(context->meta[i]) || !xattrs[i]) |
0cd77f97 | 256 | continue; |
34c10968 | 257 | |
60ecc386 ZJS |
258 | k = RET_NERRNO(fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE)); |
259 | RET_GATHER(r, k); | |
0cd77f97 | 260 | } |
34c10968 LP |
261 | |
262 | return r; | |
263 | } | |
264 | ||
b0b21dce | 265 | #define filename_escape(s) xescape((s), "./ ") |
34c10968 | 266 | |
a1e92eee | 267 | static const char *coredump_tmpfile_name(const char *s) { |
1da3cb81 | 268 | return s ?: "(unnamed temporary file)"; |
0c773903 EV |
269 | } |
270 | ||
b59233e6 LP |
271 | static int fix_permissions( |
272 | int fd, | |
273 | const char *filename, | |
274 | const char *target, | |
f46c706b | 275 | const Context *context, |
3e4d0f6c | 276 | bool allow_user) { |
b59233e6 | 277 | |
03532f0a LP |
278 | int r; |
279 | ||
b59233e6 | 280 | assert(fd >= 0); |
b59233e6 | 281 | assert(target); |
3c171f0b | 282 | assert(context); |
cfd652ed ZJS |
283 | |
284 | /* Ignore errors on these */ | |
3c171f0b | 285 | (void) fchmod(fd, 0640); |
9764bca9 | 286 | (void) fix_acl(fd, context->uid, allow_user); |
3c171f0b | 287 | (void) fix_xattr(fd, context); |
cfd652ed | 288 | |
74402bf0 | 289 | r = link_tmpfile(fd, filename, target, LINK_TMPFILE_SYNC); |
03532f0a LP |
290 | if (r < 0) |
291 | return log_error_errno(r, "Failed to move coredump %s into place: %m", target); | |
cfd652ed ZJS |
292 | |
293 | return 0; | |
294 | } | |
295 | ||
59f448cf | 296 | static int maybe_remove_external_coredump(const char *filename, uint64_t size) { |
cfd652ed | 297 | |
b59233e6 | 298 | /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */ |
cfd652ed | 299 | |
fc6cec86 | 300 | if (arg_storage == COREDUMP_STORAGE_EXTERNAL && |
cfd652ed ZJS |
301 | size <= arg_external_size_max) |
302 | return 0; | |
303 | ||
304 | if (!filename) | |
305 | return 1; | |
306 | ||
4a62c710 MS |
307 | if (unlink(filename) < 0 && errno != ENOENT) |
308 | return log_error_errno(errno, "Failed to unlink %s: %m", filename); | |
cfd652ed ZJS |
309 | |
310 | return 1; | |
311 | } | |
312 | ||
f46c706b | 313 | static int make_filename(const Context *context, char **ret) { |
b59233e6 | 314 | _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL; |
a7f7d1bd | 315 | sd_id128_t boot = {}; |
34c10968 LP |
316 | int r; |
317 | ||
3c171f0b | 318 | assert(context); |
34c10968 | 319 | |
f46c706b | 320 | c = filename_escape(context->meta[META_COMM]); |
34c10968 | 321 | if (!c) |
b59233e6 | 322 | return -ENOMEM; |
34c10968 | 323 | |
f46c706b | 324 | u = filename_escape(context->meta[META_ARGV_UID]); |
0dc5d23c | 325 | if (!u) |
b59233e6 | 326 | return -ENOMEM; |
34c10968 LP |
327 | |
328 | r = sd_id128_get_boot(&boot); | |
b59233e6 | 329 | if (r < 0) |
34c10968 | 330 | return r; |
34c10968 | 331 | |
f46c706b | 332 | p = filename_escape(context->meta[META_ARGV_PID]); |
b59233e6 LP |
333 | if (!p) |
334 | return -ENOMEM; | |
335 | ||
f46c706b | 336 | t = filename_escape(context->meta[META_ARGV_TIMESTAMP]); |
b59233e6 LP |
337 | if (!t) |
338 | return -ENOMEM; | |
339 | ||
340 | if (asprintf(ret, | |
64a5384f | 341 | "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s", |
34c10968 | 342 | c, |
0dc5d23c | 343 | u, |
34c10968 LP |
344 | SD_ID128_FORMAT_VAL(boot), |
345 | p, | |
b59233e6 LP |
346 | t) < 0) |
347 | return -ENOMEM; | |
348 | ||
349 | return 0; | |
350 | } | |
351 | ||
3e4d0f6c ZJS |
352 | static int grant_user_access(int core_fd, const Context *context) { |
353 | int at_secure = -1; | |
354 | uid_t uid = UID_INVALID, euid = UID_INVALID; | |
355 | uid_t gid = GID_INVALID, egid = GID_INVALID; | |
356 | int r; | |
357 | ||
358 | assert(core_fd >= 0); | |
359 | assert(context); | |
360 | ||
361 | if (!context->meta[META_PROC_AUXV]) | |
362 | return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), "No auxv data, not adjusting permissions."); | |
363 | ||
364 | uint8_t elf[EI_NIDENT]; | |
365 | errno = 0; | |
366 | if (pread(core_fd, &elf, sizeof(elf), 0) != sizeof(elf)) | |
367 | return log_warning_errno(errno_or_else(EIO), | |
368 | "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno)); | |
369 | ||
370 | if (elf[EI_MAG0] != ELFMAG0 || | |
371 | elf[EI_MAG1] != ELFMAG1 || | |
372 | elf[EI_MAG2] != ELFMAG2 || | |
373 | elf[EI_MAG3] != ELFMAG3 || | |
374 | elf[EI_VERSION] != EV_CURRENT) | |
375 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
376 | "Core file does not have ELF header, not adjusting permissions."); | |
377 | if (!IN_SET(elf[EI_CLASS], ELFCLASS32, ELFCLASS64) || | |
378 | !IN_SET(elf[EI_DATA], ELFDATA2LSB, ELFDATA2MSB)) | |
379 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
380 | "Core file has strange ELF class, not adjusting permissions."); | |
381 | ||
382 | if ((elf[EI_DATA] == ELFDATA2LSB) != (__BYTE_ORDER == __LITTLE_ENDIAN)) | |
383 | return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), | |
384 | "Core file has non-native endianness, not adjusting permissions."); | |
385 | ||
cb38fdbe ZJS |
386 | r = parse_auxv(LOG_WARNING, |
387 | /* elf_class= */ elf[EI_CLASS], | |
388 | context->meta[META_PROC_AUXV], | |
389 | context->meta_size[META_PROC_AUXV], | |
390 | &at_secure, &uid, &euid, &gid, &egid); | |
3e4d0f6c ZJS |
391 | if (r < 0) |
392 | return r; | |
393 | ||
394 | /* We allow access if we got all the data and at_secure is not set and | |
395 | * the uid/gid matches euid/egid. */ | |
396 | bool ret = | |
397 | at_secure == 0 && | |
398 | uid != UID_INVALID && euid != UID_INVALID && uid == euid && | |
399 | gid != GID_INVALID && egid != GID_INVALID && gid == egid; | |
400 | log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", | |
401 | ret ? "permit" : "restrict", | |
402 | uid, euid, gid, egid, yes_no(at_secure)); | |
403 | return ret; | |
404 | } | |
405 | ||
b59233e6 | 406 | static int save_external_coredump( |
f46c706b | 407 | const Context *context, |
3c171f0b | 408 | int input_fd, |
b59233e6 | 409 | char **ret_filename, |
5f3e0a74 HW |
410 | int *ret_node_fd, |
411 | int *ret_data_fd, | |
0cd4e913 | 412 | uint64_t *ret_size, |
587f2a5e | 413 | uint64_t *ret_compressed_size, |
cc4419ed | 414 | bool *ret_truncated) { |
b59233e6 | 415 | |
587f2a5e LB |
416 | _cleanup_(unlink_and_freep) char *tmp = NULL; |
417 | _cleanup_free_ char *fn = NULL; | |
254d1313 | 418 | _cleanup_close_ int fd = -EBADF; |
ee0449fd | 419 | uint64_t rlimit, process_limit, max_size; |
587f2a5e | 420 | bool truncated, storage_on_tmpfs; |
b59233e6 LP |
421 | struct stat st; |
422 | int r; | |
423 | ||
3c171f0b | 424 | assert(context); |
b59233e6 | 425 | assert(ret_filename); |
5f3e0a74 HW |
426 | assert(ret_node_fd); |
427 | assert(ret_data_fd); | |
b59233e6 | 428 | assert(ret_size); |
587f2a5e LB |
429 | assert(ret_compressed_size); |
430 | assert(ret_truncated); | |
b59233e6 | 431 | |
f46c706b | 432 | r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit); |
bdfd7b2c | 433 | if (r < 0) |
f46c706b FB |
434 | return log_error_errno(r, "Failed to parse resource limit '%s': %m", |
435 | context->meta[META_ARGV_RLIMIT]); | |
d7a0f1f4 | 436 | if (rlimit < page_size()) |
f46c706b | 437 | /* Is coredumping disabled? Then don't bother saving/processing the |
3a559f22 | 438 | * coredump. Anything below PAGE_SIZE cannot give a readable coredump |
f46c706b FB |
439 | * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but |
440 | * is usually the same as PAGE_SIZE. */ | |
baaa35ad ZJS |
441 | return log_info_errno(SYNTHETIC_ERRNO(EBADSLT), |
442 | "Resource limits disable core dumping for process %s (%s).", | |
f46c706b | 443 | context->meta[META_ARGV_PID], context->meta[META_COMM]); |
bdfd7b2c | 444 | |
ee0449fd | 445 | process_limit = MAX(arg_process_size_max, storage_size_max()); |
baaa35ad ZJS |
446 | if (process_limit == 0) |
447 | return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT), | |
448 | "Limits for coredump processing and storage are both 0, not dumping core."); | |
ee0449fd | 449 | |
bdfd7b2c | 450 | /* Never store more than the process configured, or than we actually shall keep or process */ |
ee0449fd | 451 | max_size = MIN(rlimit, process_limit); |
bdfd7b2c | 452 | |
3c171f0b | 453 | r = make_filename(context, &fn); |
23bbb0de MS |
454 | if (r < 0) |
455 | return log_error_errno(r, "Failed to determine coredump file name: %m"); | |
34c10968 | 456 | |
1fbe8d0c | 457 | (void) mkdir_parents_label(fn, 0755); |
803a3464 | 458 | |
03532f0a | 459 | fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp); |
4a62c710 | 460 | if (fd < 0) |
03532f0a | 461 | return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); |
803a3464 | 462 | |
587f2a5e LB |
463 | /* If storage is on tmpfs, the kernel oomd might kill us if there's MemoryMax set on |
464 | * the service or the slice it belongs to. This is common on low-resources systems, | |
465 | * to avoid crashing processes to take away too many system resources. | |
466 | * Check the cgroup settings, and set max_size to a bit less than half of the | |
467 | * available memory left to the process. | |
468 | * Then, attempt to write the core file uncompressed first - if the write gets | |
469 | * interrupted, we know we won't be able to write it all, so instead compress what | |
470 | * was written so far, delete the uncompressed truncated core, and then continue | |
471 | * compressing from STDIN. Given the compressed core cannot be larger than the | |
472 | * uncompressed one, and 1KB for metadata is accounted for in the calculation, we | |
473 | * should be able to at least store the full compressed core file. */ | |
474 | ||
475 | storage_on_tmpfs = fd_is_temporary_fs(fd) > 0; | |
476 | if (storage_on_tmpfs && arg_compress) { | |
477 | _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; | |
478 | uint64_t cgroup_limit = UINT64_MAX; | |
479 | struct statvfs sv; | |
480 | ||
481 | /* If we can't get the cgroup limit, just ignore it, but don't fail, | |
482 | * try anyway with the config settings. */ | |
483 | r = sd_bus_default_system(&bus); | |
484 | if (r < 0) | |
485 | log_info_errno(r, "Failed to connect to system bus, skipping MemoryAvailable check: %m"); | |
486 | else { | |
487 | _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; | |
488 | ||
489 | r = sd_bus_get_property_trivial( | |
490 | bus, | |
491 | "org.freedesktop.systemd1", | |
492 | "/org/freedesktop/systemd1/unit/self", | |
493 | "org.freedesktop.systemd1.Service", | |
494 | "MemoryAvailable", | |
495 | &error, | |
496 | 't', &cgroup_limit); | |
497 | if (r < 0) | |
498 | log_warning_errno(r, | |
499 | "Failed to query MemoryAvailable for current unit, " | |
500 | "falling back to static config settings: %s", | |
501 | bus_error_message(&error, r)); | |
502 | } | |
803a3464 | 503 | |
587f2a5e LB |
504 | max_size = MIN(cgroup_limit, max_size); |
505 | max_size = LESS_BY(max_size, 1024U) / 2; /* Account for 1KB metadata overhead for compressing */ | |
506 | max_size = MAX(PROCESS_SIZE_MIN, max_size); /* Impose a lower minimum */ | |
507 | ||
508 | /* tmpfs might get full quickly, so check the available space too. | |
509 | * But don't worry about errors here, failing to access the storage | |
510 | * location will be better logged when writing to it. */ | |
8facac5f | 511 | if (fstatvfs(fd, &sv) >= 0) |
587f2a5e | 512 | max_size = MIN((uint64_t)sv.f_frsize * (uint64_t)sv.f_bfree, max_size); |
34c10968 | 513 | |
587f2a5e | 514 | log_debug("Limiting core file size to %" PRIu64 " bytes due to cgroup memory limits.", max_size); |
7849c2ac TA |
515 | } |
516 | ||
587f2a5e LB |
517 | r = copy_bytes(input_fd, fd, max_size, 0); |
518 | if (r < 0) | |
519 | return log_error_errno(r, "Cannot store coredump of %s (%s): %m", | |
520 | context->meta[META_ARGV_PID], context->meta[META_COMM]); | |
521 | truncated = r == 1; | |
cfd652ed | 522 | |
3e4d0f6c ZJS |
523 | bool allow_user = grant_user_access(fd, context) > 0; |
524 | ||
587f2a5e LB |
525 | #if HAVE_COMPRESSION |
526 | if (arg_compress) { | |
527 | _cleanup_(unlink_and_freep) char *tmp_compressed = NULL; | |
528 | _cleanup_free_ char *fn_compressed = NULL; | |
254d1313 | 529 | _cleanup_close_ int fd_compressed = -EBADF; |
587f2a5e LB |
530 | uint64_t uncompressed_size = 0; |
531 | ||
532 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) | |
533 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); | |
cfd652ed | 534 | |
ee00684c | 535 | fn_compressed = strjoin(fn, default_compression_extension()); |
587f2a5e LB |
536 | if (!fn_compressed) |
537 | return log_oom(); | |
cfd652ed | 538 | |
03532f0a | 539 | fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed); |
587f2a5e LB |
540 | if (fd_compressed < 0) |
541 | return log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed); | |
cfd652ed | 542 | |
587f2a5e LB |
543 | r = compress_stream(fd, fd_compressed, max_size, &uncompressed_size); |
544 | if (r < 0) | |
545 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
546 | ||
547 | if (truncated && storage_on_tmpfs) { | |
548 | uint64_t partial_uncompressed_size = 0; | |
549 | ||
550 | /* Uncompressed write was truncated and we are writing to tmpfs: delete | |
551 | * the uncompressed core, and compress the remaining part from STDIN. */ | |
552 | ||
553 | tmp = unlink_and_free(tmp); | |
554 | fd = safe_close(fd); | |
555 | ||
556 | r = compress_stream(input_fd, fd_compressed, max_size, &partial_uncompressed_size); | |
557 | if (r < 0) | |
558 | return log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed)); | |
559 | uncompressed_size += partial_uncompressed_size; | |
b59233e6 LP |
560 | } |
561 | ||
9764bca9 | 562 | r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, allow_user); |
cfd652ed | 563 | if (r < 0) |
587f2a5e | 564 | return r; |
b59233e6 | 565 | |
587f2a5e LB |
566 | if (fstat(fd_compressed, &st) < 0) |
567 | return log_error_errno(errno, | |
568 | "Failed to fstat core file %s: %m", | |
569 | coredump_tmpfile_name(tmp_compressed)); | |
cfd652ed | 570 | |
587f2a5e LB |
571 | *ret_filename = TAKE_PTR(fn_compressed); /* compressed */ |
572 | *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */ | |
573 | *ret_compressed_size = (uint64_t) st.st_size; /* compressed */ | |
574 | *ret_data_fd = TAKE_FD(fd); | |
575 | *ret_size = uncompressed_size; | |
576 | *ret_truncated = truncated; | |
577 | tmp_compressed = mfree(tmp_compressed); | |
cfd652ed | 578 | |
cfd652ed | 579 | return 0; |
34c10968 | 580 | } |
3b1a55e1 | 581 | #endif |
5f3e0a74 | 582 | |
587f2a5e LB |
583 | if (truncated) |
584 | log_struct(LOG_INFO, | |
08e86b15 DDM |
585 | LOG_MESSAGE("Core file was truncated to %"PRIu64" bytes.", max_size), |
586 | "SIZE_LIMIT=%"PRIu64, max_size, | |
587f2a5e LB |
587 | "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR); |
588 | ||
9764bca9 | 589 | r = fix_permissions(fd, tmp, fn, context, allow_user); |
cfd652ed | 590 | if (r < 0) |
587f2a5e LB |
591 | return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn); |
592 | ||
593 | if (fstat(fd, &st) < 0) | |
594 | return log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); | |
595 | ||
596 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) | |
597 | return log_error_errno(errno, "Failed to seek on coredump %s: %m", fn); | |
34c10968 | 598 | |
0cfb0971 | 599 | *ret_filename = TAKE_PTR(fn); |
1cc6c93a | 600 | *ret_data_fd = TAKE_FD(fd); |
59f448cf | 601 | *ret_size = (uint64_t) st.st_size; |
587f2a5e | 602 | *ret_truncated = truncated; |
34c10968 | 603 | |
34c10968 | 604 | return 0; |
34c10968 LP |
605 | } |
606 | ||
607 | static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) { | |
608 | _cleanup_free_ char *field = NULL; | |
609 | ssize_t n; | |
610 | ||
8d4e028f | 611 | assert(fd >= 0); |
34c10968 LP |
612 | assert(ret); |
613 | assert(ret_size); | |
614 | ||
4a62c710 MS |
615 | if (lseek(fd, 0, SEEK_SET) == (off_t) -1) |
616 | return log_warning_errno(errno, "Failed to seek: %m"); | |
803a3464 | 617 | |
34c10968 | 618 | field = malloc(9 + size); |
a73c74db LP |
619 | if (!field) |
620 | return log_warning_errno(SYNTHETIC_ERRNO(ENOMEM), | |
621 | "Failed to allocate memory for coredump, coredump will not be stored."); | |
34c10968 LP |
622 | |
623 | memcpy(field, "COREDUMP=", 9); | |
624 | ||
a73c74db LP |
625 | /* NB: simple read() would fail for overly large coredumps, since read() on Linux can only deal with |
626 | * 0x7ffff000 bytes max. Hence call things in a loop. */ | |
627 | n = loop_read(fd, field + 9, size, /* do_poll= */ false); | |
23bbb0de MS |
628 | if (n < 0) |
629 | return log_error_errno((int) n, "Failed to read core data: %m"); | |
baaa35ad ZJS |
630 | if ((size_t) n < size) |
631 | return log_error_errno(SYNTHETIC_ERRNO(EIO), | |
632 | "Core data too short."); | |
34c10968 | 633 | |
1cc6c93a | 634 | *ret = TAKE_PTR(field); |
34c10968 LP |
635 | *ret_size = size + 9; |
636 | ||
34c10968 LP |
637 | return 0; |
638 | } | |
803a3464 | 639 | |
3f132692 JF |
640 | /* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines: |
641 | * 0:/dev/pts/23 | |
642 | * pos: 0 | |
643 | * flags: 0100002 | |
644 | * | |
645 | * 1:/dev/pts/23 | |
646 | * pos: 0 | |
647 | * flags: 0100002 | |
648 | * | |
649 | * 2:/dev/pts/23 | |
650 | * pos: 0 | |
651 | * flags: 0100002 | |
652 | * EOF | |
653 | */ | |
2485b7e2 YW |
654 | static int compose_open_fds(pid_t pid, char **ret) { |
655 | _cleanup_(memstream_done) MemStream m = {}; | |
4d84bc2f | 656 | _cleanup_closedir_ DIR *proc_fd_dir = NULL; |
254d1313 | 657 | _cleanup_close_ int proc_fdinfo_fd = -EBADF; |
59059b4a | 658 | const char *fddelim = "", *path; |
2485b7e2 | 659 | FILE *stream; |
7b26ea6f | 660 | int r; |
3f132692 JF |
661 | |
662 | assert(pid >= 0); | |
2485b7e2 | 663 | assert(ret); |
3f132692 | 664 | |
59059b4a | 665 | path = procfs_file_alloca(pid, "fd"); |
3f132692 | 666 | proc_fd_dir = opendir(path); |
59059b4a ZJS |
667 | if (!proc_fd_dir) |
668 | return -errno; | |
3f132692 | 669 | |
4d84bc2f | 670 | proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH); |
59059b4a ZJS |
671 | if (proc_fdinfo_fd < 0) |
672 | return -errno; | |
3f132692 | 673 | |
2485b7e2 | 674 | stream = memstream_init(&m); |
3f132692 JF |
675 | if (!stream) |
676 | return -ENOMEM; | |
677 | ||
af3b864d | 678 | FOREACH_DIRENT(de, proc_fd_dir, return -errno) { |
3f132692 | 679 | _cleanup_fclose_ FILE *fdinfo = NULL; |
4d84bc2f | 680 | _cleanup_free_ char *fdname = NULL; |
254d1313 | 681 | _cleanup_close_ int fd = -EBADF; |
3f132692 | 682 | |
af3b864d | 683 | r = readlinkat_malloc(dirfd(proc_fd_dir), de->d_name, &fdname); |
3f132692 JF |
684 | if (r < 0) |
685 | return r; | |
686 | ||
af3b864d | 687 | fprintf(stream, "%s%s:%s\n", fddelim, de->d_name, fdname); |
3f132692 JF |
688 | fddelim = "\n"; |
689 | ||
690 | /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */ | |
af3b864d | 691 | fd = openat(proc_fdinfo_fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY); |
59059b4a | 692 | if (fd < 0) |
3f132692 JF |
693 | continue; |
694 | ||
b46c3e49 VC |
695 | fdinfo = take_fdopen(&fd, "r"); |
696 | if (!fdinfo) | |
3f132692 JF |
697 | continue; |
698 | ||
7b26ea6f LP |
699 | for (;;) { |
700 | _cleanup_free_ char *line = NULL; | |
701 | ||
702 | r = read_line(fdinfo, LONG_LINE_MAX, &line); | |
703 | if (r < 0) | |
704 | return r; | |
705 | if (r == 0) | |
706 | break; | |
707 | ||
0d536673 | 708 | fputs(line, stream); |
7b26ea6f | 709 | fputc('\n', stream); |
4d84bc2f | 710 | } |
3f132692 JF |
711 | } |
712 | ||
2485b7e2 | 713 | return memstream_finalize(&m, ret, NULL); |
3f132692 JF |
714 | } |
715 | ||
7ed03ce6 JF |
716 | static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) { |
717 | const char *p; | |
718 | struct stat stbuf; | |
254d1313 | 719 | _cleanup_close_ int proc_ns_dir_fd = -EBADF; |
7ed03ce6 JF |
720 | |
721 | p = procfs_file_alloca(pid, "ns"); | |
722 | ||
723 | proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY); | |
724 | if (proc_ns_dir_fd < 0) | |
725 | return -errno; | |
726 | ||
727 | if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0) | |
728 | return -errno; | |
729 | ||
730 | *ns = stbuf.st_ino; | |
731 | return 0; | |
732 | } | |
733 | ||
0c4d1e6d | 734 | static int get_mount_namespace_leader(pid_t pid, pid_t *ret) { |
7ed03ce6 | 735 | ino_t proc_mntns; |
c53aafb7 | 736 | int r; |
7ed03ce6 JF |
737 | |
738 | r = get_process_ns(pid, "mnt", &proc_mntns); | |
739 | if (r < 0) | |
740 | return r; | |
741 | ||
aa7530d6 | 742 | for (;;) { |
7ed03ce6 | 743 | ino_t parent_mntns; |
0c4d1e6d | 744 | pid_t ppid; |
7ed03ce6 | 745 | |
0c4d1e6d LP |
746 | r = get_process_ppid(pid, &ppid); |
747 | if (r == -EADDRNOTAVAIL) /* Reached the top (i.e. typically PID 1, but could also be a process | |
748 | * whose parent is not in our pidns) */ | |
749 | return -ENOENT; | |
7ed03ce6 JF |
750 | if (r < 0) |
751 | return r; | |
752 | ||
753 | r = get_process_ns(ppid, "mnt", &parent_mntns); | |
754 | if (r < 0) | |
755 | return r; | |
756 | ||
0c4d1e6d LP |
757 | if (proc_mntns != parent_mntns) { |
758 | *ret = ppid; | |
759 | return 0; | |
760 | } | |
7ed03ce6 | 761 | |
0c4d1e6d | 762 | pid = ppid; |
7ed03ce6 | 763 | } |
7ed03ce6 JF |
764 | } |
765 | ||
766 | /* Returns 1 if the parent was found. | |
767 | * Returns 0 if there is not a process we can call the pid's | |
768 | * container parent (the pid's process isn't 'containerized'). | |
769 | * Returns a negative number on errors. | |
770 | */ | |
771 | static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { | |
7ed03ce6 JF |
772 | pid_t container_pid; |
773 | const char *proc_root_path; | |
774 | struct stat root_stat, proc_root_stat; | |
83844031 | 775 | int r; |
7ed03ce6 JF |
776 | |
777 | /* To compare inodes of / and /proc/[pid]/root */ | |
778 | if (stat("/", &root_stat) < 0) | |
779 | return -errno; | |
780 | ||
781 | proc_root_path = procfs_file_alloca(pid, "root"); | |
782 | if (stat(proc_root_path, &proc_root_stat) < 0) | |
783 | return -errno; | |
784 | ||
785 | /* The process uses system root. */ | |
c20c77ef | 786 | if (stat_inode_same(&proc_root_stat, &root_stat)) { |
7ed03ce6 JF |
787 | *cmdline = NULL; |
788 | return 0; | |
789 | } | |
790 | ||
791 | r = get_mount_namespace_leader(pid, &container_pid); | |
792 | if (r < 0) | |
793 | return r; | |
794 | ||
5dd55303 | 795 | r = get_process_cmdline(container_pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, cmdline); |
d3cba4ea EV |
796 | if (r < 0) |
797 | return r; | |
798 | ||
799 | return 1; | |
7ed03ce6 JF |
800 | } |
801 | ||
f46c706b | 802 | static int change_uid_gid(const Context *context) { |
9764bca9 NR |
803 | uid_t uid = context->uid; |
804 | gid_t gid = context->gid; | |
3c171f0b | 805 | int r; |
34c10968 | 806 | |
28add648 | 807 | if (uid_is_system(uid)) { |
888e378d LP |
808 | const char *user = "systemd-coredump"; |
809 | ||
fafff8f1 | 810 | r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0); |
888e378d LP |
811 | if (r < 0) { |
812 | log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user); | |
813 | uid = gid = 0; | |
814 | } | |
888e378d | 815 | } |
3c171f0b LP |
816 | |
817 | return drop_privileges(uid, gid, 0); | |
818 | } | |
8c8549db | 819 | |
3c171f0b | 820 | static int submit_coredump( |
3e4d0f6c | 821 | const Context *context, |
9a435388 | 822 | struct iovec_wrapper *iovw, |
3c171f0b | 823 | int input_fd) { |
34c10968 | 824 | |
c546154a | 825 | _cleanup_(json_variant_unrefp) JsonVariant *json_metadata = NULL; |
254d1313 | 826 | _cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF; |
9a435388 | 827 | _cleanup_free_ char *filename = NULL, *coredump_data = NULL; |
51d3783d | 828 | _cleanup_free_ char *stacktrace = NULL; |
9a435388 | 829 | char *core_message; |
c546154a | 830 | const char *module_name; |
587f2a5e | 831 | uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX; |
f46c706b | 832 | bool truncated = false; |
c546154a | 833 | JsonVariant *module_json; |
3c171f0b | 834 | int r; |
83844031 | 835 | |
3c171f0b | 836 | assert(context); |
9a435388 | 837 | assert(iovw); |
3c171f0b | 838 | assert(input_fd >= 0); |
f5e04665 | 839 | |
3c171f0b LP |
840 | /* Vacuum before we write anything again */ |
841 | (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); | |
803a3464 | 842 | |
3c171f0b | 843 | /* Always stream the coredump to disk, if that's possible */ |
0cd4e913 | 844 | r = save_external_coredump(context, input_fd, |
587f2a5e LB |
845 | &filename, &coredump_node_fd, &coredump_fd, |
846 | &coredump_size, &coredump_compressed_size, &truncated); | |
3c171f0b LP |
847 | if (r < 0) |
848 | /* Skip whole core dumping part */ | |
849 | goto log; | |
850 | ||
51d3783d FB |
851 | /* If we don't want to keep the coredump on disk, remove it now, as later on we |
852 | * will lack the privileges for it. However, we keep the fd to it, so that we can | |
853 | * still process it and log it. */ | |
587f2a5e | 854 | r = maybe_remove_external_coredump(filename, coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size); |
3c171f0b LP |
855 | if (r < 0) |
856 | return r; | |
633c3e8a | 857 | if (r == 0) |
2a3bebd0 | 858 | (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename); |
633c3e8a | 859 | else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) |
5206a724 | 860 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
587f2a5e | 861 | coredump_node_fd >= 0 ? coredump_compressed_size : coredump_size, arg_external_size_max); |
f5e04665 | 862 | |
3c171f0b LP |
863 | /* Vacuum again, but exclude the coredump we just created */ |
864 | (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); | |
8c9571d0 | 865 | |
51d3783d FB |
866 | /* Now, let's drop privileges to become the user who owns the segfaulted process |
867 | * and allocate the coredump memory under the user's uid. This also ensures that | |
868 | * the credentials journald will see are the ones of the coredumping user, thus | |
869 | * making sure the user gets access to the core dump. Let's also get rid of all | |
3c171f0b LP |
870 | * capabilities, if we run as root, we won't need them anymore. */ |
871 | r = change_uid_gid(context); | |
872 | if (r < 0) | |
873 | return log_error_errno(r, "Failed to drop privileges: %m"); | |
34c10968 | 874 | |
5238e957 | 875 | /* Try to get a stack trace if we can */ |
c790632c | 876 | if (coredump_size > arg_process_size_max) |
51d3783d FB |
877 | log_debug("Not generating stack trace: core size %"PRIu64" is greater " |
878 | "than %"PRIu64" (the configured maximum)", | |
6e9ef603 | 879 | coredump_size, arg_process_size_max); |
c790632c ZJS |
880 | else if (coredump_fd >= 0) { |
881 | bool skip = startswith(context->meta[META_COMM], "systemd-coredum"); /* COMM is 16 bytes usually */ | |
882 | ||
61aea456 LB |
883 | (void) parse_elf_object(coredump_fd, |
884 | context->meta[META_EXE], | |
c790632c | 885 | /* fork_disable_dump= */ skip, /* avoid loops */ |
61aea456 LB |
886 | &stacktrace, |
887 | &json_metadata); | |
c790632c | 888 | } |
51d3783d | 889 | |
3c171f0b | 890 | log: |
f46c706b FB |
891 | core_message = strjoina("Process ", context->meta[META_ARGV_PID], |
892 | " (", context->meta[META_COMM], ") of user ", | |
893 | context->meta[META_ARGV_UID], " dumped core.", | |
894 | context->is_journald && filename ? "\nCoredump diverted to " : NULL, | |
895 | context->is_journald && filename ? filename : NULL); | |
51d3783d | 896 | |
9a435388 | 897 | core_message = strjoina(core_message, stacktrace ? "\n\n" : NULL, stacktrace); |
92e92d71 | 898 | |
5edf875b DDM |
899 | if (context->is_journald) |
900 | /* We might not be able to log to the journal, so let's always print the message to another | |
901 | * log target. The target was set previously to something safe. */ | |
9a435388 | 902 | log_dispatch(LOG_ERR, 0, core_message); |
92e92d71 | 903 | |
2a3bebd0 | 904 | (void) iovw_put_string_field(iovw, "MESSAGE=", core_message); |
3c171f0b | 905 | |
0cd4e913 | 906 | if (truncated) |
2a3bebd0 | 907 | (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1"); |
0cd4e913 | 908 | |
c546154a LB |
909 | /* If we managed to parse any ELF metadata (build-id, ELF package meta), |
910 | * attach it as journal metadata. */ | |
911 | if (json_metadata) { | |
912 | _cleanup_free_ char *formatted_json = NULL; | |
913 | ||
914 | r = json_variant_format(json_metadata, 0, &formatted_json); | |
915 | if (r < 0) | |
916 | return log_error_errno(r, "Failed to format JSON package metadata: %m"); | |
917 | ||
671769c9 | 918 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_JSON=", formatted_json); |
c546154a LB |
919 | } |
920 | ||
c790632c ZJS |
921 | /* In the unlikely scenario that context->meta[META_EXE] is not available, |
922 | * let's avoid guessing the module name and skip the loop. */ | |
923 | if (context->meta[META_EXE]) | |
924 | JSON_VARIANT_OBJECT_FOREACH(module_name, module_json, json_metadata) { | |
925 | JsonVariant *t; | |
c546154a | 926 | |
c790632c ZJS |
927 | /* We only add structured fields for the 'main' ELF module, and only if we can identify it. */ |
928 | if (!path_equal_filename(module_name, context->meta[META_EXE])) | |
929 | continue; | |
c546154a | 930 | |
c790632c ZJS |
931 | t = json_variant_by_key(module_json, "name"); |
932 | if (t) | |
933 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_NAME=", json_variant_string(t)); | |
1f2abb79 | 934 | |
c790632c ZJS |
935 | t = json_variant_by_key(module_json, "version"); |
936 | if (t) | |
937 | (void) iovw_put_string_field(iovw, "COREDUMP_PACKAGE_VERSION=", json_variant_string(t)); | |
938 | } | |
c546154a | 939 | |
3c171f0b | 940 | /* Optionally store the entire coredump in the journal */ |
587f2a5e | 941 | if (arg_storage == COREDUMP_STORAGE_JOURNAL && coredump_fd >= 0) { |
6e9ef603 ZJS |
942 | if (coredump_size <= arg_journal_size_max) { |
943 | size_t sz = 0; | |
944 | ||
945 | /* Store the coredump itself in the journal */ | |
946 | ||
947 | r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); | |
9a435388 FB |
948 | if (r >= 0) { |
949 | if (iovw_put(iovw, coredump_data, sz) >= 0) | |
950 | TAKE_PTR(coredump_data); | |
951 | } else | |
6e9ef603 ZJS |
952 | log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); |
953 | } else | |
5206a724 | 954 | log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)", |
6e9ef603 | 955 | coredump_size, arg_journal_size_max); |
f5e04665 LP |
956 | } |
957 | ||
5edf875b DDM |
958 | /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the |
959 | * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write | |
960 | * the coredump to the socket. */ | |
961 | ||
962 | if (context->is_journald) { | |
963 | r = journal_fd_nonblock(true); | |
964 | if (r < 0) | |
965 | return log_error_errno(r, "Failed to make journal socket non-blocking: %m"); | |
966 | } | |
967 | ||
9a435388 | 968 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
5edf875b DDM |
969 | |
970 | if (context->is_journald) { | |
971 | int k; | |
972 | ||
973 | k = journal_fd_nonblock(false); | |
974 | if (k < 0) | |
975 | return log_error_errno(k, "Failed to make journal socket blocking: %m"); | |
976 | } | |
977 | ||
978 | if (r == -EAGAIN && context->is_journald) | |
979 | log_warning_errno(r, "Failed to log journal coredump, ignoring: %m"); | |
980 | else if (r < 0) | |
3c171f0b LP |
981 | return log_error_errno(r, "Failed to log coredump: %m"); |
982 | ||
983 | return 0; | |
984 | } | |
985 | ||
f46c706b | 986 | static int save_context(Context *context, const struct iovec_wrapper *iovw) { |
f46c706b FB |
987 | const char *unit; |
988 | int r; | |
3c171f0b | 989 | |
3c171f0b | 990 | assert(context); |
f46c706b FB |
991 | assert(iovw); |
992 | assert(iovw->count >= _META_ARGV_MAX); | |
3c171f0b | 993 | |
f46c706b | 994 | /* The context does not allocate any memory on its own */ |
3c171f0b | 995 | |
fe96c0f8 | 996 | for (size_t n = 0; n < iovw->count; n++) { |
f46c706b | 997 | struct iovec *iovec = iovw->iovec + n; |
92e92d71 | 998 | |
fe96c0f8 | 999 | for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) { |
f46c706b FB |
1000 | /* Note that these strings are NUL terminated, because we made sure that a |
1001 | * trailing NUL byte is in the buffer, though not included in the iov_len | |
1002 | * count (see process_socket() and gather_pid_metadata_*()) */ | |
1003 | assert(((char*) iovec->iov_base)[iovec->iov_len] == 0); | |
3c171f0b | 1004 | |
3e4d0f6c | 1005 | const char *p = startswith(iovec->iov_base, meta_field_names[i]); |
f46c706b FB |
1006 | if (p) { |
1007 | context->meta[i] = p; | |
3e4d0f6c | 1008 | context->meta_size[i] = iovec->iov_len - strlen(meta_field_names[i]); |
f46c706b FB |
1009 | break; |
1010 | } | |
1011 | } | |
3c171f0b | 1012 | } |
f46c706b FB |
1013 | |
1014 | if (!context->meta[META_ARGV_PID]) | |
1015 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1016 | "Failed to find the PID of crashing process"); | |
1017 | ||
1018 | r = parse_pid(context->meta[META_ARGV_PID], &context->pid); | |
1019 | if (r < 0) | |
1020 | return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]); | |
1021 | ||
9764bca9 NR |
1022 | r = parse_uid(context->meta[META_ARGV_UID], &context->uid); |
1023 | if (r < 0) | |
1024 | return log_error_errno(r, "Failed to parse UID \"%s\": %m", context->meta[META_ARGV_UID]); | |
1025 | ||
1026 | r = parse_gid(context->meta[META_ARGV_GID], &context->gid); | |
1027 | if (r < 0) | |
1028 | return log_error_errno(r, "Failed to parse GID \"%s\": %m", context->meta[META_ARGV_GID]); | |
1029 | ||
f46c706b FB |
1030 | unit = context->meta[META_UNIT]; |
1031 | context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); | |
1032 | context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); | |
1033 | ||
1034 | return 0; | |
3c171f0b LP |
1035 | } |
1036 | ||
1037 | static int process_socket(int fd) { | |
254d1313 | 1038 | _cleanup_close_ int input_fd = -EBADF; |
f46c706b | 1039 | Context context = {}; |
9a435388 FB |
1040 | struct iovec_wrapper iovw = {}; |
1041 | struct iovec iovec; | |
fe96c0f8 | 1042 | int r; |
3c171f0b LP |
1043 | |
1044 | assert(fd >= 0); | |
1045 | ||
d2acb93d | 1046 | log_setup(); |
3c171f0b | 1047 | |
988e89ee ZJS |
1048 | log_debug("Processing coredump received on stdin..."); |
1049 | ||
3c171f0b | 1050 | for (;;) { |
fb29cdbe | 1051 | CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control; |
3c171f0b LP |
1052 | struct msghdr mh = { |
1053 | .msg_control = &control, | |
1054 | .msg_controllen = sizeof(control), | |
1055 | .msg_iovlen = 1, | |
1056 | }; | |
1057 | ssize_t n; | |
fe1ef0f8 | 1058 | ssize_t l; |
3c171f0b | 1059 | |
fe1ef0f8 EV |
1060 | l = next_datagram_size_fd(fd); |
1061 | if (l < 0) { | |
1062 | r = log_error_errno(l, "Failed to determine datagram size to read: %m"); | |
3c171f0b LP |
1063 | goto finish; |
1064 | } | |
1065 | ||
9a435388 FB |
1066 | iovec.iov_len = l; |
1067 | iovec.iov_base = malloc(l + 1); | |
1068 | if (!iovec.iov_base) { | |
3c171f0b LP |
1069 | r = log_oom(); |
1070 | goto finish; | |
1071 | } | |
1072 | ||
9a435388 | 1073 | mh.msg_iov = &iovec; |
3c171f0b | 1074 | |
3691bcf3 | 1075 | n = recvmsg_safe(fd, &mh, MSG_CMSG_CLOEXEC); |
3c171f0b | 1076 | if (n < 0) { |
9a435388 | 1077 | free(iovec.iov_base); |
3691bcf3 | 1078 | r = log_error_errno(n, "Failed to receive datagram: %m"); |
3c171f0b LP |
1079 | goto finish; |
1080 | } | |
1081 | ||
9a435388 FB |
1082 | /* The final zero-length datagram carries the file descriptor and tells us |
1083 | * that we're done. */ | |
3c171f0b | 1084 | if (n == 0) { |
dac556fa | 1085 | struct cmsghdr *found; |
3c171f0b | 1086 | |
9a435388 | 1087 | free(iovec.iov_base); |
3c171f0b | 1088 | |
dac556fa | 1089 | found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); |
3c171f0b | 1090 | if (!found) { |
3691bcf3 LP |
1091 | cmsg_close_all(&mh); |
1092 | r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG), | |
1093 | "Coredump file descriptor missing."); | |
3c171f0b LP |
1094 | goto finish; |
1095 | } | |
1096 | ||
f8540bde | 1097 | assert(input_fd < 0); |
b1d02191 | 1098 | input_fd = *CMSG_TYPED_DATA(found, int); |
3c171f0b | 1099 | break; |
3691bcf3 LP |
1100 | } else |
1101 | cmsg_close_all(&mh); | |
3c171f0b LP |
1102 | |
1103 | /* Add trailing NUL byte, in case these are strings */ | |
9a435388 FB |
1104 | ((char*) iovec.iov_base)[n] = 0; |
1105 | iovec.iov_len = (size_t) n; | |
3c171f0b | 1106 | |
9a435388 FB |
1107 | r = iovw_put(&iovw, iovec.iov_base, iovec.iov_len); |
1108 | if (r < 0) | |
1109 | goto finish; | |
34c10968 LP |
1110 | } |
1111 | ||
61233823 | 1112 | /* Make sure we got all data we really need */ |
f8540bde | 1113 | assert(input_fd >= 0); |
3c171f0b | 1114 | |
f46c706b FB |
1115 | r = save_context(&context, &iovw); |
1116 | if (r < 0) | |
1117 | goto finish; | |
1118 | ||
1119 | /* Make sure we received at least all fields we need. */ | |
fe96c0f8 | 1120 | for (int i = 0; i < _META_MANDATORY_MAX; i++) |
f46c706b FB |
1121 | if (!context.meta[i]) { |
1122 | r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1123 | "A mandatory argument (%i) has not been sent, aborting.", | |
1124 | i); | |
1125 | goto finish; | |
1126 | } | |
80002f66 | 1127 | |
f46c706b | 1128 | r = submit_coredump(&context, &iovw, input_fd); |
3c171f0b LP |
1129 | |
1130 | finish: | |
9a435388 | 1131 | iovw_free_contents(&iovw, true); |
3c171f0b LP |
1132 | return r; |
1133 | } | |
1134 | ||
9a435388 | 1135 | static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) { |
254d1313 | 1136 | _cleanup_close_ int fd = -EBADF; |
3c171f0b LP |
1137 | int r; |
1138 | ||
9a435388 | 1139 | assert(iovw); |
3c171f0b LP |
1140 | assert(input_fd >= 0); |
1141 | ||
1142 | fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0); | |
1143 | if (fd < 0) | |
1144 | return log_error_errno(errno, "Failed to create coredump socket: %m"); | |
1145 | ||
1861986a LP |
1146 | r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/coredump"); |
1147 | if (r < 0) | |
1148 | return log_error_errno(r, "Failed to connect to coredump service: %m"); | |
3c171f0b | 1149 | |
fe96c0f8 | 1150 | for (size_t i = 0; i < iovw->count; i++) { |
fec603eb | 1151 | struct msghdr mh = { |
9a435388 | 1152 | .msg_iov = iovw->iovec + i, |
fec603eb LP |
1153 | .msg_iovlen = 1, |
1154 | }; | |
1155 | struct iovec copy[2]; | |
1156 | ||
1157 | for (;;) { | |
1158 | if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) | |
1159 | break; | |
1160 | ||
1161 | if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { | |
f46c706b FB |
1162 | /* This field didn't fit? That's a pity. Given that this is |
1163 | * just metadata, let's truncate the field at half, and try | |
1164 | * again. We append three dots, in order to show that this is | |
1165 | * truncated. */ | |
fec603eb LP |
1166 | |
1167 | if (mh.msg_iov != copy) { | |
f46c706b FB |
1168 | /* We don't want to modify the caller's iovec, hence |
1169 | * let's create our own array, consisting of two new | |
1170 | * iovecs, where the first is a (truncated) copy of | |
1171 | * what we want to send, and the second one contains | |
1172 | * the trailing dots. */ | |
9a435388 | 1173 | copy[0] = iovw->iovec[i]; |
ed0cb346 | 1174 | copy[1] = IOVEC_MAKE(((char[]){'.', '.', '.'}), 3); |
fec603eb LP |
1175 | |
1176 | mh.msg_iov = copy; | |
1177 | mh.msg_iovlen = 2; | |
1178 | } | |
1179 | ||
1180 | copy[0].iov_len /= 2; /* halve it, and try again */ | |
1181 | continue; | |
1182 | } | |
3c171f0b | 1183 | |
3c171f0b | 1184 | return log_error_errno(errno, "Failed to send coredump datagram: %m"); |
fec603eb | 1185 | } |
1eef15b1 ZJS |
1186 | } |
1187 | ||
3c171f0b LP |
1188 | r = send_one_fd(fd, input_fd, 0); |
1189 | if (r < 0) | |
1190 | return log_error_errno(r, "Failed to send coredump fd: %m"); | |
1eef15b1 | 1191 | |
3c171f0b LP |
1192 | return 0; |
1193 | } | |
1eef15b1 | 1194 | |
64a5384f LP |
1195 | static int gather_pid_metadata_from_argv( |
1196 | struct iovec_wrapper *iovw, | |
1197 | Context *context, | |
1198 | int argc, char **argv) { | |
1199 | ||
f46c706b | 1200 | _cleanup_free_ char *free_timestamp = NULL; |
fe96c0f8 | 1201 | int r, signo; |
3c171f0b | 1202 | char *t; |
3c171f0b | 1203 | |
e6aa443f LP |
1204 | assert(iovw); |
1205 | assert(context); | |
1206 | ||
f46c706b FB |
1207 | /* We gather all metadata that were passed via argv[] into an array of iovecs that |
1208 | * we'll forward to the socket unit */ | |
3c171f0b | 1209 | |
f46c706b FB |
1210 | if (argc < _META_ARGV_MAX) |
1211 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
1212 | "Not enough arguments passed by the kernel (%i, expected %i).", | |
1213 | argc, _META_ARGV_MAX); | |
3c171f0b | 1214 | |
fe96c0f8 | 1215 | for (int i = 0; i < _META_ARGV_MAX; i++) { |
3c171f0b | 1216 | |
f46c706b | 1217 | t = argv[i]; |
3c171f0b | 1218 | |
f46c706b | 1219 | switch (i) { |
64a5384f | 1220 | |
f46c706b FB |
1221 | case META_ARGV_TIMESTAMP: |
1222 | /* The journal fields contain the timestamp padded with six | |
1223 | * zeroes, so that the kernel-supplied 1s granularity timestamps | |
e503019b | 1224 | * becomes 1μs granularity, i.e. the granularity systemd usually |
f46c706b FB |
1225 | * operates in. */ |
1226 | t = free_timestamp = strjoin(argv[i], "000000"); | |
1227 | if (!t) | |
1228 | return log_oom(); | |
1229 | break; | |
64a5384f | 1230 | |
f46c706b FB |
1231 | case META_ARGV_SIGNAL: |
1232 | /* For signal, record its pretty name too */ | |
1233 | if (safe_atoi(argv[i], &signo) >= 0 && SIGNAL_VALID(signo)) | |
2a3bebd0 FB |
1234 | (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG", |
1235 | signal_to_string(signo)); | |
f46c706b | 1236 | break; |
64a5384f | 1237 | |
f46c706b FB |
1238 | default: |
1239 | break; | |
c8091d92 LP |
1240 | } |
1241 | ||
f46c706b FB |
1242 | r = iovw_put_string_field(iovw, meta_field_names[i], t); |
1243 | if (r < 0) | |
1244 | return r; | |
8c8549db | 1245 | } |
803a3464 | 1246 | |
f46c706b FB |
1247 | /* Cache some of the process metadata we collected so far and that we'll need to |
1248 | * access soon */ | |
1249 | return save_context(context, iovw); | |
1250 | } | |
3c171f0b | 1251 | |
db9ac801 | 1252 | static int gather_pid_metadata_from_procfs(struct iovec_wrapper *iovw, Context *context) { |
f46c706b FB |
1253 | uid_t owner_uid; |
1254 | pid_t pid; | |
1255 | char *t; | |
3e4d0f6c | 1256 | size_t size; |
f46c706b FB |
1257 | const char *p; |
1258 | int r; | |
f5e04665 | 1259 | |
e6aa443f LP |
1260 | assert(iovw); |
1261 | assert(context); | |
1262 | ||
f46c706b FB |
1263 | /* Note that if we fail on oom later on, we do not roll-back changes to the iovec |
1264 | * structure. (It remains valid, with the first iovec fields initialized.) */ | |
f5e04665 | 1265 | |
f46c706b | 1266 | pid = context->pid; |
f5e04665 | 1267 | |
f46c706b FB |
1268 | /* The following is mandatory */ |
1269 | r = get_process_comm(pid, &t); | |
9a435388 | 1270 | if (r < 0) |
f46c706b | 1271 | return log_error_errno(r, "Failed to get COMM: %m"); |
f5e04665 | 1272 | |
f46c706b | 1273 | r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t); |
9a435388 FB |
1274 | if (r < 0) |
1275 | return r; | |
f45b8015 | 1276 | |
c790632c | 1277 | /* The following are optional, but we use them if present. */ |
2a3bebd0 FB |
1278 | r = get_process_exe(pid, &t); |
1279 | if (r >= 0) | |
1280 | r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t); | |
1281 | if (r < 0) | |
f46c706b | 1282 | log_warning_errno(r, "Failed to get EXE, ignoring: %m"); |
bdfd7b2c | 1283 | |
f46c706b | 1284 | if (cg_pid_get_unit(pid, &t) >= 0) |
2a3bebd0 | 1285 | (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t); |
f5e04665 | 1286 | |
f46c706b | 1287 | if (cg_pid_get_user_unit(pid, &t) >= 0) |
2a3bebd0 | 1288 | (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t); |
f46c706b | 1289 | |
9aa82023 | 1290 | if (sd_pid_get_session(pid, &t) >= 0) |
9a435388 | 1291 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t); |
f5e04665 | 1292 | |
a035f819 | 1293 | if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) { |
9a435388 | 1294 | r = asprintf(&t, UID_FMT, owner_uid); |
7de80bfe | 1295 | if (r > 0) |
9a435388 | 1296 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t); |
f5e04665 LP |
1297 | } |
1298 | ||
9aa82023 | 1299 | if (sd_pid_get_slice(pid, &t) >= 0) |
2a3bebd0 | 1300 | (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t); |
f5e04665 | 1301 | |
5dd55303 | 1302 | if (get_process_cmdline(pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE_POSIX, &t) >= 0) |
2a3bebd0 | 1303 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t); |
a035f819 | 1304 | |
9aa82023 | 1305 | if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) |
2a3bebd0 | 1306 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t); |
a035f819 | 1307 | |
9aa82023 | 1308 | if (compose_open_fds(pid, &t) >= 0) |
2a3bebd0 | 1309 | (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t); |
3f132692 JF |
1310 | |
1311 | p = procfs_file_alloca(pid, "status"); | |
627055ce | 1312 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1313 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t); |
3f132692 JF |
1314 | |
1315 | p = procfs_file_alloca(pid, "maps"); | |
627055ce | 1316 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1317 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t); |
3f132692 JF |
1318 | |
1319 | p = procfs_file_alloca(pid, "limits"); | |
627055ce | 1320 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1321 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t); |
3f132692 JF |
1322 | |
1323 | p = procfs_file_alloca(pid, "cgroup"); | |
3e4d0f6c | 1324 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1325 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t); |
3f132692 | 1326 | |
d7032b1f | 1327 | p = procfs_file_alloca(pid, "mountinfo"); |
3e4d0f6c | 1328 | if (read_full_virtual_file(p, &t, NULL) >= 0) |
2a3bebd0 | 1329 | (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t); |
d7032b1f | 1330 | |
3e4d0f6c ZJS |
1331 | /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */ |
1332 | p = procfs_file_alloca(pid, "auxv"); | |
1333 | if (read_full_virtual_file(p, &t, &size) >= 0) { | |
1334 | char *buf = malloc(strlen("COREDUMP_PROC_AUXV=") + size + 1); | |
1335 | if (buf) { | |
1336 | /* Add a dummy terminator to make save_context() happy. */ | |
1337 | *((uint8_t*) mempcpy(stpcpy(buf, "COREDUMP_PROC_AUXV="), t, size)) = '\0'; | |
1338 | (void) iovw_consume(iovw, buf, size + strlen("COREDUMP_PROC_AUXV=")); | |
1339 | } | |
1340 | ||
1341 | free(t); | |
1342 | } | |
1343 | ||
9aa82023 | 1344 | if (get_process_cwd(pid, &t) >= 0) |
2a3bebd0 | 1345 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t); |
3f132692 JF |
1346 | |
1347 | if (get_process_root(pid, &t) >= 0) { | |
9aa82023 ZJS |
1348 | bool proc_self_root_is_slash; |
1349 | ||
1350 | proc_self_root_is_slash = strcmp(t, "/") == 0; | |
3f132692 | 1351 | |
2a3bebd0 | 1352 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t); |
7ed03ce6 JF |
1353 | |
1354 | /* If the process' root is "/", then there is a chance it has | |
1355 | * mounted own root and hence being containerized. */ | |
9aa82023 | 1356 | if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0) |
2a3bebd0 | 1357 | (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t); |
3f132692 JF |
1358 | } |
1359 | ||
9aa82023 | 1360 | if (get_process_environ(pid, &t) >= 0) |
2a3bebd0 | 1361 | (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t); |
9aa82023 | 1362 | |
f46c706b FB |
1363 | /* we successfully acquired all metadata */ |
1364 | return save_context(context, iovw); | |
9aa82023 | 1365 | } |
3f132692 | 1366 | |
9aa82023 | 1367 | static int process_kernel(int argc, char* argv[]) { |
6257e2fb | 1368 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; |
f46c706b | 1369 | Context context = {}; |
9aa82023 ZJS |
1370 | int r; |
1371 | ||
1f9d2a81 DDM |
1372 | /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds |
1373 | * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to | |
1374 | * /dev/null. */ | |
5bb1d7fb | 1375 | r = rearrange_stdio(STDIN_FILENO, -EBADF, -EBADF); |
1f9d2a81 DDM |
1376 | if (r < 0) |
1377 | return log_error_errno(r, "Failed to connect stdout/stderr to /dev/null: %m"); | |
1378 | ||
988e89ee ZJS |
1379 | log_debug("Processing coredump received from the kernel..."); |
1380 | ||
9a435388 FB |
1381 | iovw = iovw_new(); |
1382 | if (!iovw) | |
1383 | return log_oom(); | |
1384 | ||
2a3bebd0 FB |
1385 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR); |
1386 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1387 | |
1388 | /* Collect all process metadata passed by the kernel through argv[] */ | |
1389 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1); | |
92e92d71 | 1390 | if (r < 0) |
6257e2fb | 1391 | return r; |
86562420 | 1392 | |
f46c706b | 1393 | /* Collect the rest of the process metadata retrieved from the runtime */ |
db9ac801 | 1394 | r = gather_pid_metadata_from_procfs(iovw, &context); |
f46c706b | 1395 | if (r < 0) |
6257e2fb | 1396 | return r; |
f46c706b | 1397 | |
1e344c1d | 1398 | if (!context.is_journald) |
f46c706b | 1399 | /* OK, now we know it's not the journal, hence we can make use of it now. */ |
1e344c1d | 1400 | log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG); |
f46c706b FB |
1401 | |
1402 | /* If this is PID 1 disable coredump collection, we'll unlikely be able to process | |
1403 | * it later on. | |
1404 | * | |
1405 | * FIXME: maybe we should disable coredumps generation from the beginning and | |
1406 | * re-enable it only when we know it's either safe (ie we're not running OOM) or | |
1407 | * it's not pid1 ? */ | |
1408 | if (context.is_pid1) { | |
1409 | log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); | |
1410 | disable_coredumps(); | |
1411 | } | |
34c10968 | 1412 | |
f46c706b | 1413 | if (context.is_journald || context.is_pid1) |
6257e2fb | 1414 | return submit_coredump(&context, iovw, STDIN_FILENO); |
9aa82023 | 1415 | |
6257e2fb | 1416 | return send_iovec(iovw, STDIN_FILENO); |
3c171f0b | 1417 | } |
34c10968 | 1418 | |
988e89ee | 1419 | static int process_backtrace(int argc, char *argv[]) { |
3a19fe46 YW |
1420 | _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO); |
1421 | _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; | |
f46c706b | 1422 | Context context = {}; |
9a435388 | 1423 | char *message; |
988e89ee ZJS |
1424 | int r; |
1425 | ||
1426 | log_debug("Processing backtrace on stdin..."); | |
1427 | ||
9a435388 FB |
1428 | iovw = iovw_new(); |
1429 | if (!iovw) | |
5b45a160 ZJS |
1430 | return log_oom(); |
1431 | ||
2a3bebd0 FB |
1432 | (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR); |
1433 | (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); | |
f46c706b FB |
1434 | |
1435 | /* Collect all process metadata from argv[] by making sure to skip the | |
1436 | * '--backtrace' option */ | |
1437 | r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2); | |
988e89ee | 1438 | if (r < 0) |
3a19fe46 | 1439 | return r; |
aaeb2522 | 1440 | |
f46c706b | 1441 | /* Collect the rest of the process metadata retrieved from the runtime */ |
db9ac801 | 1442 | r = gather_pid_metadata_from_procfs(iovw, &context); |
f46c706b | 1443 | if (r < 0) |
3a19fe46 | 1444 | return r; |
988e89ee | 1445 | |
86562420 | 1446 | for (;;) { |
5b45a160 | 1447 | r = journal_importer_process_data(&importer); |
3a19fe46 YW |
1448 | if (r < 0) |
1449 | return log_error_errno(r, "Failed to parse journal entry on stdin: %m"); | |
d74dc4f2 ZJS |
1450 | if (r == 1 || /* complete entry */ |
1451 | journal_importer_eof(&importer)) /* end of data */ | |
5b45a160 | 1452 | break; |
988e89ee | 1453 | } |
988e89ee | 1454 | |
5b45a160 ZJS |
1455 | if (journal_importer_eof(&importer)) { |
1456 | log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter"); | |
988e89ee | 1457 | |
f46c706b FB |
1458 | message = strjoina("Process ", context.meta[META_ARGV_PID], |
1459 | " (", context.meta[META_COMM], ")" | |
1460 | " of user ", context.meta[META_ARGV_UID], | |
1461 | " failed with ", context.meta[META_ARGV_SIGNAL]); | |
9a435388 FB |
1462 | |
1463 | r = iovw_put_string_field(iovw, "MESSAGE=", message); | |
1464 | if (r < 0) | |
3a19fe46 | 1465 | return r; |
5b45a160 | 1466 | } else { |
3a19fe46 YW |
1467 | /* The imported iovecs are not supposed to be freed by us so let's copy and merge them at the |
1468 | * end of the array. */ | |
1469 | r = iovw_append(iovw, &importer.iovw); | |
1470 | if (r < 0) | |
1471 | return r; | |
9a435388 | 1472 | } |
988e89ee | 1473 | |
9a435388 | 1474 | r = sd_journal_sendv(iovw->iovec, iovw->count); |
988e89ee | 1475 | if (r < 0) |
3a19fe46 | 1476 | return log_error_errno(r, "Failed to log backtrace: %m"); |
988e89ee | 1477 | |
3a19fe46 | 1478 | return 0; |
988e89ee ZJS |
1479 | } |
1480 | ||
4515a95e | 1481 | static int run(int argc, char *argv[]) { |
3c171f0b | 1482 | int r; |
fee80f69 | 1483 | |
9aa82023 ZJS |
1484 | /* First, log to a safe place, since we don't know what crashed and it might |
1485 | * be journald which we'd rather not log to then. */ | |
8d4e028f | 1486 | |
1e344c1d | 1487 | log_set_target_and_open(LOG_TARGET_KMSG); |
8d4e028f | 1488 | |
3c171f0b LP |
1489 | /* Make sure we never enter a loop */ |
1490 | (void) prctl(PR_SET_DUMPABLE, 0); | |
8d4e028f | 1491 | |
3c171f0b LP |
1492 | /* Ignore all parse errors */ |
1493 | (void) parse_config(); | |
fee80f69 | 1494 | |
3c171f0b LP |
1495 | log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage)); |
1496 | log_debug("Selected compression %s.", yes_no(arg_compress)); | |
fee80f69 | 1497 | |
3c171f0b | 1498 | r = sd_listen_fds(false); |
4515a95e ZJS |
1499 | if (r < 0) |
1500 | return log_error_errno(r, "Failed to determine the number of file descriptors: %m"); | |
fee80f69 | 1501 | |
9aa82023 ZJS |
1502 | /* If we got an fd passed, we are running in coredumpd mode. Otherwise we |
1503 | * are invoked from the kernel as coredump handler. */ | |
988e89ee ZJS |
1504 | if (r == 0) { |
1505 | if (streq_ptr(argv[1], "--backtrace")) | |
4515a95e | 1506 | return process_backtrace(argc, argv); |
988e89ee | 1507 | else |
4515a95e | 1508 | return process_kernel(argc, argv); |
988e89ee | 1509 | } else if (r == 1) |
4515a95e | 1510 | return process_socket(SD_LISTEN_FDS_START); |
f5e04665 | 1511 | |
baaa35ad ZJS |
1512 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), |
1513 | "Received unexpected number of file descriptors."); | |
f5e04665 | 1514 | } |
4515a95e ZJS |
1515 | |
1516 | DEFINE_MAIN_FUNCTION(run); |