From: Michal Sekletar Date: Tue, 9 Jul 2024 13:21:34 +0000 (+0200) Subject: coredump: generate properly symbolized stacktrace for containerized processes X-Git-Tag: v257-rc1~726 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=68511cebe58977ea68ae4f57c6462e979efd1cff;p=thirdparty%2Fsystemd.git coredump: generate properly symbolized stacktrace for containerized processes --- diff --git a/man/coredump.conf.xml b/man/coredump.conf.xml index 2eedf942c02..65772d84d7e 100644 --- a/man/coredump.conf.xml +++ b/man/coredump.conf.xml @@ -109,6 +109,21 @@ + + AccessContainer= + + Controls whether systemd-coredump will attempt to use the mount tree of + a process that crashed within a container. Access to the container's filesystem might be necessary to generate + a fully symbolized backtrace. If set to yes, then systemd-coredump will + obtain the mount tree from corresponding mount namespace and will try to generate the stack trace using the + binary and libraries from the mount namespace. Note that the coredump of the containerized process might + still be saved in /var/lib/systemd/coredump/ even if AccessContainer= + is set to no. Defaults to no. + + + + + ExternalSizeMax= JournalSizeMax= diff --git a/meson.build b/meson.build index c0eb6ca437e..09e7c81c597 100644 --- a/meson.build +++ b/meson.build @@ -1389,6 +1389,10 @@ conf.set10('HAVE_ELFUTILS', libdw.found()) conf.set10('HAVE_DWELF_ELF_E_MACHINE_STRING', libdw.found() and cc.has_function('dwelf_elf_e_machine_string', dependencies : libdw)) +# New in elfutils 0.192 +conf.set10('HAVE_DWFL_SET_SYSROOT', + libdw.found() and cc.has_function('dwfl_set_sysroot', dependencies : libdw)) + libz = dependency('zlib', required : get_option('zlib')) conf.set10('HAVE_ZLIB', libz.found()) diff --git a/src/analyze/analyze-inspect-elf.c b/src/analyze/analyze-inspect-elf.c index e949c9049a4..1ae73041630 100644 --- a/src/analyze/analyze-inspect-elf.c +++ b/src/analyze/analyze-inspect-elf.c @@ -19,20 +19,25 @@ static int analyze_elf(char **filenames, sd_json_format_flags_t json_flags) { STRV_FOREACH(filename, filenames) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *package_metadata = NULL; _cleanup_(table_unrefp) Table *t = NULL; - _cleanup_free_ char *abspath = NULL; + _cleanup_free_ char *abspath = NULL, *path = NULL, *stacktrace = NULL; _cleanup_close_ int fd = -EBADF; + bool coredump = false; r = path_make_absolute_cwd(*filename, &abspath); if (r < 0) return log_error_errno(r, "Could not make an absolute path out of \"%s\": %m", *filename); - path_simplify(abspath); + path = path_join(empty_to_root(arg_root), abspath); + if (!path) + return log_oom(); + + path_simplify(path); - fd = RET_NERRNO(open(abspath, O_RDONLY|O_CLOEXEC)); + fd = RET_NERRNO(open(path, O_RDONLY|O_CLOEXEC)); if (fd < 0) - return log_error_errno(fd, "Could not open \"%s\": %m", abspath); + return log_error_errno(fd, "Could not open \"%s\": %m", path); - r = parse_elf_object(fd, abspath, /* fork_disable_dump= */false, NULL, &package_metadata); + r = parse_elf_object(fd, abspath, arg_root, /* fork_disable_dump= */false, &stacktrace, &package_metadata); if (r < 0) return log_error_errno(r, "Parsing \"%s\" as ELF object failed: %m", abspath); @@ -60,6 +65,9 @@ static int analyze_elf(char **filenames, sd_json_format_flags_t json_flags) { * metadata is parsed recursively in core files, so there might be * multiple modules. */ if (STR_IN_SET(module_name, "elfType", "elfArchitecture")) { + if (streq(module_name, "elfType") && streq("coredump", sd_json_variant_string(module_json))) + coredump = true; + r = table_add_many( t, TABLE_FIELD, module_name, @@ -100,6 +108,16 @@ static int analyze_elf(char **filenames, sd_json_format_flags_t json_flags) { } } } + + if (coredump) { + r = table_add_many(t, + TABLE_EMPTY, TABLE_EMPTY, + TABLE_FIELD, "stacktrace", + TABLE_STRING, stacktrace); + if (r < 0) + return table_log_add_error(r); + } + if (json_flags & SD_JSON_FORMAT_OFF) { r = table_print(t, NULL); if (r < 0) diff --git a/src/analyze/analyze.c b/src/analyze/analyze.c index a7acc35f4f9..ea62fc0d99b 100644 --- a/src/analyze/analyze.c +++ b/src/analyze/analyze.c @@ -609,7 +609,7 @@ static int parse_argv(int argc, char *argv[]) { return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --security-policy= is only supported for security."); - if ((arg_root || arg_image) && (!STRPTR_IN_SET(argv[optind], "cat-config", "verify", "condition")) && + if ((arg_root || arg_image) && (!STRPTR_IN_SET(argv[optind], "cat-config", "verify", "condition", "inspect-elf")) && (!(streq_ptr(argv[optind], "security") && arg_offline))) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Options --root= and --image= are only supported for cat-config, verify, condition and security when used with --offline= right now."); diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 843a500d8c0..e751b67417b 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -39,6 +39,8 @@ #include "main-func.h" #include "memory-util.h" #include "memstream-util.h" +#include "missing_mount.h" +#include "missing_syscall.h" #include "mkdir-label.h" #include "namespace-util.h" #include "parse-util.h" @@ -165,16 +167,22 @@ static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX; static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX; static uint64_t arg_keep_free = UINT64_MAX; static uint64_t arg_max_use = UINT64_MAX; +static bool arg_access_container = false; static int parse_config(void) { static const ConfigTableItem items[] = { - { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, - { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, - { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, - { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max }, - { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, - { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, - { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, + { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage }, + { "Coredump", "Compress", config_parse_bool, 0, &arg_compress }, + { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max }, + { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max }, + { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max }, + { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free }, + { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use }, +#if HAVE_DWFL_SET_SYSROOT + { "Coredump", "AccessContainer", config_parse_bool, 0, &arg_access_container }, +#else + { "Coredump", "AccessContainer", config_parse_warn_compat, DISABLED_CONFIGURATION, 0 }, +#endif {} }; @@ -774,15 +782,44 @@ static int change_uid_gid(const Context *context) { return drop_privileges(uid, gid, 0); } +static int setup_container_mount_tree(int mount_tree_fd, char **container_root) { + _cleanup_free_ char *root = NULL; + int r; + + assert(mount_tree_fd >= 0); + assert(container_root); + + r = unshare(CLONE_NEWNS); + if (r < 0) + return log_warning_errno(errno, "Failed to unshare mount namespace: %m"); + + r = mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (r < 0) + return log_warning_errno(errno, "Failed to disable mount propagation: %m"); + + r = mkdtemp_malloc("/tmp/systemd-coredump-root-XXXXXX", &root); + if (r < 0) + return log_warning_errno(r, "Failed to create temporary directory: %m"); + + r = move_mount(mount_tree_fd, "", -EBADF, root, MOVE_MOUNT_F_EMPTY_PATH); + if (r < 0) + return log_warning_errno(errno, "Failed to move mount tree: %m"); + + *container_root = TAKE_PTR(root); + return 0; +} + static int submit_coredump( const Context *context, struct iovec_wrapper *iovw, - int input_fd) { + int input_fd, + int mount_tree_fd) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *json_metadata = NULL; _cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF; _cleanup_free_ char *filename = NULL, *coredump_data = NULL; _cleanup_free_ char *stacktrace = NULL; + _cleanup_free_ char *root = NULL; const char *module_name; uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX; bool truncated = false, written = false; @@ -819,6 +856,12 @@ static int submit_coredump( (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); } + if (mount_tree_fd >= 0 && arg_access_container) { + r = setup_container_mount_tree(mount_tree_fd, &root); + if (r < 0) + log_warning_errno(r, "Failed to setup container mount tree, ignoring: %m"); + } + /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the * coredump memory under the user's uid. This also ensures that the credentials journald will see are * the ones of the coredumping user, thus making sure the user gets access to the core dump. Let's @@ -826,7 +869,6 @@ static int submit_coredump( r = change_uid_gid(context); if (r < 0) return log_error_errno(r, "Failed to drop privileges: %m"); - if (written) { /* Try to get a stack trace if we can */ if (coredump_size > arg_process_size_max) @@ -838,6 +880,7 @@ static int submit_coredump( (void) parse_elf_object(coredump_fd, context->meta[META_EXE], + root, /* fork_disable_dump= */ skip, /* avoid loops */ &stacktrace, &json_metadata); @@ -1000,10 +1043,11 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) { } static int process_socket(int fd) { - _cleanup_close_ int input_fd = -EBADF; + _cleanup_close_ int input_fd = -EBADF, mount_tree_fd = -EBADF; Context context = {}; struct iovec_wrapper iovw = {}; struct iovec iovec; + bool first = true; int r; assert(fd >= 0); @@ -1051,16 +1095,34 @@ static int process_socket(int fd) { free(iovec.iov_base); - found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); - if (!found) { - cmsg_close_all(&mh); - r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG), - "Coredump file descriptor missing."); - goto finish; + found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int) * 2)); + if (found) { + int fds[2] = EBADF_PAIR; + + memcpy(fds, CMSG_TYPED_DATA(found, int), sizeof(int) * 2); + + assert(mount_tree_fd < 0); + + /* Maybe we already got coredump FD in previous iteration? */ + safe_close(input_fd); + + input_fd = fds[0]; + mount_tree_fd = fds[1]; + + /* We have all FDs we need let's take a shortcut here. */ + break; + } else { + found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); + if (found) + input_fd = *CMSG_TYPED_DATA(found, int); + } + + /* This is the first message that carries file descriptors, maybe there will be one more that actually contains array of descriptors. */ + if (first) { + first = false; + continue; } - assert(input_fd < 0); - input_fd = *CMSG_TYPED_DATA(found, int); break; } else cmsg_close_all(&mh); @@ -1090,14 +1152,14 @@ static int process_socket(int fd) { goto finish; } - r = submit_coredump(&context, &iovw, input_fd); + r = submit_coredump(&context, &iovw, input_fd, mount_tree_fd); finish: iovw_free_contents(&iovw, true); return r; } -static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) { +static int send_iovec(const struct iovec_wrapper *iovw, int input_fd, int mounts_fd) { _cleanup_close_ int fd = -EBADF; int r; @@ -1154,6 +1216,12 @@ static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) { if (r < 0) return log_error_errno(r, "Failed to send coredump fd: %m"); + if (mounts_fd >= 0) { + r = send_many_fds(fd, (int[]) { input_fd, mounts_fd }, 2, 0); + if (r < 0) + return log_error_errno(r, "Failed to send coredump fds: %m"); + } + return 0; } @@ -1532,7 +1600,7 @@ static int forward_coredump_to_container(Context *context) { _exit(EXIT_FAILURE); } - r = send_iovec(iovw, STDIN_FILENO); + r = send_iovec(iovw, STDIN_FILENO, -EBADF); if (r < 0) { log_debug_errno(r, "Failed to send iovec to coredump socket: %m"); _exit(EXIT_FAILURE); @@ -1560,8 +1628,68 @@ static int forward_coredump_to_container(Context *context) { return 0; } +static int gather_pid_mount_tree_fd(const Context *context) { + _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF; + _cleanup_close_pair_ int pair[2] = EBADF_PAIR; + int fd = -EBADF, r; + pid_t child; + + assert(context); + + /* Don't bother preparing environment if we can't pass it to libdwfl. */ +#if !HAVE_DWFL_SET_SYSROOT + return -EBADF; +#endif + + if (!arg_access_container) + return -EBADF; + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair) < 0) + return log_error_errno(errno, "Failed to create socket pair: %m"); + + r = namespace_open(context->pid, NULL, &mntns_fd, NULL, NULL, &root_fd); + if (r < 0) + return log_error_errno(r, "Failed to open mount namespace of crashing process: %m"); + + r = namespace_fork("(sd-mount-tree-ns)", "(sd-mount-tree)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL, -1, mntns_fd, -1, -1, root_fd, &child); + if (r < 0) + return log_error_errno(r, "Failed to fork(): %m"); + if (r == 0) { + pair[0] = safe_close(pair[0]); + + r = open_tree(-EBADF, "/", AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); + if (r < 0) { + log_error_errno(errno, "Failed to clone mount tree: %m"); + _exit(EXIT_FAILURE); + } + + r = send_one_fd(pair[1], r, 0); + if (r < 0) { + log_error_errno(r, "Failed to send mount tree to parent: %m"); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-mount-tree-ns)", child, 0); + if (r < 0) + return log_error_errno(r, "Failed to wait for child: %m"); + if (r != EXIT_SUCCESS) + return log_error_errno(SYNTHETIC_ERRNO(ECHILD), "Child died abnormally."); + + fd = receive_one_fd(pair[0], MSG_DONTWAIT); + if (fd < 0) + return log_error_errno(fd, "Failed to receive mount tree: %m"); + + return fd; +} + static int process_kernel(int argc, char* argv[]) { _cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL; + _cleanup_close_ int mount_tree_fd = -EBADF; Context context = {}; int r, signo; @@ -1607,6 +1735,12 @@ static int process_kernel(int argc, char* argv[]) { r = forward_coredump_to_container(&context); if (r >= 0) return 0; + + r = gather_pid_mount_tree_fd(&context); + if (r < 0 && r != -EBADF) + log_warning_errno(r, "Failed to access the mount tree of a container, ignoring: %m"); + else + mount_tree_fd = r; } /* If this is PID 1 disable coredump collection, we'll unlikely be able to process @@ -1624,9 +1758,9 @@ static int process_kernel(int argc, char* argv[]) { (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT)); if (context.is_journald || context.is_pid1) - return submit_coredump(&context, iovw, STDIN_FILENO); + return submit_coredump(&context, iovw, STDIN_FILENO, mount_tree_fd); - return send_iovec(iovw, STDIN_FILENO); + return send_iovec(iovw, STDIN_FILENO, mount_tree_fd); } static int process_backtrace(int argc, char *argv[]) { diff --git a/src/coredump/coredump.conf b/src/coredump/coredump.conf index ae341e40d72..2790bf1be64 100644 --- a/src/coredump/coredump.conf +++ b/src/coredump/coredump.conf @@ -25,3 +25,4 @@ #JournalSizeMax=767M #MaxUse= #KeepFree= +#AccessContainer=no diff --git a/src/shared/elf-util.c b/src/shared/elf-util.c index 49492330e2f..69439b61ef2 100644 --- a/src/shared/elf-util.c +++ b/src/shared/elf-util.c @@ -23,6 +23,7 @@ #include "io-util.h" #include "macro.h" #include "memstream-util.h" +#include "path-util.h" #include "process-util.h" #include "rlimit-util.h" #include "string-util.h" @@ -54,6 +55,9 @@ static DLSYM_PROTOTYPE(dwfl_begin) = NULL; static DLSYM_PROTOTYPE(dwfl_build_id_find_elf) = NULL; static DLSYM_PROTOTYPE(dwfl_core_file_attach) = NULL; static DLSYM_PROTOTYPE(dwfl_core_file_report) = NULL; +#if HAVE_DWFL_SET_SYSROOT +static DLSYM_PROTOTYPE(dwfl_set_sysroot) = NULL; +#endif static DLSYM_PROTOTYPE(dwfl_end) = NULL; static DLSYM_PROTOTYPE(dwfl_errmsg) = NULL; static DLSYM_PROTOTYPE(dwfl_errno) = NULL; @@ -114,6 +118,9 @@ int dlopen_dw(void) { DLSYM_ARG(dwfl_module_getelf), DLSYM_ARG(dwfl_begin), DLSYM_ARG(dwfl_core_file_report), +#if HAVE_DWFL_SET_SYSROOT + DLSYM_ARG(dwfl_set_sysroot), +#endif DLSYM_ARG(dwfl_report_end), DLSYM_ARG(dwfl_getmodules), DLSYM_ARG(dwfl_core_file_attach), @@ -580,7 +587,7 @@ static int module_callback(Dwfl_Module *mod, void **userdata, const char *name, return DWARF_CB_OK; } -static int parse_core(int fd, const char *executable, char **ret, sd_json_variant **ret_package_metadata) { +static int parse_core(int fd, const char *root, char **ret, sd_json_variant **ret_package_metadata) { const Dwfl_Callbacks callbacks = { .find_elf = sym_dwfl_build_id_find_elf, @@ -614,7 +621,17 @@ static int parse_core(int fd, const char *executable, char **ret, sd_json_varian if (!c.dwfl) return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not parse core file, dwfl_begin() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno())); - if (sym_dwfl_core_file_report(c.dwfl, c.elf, executable) < 0) + if (empty_or_root(root)) + root = NULL; +#if HAVE_DWFL_SET_SYSROOT + if (root && sym_dwfl_set_sysroot(c.dwfl, root) < 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not set root directory, dwfl_set_sysroot() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno())); +#else + if (root) + log_warning("Compiled without dwfl_set_sysroot() support, ignoring provided root directory."); +#endif + + if (sym_dwfl_core_file_report(c.dwfl, c.elf, NULL) < 0) return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Could not parse core file, dwfl_core_file_report() failed: %s", sym_dwfl_errmsg(sym_dwfl_errno())); if (sym_dwfl_report_end(c.dwfl, NULL, NULL) != 0) @@ -641,7 +658,7 @@ static int parse_core(int fd, const char *executable, char **ret, sd_json_varian return 0; } -static int parse_elf(int fd, const char *executable, char **ret, sd_json_variant **ret_package_metadata) { +static int parse_elf(int fd, const char *executable, const char *root, char **ret, sd_json_variant **ret_package_metadata) { _cleanup_(sd_json_variant_unrefp) sd_json_variant *package_metadata = NULL, *elf_metadata = NULL; _cleanup_set_free_ Set *modules = NULL; _cleanup_(stack_context_done) StackContext c = { @@ -672,7 +689,7 @@ static int parse_elf(int fd, const char *executable, char **ret, sd_json_variant if (elf_header.e_type == ET_CORE) { _cleanup_free_ char *out = NULL; - r = parse_core(fd, executable, ret ? &out : NULL, &package_metadata); + r = parse_core(fd, root, ret ? &out : NULL, &package_metadata); if (r < 0) return log_warning_errno(r, "Failed to inspect core file: %m"); @@ -743,7 +760,7 @@ static int parse_elf(int fd, const char *executable, char **ret, sd_json_variant return 0; } -int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) { +int parse_elf_object(int fd, const char *executable, const char *root, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) { _cleanup_close_pair_ int error_pipe[2] = EBADF_PAIR, return_pipe[2] = EBADF_PAIR, json_pipe[2] = EBADF_PAIR; @@ -813,7 +830,7 @@ int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, cha goto child_fail; } - r = parse_elf(fd, executable, ret ? &buf : NULL, ret_package_metadata ? &package_metadata : NULL); + r = parse_elf(fd, executable, root, ret ? &buf : NULL, ret_package_metadata ? &package_metadata : NULL); if (r < 0) goto child_fail; diff --git a/src/shared/elf-util.h b/src/shared/elf-util.h index 13f531df9fd..1d6af581510 100644 --- a/src/shared/elf-util.h +++ b/src/shared/elf-util.h @@ -10,9 +10,9 @@ int dlopen_elf(void); /* Parse an ELF object in a forked process, so that errors while iterating over * untrusted and potentially malicious data do not propagate to the main caller's process. * If fork_disable_dump, the child process will not dump core if it crashes. */ -int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata); +int parse_elf_object(int fd, const char *executable, const char *root, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata); #else -static inline int parse_elf_object(int fd, const char *executable, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) { +static inline int parse_elf_object(int fd, const char *executable, const char *root, bool fork_disable_dump, char **ret, sd_json_variant **ret_package_metadata) { return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "elfutils disabled, parsing ELF objects not supported"); } #endif