#include <stdio.h>
#include <sys/prctl.h>
#include <sys/statvfs.h>
+#include <sys/auxv.h>
#include <sys/xattr.h>
#include <unistd.h>
#include "compress.h"
#include "conf-parser.h"
#include "copy.h"
+#include "coredump-util.h"
#include "coredump-vacuum.h"
#include "dirent-util.h"
#include "elf-util.h"
#include "fs-util.h"
#include "io-util.h"
#include "journal-importer.h"
+#include "journal-send.h"
#include "log.h"
#include "macro.h"
#include "main-func.h"
#include "memory-util.h"
+#include "memstream-util.h"
#include "mkdir-label.h"
#include "parse-util.h"
#include "process-util.h"
META_EXE = _META_MANDATORY_MAX,
META_UNIT,
+ META_PROC_AUXV,
_META_MAX
};
static const char * const meta_field_names[_META_MAX] = {
- [META_ARGV_PID] = "COREDUMP_PID=",
- [META_ARGV_UID] = "COREDUMP_UID=",
- [META_ARGV_GID] = "COREDUMP_GID=",
- [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=",
- [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
- [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=",
- [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=",
- [META_COMM] = "COREDUMP_COMM=",
- [META_EXE] = "COREDUMP_EXE=",
- [META_UNIT] = "COREDUMP_UNIT=",
+ [META_ARGV_PID] = "COREDUMP_PID=",
+ [META_ARGV_UID] = "COREDUMP_UID=",
+ [META_ARGV_GID] = "COREDUMP_GID=",
+ [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=",
+ [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
+ [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=",
+ [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=",
+ [META_COMM] = "COREDUMP_COMM=",
+ [META_EXE] = "COREDUMP_EXE=",
+ [META_UNIT] = "COREDUMP_UNIT=",
+ [META_PROC_AUXV] = "COREDUMP_PROC_AUXV=",
};
typedef struct Context {
const char *meta[_META_MAX];
+ size_t meta_size[_META_MAX];
pid_t pid;
bool is_pid1;
bool is_journald;
} CoredumpStorage;
static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = {
- [COREDUMP_STORAGE_NONE] = "none",
+ [COREDUMP_STORAGE_NONE] = "none",
[COREDUMP_STORAGE_EXTERNAL] = "external",
- [COREDUMP_STORAGE_JOURNAL] = "journal",
+ [COREDUMP_STORAGE_JOURNAL] = "journal",
};
DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage);
static int parse_config(void) {
static const ConfigTableItem items[] = {
- { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
- { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
- { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
- { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
- { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
- { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
- { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
+ { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
+ { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
+ { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
+ { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
+ { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
+ { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
+ { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
{}
};
- return config_parse_many_nulstr(
- PKGSYSCONFDIR "/coredump.conf",
- CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
- "Coredump\0",
- config_item_table_lookup, items,
- CONFIG_PARSE_WARN,
- NULL,
- NULL);
+ return config_parse_config_file("coredump.conf", "Coredump\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
}
static uint64_t storage_size_max(void) {
return 0;
}
-static int fix_acl(int fd, uid_t uid) {
+static int fix_acl(int fd, uid_t uid, bool allow_user) {
+ assert(fd >= 0);
+ assert(uid_is_valid(uid));
#if HAVE_ACL
int r;
- assert(fd >= 0);
- assert(uid_is_valid(uid));
+ /* We don't allow users to read coredumps if the uid or capabilities were changed. */
+ if (!allow_user)
+ return 0;
if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY)
return 0;
static int fix_xattr(int fd, const Context *context) {
static const char * const xattrs[_META_MAX] = {
- [META_ARGV_PID] = "user.coredump.pid",
- [META_ARGV_UID] = "user.coredump.uid",
- [META_ARGV_GID] = "user.coredump.gid",
- [META_ARGV_SIGNAL] = "user.coredump.signal",
- [META_ARGV_TIMESTAMP] = "user.coredump.timestamp",
- [META_ARGV_RLIMIT] = "user.coredump.rlimit",
- [META_ARGV_HOSTNAME] = "user.coredump.hostname",
- [META_COMM] = "user.coredump.comm",
- [META_EXE] = "user.coredump.exe",
+ [META_ARGV_PID] = "user.coredump.pid",
+ [META_ARGV_UID] = "user.coredump.uid",
+ [META_ARGV_GID] = "user.coredump.gid",
+ [META_ARGV_SIGNAL] = "user.coredump.signal",
+ [META_ARGV_TIMESTAMP] = "user.coredump.timestamp",
+ [META_ARGV_RLIMIT] = "user.coredump.rlimit",
+ [META_ARGV_HOSTNAME] = "user.coredump.hostname",
+ [META_COMM] = "user.coredump.comm",
+ [META_EXE] = "user.coredump.exe",
};
int r = 0;
#define filename_escape(s) xescape((s), "./ ")
static const char *coredump_tmpfile_name(const char *s) {
- return s ? s : "(unnamed temporary file)";
+ return s ?: "(unnamed temporary file)";
}
static int fix_permissions(
const char *filename,
const char *target,
const Context *context,
- uid_t uid) {
+ uid_t uid,
+ bool allow_user) {
int r;
/* Ignore errors on these */
(void) fchmod(fd, 0640);
- (void) fix_acl(fd, uid);
+ (void) fix_acl(fd, uid, allow_user);
(void) fix_xattr(fd, context);
r = fsync_full(fd);
if (r < 0)
return log_error_errno(r, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename));
- r = link_tmpfile(fd, filename, target);
+ r = link_tmpfile(fd, filename, target, /* replace= */ false);
if (r < 0)
return log_error_errno(r, "Failed to move coredump %s into place: %m", target);
return 0;
}
+static int grant_user_access(int core_fd, const Context *context) {
+ int at_secure = -1;
+ uid_t uid = UID_INVALID, euid = UID_INVALID;
+ uid_t gid = GID_INVALID, egid = GID_INVALID;
+ int r;
+
+ assert(core_fd >= 0);
+ assert(context);
+
+ if (!context->meta[META_PROC_AUXV])
+ return log_warning_errno(SYNTHETIC_ERRNO(ENODATA), "No auxv data, not adjusting permissions.");
+
+ uint8_t elf[EI_NIDENT];
+ errno = 0;
+ if (pread(core_fd, &elf, sizeof(elf), 0) != sizeof(elf))
+ return log_warning_errno(errno_or_else(EIO),
+ "Failed to pread from coredump fd: %s", STRERROR_OR_EOF(errno));
+
+ if (elf[EI_MAG0] != ELFMAG0 ||
+ elf[EI_MAG1] != ELFMAG1 ||
+ elf[EI_MAG2] != ELFMAG2 ||
+ elf[EI_MAG3] != ELFMAG3 ||
+ elf[EI_VERSION] != EV_CURRENT)
+ return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Core file does not have ELF header, not adjusting permissions.");
+ if (!IN_SET(elf[EI_CLASS], ELFCLASS32, ELFCLASS64) ||
+ !IN_SET(elf[EI_DATA], ELFDATA2LSB, ELFDATA2MSB))
+ return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Core file has strange ELF class, not adjusting permissions.");
+
+ if ((elf[EI_DATA] == ELFDATA2LSB) != (__BYTE_ORDER == __LITTLE_ENDIAN))
+ return log_info_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Core file has non-native endianness, not adjusting permissions.");
+
+ r = parse_auxv(LOG_WARNING,
+ /* elf_class= */ elf[EI_CLASS],
+ context->meta[META_PROC_AUXV],
+ context->meta_size[META_PROC_AUXV],
+ &at_secure, &uid, &euid, &gid, &egid);
+ if (r < 0)
+ return r;
+
+ /* We allow access if we got all the data and at_secure is not set and
+ * the uid/gid matches euid/egid. */
+ bool ret =
+ at_secure == 0 &&
+ uid != UID_INVALID && euid != UID_INVALID && uid == euid &&
+ gid != GID_INVALID && egid != GID_INVALID && gid == egid;
+ log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)",
+ ret ? "permit" : "restrict",
+ uid, euid, gid, egid, yes_no(at_secure));
+ return ret;
+}
+
static int save_external_coredump(
const Context *context,
int input_fd,
_cleanup_(unlink_and_freep) char *tmp = NULL;
_cleanup_free_ char *fn = NULL;
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
uint64_t rlimit, process_limit, max_size;
bool truncated, storage_on_tmpfs;
struct stat st;
/* tmpfs might get full quickly, so check the available space too.
* But don't worry about errors here, failing to access the storage
* location will be better logged when writing to it. */
- if (statvfs("/var/lib/systemd/coredump/", &sv) >= 0)
+ if (fstatvfs(fd, &sv) >= 0)
max_size = MIN((uint64_t)sv.f_frsize * (uint64_t)sv.f_bfree, max_size);
log_debug("Limiting core file size to %" PRIu64 " bytes due to cgroup memory limits.", max_size);
context->meta[META_ARGV_PID], context->meta[META_COMM]);
truncated = r == 1;
+ bool allow_user = grant_user_access(fd, context) > 0;
+
#if HAVE_COMPRESSION
if (arg_compress) {
_cleanup_(unlink_and_freep) char *tmp_compressed = NULL;
_cleanup_free_ char *fn_compressed = NULL;
- _cleanup_close_ int fd_compressed = -1;
+ _cleanup_close_ int fd_compressed = -EBADF;
uint64_t uncompressed_size = 0;
if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
uncompressed_size += partial_uncompressed_size;
}
- r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid);
+ r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid, allow_user);
if (r < 0)
return r;
"SIZE_LIMIT=%"PRIu64, max_size,
"MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR);
- r = fix_permissions(fd, tmp, fn, context, uid);
+ r = fix_permissions(fd, tmp, fn, context, uid, allow_user);
if (r < 0)
return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn);
* flags: 0100002
* EOF
*/
-static int compose_open_fds(pid_t pid, char **open_fds) {
+static int compose_open_fds(pid_t pid, char **ret) {
+ _cleanup_(memstream_done) MemStream m = {};
_cleanup_closedir_ DIR *proc_fd_dir = NULL;
- _cleanup_close_ int proc_fdinfo_fd = -1;
- _cleanup_free_ char *buffer = NULL;
- _cleanup_fclose_ FILE *stream = NULL;
+ _cleanup_close_ int proc_fdinfo_fd = -EBADF;
const char *fddelim = "", *path;
- size_t size = 0;
+ FILE *stream;
int r;
assert(pid >= 0);
- assert(open_fds != NULL);
+ assert(ret);
path = procfs_file_alloca(pid, "fd");
proc_fd_dir = opendir(path);
if (proc_fdinfo_fd < 0)
return -errno;
- stream = open_memstream_unlocked(&buffer, &size);
+ stream = memstream_init(&m);
if (!stream)
return -ENOMEM;
FOREACH_DIRENT(de, proc_fd_dir, return -errno) {
_cleanup_fclose_ FILE *fdinfo = NULL;
_cleanup_free_ char *fdname = NULL;
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
r = readlinkat_malloc(dirfd(proc_fd_dir), de->d_name, &fdname);
if (r < 0)
}
}
- errno = 0;
- stream = safe_fclose(stream);
-
- if (errno > 0)
- return -errno;
-
- *open_fds = TAKE_PTR(buffer);
-
- return 0;
+ return memstream_finalize(&m, ret, NULL);
}
static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) {
const char *p;
struct stat stbuf;
- _cleanup_close_ int proc_ns_dir_fd = -1;
+ _cleanup_close_ int proc_ns_dir_fd = -EBADF;
p = procfs_file_alloca(pid, "ns");
}
static int submit_coredump(
- Context *context,
+ const Context *context,
struct iovec_wrapper *iovw,
int input_fd) {
_cleanup_(json_variant_unrefp) JsonVariant *json_metadata = NULL;
- _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
+ _cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF;
_cleanup_free_ char *filename = NULL, *coredump_data = NULL;
_cleanup_free_ char *stacktrace = NULL;
char *core_message;
core_message = strjoina(core_message, stacktrace ? "\n\n" : NULL, stacktrace);
- if (context->is_journald) {
- /* We cannot log to the journal, so just print the message.
- * The target was set previously to something safe. */
+ if (context->is_journald)
+ /* We might not be able to log to the journal, so let's always print the message to another
+ * log target. The target was set previously to something safe. */
log_dispatch(LOG_ERR, 0, core_message);
- return 0;
- }
(void) iovw_put_string_field(iovw, "MESSAGE=", core_message);
coredump_size, arg_journal_size_max);
}
+ /* If journald is coredumping, we have to be careful that we don't deadlock when trying to write the
+ * coredump to the journal, so we put the journal socket in nonblocking mode before trying to write
+ * the coredump to the socket. */
+
+ if (context->is_journald) {
+ r = journal_fd_nonblock(true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make journal socket non-blocking: %m");
+ }
+
r = sd_journal_sendv(iovw->iovec, iovw->count);
- if (r < 0)
+
+ if (context->is_journald) {
+ int k;
+
+ k = journal_fd_nonblock(false);
+ if (k < 0)
+ return log_error_errno(k, "Failed to make journal socket blocking: %m");
+ }
+
+ if (r == -EAGAIN && context->is_journald)
+ log_warning_errno(r, "Failed to log journal coredump, ignoring: %m");
+ else if (r < 0)
return log_error_errno(r, "Failed to log coredump: %m");
return 0;
}
static int save_context(Context *context, const struct iovec_wrapper *iovw) {
- unsigned count = 0;
const char *unit;
int r;
struct iovec *iovec = iovw->iovec + n;
for (size_t i = 0; i < ELEMENTSOF(meta_field_names); i++) {
- char *p;
-
/* Note that these strings are NUL terminated, because we made sure that a
* trailing NUL byte is in the buffer, though not included in the iov_len
* count (see process_socket() and gather_pid_metadata_*()) */
assert(((char*) iovec->iov_base)[iovec->iov_len] == 0);
- p = startswith(iovec->iov_base, meta_field_names[i]);
+ const char *p = startswith(iovec->iov_base, meta_field_names[i]);
if (p) {
context->meta[i] = p;
- count++;
+ context->meta_size[i] = iovec->iov_len - strlen(meta_field_names[i]);
break;
}
}
}
static int process_socket(int fd) {
- _cleanup_close_ int input_fd = -1;
+ _cleanup_close_ int input_fd = -EBADF;
Context context = {};
struct iovec_wrapper iovw = {};
struct iovec iovec;
}
assert(input_fd < 0);
- input_fd = *(int*) CMSG_DATA(found);
+ input_fd = *CMSG_TYPED_DATA(found, int);
break;
} else
cmsg_close_all(&mh);
}
static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
- _cleanup_close_ int fd = -1;
+ _cleanup_close_ int fd = -EBADF;
int r;
assert(iovw);
uid_t owner_uid;
pid_t pid;
char *t;
+ size_t size;
const char *p;
int r;
(void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t);
p = procfs_file_alloca(pid, "cgroup");
- if (read_full_virtual_file(p, &t, NULL) >=0)
+ if (read_full_virtual_file(p, &t, NULL) >= 0)
(void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t);
p = procfs_file_alloca(pid, "mountinfo");
- if (read_full_virtual_file(p, &t, NULL) >=0)
+ if (read_full_virtual_file(p, &t, NULL) >= 0)
(void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t);
+ /* We attach /proc/auxv here. ELF coredumps also contain a note for this (NT_AUXV), see elf(5). */
+ p = procfs_file_alloca(pid, "auxv");
+ if (read_full_virtual_file(p, &t, &size) >= 0) {
+ char *buf = malloc(strlen("COREDUMP_PROC_AUXV=") + size + 1);
+ if (buf) {
+ /* Add a dummy terminator to make save_context() happy. */
+ *((uint8_t*) mempcpy(stpcpy(buf, "COREDUMP_PROC_AUXV="), t, size)) = '\0';
+ (void) iovw_consume(iovw, buf, size + strlen("COREDUMP_PROC_AUXV="));
+ }
+
+ free(t);
+ }
+
if (get_process_cwd(pid, &t) >= 0)
(void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t);
struct iovec_wrapper *iovw;
int r;
+ /* When we're invoked by the kernel, stdout/stderr are closed which is dangerous because the fds
+ * could get reallocated. To avoid hard to debug issues, let's instead bind stdout/stderr to
+ * /dev/null. */
+ r = rearrange_stdio(STDIN_FILENO, -EBADF, -EBADF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect stdout/stderr to /dev/null: %m");
+
log_debug("Processing coredump received from the kernel...");
iovw = iovw_new();
if (r < 0)
goto finish;
- if (!context.is_journald) {
+ if (!context.is_journald)
/* OK, now we know it's not the journal, hence we can make use of it now. */
- log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
- log_open();
- }
+ log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG);
/* If this is PID 1 disable coredump collection, we'll unlikely be able to process
* it later on.
/* First, log to a safe place, since we don't know what crashed and it might
* be journald which we'd rather not log to then. */
- log_set_target(LOG_TARGET_KMSG);
- log_open();
+ log_set_target_and_open(LOG_TARGET_KMSG);
/* Make sure we never enter a loop */
(void) prctl(PR_SET_DUMPABLE, 0);