From 8f350e637c0e8001398602a6b2f536de3905787d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 18 Feb 2019 13:32:26 +0100 Subject: [PATCH] rexec: handle legacy kernels Signed-off-by: Christian Brauner --- src/lxc/file_utils.c | 32 +++++++++++++++++- src/lxc/file_utils.h | 1 + src/lxc/memory_utils.h | 16 ++++++--- src/lxc/rexec.c | 77 ++++++++++++++++++++++++++++++++++++------ 4 files changed, 109 insertions(+), 17 deletions(-) diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c index 930fd738a..c057cb5d2 100644 --- a/src/lxc/file_utils.c +++ b/src/lxc/file_utils.c @@ -222,7 +222,7 @@ int lxc_make_tmpfile(char *template, bool rm) mode_t msk; msk = umask(0022); - fd = mkstemp(template); + fd = mkostemp(template, O_CLOEXEC); umask(msk); if (fd < 0) return -1; @@ -366,3 +366,33 @@ on_error: return NULL; } + +int fd_to_fd(int from, int to) +{ + for (;;) { + uint8_t buf[PATH_MAX]; + uint8_t *p = buf; + ssize_t bytes_to_write; + ssize_t bytes_read; + + bytes_read = lxc_read_nointr(from, buf, sizeof buf); + if (bytes_read < 0) + return -1; + if (bytes_read == 0) + break; + + bytes_to_write = (size_t)bytes_read; + do { + ssize_t bytes_written; + + bytes_written = lxc_write_nointr(to, p, bytes_to_write); + if (bytes_written < 0) + return -1; + + bytes_to_write -= bytes_written; + p += bytes_written; + } while (bytes_to_write > 0); + } + + return 0; +} diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h index fc2b7d8c1..cc8f69e18 100644 --- a/src/lxc/file_utils.h +++ b/src/lxc/file_utils.h @@ -57,5 +57,6 @@ extern FILE *fopen_cloexec(const char *path, const char *mode); extern ssize_t lxc_sendfile_nointr(int out_fd, int in_fd, off_t *offset, size_t count); extern char *file_to_buf(char *path, size_t *length); +extern int fd_to_fd(int from, int to); #endif /* __LXC_FILE_UTILS_H */ diff --git a/src/lxc/memory_utils.h b/src/lxc/memory_utils.h index fdcfb5530..c1dafb441 100644 --- a/src/lxc/memory_utils.h +++ b/src/lxc/memory_utils.h @@ -28,6 +28,8 @@ #include #include +#include "macro.h" + static inline void __auto_free__(void *p) { free(*(void **)p); @@ -45,13 +47,17 @@ static inline void __auto_closedir__(DIR **d) closedir(*d); } +#define close_prot_errno_disarm(fd) \ + if (fd >= 0) { \ + int _e_ = errno; \ + close(fd); \ + errno = _e_; \ + fd = -EBADF; \ + } + static inline void __auto_close__(int *fd) { - if (*fd >= 0) { - int e = errno; - close(*fd); - errno = e; - } + close_prot_errno_disarm(*fd); } #define __do_close_prot_errno __attribute__((__cleanup__(__auto_close__))) diff --git a/src/lxc/rexec.c b/src/lxc/rexec.c index 0589b4a78..3b714a1fb 100644 --- a/src/lxc/rexec.c +++ b/src/lxc/rexec.c @@ -84,42 +84,97 @@ static int parse_argv(char ***argv) static int is_memfd(void) { __do_close_prot_errno int fd = -EBADF; - int saved_errno, seals; + int seals; fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); if (fd < 0) return -ENOTRECOVERABLE; seals = fcntl(fd, F_GET_SEALS); - if (seals < 0) + if (seals < 0) { + struct stat s = {0}; + + if (fstat(fd, &s) == 0) { + fprintf(stderr, "AAAAA: %ld\n", (long)s.st_nlink); + return (s.st_nlink == 0); + } + return -EINVAL; + } return seals == LXC_MEMFD_REXEC_SEALS; } static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name) { - __do_close_prot_errno int fd = -EBADF, memfd = -EBADF; - int saved_errno; - ssize_t bytes_sent; + __do_close_prot_errno int fd = -EBADF, memfd = -EBADF, tmpfd = -EBADF; + int ret; memfd = memfd_create(memfd_name, MFD_ALLOW_SEALING | MFD_CLOEXEC); - if (memfd < 0) - return; + if (memfd < 0) { + char template[PATH_MAX]; + + ret = snprintf(template, sizeof(template), + P_tmpdir "/.%s_XXXXXX", memfd_name); + if (ret < 0 || (size_t)ret >= sizeof(template)) + return; + + tmpfd = lxc_make_tmpfile(template, true); + if (tmpfd < 0) + return; + + ret = fchmod(tmpfd, 0700); + if (ret) + return; + } fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); if (fd < 0) return; /* sendfile() handles up to 2GB. */ - bytes_sent = lxc_sendfile_nointr(memfd, fd, NULL, LXC_SENDFILE_MAX); - if (bytes_sent < 0) + if (memfd >= 0) { + ssize_t bytes_sent = 0; + struct stat st = {0}; + + ret = fstat(fd, &st); + if (ret) + return; + + while (bytes_sent < st.st_size) { + ssize_t sent; + sent = lxc_sendfile_nointr(memfd, fd, NULL, + st.st_size - bytes_sent); + if (sent < 0) + return; + bytes_sent += sent; + } + } else if (fd_to_fd(fd, tmpfd)) { return; + } - if (fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS)) + close_prot_errno_disarm(fd); + + if (memfd >= 0 && fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS)) return; - fexecve(memfd, argv, envp); + if (memfd >= 0) { + fexecve(memfd, argv, envp); + } else { + __do_close_prot_errno int execfd = -EBADF; + char procfd[LXC_PROC_PID_FD_LEN]; + + ret = snprintf(procfd, sizeof(procfd), "/proc/self/fd/%d", tmpfd); + if (ret < 0 || (size_t)ret >= sizeof(procfd)) + return; + + execfd = open(procfd, O_PATH | O_CLOEXEC); + close_prot_errno_disarm(tmpfd); + if (execfd < 0) + return; + + fexecve(execfd, argv, envp); + } } /* -- 2.47.2