From: Karel Zak Date: Wed, 28 Aug 2019 13:47:16 +0000 (+0200) Subject: libmount: improve mountinfo reliability X-Git-Tag: v2.35-rc1~256 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e4925f591c1bfb83719418b56b952830d15b77eb;p=thirdparty%2Futil-linux.git libmount: improve mountinfo reliability The standard way how we read mount table is not reliable because during the read() syscalls the table may be modified by some another process. The changes in the table is possible to detect by poll() event, and in this case it seems better to lseek to the begin of the file and read it again. It's expensive, but better than races... This patch does not modify mountinfo parser, but it reads all file to memory (by read()+poll()) and than it creates memory stream from the buffer and use it rather than a regular file stream. It means the parser is still possible to use for normal files (e.g. fstab) as well as for mountinfo and it's also portable to systems where for some reason is no fmemopen(). Note that re-read after poll() event is limited to 5 attempts (but successful read() without event zeroize the counter). It's because we do not want to wait for consistent mountinfo for ever. It seems better to use old (less reliable) way than hang up in read()+poll() loop. Addresses: https://github.com/systemd/systemd/issues/10872 Reported-by: Zbigniew Jędrzejewski-Szmek Signed-off-by: Karel Zak --- diff --git a/configure.ac b/configure.ac index 3bec9769b6..d4cf46fea0 100644 --- a/configure.ac +++ b/configure.ac @@ -466,6 +466,7 @@ AC_CHECK_FUNCS([ \ err \ errx \ explicit_bzero \ + fmemopen \ fsync \ utimensat \ getdomainname \ diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h index 11fc283155..6f283965c0 100644 --- a/libmount/src/mountP.h +++ b/libmount/src/mountP.h @@ -97,6 +97,7 @@ extern int mnt_valid_tagname(const char *tagname); extern int append_string(char **a, const char *b); extern const char *mnt_statfs_get_fstype(struct statfs *vfs); +extern int is_procfs_fd(int fd); extern int is_file_empty(const char *name); extern int mnt_is_readonly(const char *path) @@ -121,6 +122,7 @@ extern void mnt_free_filesystems(char **filesystems); extern char *mnt_get_kernel_cmdline_option(const char *name); extern int mnt_stat_mountpoint(const char *target, struct stat *st); +extern FILE *mnt_get_procfs_memstream(int fd, char **membuf); /* tab.c */ extern int is_mountinfo(struct libmnt_table *tb); diff --git a/libmount/src/tab_parse.c b/libmount/src/tab_parse.c index 2dfa6cbb4b..c118a59d7c 100644 --- a/libmount/src/tab_parse.c +++ b/libmount/src/tab_parse.c @@ -698,15 +698,7 @@ static int kernel_fs_postparse(struct libmnt_table *tb, return rc; } -/** - * mnt_table_parse_stream: - * @tb: tab pointer - * @f: file stream - * @filename: filename used for debug and error messages - * - * Returns: 0 on success, negative number in case of error. - */ -int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename) +static int __table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename) { int rc = -1; int flags = 0; @@ -780,6 +772,40 @@ err: return rc; } +/** + * mnt_table_parse_stream: + * @tb: tab pointer + * @f: file stream + * @filename: filename used for debug and error messages + * + * Returns: 0 on success, negative number in case of error. + */ +int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename) +{ + int fd, rc; + FILE *memf = NULL; + char *membuf = NULL; + + /* + * For /proc/#/{mountinfo,mount} we read all file to memory and use it + * as memory stream. For more details see mnt_read_procfs_file(). + */ + if ((fd = fileno(f)) >= 0 + && (tb->fmt == MNT_FMT_GUESS || + tb->fmt == MNT_FMT_MOUNTINFO || + tb->fmt == MNT_FMT_MTAB) + && is_procfs_fd(fd) + && (memf = mnt_get_procfs_memstream(fd, &membuf))) { + + rc = __table_parse_stream(tb, memf, filename); + fclose(memf); + free(membuf); + } else + rc = __table_parse_stream(tb, f, filename); + + return rc; +} + /** * mnt_table_parse_file: * @tb: tab pointer @@ -795,18 +821,49 @@ err: int mnt_table_parse_file(struct libmnt_table *tb, const char *filename) { FILE *f; - int rc; + int rc, fd = -1; if (!filename || !tb) return -EINVAL; - f = fopen(filename, "r" UL_CLOEXECSTR); + /* + * Try to use read()+poll() to realiably read all + * /proc/#/{mount,mountinfo} file to memory + */ + if (tb->fmt != MNT_FMT_SWAPS + && strncmp(filename, "/proc/", 6) == 0) { + + FILE *memf; + char *membuf = NULL; + + fd = open(filename, O_RDONLY|O_CLOEXEC); + if (fd < 0) { + rc = -errno; + goto done; + } + memf = mnt_get_procfs_memstream(fd, &membuf); + if (memf) { + rc = __table_parse_stream(tb, memf, filename); + + fclose(memf); + free(membuf); + close(fd); + goto done; + } + /* else fallback to fopen/fdopen() */ + } + + if (fd >= 0) + f = fdopen(fd, "r" UL_CLOEXECSTR); + else + f = fopen(filename, "r" UL_CLOEXECSTR); + if (f) { - rc = mnt_table_parse_stream(tb, f, filename); + rc = __table_parse_stream(tb, f, filename); fclose(f); } else rc = -errno; - +done: DBG(TAB, ul_debugobj(tb, "parsing done [filename=%s, rc=%d]", filename, rc)); return rc; } @@ -863,7 +920,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname) f = fopen_at(dd, d->d_name, O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR); if (f) { - mnt_table_parse_stream(tb, f, d->d_name); + __table_parse_stream(tb, f, d->d_name); fclose(f); } } @@ -904,7 +961,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname) f = fopen_at(dirfd(dir), d->d_name, O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR); if (f) { - mnt_table_parse_stream(tb, f, d->d_name); + __table_parse_stream(tb, f, d->d_name); fclose(f); } } diff --git a/libmount/src/utils.c b/libmount/src/utils.c index 09403de880..5b30f2ac07 100644 --- a/libmount/src/utils.c +++ b/libmount/src/utils.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "strutils.h" @@ -424,6 +425,12 @@ const char *mnt_statfs_get_fstype(struct statfs *vfs) return NULL; } +int is_procfs_fd(int fd) +{ + struct statfs sfs; + + return fstatfs(fd, &sfs) == 0 && sfs.f_type == STATFS_PROC_MAGIC; +} /** * mnt_match_fstype: @@ -1140,8 +1147,155 @@ done: return 1; } +#if defined(HAVE_FMEMOPEN) || defined(TEST_PROGRAM) + +/* + * This function tries to minimize possible races when we read + * /proc/#/{mountinfo,mount} files. + * + * The idea is to minimize number of read()s and check by poll() that during + * the read the mount table has not been modified. If yes, than re-read it + * (with some limitations to avoid never ending loop). + * + * Returns: <0 error, 0 success, 1 too many attempts + */ +static int read_procfs_file(int fd, char **buf, size_t *bufsiz) +{ + size_t bufmax = 0; + int rc = 0, tries = 0; + char *bufptr; + + assert(buf); + assert(bufsiz); + + *bufsiz = 0; + + do { + ssize_t ret; + + if (bufmax == *bufsiz) { + char *tmp; + + bufmax = bufmax ? bufmax * 2 : (16 * 1024); + + tmp = realloc(*buf, bufmax); + if (!tmp) + break; + *buf = tmp; + bufptr = tmp + *bufsiz; + } + + errno = 0; + ret = read(fd, bufptr, bufmax); + + if (ret < 0) { + /* error */ + if (errno == EAGAIN || errno == EINTR) { + xusleep(250000); + tries++; + continue; + } + break; + + } else if (ret > 0) { + /* success -- verify no event during read */ + struct pollfd fds[] = { + { .fd = fd, .events = POLLPRI } + }; + + rc = poll(fds, 1, 0); + if (rc < 0) + break; /* poll() error */ + if (rc > 0) { + /* event -- read all again */ + if (lseek(fd, 0, SEEK_SET) != 0) + break; + *bufsiz = 0; + bufptr = *buf; + tries++; + continue; + } + + /* successful read() without active poll() */ + *bufsiz += (size_t) ret; + bufptr += ret; + tries = 0; + } else { + /* end-of-file */ + goto success; + } + } while (tries <= 5); + + rc = errno ? -errno : 1; + free(*buf); + return rc; + +success: + return 0; +} + +/* + * Create FILE stream for data from read_procfs_file() + */ +FILE *mnt_get_procfs_memstream(int fd, char **membuf) +{ + FILE *memf; + size_t sz = 0; + off_t cur; + + /* in case of error, rewind to the original position */ + cur = lseek(fd, 0, SEEK_CUR); + + if (read_procfs_file(fd, membuf, &sz) == 0 + && sz > 0 + && (memf = fmemopen(*membuf, sz, "r"))) + return memf; + + /* error */ + lseek(fd, cur, SEEK_SET); + return NULL; +} +#else +FILE *mnt_get_procfs_memstream(int fd __attribute((__unused__)), + char **membuf __attribute((__unused__))) +{ + return NULL; +} +#endif /* HAVE_FMEMOPEN */ + #ifdef TEST_PROGRAM +static int test_proc_read(struct libmnt_test *ts, int argc, char *argv[]) +{ + char *buf = NULL; + char *filename = argv[1]; + size_t bufsiz = 0; + int rc = 0, fd = open(filename, O_RDONLY); + + if (fd <= 0) { + warn("%s: cannot open", filename); + return -errno; + } + + rc = read_procfs_file(fd, &buf, &bufsiz); + close(fd); + + switch (rc) { + case 0: + fwrite(buf, 1, bufsiz, stdout); + free(buf); + break; + case 1: + warnx("too many attempts"); + break; + default: + warn("%s: cannot read", filename); + break; + } + + return rc; +} + static int test_match_fstype(struct libmnt_test *ts, int argc, char *argv[]) { char *type = argv[1]; @@ -1323,6 +1477,7 @@ int main(int argc, char *argv[]) { "--guess-root", test_guess_root, "[]" }, { "--mkdir", test_mkdir, "" }, { "--statfs-type", test_statfs_type, "" }, + { "--read-procfs", test_proc_read, "" }, { NULL } };