From a99ab266110795ed94a9cb4d2765ddad9c4310da Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Sep 2019 11:59:45 -0400 Subject: [PATCH] ls: use statx instead of stat when available statx allows ls to indicate interest in only certain inode metadata. This is potentially a win on networked/clustered/distributed file systems. In cases where we'd have to do a full, heavyweight stat() call we can now do a much lighter statx() call. As a real-world example, consider a file system like CephFS where one client is actively writing to a file and another client does an ls --color in the same directory. --color means that we need to fetch the mode of the file. Doing that with a stat() call means that we have to fetch the size and mtime in addition to the mode. The MDS in that situation will have to revoke caps in order to ensure that it has up-to-date values to report, which disrupts the writer. This has a measurable affect on performance. I ran a fio sequential write test on one cephfs client and had a second client do "ls --color" in a tight loop on the directory that held the file: Baseline -- no activity on the second client: WRITE: bw=76.7MiB/s (80.4MB/s), 76.7MiB/s-76.7MiB/s (80.4MB/s-80.4MB/s), io=4600MiB (4824MB), run=60016-60016msec Without this patch series, we see a noticable performance hit: WRITE: bw=70.4MiB/s (73.9MB/s), 70.4MiB/s-70.4MiB/s (73.9MB/s-73.9MB/s), io=4228MiB (4433MB), run=60012-60012msec With this patch series, we gain most of that ground back: WRITE: bw=75.9MiB/s (79.6MB/s), 75.9MiB/s-75.9MiB/s (79.6MB/s-79.6MB/s), io=4555MiB (4776MB), run=60019-60019msec * src/stat.c: move statx to stat struct conversion to new header... * src/statx.h: ...here. * src/ls.c: Add wrapper functions for stat/lstat/fstat calls, and add variants for when we are only interested in specific info. Add statx-enabled functions and set the request mask based on the output format and what values are needed. * NEWS: Mention the Improvement. --- NEWS | 10 ++-- src/local.mk | 1 + src/ls.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/stat.c | 32 +----------- src/statx.h | 52 ++++++++++++++++++ 5 files changed, 199 insertions(+), 41 deletions(-) create mode 100644 src/statx.h diff --git a/NEWS b/NEWS index 19c097f308..fe38e80d49 100644 --- a/NEWS +++ b/NEWS @@ -55,10 +55,14 @@ GNU coreutils NEWS -*- outline -*- od --skip-bytes now can use lseek even if the input is not a regular file, greatly improving performance in some cases. - stat(1) now uses the statx() system call where available, which can + stat(1) supports a new --cached= option, used on systems with statx(2) + to control cache coherency of file system attributes, + useful on network file systems. + +** Improvements + + stat and ls now use the statx() system call where available, which can operate more efficiently by only retrieving requested attributes. - stat(1) also supports a new --cached= option to control cache - coherency of file system attributes, useful on network file systems. * Noteworthy changes in release 8.31 (2019-03-10) [stable] diff --git a/src/local.mk b/src/local.mk index 763c8a01c6..48dad1da9a 100644 --- a/src/local.mk +++ b/src/local.mk @@ -58,6 +58,7 @@ noinst_HEADERS = \ src/prog-fprintf.h \ src/remove.h \ src/set-fields.h \ + src/statx.h \ src/system.h \ src/uname.h diff --git a/src/ls.c b/src/ls.c index 120ce153e3..034087f264 100644 --- a/src/ls.c +++ b/src/ls.c @@ -114,6 +114,7 @@ #include "xgethostname.h" #include "c-ctype.h" #include "canonicalize.h" +#include "statx.h" /* Include last to avoid a clash of include guards with some premature versions of libcap. @@ -1063,6 +1064,136 @@ dired_dump_obstack (const char *prefix, struct obstack *os) } } +#if HAVE_STATX && defined STATX_INO +static unsigned int _GL_ATTRIBUTE_PURE +time_type_to_statx (void) +{ + switch (time_type) + { + case time_ctime: + return STATX_CTIME; + case time_mtime: + return STATX_MTIME; + case time_atime: + return STATX_ATIME; + default: + abort (); + } + return 0; +} + +static unsigned int _GL_ATTRIBUTE_PURE +calc_req_mask (void) +{ + unsigned int mask = STATX_MODE; + + if (print_inode) + mask |= STATX_INO; + + if (print_block_size) + mask |= STATX_BLOCKS; + + if (format == long_format) { + mask |= STATX_NLINK | STATX_SIZE | time_type_to_statx (); + if (print_owner || print_author) + mask |= STATX_UID; + if (print_group) + mask |= STATX_GID; + } + + switch (sort_type) + { + case sort_none: + case sort_name: + case sort_version: + case sort_extension: + break; + case sort_time: + mask |= time_type_to_statx (); + break; + case sort_size: + mask |= STATX_SIZE; + break; + default: + abort (); + } + + return mask; +} + +static int +do_statx (int fd, const char *name, struct stat *st, int flags, + unsigned int mask) +{ + struct statx stx; + int ret = statx (fd, name, flags, mask, &stx); + if (ret >= 0) + statx_to_stat (&stx, st); + return ret; +} + +static inline int +do_stat (const char *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, 0, calc_req_mask ()); +} + +static inline int +do_lstat (const char *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, AT_SYMLINK_NOFOLLOW, calc_req_mask ()); +} + +static inline int +stat_for_mode (const char *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, 0, STATX_MODE); +} + +/* dev+ino should be static, so no need to sync with backing store */ +static inline int +stat_for_ino (const char *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, 0, STATX_INO); +} + +static inline int +fstat_for_ino (int fd, struct stat *st) +{ + return do_statx (fd, "", st, AT_EMPTY_PATH, STATX_INO); +} +#else +static inline int +do_stat (const char *name, struct stat *st) +{ + return stat (name, st); +} + +static inline int +do_lstat (const char *name, struct stat *st) +{ + return lstat (name, st); +} + +static inline int +stat_for_mode (const char *name, struct stat *st) +{ + return stat (name, st); +} + +static inline int +stat_for_ino (const char *name, struct stat *st) +{ + return stat (name, st); +} + +static inline int +fstat_for_ino (int fd, struct stat *st) +{ + return fstat (fd, st); +} +#endif + /* Return the address of the first plain %b spec in FMT, or NULL if there is no such spec. %5b etc. do not match, so that user widths/flags are honored. */ @@ -2737,10 +2868,10 @@ print_dir (char const *name, char const *realname, bool command_line_arg) struct stat dir_stat; int fd = dirfd (dirp); - /* If dirfd failed, endure the overhead of using stat. */ + /* If dirfd failed, endure the overhead of stat'ing by path */ if ((0 <= fd - ? fstat (fd, &dir_stat) - : stat (name, &dir_stat)) < 0) + ? fstat_for_ino (fd, &dir_stat) + : stat_for_ino (name, &dir_stat)) < 0) { file_failure (command_line_arg, _("cannot determine device and inode of %s"), name); @@ -3202,7 +3333,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode, switch (dereference) { case DEREF_ALWAYS: - err = stat (full_name, &f->stat); + err = do_stat (full_name, &f->stat); do_deref = true; break; @@ -3211,7 +3342,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode, if (command_line_arg) { bool need_lstat; - err = stat (full_name, &f->stat); + err = do_stat (full_name, &f->stat); do_deref = true; if (dereference == DEREF_COMMAND_LINE_ARGUMENTS) @@ -3231,7 +3362,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode, FALLTHROUGH; default: /* DEREF_NEVER */ - err = lstat (full_name, &f->stat); + err = do_lstat (full_name, &f->stat); do_deref = false; break; } @@ -3320,7 +3451,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode, they won't be traced and when no indicator is needed. */ if (linkname && (file_type <= indicator_style || check_symlink_mode) - && stat (linkname, &linkstats) == 0) + && stat_for_mode (linkname, &linkstats) == 0) { f->linkok = true; f->linkmode = linkstats.st_mode; diff --git a/src/stat.c b/src/stat.c index ee68f1682b..f2bf0dcb79 100644 --- a/src/stat.c +++ b/src/stat.c @@ -73,6 +73,7 @@ #include "strftime.h" #include "find-mount-point.h" #include "xvasprintf.h" +#include "statx.h" #if HAVE_STATX && defined STATX_INO # define USE_STATX 1 @@ -1245,37 +1246,6 @@ static bool dont_sync; static bool force_sync; #if USE_STATX -/* Much of the format printing requires a struct stat or timespec */ -static struct timespec -statx_timestamp_to_timespec (struct statx_timestamp tsx) -{ - struct timespec ts; - - ts.tv_sec = tsx.tv_sec; - ts.tv_nsec = tsx.tv_nsec; - return ts; -} - -static void -statx_to_stat (struct statx *stx, struct stat *stat) -{ - stat->st_dev = makedev (stx->stx_dev_major, stx->stx_dev_minor); - stat->st_ino = stx->stx_ino; - stat->st_mode = stx->stx_mode; - stat->st_nlink = stx->stx_nlink; - stat->st_uid = stx->stx_uid; - stat->st_gid = stx->stx_gid; - stat->st_rdev = makedev (stx->stx_rdev_major, stx->stx_rdev_minor); - stat->st_size = stx->stx_size; - stat->st_blksize = stx->stx_blksize; -/* define to avoid sc_prohibit_stat_st_blocks. */ -# define SC_ST_BLOCKS st_blocks - stat->SC_ST_BLOCKS = stx->stx_blocks; - stat->st_atim = statx_timestamp_to_timespec (stx->stx_atime); - stat->st_mtim = statx_timestamp_to_timespec (stx->stx_mtime); - stat->st_ctim = statx_timestamp_to_timespec (stx->stx_ctime); -} - static unsigned int fmt_to_mask (char fmt) { diff --git a/src/statx.h b/src/statx.h new file mode 100644 index 0000000000..19f3e18486 --- /dev/null +++ b/src/statx.h @@ -0,0 +1,52 @@ +/* statx -> stat conversion functions for coreutils + Copyright (C) 2019 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef COREUTILS_STATX_H +# define COREUTILS_STATX_H + +# if HAVE_STATX && defined STATX_INO +/* Much of the format printing requires a struct stat or timespec */ +static inline struct timespec +statx_timestamp_to_timespec (struct statx_timestamp tsx) +{ + struct timespec ts; + + ts.tv_sec = tsx.tv_sec; + ts.tv_nsec = tsx.tv_nsec; + return ts; +} + +static inline void +statx_to_stat (struct statx *stx, struct stat *stat) +{ + stat->st_dev = makedev (stx->stx_dev_major, stx->stx_dev_minor); + stat->st_ino = stx->stx_ino; + stat->st_mode = stx->stx_mode; + stat->st_nlink = stx->stx_nlink; + stat->st_uid = stx->stx_uid; + stat->st_gid = stx->stx_gid; + stat->st_rdev = makedev (stx->stx_rdev_major, stx->stx_rdev_minor); + stat->st_size = stx->stx_size; + stat->st_blksize = stx->stx_blksize; +/* define to avoid sc_prohibit_stat_st_blocks. */ +# define SC_ST_BLOCKS st_blocks + stat->SC_ST_BLOCKS = stx->stx_blocks; + stat->st_atim = statx_timestamp_to_timespec (stx->stx_atime); + stat->st_mtim = statx_timestamp_to_timespec (stx->stx_mtime); + stat->st_ctim = statx_timestamp_to_timespec (stx->stx_ctime); +} +# endif /* HAVE_STATX && defined STATX_INO */ +#endif /* COREUTILS_STATX_H */ -- 2.47.2