From: Matteo Croce Date: Fri, 23 May 2025 01:59:36 +0000 (+0200) Subject: add option to force cachestat() usage X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=36965bbfb36328e80e3511a11b85b63e8a41d998;p=thirdparty%2Futil-linux.git add option to force cachestat() usage When cachestat() is not available, mmap() and fincore() are used to get cache statistics. This fallback returns less data than cachestat, and potentially hides read errors by reporting that the file is all mapped in memory: $ fincore /usr/bin/java fincore: failed to do cachestat: /usr/bin/java: Operation not permitted RES PAGES SIZE FILE 208K 13 198,1K /usr/bin/java $ echo $? 0 $ sudo fincore /usr/bin/java RES PAGES SIZE FILE 0B 0 198,1K /usr/bin/java Add a --cachestat argument which forces cachestat usage and fails if it's not available: $ fincore -C /usr/bin/java fincore: failed to do cachestat: /usr/bin/java: Operation not permitted $ echo $? 1 Also, on huge files the fallback is very slow and memory consuming, so this flag can be user to avoid doing it by mistake. --- diff --git a/bash-completion/fincore b/bash-completion/fincore index 5f073b4ec..34007e04d 100644 --- a/bash-completion/fincore +++ b/bash-completion/fincore @@ -33,6 +33,7 @@ _fincore_module() --output-all --raw --recursive + --cachestat --help --version " diff --git a/misc-utils/fincore.1.adoc b/misc-utils/fincore.1.adoc index 8e480777f..8403482d9 100644 --- a/misc-utils/fincore.1.adoc +++ b/misc-utils/fincore.1.adoc @@ -23,6 +23,9 @@ fincore - count pages of file contents in core == DESCRIPTION *fincore* counts pages of file contents being resident in memory (in core), and reports the numbers. If an error occurs during counting, then an error message is printed to the stderr and *fincore* continues processing the rest of files listed in a command line. +*fincore* uses the *cachestat*(2) syscall to count resident pages. If the *cachestat* syscall is not available and *cachestat* usage is not forced with the *--cachestat* option, then *fincore* uses the *mincore*(2) syscall as a fallback. +The *cachestat* syscall is more efficient than *mincore* because it does not require a page table lock to walks page tables, and also reports more information than *mincore*, like the number of cached pages, dirty pages, pages marked for writeback, evicted pages, and recently evicted pages. +Another difference between the two syscalls is that if write permissions are not granted to the file, then *cachestat* returns an error, while *mincore* for security reasons, returns fake data as if all pages were resident in memory (c.f.r. link:https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=134fca9063ad4851de767d1768180e5dede9a881[kernel commit "make mincore() more conservative"]). The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using *--output* _columns-list_ in environments where a stable output is required. @@ -37,6 +40,9 @@ include::man-common/in-bytes.adoc[] *-c*, *--total*:: produce a grand total. +*-C*, *--cachestat*:: +force usage of the cachestat() syscall instead of mincore(), and fail if it's not available. + *-o*, *--output* _list_:: Define output columns. See the *--help* output to get a list of the currently supported columns. The default list of columns may be extended if _list_ is specified in the format _{plus}list_. //TRANSLATORS: Keep {plus} untranslated. diff --git a/misc-utils/fincore.c b/misc-utils/fincore.c index 86c1373cb..2f878abb6 100644 --- a/misc-utils/fincore.c +++ b/misc-utils/fincore.c @@ -131,7 +131,8 @@ struct fincore_control { raw : 1, json : 1, recursive : 1, - total : 1; + total : 1, + cachestat : 1; }; @@ -367,9 +368,12 @@ static int fincore_fd (struct fincore_control *ctl, return 0; } - if (errno != ENOSYS) + if (errno != ENOSYS || ctl->cachestat) warn(_("failed to do cachestat: %s"), st->name); + if (ctl->cachestat) + return -errno; + return mincore_fd(ctl, fd, st); } @@ -442,6 +446,7 @@ static void __attribute__((__noreturn__)) usage(void) fputs(_(" --output-all output all columns\n"), out); fputs(_(" -r, --raw use raw output format\n"), out); fputs(_(" -R, --recursive recursively check all files in directories\n"), out); + fputs(_(" -C, --cachestat force useage of cachestat syscall\n"), out); fputs(USAGE_SEPARATOR, out); fprintf(out, USAGE_HELP_OPTIONS(23)); @@ -481,6 +486,7 @@ int main(int argc, char ** argv) { "json", no_argument, NULL, 'J' }, { "raw", no_argument, NULL, 'r' }, { "recursive", no_argument, NULL, 'R' }, + { "cachestat", no_argument, NULL, 'C' }, { NULL, 0, NULL, 0 }, }; @@ -489,7 +495,7 @@ int main(int argc, char ** argv) textdomain(PACKAGE); close_stdout_atexit(); - while ((c = getopt_long (argc, argv, "bcno:JrRVh", longopts, NULL)) != -1) { + while ((c = getopt_long (argc, argv, "bcCno:JrRVh", longopts, NULL)) != -1) { switch (c) { case 'b': ctl.bytes = 1; @@ -497,6 +503,12 @@ int main(int argc, char ** argv) case 'c': ctl.total = 1; break; + case 'C': +#ifndef HAVE_CACHESTAT + errx(EXIT_FAILURE, _("cachestat option is not supported")); +#endif + ctl.cachestat = 1; + break; case 'n': ctl.noheadings = 1; break;