From 40a824567f01873d31df16a28997ddc0fc45a328 Mon Sep 17 00:00:00 2001 From: Daniele Pizzolli Date: Mon, 11 Oct 2021 09:30:45 +0200 Subject: [PATCH] hardlink: add new option: -S/--maximum-size Support limiting comparison by maximum file size. [kzak@redhat.com: - resolve conflicts] Signed-off-by: Daniele Pizzolli Signed-off-by: Karel Zak --- misc-utils/hardlink.1.adoc | 3 + misc-utils/hardlink.c | 17 +++++- .../hardlink/options-maximum-size-8191 | 59 +++++++++++++++++++ .../hardlink/options-maximum-size-8192 | 33 +++++++++++ tests/ts/hardlink/options | 17 ++++++ 5 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 tests/expected/hardlink/options-maximum-size-8191 create mode 100644 tests/expected/hardlink/options-maximum-size-8192 diff --git a/misc-utils/hardlink.1.adoc b/misc-utils/hardlink.1.adoc index 36f8bfdbf7..f45b468358 100644 --- a/misc-utils/hardlink.1.adoc +++ b/misc-utils/hardlink.1.adoc @@ -87,6 +87,9 @@ A regular expression to include files. If the option *--exclude* has been given, *-s*, *--minimum-size* _size_:: The minimum size to consider. By default this is 1, so empty files will not be linked. The _size_ argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB"). +*-S*, *--maximum-size* _size_:: +The maximum size to consider. By default this is 0 and 0 has the special meaning of unlimited. The _size_ argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB"). + *-b*, *--io-size* _size_:: The size of the read() or sendfile() buffer used when comparing file contents. The _size_ argument may be followed by the multiplicative suffixes KiB, MiB, diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c index 20998ffeaf..ed49b0a7f5 100644 --- a/misc-utils/hardlink.c +++ b/misc-utils/hardlink.c @@ -138,6 +138,7 @@ struct hdl_regex { * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE) * @dry_run: Specifies whether hardlink should not link files (default = FALSE) * @min_size: Minimum size of files to consider. (default = 1 byte) + * @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte) */ static struct options { struct hdl_regex *include; @@ -155,6 +156,7 @@ static struct options { unsigned int keep_oldest:1; unsigned int dry_run:1; uintmax_t min_size; + uintmax_t max_size; size_t io_size; size_t cache_size; } opts = { @@ -723,6 +725,12 @@ static int inserter(const char *fpath, const struct stat *sb, jlog(JLOG_VERBOSE2, " %5zu: [%ld/%ld/%ld] %s", stats.files, sb->st_dev, sb->st_ino, sb->st_nlink, fpath); + if ((opts.max_size > 0) && ((uintmax_t) sb->st_size > opts.max_size)) { + jlog(JLOG_VERBOSE1, + _("Skipped %s (greater than configured size)"), fpath); + return 0; + } + pathlen = strlen(fpath) + 1; fil = xcalloc(1, sizeof(*fil)); @@ -915,6 +923,7 @@ static void __attribute__((__noreturn__)) usage(void) fputs(_(" -x, --exclude regular expression to exclude files\n"), out); fputs(_(" -i, --include regular expression to include files/dirs\n"), out); fputs(_(" -s, --minimum-size minimum size for files.\n"), out); + fputs(_(" -S, --maximum-size maximum size for files.\n"), out); fputs(_(" -b, --io-size I/O buffer size for file reading (speedup, using more RAM)\n"), out); fputs(_(" -r, --cache-size memory limit for cached file content data\n"), out); fputs(_(" -c, --content compare only file contents, same as -pot\n"), out); @@ -933,7 +942,7 @@ static void __attribute__((__noreturn__)) usage(void) */ static int parse_options(int argc, char *argv[]) { - static const char optstr[] = "VhvnfpotXcmMOx:y:i:r:s:b:q"; + static const char optstr[] = "VhvnfpotXcmMOx:y:i:r:S:s:b:q"; static const struct option long_options[] = { {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, @@ -951,6 +960,7 @@ static int parse_options(int argc, char *argv[]) {"include", required_argument, NULL, 'i'}, {"method", required_argument, NULL, 'y' }, {"minimum-size", required_argument, NULL, 's'}, + {"maximum-size", required_argument, NULL, 'S'}, {"io-size", required_argument, NULL, 'b'}, {"content", no_argument, NULL, 'c'}, {"quiet", no_argument, NULL, 'q'}, @@ -1019,7 +1029,10 @@ static int parse_options(int argc, char *argv[]) register_regex(&opts.include, optarg); break; case 's': - opts.min_size = strtosize_or_err(optarg, _("failed to parse size")); + opts.min_size = strtosize_or_err(optarg, _("failed to parse minimum size")); + break; + case 'S': + opts.max_size = strtosize_or_err(optarg, _("failed to parse maximum size")); break; case 'r': opts.cache_size = strtosize_or_err(optarg, _("failed to cache size")); diff --git a/tests/expected/hardlink/options-maximum-size-8191 b/tests/expected/hardlink/options-maximum-size-8191 new file mode 100644 index 0000000000..cc718d4513 --- /dev/null +++ b/tests/expected/hardlink/options-maximum-size-8191 @@ -0,0 +1,59 @@ +Skipped [Redacted]/hardlink/testdir1/file-a-2 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-c-2 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-a-2 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-c-2 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-b-1 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-c-1 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-b-2 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-c-3 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-b-3 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-a-3 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-1/file-a-1 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-1/sdir-2/file-a-1-abcdefghijklmnopqrstxyz-"§$%&()=?*+ (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-b-4 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-b-1 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-a-5 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-a-4 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-2/sdir-2/file-a-5 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-2/sdir-2/file-b-5 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/dir-2/sdir-3/file-b-4 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-c-1 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-b-5 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-b-2 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-c-3 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-b-3 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-a-3 (greater than configured size) +Skipped [Redacted]/hardlink/testdir1/file-a-1 (greater than configured size) +Mode: real +Files: 26 +Linked: 0 files +Compared: 0 xattrs +Compared: 0 files +Saved: 0 B +Duration: [Redacted] +dir-1/sdir-1/file-a-1 10 8192 1540236423 644 +dir-1/sdir-1/file-a-2 10 8192 1540236423 644 +dir-1/sdir-1/file-a-3 10 8192 1540236423 644 +dir-1/sdir-1/file-b-1 10 8192 1540236430 644 +dir-1/sdir-1/file-b-2 10 8192 1540236430 644 +dir-1/sdir-1/file-b-3 10 8192 1540236430 644 +dir-1/sdir-1/file-c-1 6 8192 1540236548 644 +dir-1/sdir-1/file-c-2 6 8192 1540236548 644 +dir-1/sdir-1/file-c-3 6 8192 1540236548 644 +dir-1/sdir-2/file-a-1-abcdefghijklmnopqrstxyz-"§$%&()=?*+ 10 8192 1540236423 644 +dir-2/sdir-2/file-a-5 10 8192 1540236423 644 +dir-2/sdir-2/file-b-5 10 8192 1540236430 644 +dir-2/sdir-3/file-b-4 10 8192 1540236430 644 +file-a-1 10 8192 1540236423 644 +file-a-2 10 8192 1540236423 644 +file-a-3 10 8192 1540236423 644 +file-a-4 10 8192 1540236423 644 +file-a-5 10 8192 1540236423 644 +file-b-1 10 8192 1540236430 644 +file-b-2 10 8192 1540236430 644 +file-b-3 10 8192 1540236430 644 +file-b-4 10 8192 1540236430 644 +file-b-5 10 8192 1540236430 644 +file-c-1 6 8192 1540236548 644 +file-c-2 6 8192 1540236548 644 +file-c-3 6 8192 1540236548 644 diff --git a/tests/expected/hardlink/options-maximum-size-8192 b/tests/expected/hardlink/options-maximum-size-8192 new file mode 100644 index 0000000000..7308553d2b --- /dev/null +++ b/tests/expected/hardlink/options-maximum-size-8192 @@ -0,0 +1,33 @@ +Mode: real +Files: 26 +Linked: 0 files +Compared: 0 xattrs +Compared: 0 files +Saved: 0 B +Duration: [Redacted] +dir-1/sdir-1/file-a-1 10 8192 1540236423 644 +dir-1/sdir-1/file-a-2 10 8192 1540236423 644 +dir-1/sdir-1/file-a-3 10 8192 1540236423 644 +dir-1/sdir-1/file-b-1 10 8192 1540236430 644 +dir-1/sdir-1/file-b-2 10 8192 1540236430 644 +dir-1/sdir-1/file-b-3 10 8192 1540236430 644 +dir-1/sdir-1/file-c-1 6 8192 1540236548 644 +dir-1/sdir-1/file-c-2 6 8192 1540236548 644 +dir-1/sdir-1/file-c-3 6 8192 1540236548 644 +dir-1/sdir-2/file-a-1-abcdefghijklmnopqrstxyz-"§$%&()=?*+ 10 8192 1540236423 644 +dir-2/sdir-2/file-a-5 10 8192 1540236423 644 +dir-2/sdir-2/file-b-5 10 8192 1540236430 644 +dir-2/sdir-3/file-b-4 10 8192 1540236430 644 +file-a-1 10 8192 1540236423 644 +file-a-2 10 8192 1540236423 644 +file-a-3 10 8192 1540236423 644 +file-a-4 10 8192 1540236423 644 +file-a-5 10 8192 1540236423 644 +file-b-1 10 8192 1540236430 644 +file-b-2 10 8192 1540236430 644 +file-b-3 10 8192 1540236430 644 +file-b-4 10 8192 1540236430 644 +file-b-5 10 8192 1540236430 644 +file-c-1 6 8192 1540236548 644 +file-c-2 6 8192 1540236548 644 +file-c-3 6 8192 1540236548 644 diff --git a/tests/ts/hardlink/options b/tests/ts/hardlink/options index e293921a1f..d86acf395f 100755 --- a/tests/ts/hardlink/options +++ b/tests/ts/hardlink/options @@ -76,5 +76,22 @@ $TS_CMD_HARDLINK --quiet --content "$SRCDIR" >> $TS_OUTPUT 2>> $TS_ERRLOG show_srcdir | sed 's/\(1540236\).*/\1xxx\tperm/' >> $TS_OUTPUT 2>> $TS_ERRLOG ts_finalize_subtest +ts_init_subtest "maximum-size-8191" +# Redact path and duration for reproduciblity +$TS_CMD_HARDLINK -vv --maximum-size 8191 "$SRCDIR" | \ + sed 's:^Skipped .*/tests/output/\(.*\):Skipped [Redacted]/\1:' | \ + sed 's/^Duration: .*/Duration: [Redacted]/' >> $TS_OUTPUT 2>> $TS_ERRLOG +show_srcdir >> $TS_OUTPUT 2>> $TS_ERRLOG +ts_finalize_subtest + +ts_init_subtest "maximum-size-8192" +# Redact path and duration for reproduciblity +$TS_CMD_HARDLINK -vv --maximum-size 8192 "$SRCDIR" | \ + sed 's:^Skipped .*/tests/output/\(.*\):Skipped [Redacted]/\1:' | \ + sed 's/^Duration:.*/Duration: [Redacted]/' >> $TS_OUTPUT 2>> $TS_ERRLOG +show_srcdir >> $TS_OUTPUT 2>> $TS_ERRLOG +ts_finalize_subtest + + rm -rf "$SRCDIR" ts_finalize -- 2.47.3