]> git.ipfire.org Git - thirdparty/util-linux.git/blame - misc-utils/hardlink.c
tests: update hardlink output
[thirdparty/util-linux.git] / misc-utils / hardlink.c
CommitLineData
2180ecc8 1/* hardlink.c - Link multiple identical files together
55c000e1 2 *
2180ecc8 3 * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org>
cd6b8d39 4 * Copyright (C) 2021 Karel Zak <kzak@redhat.com>
55c000e1 5 *
2180ecc8 6 * SPDX-License-Identifier: MIT
0b05aab4 7 *
2180ecc8
KZ
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
55c000e1 14 *
2180ecc8
KZ
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
55c000e1 17 *
2180ecc8
KZ
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
55c000e1 25 */
5c7cac85
KZ
26#define _POSIX_C_SOURCE 200112L /* POSIX functions */
27#define _XOPEN_SOURCE 600 /* nftw() */
28
29#include <sys/types.h> /* stat */
30#include <sys/stat.h> /* stat */
31#include <sys/time.h> /* getrlimit, getrusage */
32#include <sys/resource.h> /* getrlimit, getrusage */
33#include <fcntl.h> /* posix_fadvise */
34#include <ftw.h> /* ftw */
35#include <search.h> /* tsearch() and friends */
cd6b8d39
KZ
36#include <signal.h> /* SIG*, sigaction */
37#include <getopt.h> /* getopt_long() */
5c7cac85 38#include <ctype.h> /* tolower() */
66a38e97
KZ
39#include <sys/ioctl.h>
40
41#if defined(HAVE_LINUX_FIEMAP_H)
42# include <linux/fs.h>
43# include <linux/fiemap.h>
44# ifdef FICLONE
45# define USE_REFLINK 1
46# endif
47#endif
2180ecc8 48
cd6b8d39
KZ
49#include "nls.h"
50#include "c.h"
0361f744 51#include "xalloc.h"
631e6865 52#include "strutils.h"
06d8fe89 53#include "monotonic.h"
4c467ebc 54#include "optutils.h"
259bed15 55#include "fileeq.h"
66a38e97 56#include "statfs_magic.h"
2180ecc8 57
abaf378c 58#include <regex.h> /* regcomp(), regexec() */
3807e71a 59
3854515c
KZ
60#if defined(HAVE_SYS_XATTR_H) && defined(HAVE_LLISTXATTR) && defined(HAVE_LGETXATTR)
61# include <sys/xattr.h>
62# define USE_XATTR 1
2180ecc8 63#endif
0ec20db8 64
4c467ebc
KZ
65static int quiet; /* don't print anything */
66
66a38e97
KZ
67#ifdef USE_REFLINK
68enum {
69 REFLINK_NEVER = 0,
70 REFLINK_AUTO,
71 REFLINK_ALWAYS
72};
73static int reflink_mode = REFLINK_NEVER;
74static int reflinks_skip;
75#endif
76
259bed15
KZ
77static struct ul_fileeq fileeq;
78
2180ecc8
KZ
79/**
80 * struct file - Information about a file
81 * @st: The stat buffer associated with the file
82 * @next: Next file with the same size
83 * @basename: The offset off the basename in the filename
84 * @path: The path of the file
85 *
86 * This contains all information we need about a file.
87 */
88struct file {
5c7cac85 89 struct stat st;
259bed15
KZ
90 struct ul_fileeq_data data;
91
5c7cac85
KZ
92 struct file *next;
93 struct link {
94 struct link *next;
95 int basename;
2180ecc8 96#if __STDC_VERSION__ >= 199901L
5c7cac85 97 char path[];
2180ecc8 98#elif __GNUC__
5c7cac85 99 char path[0];
2180ecc8 100#else
5c7cac85 101 char path[1];
2180ecc8 102#endif
5c7cac85 103 } *links;
3807e71a 104};
0ec20db8 105
2180ecc8
KZ
106/**
107 * enum log_level - Logging levels
2180ecc8
KZ
108 * @JLOG_SUMMARY: Default log level
109 * @JLOG_INFO: Verbose logging (verbose == 1)
5034cd9f
KZ
110 * @JLOG_VERBOSE1: Verbosity 2
111 * @JLOG_VERBOSE2: Verbosity 3
2180ecc8
KZ
112 */
113enum log_level {
5c7cac85
KZ
114 JLOG_SUMMARY,
115 JLOG_INFO,
116 JLOG_VERBOSE1,
117 JLOG_VERBOSE2
3807e71a 118};
0ec20db8 119
2180ecc8
KZ
120/**
121 * struct statistic - Statistics about the file
122 * @started: Whether we are post command-line processing
123 * @files: The number of files worked on
124 * @linked: The number of files replaced by a hardlink to a master
125 * @xattr_comparisons: The number of extended attribute comparisons
126 * @comparisons: The number of comparisons
127 * @saved: The (exaggerated) amount of space saved
06d8fe89 128 * @start_time: The time we started at
2180ecc8
KZ
129 */
130static struct statistics {
5c7cac85
KZ
131 int started;
132 size_t files;
133 size_t linked;
134 size_t xattr_comparisons;
135 size_t comparisons;
66a38e97 136 size_t ignored_reflinks;
5c7cac85
KZ
137 double saved;
138 struct timeval start_time;
2180ecc8 139} stats;
3807e71a 140
8ff8b027
KZ
141
142struct hdl_regex {
143 regex_t re; /* POSIX compatible regex handler */
144
145 struct hdl_regex *next;
146};
147
2180ecc8
KZ
148/**
149 * struct options - Processed command-line options
150 * @include: A linked list of regular expressions for the --include option
151 * @exclude: A linked list of regular expressions for the --exclude option
152 * @verbosity: The verbosity. Should be one of #enum log_level
153 * @respect_mode: Whether to respect file modes (default = TRUE)
154 * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
155 * @respect_name: Whether to respect file names (default = FALSE)
156 * @respect_time: Whether to respect file modification times (default = TRUE)
157 * @respect_xattrs: Whether to respect extended attributes (default = FALSE)
158 * @maximise: Chose the file with the highest link count as master
159 * @minimise: Chose the file with the lowest link count as master
160 * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
161 * @dry_run: Specifies whether hardlink should not link files (default = FALSE)
162 * @min_size: Minimum size of files to consider. (default = 1 byte)
40a82456 163 * @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte)
2180ecc8
KZ
164 */
165static struct options {
8ff8b027
KZ
166 struct hdl_regex *include;
167 struct hdl_regex *exclude;
5c7cac85 168
259bed15 169 const char *method;
5c7cac85
KZ
170 signed int verbosity;
171 unsigned int respect_mode:1;
172 unsigned int respect_owner:1;
173 unsigned int respect_name:1;
174 unsigned int respect_time:1;
175 unsigned int respect_xattrs:1;
176 unsigned int maximise:1;
177 unsigned int minimise:1;
178 unsigned int keep_oldest:1;
179 unsigned int dry_run:1;
180 uintmax_t min_size;
40a82456 181 uintmax_t max_size;
64c8db3c 182 size_t io_size;
f3212b91 183 size_t cache_size;
9e7235e7 184} opts = {
5c7cac85 185 /* default setting */
259bed15 186 .method = "sha256",
5c7cac85
KZ
187 .respect_mode = TRUE,
188 .respect_owner = TRUE,
189 .respect_time = TRUE,
190 .respect_xattrs = FALSE,
191 .keep_oldest = FALSE,
722762b6 192 .min_size = 1,
f3212b91 193 .cache_size = 10*1024*1024
9e7235e7 194};
2180ecc8
KZ
195
196/*
197 * files
198 *
199 * A binary tree of files, managed using tsearch(). To see which nodes
200 * are considered equal, see compare_nodes()
201 */
202static void *files;
203static void *files_by_ino;
204
205/*
206 * last_signal
207 *
208 * The last signal we received. We store the signal here in order to be able
209 * to break out of loops gracefully and to return from our nftw() handler.
210 */
211static int last_signal;
0ec20db8 212
4939964c
KZ
213
214#define is_log_enabled(_level) (quiet == 0 && (_level) <= (unsigned int)opts.verbosity)
215
2180ecc8
KZ
216/**
217 * jlog - Logging for hardlink
218 * @level: The log level
219 * @format: A format string for printf()
220 */
5c7cac85 221__attribute__((format(printf, 2, 3)))
2180ecc8 222static void jlog(enum log_level level, const char *format, ...)
0ec20db8 223{
5c7cac85 224 va_list args;
2180ecc8 225
4939964c 226 if (!is_log_enabled(level))
5c7cac85 227 return;
423e80c1 228
5c7cac85
KZ
229 va_start(args, format);
230 vfprintf(stdout, format, args);
231 va_end(args);
232 fputc('\n', stdout);
0ec20db8
DJ
233}
234
2180ecc8
KZ
235/**
236 * CMP - Compare two numerical values, return 1, 0, or -1
237 * @a: First value
238 * @b: Second value
239 *
240 * Used to compare two integers of any size while avoiding overflow.
241 */
242#define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
243
2180ecc8 244/**
8ff8b027
KZ
245 * register_regex - Compile and insert a regular expression into list
246 * @pregs: Pointer to a linked list of regular expressions
247 * @regex: String containing the regular expression to be compiled
248 */
249static void register_regex(struct hdl_regex **pregs, const char *regex)
250{
251 struct hdl_regex *link;
252 int err;
253
254 link = xmalloc(sizeof(*link));
255
256 if ((err = regcomp(&link->re, regex, REG_NOSUB | REG_EXTENDED)) != 0) {
257 size_t size = regerror(err, &link->re, NULL, 0);
258 char *buf = xmalloc(size + 1);
259
260 regerror(err, &link->re, buf, size);
261
262 errx(EXIT_FAILURE, _("could not compile regular expression %s: %s"),
263 regex, buf);
264 }
265 link->next = *pregs; *pregs = link;
266}
267
268/**
269 * match_any_regex - Match against multiple regular expressions
2180ecc8
KZ
270 * @pregs: A linked list of regular expressions
271 * @what: The string to match against
272 *
273 * Checks whether any of the regular expressions in the list matches the
274 * string.
275 */
8ff8b027 276static int match_any_regex(struct hdl_regex *pregs, const char *what)
0ec20db8 277{
5c7cac85 278 for (; pregs != NULL; pregs = pregs->next) {
8ff8b027 279 if (regexec(&pregs->re, what, 0, NULL, 0) == 0)
5c7cac85
KZ
280 return TRUE;
281 }
282 return FALSE;
0ec20db8
DJ
283}
284
2180ecc8
KZ
285/**
286 * compare_nodes - Node comparison function
287 * @_a: The first node (a #struct file)
288 * @_b: The second node (a #struct file)
289 *
290 * Compare the two nodes for the binary tree.
291 */
292static int compare_nodes(const void *_a, const void *_b)
94b040b0 293{
5c7cac85
KZ
294 const struct file *a = _a;
295 const struct file *b = _b;
296 int diff = 0;
bd7722af 297
5c7cac85
KZ
298 if (diff == 0)
299 diff = CMP(a->st.st_dev, b->st.st_dev);
300 if (diff == 0)
301 diff = CMP(a->st.st_size, b->st.st_size);
2180ecc8 302
5c7cac85 303 return diff;
94b040b0
JN
304}
305
2180ecc8
KZ
306/**
307 * compare_nodes_ino - Node comparison function
308 * @_a: The first node (a #struct file)
309 * @_b: The second node (a #struct file)
310 *
311 * Compare the two nodes for the binary tree.
312 */
313static int compare_nodes_ino(const void *_a, const void *_b)
94b040b0 314{
5c7cac85
KZ
315 const struct file *a = _a;
316 const struct file *b = _b;
317 int diff = 0;
318
319 if (diff == 0)
320 diff = CMP(a->st.st_dev, b->st.st_dev);
321 if (diff == 0)
322 diff = CMP(a->st.st_ino, b->st.st_ino);
323
324 /* If opts.respect_name is used, we will restrict a struct file to
325 * contain only links with the same basename to keep the rest simple.
326 */
327 if (diff == 0 && opts.respect_name)
328 diff = strcmp(a->links->path + a->links->basename,
329 b->links->path + b->links->basename);
330
331 return diff;
94b040b0
JN
332}
333
2180ecc8
KZ
334/**
335 * print_stats - Print statistics to stdout
336 */
337static void print_stats(void)
94b040b0 338{
5c7cac85
KZ
339 struct timeval end = { 0, 0 }, delta = { 0, 0 };
340 char *ssz;
06d8fe89 341
5c7cac85
KZ
342 gettime_monotonic(&end);
343 timersub(&end, &stats.start_time, &delta);
344
66a38e97 345 jlog(JLOG_SUMMARY, "%-25s %s", _("Mode:"),
5c7cac85 346 opts.dry_run ? _("dry-run") : _("real"));
66a38e97
KZ
347 jlog(JLOG_SUMMARY, "%-25s %s", _("Method:"), opts.method);
348 jlog(JLOG_SUMMARY, "%-25s %zu", _("Files:"), stats.files);
349 jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Linked:"), stats.linked);
06d8fe89 350
3854515c 351#ifdef USE_XATTR
66a38e97 352 jlog(JLOG_SUMMARY, _("%-25s %zu xattrs"), _("Compared:"),
5c7cac85 353 stats.xattr_comparisons);
2180ecc8 354#endif
66a38e97 355 jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Compared:"),
5c7cac85 356 stats.comparisons);
66a38e97
KZ
357#ifdef USE_REFLINK
358 if (reflinks_skip)
359 jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Skipped reflinks:"),
360 stats.ignored_reflinks);
361#endif
5c7cac85
KZ
362 ssz = size_to_human_string(SIZE_SUFFIX_3LETTER |
363 SIZE_SUFFIX_SPACE |
364 SIZE_DECIMAL_2DIGITS, stats.saved);
423e80c1 365
66a38e97 366 jlog(JLOG_SUMMARY, "%-25s %s", _("Saved:"), ssz);
5c7cac85 367 free(ssz);
423e80c1 368
66a38e97 369 jlog(JLOG_SUMMARY, _("%-25s %"PRId64".%06"PRId64" seconds"), _("Duration:"),
63463630 370 (int64_t)delta.tv_sec, (int64_t)delta.tv_usec);
94b040b0 371}
55c000e1 372
2180ecc8
KZ
373/**
374 * handle_interrupt - Handle a signal
375 *
376 * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals.
377 */
cd6b8d39 378static int handle_interrupt(void)
0ec20db8 379{
5c7cac85
KZ
380 switch (last_signal) {
381 case SIGINT:
382 case SIGTERM:
383 return TRUE;
384 case SIGUSR1:
385 print_stats();
386 putchar('\n');
387 break;
388 }
389
390 last_signal = 0;
391 return FALSE;
0ec20db8
DJ
392}
393
3854515c 394#ifdef USE_XATTR
2180ecc8 395
2180ecc8
KZ
396/**
397 * llistxattr_or_die - Wrapper for llistxattr()
398 *
399 * This does the same thing as llistxattr() except that it aborts if any error
400 * other than "not supported" is detected.
401 */
402static ssize_t llistxattr_or_die(const char *path, char *list, size_t size)
403{
5c7cac85 404 ssize_t len = llistxattr(path, list, size);
2180ecc8 405
5c7cac85
KZ
406 if (len < 0 && errno != ENOTSUP)
407 err(EXIT_FAILURE, _("cannot get xattr names for %s"), path);
c12b6394 408
5c7cac85 409 return len;
2180ecc8
KZ
410}
411
412/**
413 * lgetxattr_or_die - Wrapper for lgetxattr()
414 *
415 * This does the same thing as lgetxattr() except that it aborts upon error.
416 */
5c7cac85
KZ
417static ssize_t lgetxattr_or_die(const char *path,
418 const char *name, void *value, size_t size)
2180ecc8 419{
5c7cac85 420 ssize_t len = lgetxattr(path, name, value, size);
2180ecc8 421
5c7cac85
KZ
422 if (len < 0)
423 err(EXIT_FAILURE, _("cannot get xattr value of %s for %s"),
424 name, path);
c12b6394 425
5c7cac85 426 return len;
2180ecc8
KZ
427}
428
429/**
430 * get_xattr_name_count - Count the number of xattr names
431 * @names: a non-empty table of concatenated, null-terminated xattr names
432 * @len: the total length of the table
433 *
434 * @Returns the number of xattr names
435 */
436static int get_xattr_name_count(const char *const names, ssize_t len)
437{
5c7cac85
KZ
438 int count = 0;
439 const char *name;
2180ecc8 440
5c7cac85
KZ
441 for (name = names; name < (names + len); name += strlen(name) + 1)
442 count++;
2180ecc8 443
5c7cac85 444 return count;
2180ecc8
KZ
445}
446
447/**
448 * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
449 * the names they point to.
450 */
451static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2)
452{
5c7cac85 453 return strcmp(*(char *const *)ptr1, *(char *const *)ptr2);
2180ecc8
KZ
454}
455
456/**
457 * get_sorted_xattr_name_table - Create a sorted table of xattr names.
458 * @names - table of concatenated, null-terminated xattr names
459 * @n - the number of names
460 *
461 * @Returns allocated table of pointers to the names, sorted alphabetically
462 */
463static const char **get_sorted_xattr_name_table(const char *names, int n)
464{
5c7cac85
KZ
465 const char **table = xmalloc(n * sizeof(char *));
466 int i;
2180ecc8 467
5c7cac85
KZ
468 for (i = 0; i < n; i++) {
469 table[i] = names;
470 names += strlen(names) + 1;
471 }
2180ecc8 472
5c7cac85 473 qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
2180ecc8 474
5c7cac85 475 return table;
2180ecc8
KZ
476}
477
478/**
479 * file_xattrs_equal - Compare the extended attributes of two files
480 * @a: The first file
481 * @b: The second file
482 *
483 * @Returns: %TRUE if and only if extended attributes are equal
484 */
cd6b8d39 485static int file_xattrs_equal(const struct file *a, const struct file *b)
2180ecc8 486{
5c7cac85
KZ
487 ssize_t len_a;
488 ssize_t len_b;
489 char *names_a = NULL;
490 char *names_b = NULL;
491 int n_a;
492 int n_b;
493 const char **name_ptrs_a = NULL;
494 const char **name_ptrs_b = NULL;
495 void *value_a = NULL;
496 void *value_b = NULL;
497 int ret = FALSE;
498 int i;
2180ecc8 499
5c7cac85
KZ
500 assert(a->links != NULL);
501 assert(b->links != NULL);
2180ecc8 502
5c7cac85
KZ
503 jlog(JLOG_VERBOSE1, _("Comparing xattrs of %s to %s"), a->links->path,
504 b->links->path);
2180ecc8 505
5c7cac85 506 stats.xattr_comparisons++;
2180ecc8 507
5c7cac85
KZ
508 len_a = llistxattr_or_die(a->links->path, NULL, 0);
509 len_b = llistxattr_or_die(b->links->path, NULL, 0);
2180ecc8 510
5c7cac85
KZ
511 if (len_a <= 0 && len_b <= 0)
512 return TRUE; // xattrs not supported or neither file has any
2180ecc8 513
5c7cac85
KZ
514 if (len_a != len_b)
515 return FALSE; // total lengths of xattr names differ
2180ecc8 516
5c7cac85
KZ
517 names_a = xmalloc(len_a);
518 names_b = xmalloc(len_b);
2180ecc8 519
5c7cac85
KZ
520 len_a = llistxattr_or_die(a->links->path, names_a, len_a);
521 len_b = llistxattr_or_die(b->links->path, names_b, len_b);
522 assert((len_a > 0) && (len_a == len_b));
2180ecc8 523
5c7cac85
KZ
524 n_a = get_xattr_name_count(names_a, len_a);
525 n_b = get_xattr_name_count(names_b, len_b);
2180ecc8 526
5c7cac85
KZ
527 if (n_a != n_b)
528 goto exit; // numbers of xattrs differ
2180ecc8 529
5c7cac85
KZ
530 name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
531 name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
2180ecc8 532
5c7cac85 533 // We now have two sorted tables of xattr names.
2180ecc8 534
5c7cac85
KZ
535 for (i = 0; i < n_a; i++) {
536 if (handle_interrupt())
537 goto exit; // user wants to quit
2180ecc8 538
5c7cac85
KZ
539 if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
540 goto exit; // names at same slot differ
2180ecc8 541
5c7cac85
KZ
542 len_a =
543 lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
544 len_b =
545 lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
2180ecc8 546
5c7cac85
KZ
547 if (len_a != len_b)
548 goto exit; // xattrs with same name, different value lengths
2180ecc8 549
5c7cac85
KZ
550 value_a = xmalloc(len_a);
551 value_b = xmalloc(len_b);
2180ecc8 552
5c7cac85
KZ
553 len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
554 value_a, len_a);
555 len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
556 value_b, len_b);
557 assert((len_a >= 0) && (len_a == len_b));
2180ecc8 558
5c7cac85
KZ
559 if (memcmp(value_a, value_b, len_a) != 0)
560 goto exit; // xattrs with same name, different values
2180ecc8 561
5c7cac85
KZ
562 free(value_a);
563 free(value_b);
564 value_a = NULL;
565 value_b = NULL;
566 }
2180ecc8 567
5c7cac85 568 ret = TRUE;
2180ecc8 569
5c7cac85
KZ
570 exit:
571 free(names_a);
572 free(names_b);
573 free(name_ptrs_a);
574 free(name_ptrs_b);
575 free(value_a);
576 free(value_b);
577 return ret;
2180ecc8 578}
3854515c 579#else /* !USE_XATTR */
cd6b8d39 580static int file_xattrs_equal(const struct file *a, const struct file *b)
2180ecc8 581{
5c7cac85 582 return TRUE;
2180ecc8 583}
3854515c 584#endif /* USE_XATTR */
2180ecc8 585
2180ecc8
KZ
586/**
587 * file_may_link_to - Check whether a file may replace another one
588 * @a: The first file
589 * @b: The second file
590 *
259bed15
KZ
591 * Check whether the two files are considered equal attributes and can be
592 * linked. This function does not compare content od the files!
2180ecc8 593 */
cd6b8d39 594static int file_may_link_to(const struct file *a, const struct file *b)
2180ecc8 595{
5c7cac85
KZ
596 return (a->st.st_size != 0 &&
597 a->st.st_size == b->st.st_size &&
598 a->links != NULL && b->links != NULL &&
599 a->st.st_dev == b->st.st_dev &&
600 a->st.st_ino != b->st.st_ino &&
601 (!opts.respect_mode || a->st.st_mode == b->st.st_mode) &&
602 (!opts.respect_owner || a->st.st_uid == b->st.st_uid) &&
603 (!opts.respect_owner || a->st.st_gid == b->st.st_gid) &&
604 (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
605 (!opts.respect_name
606 || strcmp(a->links->path + a->links->basename,
607 b->links->path + b->links->basename) == 0) &&
259bed15 608 (!opts.respect_xattrs || file_xattrs_equal(a, b)));
2180ecc8
KZ
609}
610
611/**
612 * file_compare - Compare two files to decide which should be master
613 * @a: The first file
614 * @b: The second file
615 *
616 * Check which of the files should be considered greater and thus serve
617 * as the master when linking (the master is the file that all equal files
618 * will be replaced with).
619 */
620static int file_compare(const struct file *a, const struct file *b)
621{
5c7cac85
KZ
622 int res = 0;
623 if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino)
624 return 0;
625
626 if (res == 0 && opts.maximise)
627 res = CMP(a->st.st_nlink, b->st.st_nlink);
628 if (res == 0 && opts.minimise)
629 res = CMP(b->st.st_nlink, a->st.st_nlink);
630 if (res == 0)
631 res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime)
632 : CMP(a->st.st_mtime, b->st.st_mtime);
633 if (res == 0)
634 res = CMP(b->st.st_ino, a->st.st_ino);
635
636 return res;
2180ecc8
KZ
637}
638
66a38e97
KZ
639#ifdef USE_REFLINK
640static inline int do_link(struct file *a, struct file *b,
641 const char *new_name, int reflink)
642{
643 if (reflink) {
644 int dest = -1, src = -1;
645
646 dest = open(new_name, O_CREAT|O_WRONLY|O_TRUNC);
647 if (dest < 0)
648 goto fallback;
649 if (fchmod(dest, b->st.st_mode) != 0)
650 goto fallback;
651 if (fchown(dest, b->st.st_uid, b->st.st_gid) != 0)
652 goto fallback;
653 src = open(a->links->path, O_RDONLY);
654 if (src < 0)
655 goto fallback;
656 if (ioctl(dest, FICLONE, src) != 0)
657 goto fallback;
658 close(dest);
659 close(src);
660 return 0;
661fallback:
662 if (dest >= 0) {
663 close(dest);
664 unlink(new_name);
665 }
666 if (src >= 0)
667 close(src);
668
669 if (reflink_mode == REFLINK_ALWAYS)
670 return -errno;
671 jlog(JLOG_VERBOSE2,_("Reflinking failed, fallback to hardlinking"));
672 }
673
674 return link(a->links->path, new_name);
675}
676#else
677static inline int do_link(struct file *a,
678 struct file *b __attribute__((__unused__)),
679 const char *new_name,
680 int reflink __attribute__((__unused__)))
681{
682 return link(a->links->path, new_name);
683}
684#endif /* USE_REFLINK */
685
2180ecc8
KZ
686/**
687 * file_link - Replace b with a link to a
688 * @a: The first file
689 * @b: The second file
690 *
691 * Link the file, replacing @b with the current one. The file is first
692 * linked to a temporary name, and then renamed to the name of @b, making
693 * the replace atomic (@b will always exist).
694 */
66a38e97 695static int file_link(struct file *a, struct file *b, int reflink)
2180ecc8 696{
5c7cac85
KZ
697
698 file_link:
699 assert(a->links != NULL);
700 assert(b->links != NULL);
701
4939964c
KZ
702 if (is_log_enabled(JLOG_INFO)) {
703 char *ssz = size_to_human_string(SIZE_SUFFIX_3LETTER |
5c7cac85
KZ
704 SIZE_SUFFIX_SPACE |
705 SIZE_DECIMAL_2DIGITS, a->st.st_size);
66a38e97
KZ
706 jlog(JLOG_INFO, _("%s%sLinking %s to %s (-%s)"),
707 opts.dry_run ? _("[DryRun] ") : "",
708 reflink ? "Ref" : "",
709 a->links->path, b->links->path,
4939964c
KZ
710 ssz);
711 free(ssz);
712 }
5c7cac85
KZ
713
714 if (!opts.dry_run) {
fd1e57a7
KZ
715 char *new_path;
716 int failed = 1;
5c7cac85 717
fd1e57a7
KZ
718 xasprintf(&new_path, "%s.hardlink-temporary", b->links->path);
719
66a38e97 720 if (do_link(a, b, new_path, reflink) != 0)
fd1e57a7
KZ
721 warn(_("cannot link %s to %s"), a->links->path, new_path);
722
723 else if (rename(new_path, b->links->path) != 0) {
724 warn(_("cannot rename %s to %s"), a->links->path, new_path);
725 unlink(new_path);
726 } else
727 failed = 0;
5c7cac85 728
5c7cac85 729 free(new_path);
fd1e57a7
KZ
730 if (failed)
731 return FALSE;
5c7cac85
KZ
732 }
733
734 /* Update statistics */
735 stats.linked++;
736
737 /* Increase the link count of this file, and set stat() of other file */
738 a->st.st_nlink++;
739 b->st.st_nlink--;
740
741 if (b->st.st_nlink == 0)
742 stats.saved += a->st.st_size;
743
744 /* Move the link from file b to a */
745 {
746 struct link *new_link = b->links;
747
748 b->links = b->links->next;
749 new_link->next = a->links->next;
750 a->links->next = new_link;
751 }
752
753 /* Do it again */
754 if (b->links)
755 goto file_link;
756
757 return TRUE;
2180ecc8
KZ
758}
759
760/**
761 * inserter - Callback function for nftw()
762 * @fpath: The path of the file being visited
763 * @sb: The stat information of the file
764 * @typeflag: The type flag
765 * @ftwbuf: Contains current level of nesting and offset of basename
766 *
767 * Called by nftw() for the files. See the manual page for nftw() for
768 * further information.
769 */
5c7cac85
KZ
770static int inserter(const char *fpath, const struct stat *sb,
771 int typeflag, struct FTW *ftwbuf)
2180ecc8 772{
5c7cac85
KZ
773 struct file *fil;
774 struct file **node;
775 size_t pathlen;
776 int included;
777 int excluded;
2180ecc8 778
5c7cac85
KZ
779 if (handle_interrupt())
780 return 1;
781 if (typeflag == FTW_DNR || typeflag == FTW_NS)
782 warn(_("cannot read %s"), fpath);
783 if (typeflag != FTW_F || !S_ISREG(sb->st_mode))
784 return 0;
2180ecc8 785
8ff8b027
KZ
786 included = match_any_regex(opts.include, fpath);
787 excluded = match_any_regex(opts.exclude, fpath);
2180ecc8 788
5c7cac85
KZ
789 if ((opts.exclude && excluded && !included) ||
790 (!opts.exclude && opts.include && !included))
791 return 0;
2180ecc8 792
5c7cac85 793 stats.files++;
2180ecc8 794
5c7cac85
KZ
795 if ((uintmax_t) sb->st_size < opts.min_size) {
796 jlog(JLOG_VERBOSE1,
797 _("Skipped %s (smaller than configured size)"), fpath);
798 return 0;
799 }
2180ecc8 800
57d9fd2b
KZ
801 jlog(JLOG_VERBOSE2, " %5zu: [%ld/%ld/%zu] %s",
802 stats.files, sb->st_dev, sb->st_ino,
803 (size_t) sb->st_nlink, fpath);
2180ecc8 804
40a82456
DP
805 if ((opts.max_size > 0) && ((uintmax_t) sb->st_size > opts.max_size)) {
806 jlog(JLOG_VERBOSE1,
807 _("Skipped %s (greater than configured size)"), fpath);
808 return 0;
809 }
2180ecc8 810
5c7cac85 811 pathlen = strlen(fpath) + 1;
2180ecc8 812
5c7cac85
KZ
813 fil = xcalloc(1, sizeof(*fil));
814 fil->links = xcalloc(1, sizeof(struct link) + pathlen);
2180ecc8 815
5c7cac85
KZ
816 fil->st = *sb;
817 fil->links->basename = ftwbuf->base;
818 fil->links->next = NULL;
2180ecc8 819
5c7cac85 820 memcpy(fil->links->path, fpath, pathlen);
2180ecc8 821
5c7cac85 822 node = tsearch(fil, &files_by_ino, compare_nodes_ino);
2180ecc8 823
5c7cac85
KZ
824 if (node == NULL)
825 goto fail;
2180ecc8 826
5c7cac85
KZ
827 if (*node != fil) {
828 /* Already known inode, add link to inode information */
829 assert((*node)->st.st_dev == sb->st_dev);
830 assert((*node)->st.st_ino == sb->st_ino);
2180ecc8 831
5c7cac85
KZ
832 fil->links->next = (*node)->links;
833 (*node)->links = fil->links;
2180ecc8 834
5c7cac85
KZ
835 free(fil);
836 } else {
837 /* New inode, insert into by-size table */
838 node = tsearch(fil, &files, compare_nodes);
2180ecc8 839
5c7cac85
KZ
840 if (node == NULL)
841 goto fail;
2180ecc8 842
5c7cac85
KZ
843 if (*node != fil) {
844 struct file *l;
2180ecc8 845
5c7cac85
KZ
846 if (file_compare(fil, *node) >= 0) {
847 fil->next = *node;
848 *node = fil;
849 } else {
850 for (l = *node; l != NULL; l = l->next) {
851 if (l->next != NULL
852 && file_compare(fil, l->next) < 0)
853 continue;
2180ecc8 854
5c7cac85
KZ
855 fil->next = l->next;
856 l->next = fil;
2180ecc8 857
5c7cac85
KZ
858 break;
859 }
860 }
861 }
862 }
2180ecc8 863
5c7cac85 864 return 0;
3c52b1c8 865
5c7cac85
KZ
866 fail:
867 warn(_("cannot continue")); /* probably ENOMEM */
868 return 0;
2180ecc8
KZ
869}
870
66a38e97
KZ
871#ifdef USE_REFLINK
872static int is_reflink_compatible(dev_t devno, const char *filename)
873{
874 static dev_t last_dev = 0;
875 static int last_status = 0;
876
877 if (last_dev != devno) {
878 struct statfs vfs;
879
880 if (statfs(filename, &vfs) != 0)
881 return 0;
882
883 last_dev = devno;
884 switch (vfs.f_type) {
885 case STATFS_BTRFS_MAGIC:
886 case STATFS_XFS_MAGIC:
887 last_status = 1;
888 break;
889 default:
890 last_status = 0;
891 break;
892 }
893 }
894
895 return last_status;
896}
897
898static int is_reflink(struct file *xa, struct file *xb)
899{
900 int last = 0, rc = 0;
901 char abuf[BUFSIZ] = { 0 },
902 bbuf[BUFSIZ] = { 0 };
903
904 struct fiemap *amap = (struct fiemap *) abuf,
905 *bmap = (struct fiemap *) bbuf;
906
907 int af = open(xa->links->path, O_RDONLY),
908 bf = open(xb->links->path, O_RDONLY);
909
910 do {
911 size_t i;
912
913 amap->fm_length = ~0ULL;
914 amap->fm_flags = FIEMAP_FLAG_SYNC;
915 amap->fm_extent_count = (sizeof(abuf) - sizeof(*amap)) / sizeof(struct fiemap_extent);
916
917 bmap->fm_length = ~0ULL;
918 bmap->fm_flags = FIEMAP_FLAG_SYNC;
919 bmap->fm_extent_count = (sizeof(bbuf) - sizeof(*bmap)) / sizeof(struct fiemap_extent);
920
921 if (ioctl(af, FS_IOC_FIEMAP, (unsigned long) amap) < 0)
922 goto done;
923 if (ioctl(bf, FS_IOC_FIEMAP, (unsigned long) bmap) < 0)
924 goto done;
925
926 if (amap->fm_mapped_extents != bmap->fm_mapped_extents)
927 goto done;
928
929 for (i = 0; i < amap->fm_mapped_extents; i++) {
930 struct fiemap_extent *a = &amap->fm_extents[i];
931 struct fiemap_extent *b = &bmap->fm_extents[i];
932
933 if (a->fe_logical != b->fe_logical ||
934 a->fe_length != b->fe_length ||
935 a->fe_physical != b->fe_physical)
936 goto done;
937 if (!(a->fe_flags & FIEMAP_EXTENT_SHARED) ||
938 !(b->fe_flags & FIEMAP_EXTENT_SHARED))
939 goto done;
940 if (a->fe_flags & FIEMAP_EXTENT_LAST)
941 last = 1;
942 }
943
944 bmap->fm_start = amap->fm_start =
945 amap->fm_extents[amap->fm_mapped_extents - 1].fe_logical +
946 amap->fm_extents[amap->fm_mapped_extents - 1].fe_length;
947 } while (last == 0);
948
949 rc = 1;
950done:
951 close(af);
952 close(bf);
953
954 return rc;
955}
956#endif /* USE_REFLINK */
957
259bed15
KZ
958static inline size_t count_nodes(struct file *x)
959{
960 size_t ct = 0;
961
962 for ( ; x != NULL; x = x->next)
963 ct++;
964
965 return ct;
966}
967
2180ecc8
KZ
968/**
969 * visitor - Callback for twalk()
970 * @nodep: Pointer to a pointer to a #struct file
971 * @which: At which point this visit is (preorder, postorder, endorder)
972 * @depth: The depth of the node in the tree
973 *
974 * Visit the nodes in the binary tree. For each node, call hardlinker()
975 * on each #struct file in the linked list of #struct file instances located
976 * at that node.
977 */
978static void visitor(const void *nodep, const VISIT which, const int depth)
979{
5c7cac85 980 struct file *master = *(struct file **)nodep;
259bed15 981 struct file *begin = master;
5c7cac85 982 struct file *other;
2180ecc8 983
5c7cac85 984 (void)depth;
2180ecc8 985
5c7cac85
KZ
986 if (which != leaf && which != endorder)
987 return;
2180ecc8 988
5c7cac85 989 for (; master != NULL; master = master->next) {
259bed15 990 size_t nnodes, memsiz;
66a38e97 991 int may_reflink = 0;
ee4c3249 992
5c7cac85
KZ
993 if (handle_interrupt())
994 exit(EXIT_FAILURE);
995 if (master->links == NULL)
996 continue;
2180ecc8 997
259bed15
KZ
998 /* calculate per file max memory use */
999 nnodes = count_nodes(master);
1000 if (!nnodes)
1001 continue;
f3212b91
KZ
1002
1003 /* per-file cache size */
1004 memsiz = opts.cache_size / nnodes;
64c8db3c
KZ
1005 /* filesiz, readsiz, memsiz */
1006 ul_fileeq_set_size(&fileeq, master->st.st_size, opts.io_size, memsiz);
259bed15 1007
66a38e97
KZ
1008#ifdef USE_REFLINK
1009 if (reflink_mode || reflinks_skip) {
1010 may_reflink =
1011 reflink_mode == REFLINK_ALWAYS ? 1 :
1012 is_reflink_compatible(master->st.st_dev,
1013 master->links->path);
1014 }
1015#endif
5c7cac85 1016 for (other = master->next; other != NULL; other = other->next) {
259bed15
KZ
1017 int eq;
1018
5c7cac85
KZ
1019 if (handle_interrupt())
1020 exit(EXIT_FAILURE);
2180ecc8 1021
5c7cac85
KZ
1022 assert(other != other->next);
1023 assert(other->st.st_size == master->st.st_size);
2180ecc8 1024
af5f0078
KZ
1025 if (!other->links)
1026 continue;
1027
259bed15 1028 /* check file attributes, etc. */
af5f0078
KZ
1029 if (!file_may_link_to(master, other)) {
1030 jlog(JLOG_VERBOSE2,
1031 _("Skipped (attributes mismatch) %s"), other->links->path);
5c7cac85 1032 continue;
af5f0078 1033 }
66a38e97
KZ
1034#ifdef USE_REFLINK
1035 if (may_reflink && reflinks_skip && is_reflink(master, other)) {
1036 jlog(JLOG_VERBOSE2,
1037 _("Skipped (already reflink) %s"), other->links->path);
1038 stats.ignored_reflinks++;
1039 continue;
1040 }
1041#endif
259bed15
KZ
1042 /* initialize content comparison */
1043 if (!ul_fileeq_data_associated(&master->data))
1044 ul_fileeq_data_set_file(&master->data, master->links->path);
1045 if (!ul_fileeq_data_associated(&other->data))
1046 ul_fileeq_data_set_file(&other->data, other->links->path);
1047
1048 /* compare files */
1049 eq = ul_fileeq(&fileeq, &master->data, &other->data);
1050
1051 /* reduce number of open files, keep only master open */
1052 ul_fileeq_data_close_file(&other->data);
1053
1054 stats.comparisons++;
1055
af5f0078
KZ
1056 if (!eq) {
1057 jlog(JLOG_VERBOSE2,
1058 _("Skipped (content mismatch) %s"), other->links->path);
259bed15 1059 continue;
af5f0078 1060 }
259bed15
KZ
1061
1062 /* link files */
66a38e97 1063 if (!file_link(master, other, may_reflink) && errno == EMLINK) {
259bed15 1064 ul_fileeq_data_deinit(&master->data);
5c7cac85 1065 master = other;
259bed15 1066 }
5c7cac85 1067 }
259bed15
KZ
1068
1069 /* don't keep master data in memory */
1070 ul_fileeq_data_deinit(&master->data);
1071 }
1072
1073 /* final cleanup */
1074 for (other = begin; other != NULL; other = other->next) {
1075 if (ul_fileeq_data_associated(&other->data))
1076 ul_fileeq_data_deinit(&other->data);
5c7cac85 1077 }
2180ecc8
KZ
1078}
1079
1080/**
d2c3c5a6 1081 * usage - Print the program help and exit
2180ecc8 1082 */
d2c3c5a6 1083static void __attribute__((__noreturn__)) usage(void)
2180ecc8 1084{
5c7cac85
KZ
1085 FILE *out = stdout;
1086
1087 fputs(USAGE_HEADER, out);
1088 fprintf(out, _(" %s [options] <directory>|<file> ...\n"),
1089 program_invocation_short_name);
1090
1091 fputs(USAGE_SEPARATOR, out);
1092 fputs(_("Consolidate duplicate files using hardlinks.\n"), out);
1093
1094 fputs(USAGE_OPTIONS, out);
1095 fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out);
1096 fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out);
1097 fputs(_(" -n, --dry-run don't actually link anything\n"), out);
259bed15
KZ
1098 fputs(_(" -y, --method <name> file content comparison method\n"), out);
1099
5c7cac85
KZ
1100 fputs(_(" -f, --respect-name filenames have to be identical\n"), out);
1101 fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out);
1102 fputs(_(" -o, --ignore-owner ignore owner changes\n"), out);
1103 fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out);
3854515c 1104#ifdef USE_XATTR
5c7cac85 1105 fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out);
66a38e97
KZ
1106#endif
1107#ifdef USE_REFLINK
1108 fputs(_(" --reflink[=<when>] create clone/CoW copies (auto, always, never)\n"), out);
1109 fputs(_(" --skip-reflinks skip already cloned files (enabled on --reflink)\n"), out);
04ae85a7 1110#endif
5c7cac85
KZ
1111 fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n"
1112 " lowest hardlink count\n"), out);
1113 fputs(_(" -M, --minimize reverse the meaning of -m\n"), out);
1114 fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n"
1115 " (lower precedence than minimize/maximize)\n"), out);
1116 fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out);
1117 fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out);
1118 fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out);
40a82456 1119 fputs(_(" -S, --maximum-size <size> maximum size for files.\n"), out);
64c8db3c 1120 fputs(_(" -b, --io-size <size> I/O buffer size for file reading (speedup, using more RAM)\n"), out);
f3212b91 1121 fputs(_(" -r, --cache-size <size> memory limit for cached file content data\n"), out);
5c7cac85
KZ
1122 fputs(_(" -c, --content compare only file contents, same as -pot\n"), out);
1123
1124 fputs(USAGE_SEPARATOR, out);
1125 printf(USAGE_HELP_OPTIONS(28));
1126 printf(USAGE_MAN_TAIL("hardlink(1)"));
1127
1128 exit(EXIT_SUCCESS);
2180ecc8
KZ
1129}
1130
2180ecc8
KZ
1131/**
1132 * parse_options - Parse the command line options
1133 * @argc: Number of options
1134 * @argv: Array of options
1135 */
1136static int parse_options(int argc, char *argv[])
1137{
66a38e97
KZ
1138 enum {
1139 OPT_REFLINK = CHAR_MAX + 1,
1140 OPT_SKIP_RELINKS
1141 };
40a82456 1142 static const char optstr[] = "VhvnfpotXcmMOx:y:i:r:S:s:b:q";
5c7cac85
KZ
1143 static const struct option long_options[] = {
1144 {"version", no_argument, NULL, 'V'},
1145 {"help", no_argument, NULL, 'h'},
1146 {"verbose", no_argument, NULL, 'v'},
1147 {"dry-run", no_argument, NULL, 'n'},
1148 {"respect-name", no_argument, NULL, 'f'},
1149 {"ignore-mode", no_argument, NULL, 'p'},
1150 {"ignore-owner", no_argument, NULL, 'o'},
1151 {"ignore-time", no_argument, NULL, 't'},
1152 {"respect-xattrs", no_argument, NULL, 'X'},
1153 {"maximize", no_argument, NULL, 'm'},
1154 {"minimize", no_argument, NULL, 'M'},
1155 {"keep-oldest", no_argument, NULL, 'O'},
1156 {"exclude", required_argument, NULL, 'x'},
1157 {"include", required_argument, NULL, 'i'},
259bed15 1158 {"method", required_argument, NULL, 'y' },
5c7cac85 1159 {"minimum-size", required_argument, NULL, 's'},
40a82456 1160 {"maximum-size", required_argument, NULL, 'S'},
66a38e97
KZ
1161#ifdef USE_REFLINK
1162 {"reflink", optional_argument, NULL, OPT_REFLINK },
1163 {"skip-reflinks", no_argument, NULL, OPT_SKIP_RELINKS },
1164#endif
64c8db3c 1165 {"io-size", required_argument, NULL, 'b'},
5c7cac85
KZ
1166 {"content", no_argument, NULL, 'c'},
1167 {"quiet", no_argument, NULL, 'q'},
f3212b91 1168 {"cache-size", required_argument, NULL, 'r'},
5c7cac85
KZ
1169 {NULL, 0, NULL, 0}
1170 };
1171 static const ul_excl_t excl[] = {
1172 {'q', 'v'},
1173 {0}
1174 };
1175 int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
1176 int c;
1177
1178 while ((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
1179
1180 err_exclusive_options(c, long_options, excl, excl_st);
1181
1182 switch (c) {
1183 case 'p':
1184 opts.respect_mode = FALSE;
1185 break;
1186 case 'o':
1187 opts.respect_owner = FALSE;
1188 break;
1189 case 't':
1190 opts.respect_time = FALSE;
1191 break;
1192 case 'X':
1193 opts.respect_xattrs = TRUE;
1194 break;
1195 case 'm':
1196 opts.maximise = TRUE;
1197 break;
1198 case 'M':
1199 opts.minimise = TRUE;
1200 break;
1201 case 'O':
1202 opts.keep_oldest = TRUE;
1203 break;
1204 case 'f':
1205 opts.respect_name = TRUE;
1206 break;
1207 case 'v':
1208 opts.verbosity++;
1209 break;
1210 case 'q':
1211 quiet = TRUE;
1212 break;
1213 case 'c':
1214 opts.respect_mode = FALSE;
1215 opts.respect_name = FALSE;
1216 opts.respect_owner = FALSE;
1217 opts.respect_time = FALSE;
1218 opts.respect_xattrs = FALSE;
1219 break;
1220 case 'n':
1221 opts.dry_run = 1;
1222 break;
1223 case 'x':
1224 register_regex(&opts.exclude, optarg);
1225 break;
259bed15
KZ
1226 case 'y':
1227 opts.method = optarg;
1228 break;
5c7cac85
KZ
1229 case 'i':
1230 register_regex(&opts.include, optarg);
1231 break;
1232 case 's':
40a82456 1233 opts.min_size = strtosize_or_err(optarg, _("failed to parse minimum size"));
5c7cac85 1234 break;
722762b6 1235 case 'S':
40a82456 1236 opts.max_size = strtosize_or_err(optarg, _("failed to parse maximum size"));
5c7cac85 1237 break;
f3212b91
KZ
1238 case 'r':
1239 opts.cache_size = strtosize_or_err(optarg, _("failed to cache size"));
1240 break;
64c8db3c
KZ
1241 case 'b':
1242 opts.io_size = strtosize_or_err(optarg, _("failed to parse I/O size"));
722762b6 1243 break;
66a38e97
KZ
1244#ifdef USE_REFLINK
1245 case OPT_REFLINK:
1246 reflink_mode = REFLINK_AUTO;
1247 if (optarg) {
1248 if (strcmp(optarg, "auto") == 0)
1249 reflink_mode = REFLINK_AUTO;
1250 else if (strcmp(optarg, "always") == 0)
1251 reflink_mode = REFLINK_ALWAYS;
1252 else if (strcmp(optarg, "never") == 0)
1253 reflink_mode = REFLINK_NEVER;
1254 else
1255 errx(EXIT_FAILURE, _("unsupported reflink mode; %s"), optarg);
1256 }
1257 if (reflink_mode != REFLINK_NEVER)
1258 reflinks_skip = 1;
1259 break;
1260 case OPT_SKIP_RELINKS:
1261 reflinks_skip = 1;
1262 break;
1263#endif
5c7cac85
KZ
1264 case 'h':
1265 usage();
1266 case 'V':
1267 print_version(EXIT_SUCCESS);
1268 default:
1269 errtryhelp(EXIT_FAILURE);}
1270 }
1271
1272 return 0;
2180ecc8
KZ
1273}
1274
1275/**
5c7cac85
KZ
1276* to_be_called_atexit - Cleanup handler, also prints statistics.
1277*/
2180ecc8
KZ
1278static void to_be_called_atexit(void)
1279{
5c7cac85
KZ
1280 if (stats.started)
1281 print_stats();
2180ecc8
KZ
1282}
1283
1284/**
5c7cac85
KZ
1285* sighandler - Signal handler, sets the global last_signal variable
1286* @i: The signal number
1287*/
2180ecc8
KZ
1288static void sighandler(int i)
1289{
5c7cac85
KZ
1290 if (last_signal != SIGINT)
1291 last_signal = i;
1292 if (i == SIGINT)
1293 putchar('\n');
2180ecc8
KZ
1294}
1295
1296int main(int argc, char *argv[])
1297{
5c7cac85 1298 struct sigaction sa;
259bed15 1299 int rc;
2180ecc8 1300
5c7cac85
KZ
1301 sa.sa_handler = sighandler;
1302 sa.sa_flags = SA_RESTART;
1303 sigfillset(&sa.sa_mask);
2180ecc8 1304
5c7cac85
KZ
1305 /* If we receive a SIGINT, end the processing */
1306 sigaction(SIGINT, &sa, NULL);
1307 sigaction(SIGUSR1, &sa, NULL);
2180ecc8 1308
5c7cac85
KZ
1309 /* Pretty print numeric output */
1310 setlocale(LC_NUMERIC, "");
2180ecc8 1311
5c7cac85
KZ
1312 if (atexit(to_be_called_atexit) != 0)
1313 err(EXIT_FAILURE, _("cannot register exit handler"));
2180ecc8 1314
5c7cac85 1315 parse_options(argc, argv);
2180ecc8 1316
5c7cac85 1317 if (optind == argc)
01480c61 1318 errx(EXIT_FAILURE, _("no directory or file specified"));
2180ecc8 1319
5c7cac85 1320 gettime_monotonic(&stats.start_time);
722762b6 1321
259bed15
KZ
1322 rc = ul_fileeq_init(&fileeq, opts.method);
1323 if (rc != 0 && strcmp(opts.method, "memcmp") != 0) {
1324 warnx(_("cannot initialize %s method, use 'memcmp' fallback"), opts.method);
1325 opts.method = "memcmp";
1326 rc = ul_fileeq_init(&fileeq, opts.method);
1327 }
1328 if (rc < 0)
1329 err(EXIT_FAILURE, _("failed to initialize files comparior"));
722762b6 1330
64c8db3c
KZ
1331 /* defautl I/O size */
1332 if (!opts.io_size) {
1333 if (strcmp(opts.method, "memcmp") == 0)
1334 opts.io_size = 8*1024;
1335 else
1336 opts.io_size = 1024*1024;
1337 }
722762b6 1338
5c7cac85 1339 stats.started = TRUE;
2180ecc8 1340
ee4c3249 1341 jlog(JLOG_VERBOSE2, _("Scanning [device/inode/links]:"));
5c7cac85
KZ
1342 for (; optind < argc; optind++) {
1343 if (nftw(argv[optind], inserter, 20, FTW_PHYS) == -1)
1344 warn(_("cannot process %s"), argv[optind]);
1345 }
2180ecc8 1346
5c7cac85 1347 twalk(files, visitor);
722762b6 1348
259bed15 1349 ul_fileeq_deinit(&fileeq);
5c7cac85 1350 return 0;
0ec20db8 1351}