1 /* hardlink.c - Link multiple identical files together
3 * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org>
4 * Copyright (C) 2021 Karel Zak <kzak@redhat.com>
6 * SPDX-License-Identifier: MIT
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #define _POSIX_C_SOURCE 200112L /* POSIX functions */
27 #define _XOPEN_SOURCE 600 /* nftw() */
29 #include <sys/types.h> /* stat */
30 #include <sys/stat.h> /* stat */
31 #include <sys/time.h> /* getrlimit, getrusage */
32 #include <sys/resource.h> /* getrlimit, getrusage */
33 #include <fcntl.h> /* posix_fadvise */
34 #include <ftw.h> /* ftw */
35 #include <search.h> /* tsearch() and friends */
36 #include <signal.h> /* SIG*, sigaction */
37 #include <getopt.h> /* getopt_long() */
38 #include <ctype.h> /* tolower() */
44 #include "monotonic.h"
46 /* Use libpcre2posix if it's available */
47 #ifdef HAVE_PCRE2_POSIX
48 # include <pcre2posix.h>
50 # define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
52 # include <regex.h> /* regcomp(), regsearch() */
55 #ifdef HAVE_SYS_XATTR_H
56 # include <sys/xattr.h> /* listxattr, getxattr */
60 * struct file - Information about a file
61 * @st: The stat buffer associated with the file
62 * @next: Next file with the same size
63 * @basename: The offset off the basename in the filename
64 * @path: The path of the file
66 * This contains all information we need about a file.
74 #if __STDC_VERSION__ >= 199901L
85 * enum log_level - Logging levels
86 * @JLOG_SUMMARY: Default log level
87 * @JLOG_INFO: Verbose logging (verbose == 1)
88 * @JLOG_DEBUG1: Verbosity 2
89 * @JLOG_DEBUG2: Verbosity 3
99 * struct statistic - Statistics about the file
100 * @started: Whether we are post command-line processing
101 * @files: The number of files worked on
102 * @linked: The number of files replaced by a hardlink to a master
103 * @xattr_comparisons: The number of extended attribute comparisons
104 * @comparisons: The number of comparisons
105 * @saved: The (exaggerated) amount of space saved
106 * @start_time: The time we started at
108 static struct statistics
{
112 size_t xattr_comparisons
;
115 struct timeval start_time
;
119 * struct options - Processed command-line options
120 * @include: A linked list of regular expressions for the --include option
121 * @exclude: A linked list of regular expressions for the --exclude option
122 * @verbosity: The verbosity. Should be one of #enum log_level
123 * @respect_mode: Whether to respect file modes (default = TRUE)
124 * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
125 * @respect_name: Whether to respect file names (default = FALSE)
126 * @respect_time: Whether to respect file modification times (default = TRUE)
127 * @respect_xattrs: Whether to respect extended attributes (default = FALSE)
128 * @maximise: Chose the file with the highest link count as master
129 * @minimise: Chose the file with the lowest link count as master
130 * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
131 * @dry_run: Specifies whether hardlink should not link files (default = FALSE)
132 * @min_size: Minimum size of files to consider. (default = 1 byte)
134 static struct options
{
137 struct regex_link
*next
;
138 } *include
, *exclude
;
139 signed int verbosity
;
140 unsigned int respect_mode
:1;
141 unsigned int respect_owner
:1;
142 unsigned int respect_name
:1;
143 unsigned int respect_time
:1;
144 unsigned int respect_xattrs
:1;
145 unsigned int maximise
:1;
146 unsigned int minimise
:1;
147 unsigned int keep_oldest
:1;
148 unsigned int dry_run
:1;
151 /* default setting */
152 .respect_mode
= TRUE
,
153 .respect_owner
= TRUE
,
154 .respect_time
= TRUE
,
155 .respect_xattrs
= FALSE
,
156 .keep_oldest
= FALSE
,
163 * A binary tree of files, managed using tsearch(). To see which nodes
164 * are considered equal, see compare_nodes()
167 static void *files_by_ino
;
172 * The last signal we received. We store the signal here in order to be able
173 * to break out of loops gracefully and to return from our nftw() handler.
175 static int last_signal
;
178 * jlog - Logging for hardlink
179 * @level: The log level
180 * @format: A format string for printf()
182 __attribute__ ((format(printf
, 2, 3)))
183 static void jlog(enum log_level level
, const char *format
, ...)
187 if (level
> (unsigned int) opts
.verbosity
)
190 va_start(args
, format
);
191 vfprintf(stdout
, format
, args
);
197 * CMP - Compare two numerical values, return 1, 0, or -1
201 * Used to compare two integers of any size while avoiding overflow.
203 #define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
206 * regexec_any - Match against multiple regular expressions
207 * @pregs: A linked list of regular expressions
208 * @what: The string to match against
210 * Checks whether any of the regular expressions in the list matches the
213 static int regexec_any(struct regex_link
*pregs
, const char *what
)
215 for (; pregs
!= NULL
; pregs
= pregs
->next
)
216 if (regexec(&pregs
->preg
, what
, 0, NULL
, 0) == 0)
222 * compare_nodes - Node comparison function
223 * @_a: The first node (a #struct file)
224 * @_b: The second node (a #struct file)
226 * Compare the two nodes for the binary tree.
228 static int compare_nodes(const void *_a
, const void *_b
)
230 const struct file
*a
= _a
;
231 const struct file
*b
= _b
;
235 diff
= CMP(a
->st
.st_dev
, b
->st
.st_dev
);
237 diff
= CMP(a
->st
.st_size
, b
->st
.st_size
);
243 * compare_nodes_ino - Node comparison function
244 * @_a: The first node (a #struct file)
245 * @_b: The second node (a #struct file)
247 * Compare the two nodes for the binary tree.
249 static int compare_nodes_ino(const void *_a
, const void *_b
)
251 const struct file
*a
= _a
;
252 const struct file
*b
= _b
;
256 diff
= CMP(a
->st
.st_dev
, b
->st
.st_dev
);
258 diff
= CMP(a
->st
.st_ino
, b
->st
.st_ino
);
260 /* If opts.respect_name is used, we will restrict a struct file to
261 * contain only links with the same basename to keep the rest simple.
263 if (diff
== 0 && opts
.respect_name
)
264 diff
= strcmp(a
->links
->path
+ a
->links
->basename
,
265 b
->links
->path
+ b
->links
->basename
);
271 * print_stats - Print statistics to stdout
273 static void print_stats(void)
275 struct timeval end
= { 0, 0 }, delta
= { 0, 0 };
278 gettime_monotonic(&end
);
279 timersub(&end
, &stats
.start_time
, &delta
);
281 jlog(JLOG_SUMMARY
, "%-15s %s", _("Mode:"),
282 opts
.dry_run
? _("dry-run") : _("real"));
283 jlog(JLOG_SUMMARY
, "%-15s %zu", _("Files:"), stats
.files
);
284 jlog(JLOG_SUMMARY
, _("%-15s %zu files"), _("Linked:"), stats
.linked
);
285 #ifdef HAVE_SYS_XATTR_H
286 jlog(JLOG_SUMMARY
, _("%-15s %zu xattrs"), _("Compared:"), stats
.xattr_comparisons
);
288 jlog(JLOG_SUMMARY
, _("%-15s %zu files"), _("Compared:"), stats
.comparisons
);
290 ssz
= size_to_human_string(SIZE_SUFFIX_3LETTER
|
292 SIZE_DECIMAL_2DIGITS
, stats
.saved
);
294 jlog(JLOG_SUMMARY
, "%-15s %s", _("Saved:"), ssz
);
297 jlog(JLOG_SUMMARY
, _("%-15s %ld.%06ld seconds"), _("Duration:"),
298 (long)delta
.tv_sec
, (long)delta
.tv_usec
);
302 * handle_interrupt - Handle a signal
304 * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals.
306 static int handle_interrupt(void)
308 switch (last_signal
) {
322 #ifdef HAVE_SYS_XATTR_H
325 * llistxattr_or_die - Wrapper for llistxattr()
327 * This does the same thing as llistxattr() except that it aborts if any error
328 * other than "not supported" is detected.
330 static ssize_t
llistxattr_or_die(const char *path
, char *list
, size_t size
)
332 ssize_t len
= llistxattr(path
, list
, size
);
334 if (len
< 0 && errno
!= ENOTSUP
)
335 err(EXIT_FAILURE
, _("cannot get xattr names for %s"), path
);
341 * lgetxattr_or_die - Wrapper for lgetxattr()
343 * This does the same thing as lgetxattr() except that it aborts upon error.
345 static ssize_t
lgetxattr_or_die(const char *path
, const char *name
, void *value
,
348 ssize_t len
= lgetxattr(path
, name
, value
, size
);
351 err(EXIT_FAILURE
, _("cannot get xattr value of %s for %s"), name
, path
);
357 * get_xattr_name_count - Count the number of xattr names
358 * @names: a non-empty table of concatenated, null-terminated xattr names
359 * @len: the total length of the table
361 * @Returns the number of xattr names
363 static int get_xattr_name_count(const char *const names
, ssize_t len
)
368 for (name
= names
; name
< (names
+ len
); name
+= strlen(name
) + 1)
375 * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
376 * the names they point to.
378 static int cmp_xattr_name_ptrs(const void *ptr1
, const void *ptr2
)
380 return strcmp(*(char *const *) ptr1
, *(char *const *) ptr2
);
384 * get_sorted_xattr_name_table - Create a sorted table of xattr names.
385 * @names - table of concatenated, null-terminated xattr names
386 * @n - the number of names
388 * @Returns allocated table of pointers to the names, sorted alphabetically
390 static const char **get_sorted_xattr_name_table(const char *names
, int n
)
392 const char **table
= xmalloc(n
* sizeof(char *));
395 for (i
= 0; i
< n
; i
++) {
397 names
+= strlen(names
) + 1;
400 qsort(table
, n
, sizeof(char *), cmp_xattr_name_ptrs
);
406 * file_xattrs_equal - Compare the extended attributes of two files
408 * @b: The second file
410 * @Returns: %TRUE if and only if extended attributes are equal
412 static int file_xattrs_equal(const struct file
*a
, const struct file
*b
)
416 char *names_a
= NULL
;
417 char *names_b
= NULL
;
420 const char **name_ptrs_a
= NULL
;
421 const char **name_ptrs_b
= NULL
;
422 void *value_a
= NULL
;
423 void *value_b
= NULL
;
427 assert(a
->links
!= NULL
);
428 assert(b
->links
!= NULL
);
430 jlog(JLOG_DEBUG1
, "Comparing xattrs of %s to %s", a
->links
->path
,
433 stats
.xattr_comparisons
++;
435 len_a
= llistxattr_or_die(a
->links
->path
, NULL
, 0);
436 len_b
= llistxattr_or_die(b
->links
->path
, NULL
, 0);
438 if (len_a
<= 0 && len_b
<= 0)
439 return TRUE
; // xattrs not supported or neither file has any
442 return FALSE
; // total lengths of xattr names differ
444 names_a
= xmalloc(len_a
);
445 names_b
= xmalloc(len_b
);
447 len_a
= llistxattr_or_die(a
->links
->path
, names_a
, len_a
);
448 len_b
= llistxattr_or_die(b
->links
->path
, names_b
, len_b
);
449 assert((len_a
> 0) && (len_a
== len_b
));
451 n_a
= get_xattr_name_count(names_a
, len_a
);
452 n_b
= get_xattr_name_count(names_b
, len_b
);
455 goto exit
; // numbers of xattrs differ
457 name_ptrs_a
= get_sorted_xattr_name_table(names_a
, n_a
);
458 name_ptrs_b
= get_sorted_xattr_name_table(names_b
, n_b
);
460 // We now have two sorted tables of xattr names.
462 for (i
= 0; i
< n_a
; i
++) {
463 if (handle_interrupt())
464 goto exit
; // user wants to quit
466 if (strcmp(name_ptrs_a
[i
], name_ptrs_b
[i
]) != 0)
467 goto exit
; // names at same slot differ
469 len_a
= lgetxattr_or_die(a
->links
->path
, name_ptrs_a
[i
], NULL
, 0);
470 len_b
= lgetxattr_or_die(b
->links
->path
, name_ptrs_b
[i
], NULL
, 0);
473 goto exit
; // xattrs with same name, different value lengths
475 value_a
= xmalloc(len_a
);
476 value_b
= xmalloc(len_b
);
478 len_a
= lgetxattr_or_die(a
->links
->path
, name_ptrs_a
[i
],
480 len_b
= lgetxattr_or_die(b
->links
->path
, name_ptrs_b
[i
],
482 assert((len_a
>= 0) && (len_a
== len_b
));
484 if (memcmp(value_a
, value_b
, len_a
) != 0)
485 goto exit
; // xattrs with same name, different values
505 static int file_xattrs_equal(const struct file
*a
, const struct file
*b
)
512 * file_contents_equal - Compare contents of two files for equality
514 * @b: The second file
516 * Compare the contents of the files for equality
518 static int file_contents_equal(const struct file
*a
, const struct file
*b
)
524 int cmp
= 0; /* zero => equal */
525 off_t off
= 0; /* current offset */
527 assert(a
->links
!= NULL
);
528 assert(b
->links
!= NULL
);
530 jlog(JLOG_DEBUG1
, "Comparing %s to %s", a
->links
->path
, b
->links
->path
);
534 if ((fa
= fopen(a
->links
->path
, "rb")) == NULL
)
536 if ((fb
= fopen(b
->links
->path
, "rb")) == NULL
)
539 #if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
540 posix_fadvise(fileno(fa
), 0, 0, POSIX_FADV_SEQUENTIAL
);
541 posix_fadvise(fileno(fb
), 0, 0, POSIX_FADV_SEQUENTIAL
);
544 while (!handle_interrupt() && cmp
== 0) {
548 ca
= fread(buf_a
, 1, sizeof(buf_a
), fa
);
549 if (ca
< sizeof(buf_a
) && ferror(fa
))
552 cb
= fread(buf_b
, 1, sizeof(buf_b
), fb
);
553 if (cb
< sizeof(buf_b
) && ferror(fb
))
558 if ((ca
!= cb
|| ca
== 0)) {
562 cmp
= memcmp(buf_a
, buf_b
, ca
);
569 return !handle_interrupt() && cmp
== 0;
571 if (fa
== NULL
|| fb
== NULL
)
572 warn(_("cannot open %s"), fa
? b
->links
->path
: a
->links
->path
);
574 warn(_("cannot read %s"), ferror(fa
) ? a
->links
->path
: b
->links
->path
);
580 * file_may_link_to - Check whether a file may replace another one
582 * @b: The second file
584 * Check whether the two fies are considered equal and can be linked
585 * together. If the two files are identical, the result will be FALSE,
586 * as replacing a link with an identical one is stupid.
588 static int file_may_link_to(const struct file
*a
, const struct file
*b
)
590 return (a
->st
.st_size
!= 0 &&
591 a
->st
.st_size
== b
->st
.st_size
&&
592 a
->links
!= NULL
&& b
->links
!= NULL
&&
593 a
->st
.st_dev
== b
->st
.st_dev
&&
594 a
->st
.st_ino
!= b
->st
.st_ino
&&
595 (!opts
.respect_mode
|| a
->st
.st_mode
== b
->st
.st_mode
) &&
596 (!opts
.respect_owner
|| a
->st
.st_uid
== b
->st
.st_uid
) &&
597 (!opts
.respect_owner
|| a
->st
.st_gid
== b
->st
.st_gid
) &&
598 (!opts
.respect_time
|| a
->st
.st_mtime
== b
->st
.st_mtime
) &&
600 || strcmp(a
->links
->path
+ a
->links
->basename
,
601 b
->links
->path
+ b
->links
->basename
) == 0) &&
602 (!opts
.respect_xattrs
|| file_xattrs_equal(a
, b
)) &&
603 file_contents_equal(a
, b
));
607 * file_compare - Compare two files to decide which should be master
609 * @b: The second file
611 * Check which of the files should be considered greater and thus serve
612 * as the master when linking (the master is the file that all equal files
613 * will be replaced with).
615 static int file_compare(const struct file
*a
, const struct file
*b
)
618 if (a
->st
.st_dev
== b
->st
.st_dev
&& a
->st
.st_ino
== b
->st
.st_ino
)
621 if (res
== 0 && opts
.maximise
)
622 res
= CMP(a
->st
.st_nlink
, b
->st
.st_nlink
);
623 if (res
== 0 && opts
.minimise
)
624 res
= CMP(b
->st
.st_nlink
, a
->st
.st_nlink
);
626 res
= opts
.keep_oldest
? CMP(b
->st
.st_mtime
, a
->st
.st_mtime
)
627 : CMP(a
->st
.st_mtime
, b
->st
.st_mtime
);
629 res
= CMP(b
->st
.st_ino
, a
->st
.st_ino
);
635 * file_link - Replace b with a link to a
637 * @b: The second file
639 * Link the file, replacing @b with the current one. The file is first
640 * linked to a temporary name, and then renamed to the name of @b, making
641 * the replace atomic (@b will always exist).
643 static int file_link(struct file
*a
, struct file
*b
)
648 assert(a
->links
!= NULL
);
649 assert(b
->links
!= NULL
);
651 ssz
= size_to_human_string(SIZE_SUFFIX_3LETTER
|
653 SIZE_DECIMAL_2DIGITS
, a
->st
.st_size
);
654 jlog(JLOG_INFO
, "%sLinking %s to %s (-%s)",
655 opts
.dry_run
? "[DryRun] " : "", a
->links
->path
, b
->links
->path
, ssz
);
659 size_t len
= strlen(b
->links
->path
) + strlen(".hardlink-temporary") + 1;
660 char *new_path
= xmalloc(len
);
662 snprintf(new_path
, len
, "%s.hardlink-temporary", b
->links
->path
);
664 if (link(a
->links
->path
, new_path
) != 0) {
665 warn(_("cannot link %s to %s"), a
->links
->path
, new_path
);
668 } else if (rename(new_path
, b
->links
->path
) != 0) {
669 warn(_("cannot rename %s to %s"), a
->links
->path
, new_path
);
670 unlink(new_path
); /* cleanup failed rename */
677 /* Update statistics */
680 /* Increase the link count of this file, and set stat() of other file */
684 if (b
->st
.st_nlink
== 0)
685 stats
.saved
+= a
->st
.st_size
;
687 /* Move the link from file b to a */
689 struct link
*new_link
= b
->links
;
691 b
->links
= b
->links
->next
;
692 new_link
->next
= a
->links
->next
;
693 a
->links
->next
= new_link
;
704 * inserter - Callback function for nftw()
705 * @fpath: The path of the file being visited
706 * @sb: The stat information of the file
707 * @typeflag: The type flag
708 * @ftwbuf: Contains current level of nesting and offset of basename
710 * Called by nftw() for the files. See the manual page for nftw() for
711 * further information.
713 static int inserter(const char *fpath
, const struct stat
*sb
, int typeflag
,
722 if (handle_interrupt())
724 if (typeflag
== FTW_DNR
|| typeflag
== FTW_NS
)
725 warn(_("cannot read %s"), fpath
);
726 if (typeflag
!= FTW_F
|| !S_ISREG(sb
->st_mode
))
729 included
= regexec_any(opts
.include
, fpath
);
730 excluded
= regexec_any(opts
.exclude
, fpath
);
732 if ((opts
.exclude
&& excluded
&& !included
) ||
733 (!opts
.exclude
&& opts
.include
&& !included
))
738 if ((uintmax_t) sb
->st_size
< opts
.min_size
) {
739 jlog(JLOG_DEBUG1
, "Skipped %s (smaller than configured size)", fpath
);
743 jlog(JLOG_DEBUG2
, "Visiting %s (file %zu)", fpath
, stats
.files
);
745 pathlen
= strlen(fpath
) + 1;
747 fil
= xcalloc(1, sizeof(*fil
));
748 fil
->links
= xcalloc(1, sizeof(struct link
) + pathlen
);
751 fil
->links
->basename
= ftwbuf
->base
;
752 fil
->links
->next
= NULL
;
754 memcpy(fil
->links
->path
, fpath
, pathlen
);
756 node
= tsearch(fil
, &files_by_ino
, compare_nodes_ino
);
762 /* Already known inode, add link to inode information */
763 assert((*node
)->st
.st_dev
== sb
->st_dev
);
764 assert((*node
)->st
.st_ino
== sb
->st_ino
);
766 fil
->links
->next
= (*node
)->links
;
767 (*node
)->links
= fil
->links
;
771 /* New inode, insert into by-size table */
772 node
= tsearch(fil
, &files
, compare_nodes
);
780 if (file_compare(fil
, *node
) >= 0) {
784 for (l
= *node
; l
!= NULL
; l
= l
->next
) {
785 if (l
->next
!= NULL
&& file_compare(fil
, l
->next
) < 0)
800 warn(_("cannot continue")); /* probably ENOMEM */
805 * visitor - Callback for twalk()
806 * @nodep: Pointer to a pointer to a #struct file
807 * @which: At which point this visit is (preorder, postorder, endorder)
808 * @depth: The depth of the node in the tree
810 * Visit the nodes in the binary tree. For each node, call hardlinker()
811 * on each #struct file in the linked list of #struct file instances located
814 static void visitor(const void *nodep
, const VISIT which
, const int depth
)
816 struct file
*master
= *(struct file
**) nodep
;
821 if (which
!= leaf
&& which
!= endorder
)
824 for (; master
!= NULL
; master
= master
->next
) {
825 if (handle_interrupt())
827 if (master
->links
== NULL
)
830 for (other
= master
->next
; other
!= NULL
; other
= other
->next
) {
831 if (handle_interrupt())
834 assert(other
!= other
->next
);
835 assert(other
->st
.st_size
== master
->st
.st_size
);
837 if (other
->links
== NULL
|| !file_may_link_to(master
, other
))
840 if (!file_link(master
, other
) && errno
== EMLINK
)
847 * usage - Print the program help and exit
849 static void __attribute__((__noreturn__
)) usage(void)
853 fputs(USAGE_HEADER
, out
);
854 fprintf(out
, _(" %s [options] <directory>|<file> ...\n"), program_invocation_short_name
);
856 fputs(USAGE_SEPARATOR
, out
);
857 fputs(_("Consolidate duplicate files using hardlinks.\n"), out
);
859 fputs(USAGE_OPTIONS
, out
);
860 fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out
);
861 fputs(_(" -n, --dry-run don't actually link anything\n"), out
);
862 fputs(_(" -f, --respect-name filenames have to be identical\n"), out
);
863 fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out
);
864 fputs(_(" -o, --ignore-owner ignore owner changes\n"), out
);
865 fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out
);
866 #ifdef HAVE_SYS_XATTR_H
867 fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out
);
869 fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n"
870 " lowest hardlink count\n"), out
);
871 fputs(_(" -M, --minimize reverse the meaning of -m\n"), out
);
872 fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n"
873 " (lower precedence than minimize/maximize)\n"), out
);
874 fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out
);
875 fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out
);
876 fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out
);
877 fputs(_(" -c, --content compare only file contents, same as -pot\n"), out
);
879 fputs(USAGE_SEPARATOR
, out
);
880 printf(USAGE_HELP_OPTIONS(28));
882 printf(USAGE_MAN_TAIL("hardlink(1)"));
888 * register_regex - Compile and insert a regular expression into list
889 * @pregs: Pointer to a linked list of regular expressions
890 * @regex: String containing the regular expression to be compiled
892 static void register_regex(struct regex_link
**pregs
, const char *regex
)
894 struct regex_link
*link
= xmalloc(sizeof(*link
));
897 if ((err
= regcomp(&link
->preg
, regex
, REG_NOSUB
| REG_EXTENDED
)) != 0) {
898 size_t size
= regerror(err
, &link
->preg
, NULL
, 0);
899 char *buf
= xmalloc(size
+ 1);
901 regerror(err
, &link
->preg
, buf
, size
);
903 errx(EXIT_FAILURE
, _("could not compile regular expression %s: %s"),
912 * parse_options - Parse the command line options
913 * @argc: Number of options
914 * @argv: Array of options
916 static int parse_options(int argc
, char *argv
[])
918 static const char optstr
[] = "VhvnfpotXcmMOx:i:s:";
919 static const struct option long_options
[] = {
920 {"version", no_argument
, NULL
, 'V'},
921 {"help", no_argument
, NULL
, 'h'},
922 {"verbose", no_argument
, NULL
, 'v'},
923 {"dry-run", no_argument
, NULL
, 'n'},
924 {"respect-name", no_argument
, NULL
, 'f'},
925 {"ignore-mode", no_argument
, NULL
, 'p'},
926 {"ignore-owner", no_argument
, NULL
, 'o'},
927 {"ignore-time", no_argument
, NULL
, 't'},
928 {"respect-xattrs", no_argument
, NULL
, 'X'},
929 {"maximize", no_argument
, NULL
, 'm'},
930 {"minimize", no_argument
, NULL
, 'M'},
931 {"keep-oldest", no_argument
, NULL
, 'O'},
932 {"exclude", required_argument
, NULL
, 'x'},
933 {"include", required_argument
, NULL
, 'i'},
934 {"minimum-size", required_argument
, NULL
, 's'},
935 {"content", no_argument
, NULL
, 'c'},
940 while ((c
= getopt_long(argc
, argv
, optstr
, long_options
, NULL
)) != -1) {
943 opts
.respect_mode
= FALSE
;
946 opts
.respect_owner
= FALSE
;
949 opts
.respect_time
= FALSE
;
952 opts
.respect_xattrs
= TRUE
;
955 opts
.maximise
= TRUE
;
958 opts
.minimise
= TRUE
;
961 opts
.keep_oldest
= TRUE
;
964 opts
.respect_name
= TRUE
;
970 opts
.respect_mode
= FALSE
;
971 opts
.respect_name
= FALSE
;
972 opts
.respect_owner
= FALSE
;
973 opts
.respect_time
= FALSE
;
974 opts
.respect_xattrs
= FALSE
;
980 register_regex(&opts
.exclude
, optarg
);
983 register_regex(&opts
.include
, optarg
);
986 opts
.min_size
= strtosize_or_err(optarg
, _("failed to parse size"));
992 print_version(EXIT_SUCCESS
);
994 errtryhelp(EXIT_FAILURE
);
1001 * to_be_called_atexit - Cleanup handler, also prints statistics.
1003 static void to_be_called_atexit(void)
1010 * sighandler - Signal handler, sets the global last_signal variable
1011 * @i: The signal number
1013 static void sighandler(int i
)
1015 if (last_signal
!= SIGINT
)
1021 int main(int argc
, char *argv
[])
1023 struct sigaction sa
;
1025 sa
.sa_handler
= sighandler
;
1026 sa
.sa_flags
= SA_RESTART
;
1027 sigfillset(&sa
.sa_mask
);
1029 /* If we receive a SIGINT, end the processing */
1030 sigaction(SIGINT
, &sa
, NULL
);
1031 sigaction(SIGUSR1
, &sa
, NULL
);
1033 /* Pretty print numeric output */
1034 setlocale(LC_NUMERIC
, "");
1036 if (atexit(to_be_called_atexit
) != 0)
1037 err(EXIT_FAILURE
, _("cannot register exit handler"));
1039 parse_options(argc
, argv
);
1042 errx(EXIT_FAILURE
, _("no directory of dile specified"));
1044 gettime_monotonic(&stats
.start_time
);
1045 stats
.started
= TRUE
;
1047 for (; optind
< argc
; optind
++)
1048 if (nftw(argv
[optind
], inserter
, 20, FTW_PHYS
) == -1)
1049 warn(_("cannot process %s"), argv
[optind
]);
1051 twalk(files
, visitor
);