1 /* hardlink.c - Link multiple identical files together
3 * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org>
4 * Copyright (C) 2021 Karel Zak <kzak@redhat.com>
6 * SPDX-License-Identifier: MIT
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #define _POSIX_C_SOURCE 200112L /* POSIX functions */
27 #define _XOPEN_SOURCE 600 /* nftw() */
29 #include <sys/types.h> /* stat */
30 #include <sys/stat.h> /* stat */
31 #include <sys/time.h> /* getrlimit, getrusage */
32 #include <sys/resource.h> /* getrlimit, getrusage */
33 #include <fcntl.h> /* posix_fadvise */
34 #include <ftw.h> /* ftw */
35 #include <search.h> /* tsearch() and friends */
36 #include <signal.h> /* SIG*, sigaction */
37 #include <getopt.h> /* getopt_long() */
38 #include <ctype.h> /* tolower() */
44 #include "monotonic.h"
48 #include <regex.h> /* regcomp(), regexec() */
50 #if defined(HAVE_SYS_XATTR_H) && defined(HAVE_LLISTXATTR) && defined(HAVE_LGETXATTR)
51 # include <sys/xattr.h>
55 static int quiet
; /* don't print anything */
57 static struct ul_fileeq fileeq
;
60 * struct file - Information about a file
61 * @st: The stat buffer associated with the file
62 * @next: Next file with the same size
63 * @basename: The offset off the basename in the filename
64 * @path: The path of the file
66 * This contains all information we need about a file.
70 struct ul_fileeq_data data
;
76 #if __STDC_VERSION__ >= 199901L
87 * enum log_level - Logging levels
88 * @JLOG_SUMMARY: Default log level
89 * @JLOG_INFO: Verbose logging (verbose == 1)
90 * @JLOG_VERBOSE1: Verbosity 2
91 * @JLOG_VERBOSE2: Verbosity 3
101 * struct statistic - Statistics about the file
102 * @started: Whether we are post command-line processing
103 * @files: The number of files worked on
104 * @linked: The number of files replaced by a hardlink to a master
105 * @xattr_comparisons: The number of extended attribute comparisons
106 * @comparisons: The number of comparisons
107 * @saved: The (exaggerated) amount of space saved
108 * @start_time: The time we started at
110 static struct statistics
{
114 size_t xattr_comparisons
;
117 struct timeval start_time
;
122 regex_t re
; /* POSIX compatible regex handler */
124 struct hdl_regex
*next
;
128 * struct options - Processed command-line options
129 * @include: A linked list of regular expressions for the --include option
130 * @exclude: A linked list of regular expressions for the --exclude option
131 * @verbosity: The verbosity. Should be one of #enum log_level
132 * @respect_mode: Whether to respect file modes (default = TRUE)
133 * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
134 * @respect_name: Whether to respect file names (default = FALSE)
135 * @respect_time: Whether to respect file modification times (default = TRUE)
136 * @respect_xattrs: Whether to respect extended attributes (default = FALSE)
137 * @maximise: Chose the file with the highest link count as master
138 * @minimise: Chose the file with the lowest link count as master
139 * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
140 * @dry_run: Specifies whether hardlink should not link files (default = FALSE)
141 * @min_size: Minimum size of files to consider. (default = 1 byte)
142 * @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte)
144 static struct options
{
145 struct hdl_regex
*include
;
146 struct hdl_regex
*exclude
;
149 signed int verbosity
;
150 unsigned int respect_mode
:1;
151 unsigned int respect_owner
:1;
152 unsigned int respect_name
:1;
153 unsigned int respect_time
:1;
154 unsigned int respect_xattrs
:1;
155 unsigned int maximise
:1;
156 unsigned int minimise
:1;
157 unsigned int keep_oldest
:1;
158 unsigned int dry_run
:1;
164 /* default setting */
166 .respect_mode
= TRUE
,
167 .respect_owner
= TRUE
,
168 .respect_time
= TRUE
,
169 .respect_xattrs
= FALSE
,
170 .keep_oldest
= FALSE
,
172 .cache_size
= 10*1024*1024
178 * A binary tree of files, managed using tsearch(). To see which nodes
179 * are considered equal, see compare_nodes()
182 static void *files_by_ino
;
187 * The last signal we received. We store the signal here in order to be able
188 * to break out of loops gracefully and to return from our nftw() handler.
190 static int last_signal
;
193 * jlog - Logging for hardlink
194 * @level: The log level
195 * @format: A format string for printf()
197 __attribute__((format(printf
, 2, 3)))
198 static void jlog(enum log_level level
, const char *format
, ...)
202 if (quiet
|| level
> (unsigned int)opts
.verbosity
)
205 va_start(args
, format
);
206 vfprintf(stdout
, format
, args
);
212 * CMP - Compare two numerical values, return 1, 0, or -1
216 * Used to compare two integers of any size while avoiding overflow.
218 #define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
221 * register_regex - Compile and insert a regular expression into list
222 * @pregs: Pointer to a linked list of regular expressions
223 * @regex: String containing the regular expression to be compiled
225 static void register_regex(struct hdl_regex
**pregs
, const char *regex
)
227 struct hdl_regex
*link
;
230 link
= xmalloc(sizeof(*link
));
232 if ((err
= regcomp(&link
->re
, regex
, REG_NOSUB
| REG_EXTENDED
)) != 0) {
233 size_t size
= regerror(err
, &link
->re
, NULL
, 0);
234 char *buf
= xmalloc(size
+ 1);
236 regerror(err
, &link
->re
, buf
, size
);
238 errx(EXIT_FAILURE
, _("could not compile regular expression %s: %s"),
241 link
->next
= *pregs
; *pregs
= link
;
245 * match_any_regex - Match against multiple regular expressions
246 * @pregs: A linked list of regular expressions
247 * @what: The string to match against
249 * Checks whether any of the regular expressions in the list matches the
252 static int match_any_regex(struct hdl_regex
*pregs
, const char *what
)
254 for (; pregs
!= NULL
; pregs
= pregs
->next
) {
255 if (regexec(&pregs
->re
, what
, 0, NULL
, 0) == 0)
262 * compare_nodes - Node comparison function
263 * @_a: The first node (a #struct file)
264 * @_b: The second node (a #struct file)
266 * Compare the two nodes for the binary tree.
268 static int compare_nodes(const void *_a
, const void *_b
)
270 const struct file
*a
= _a
;
271 const struct file
*b
= _b
;
275 diff
= CMP(a
->st
.st_dev
, b
->st
.st_dev
);
277 diff
= CMP(a
->st
.st_size
, b
->st
.st_size
);
283 * compare_nodes_ino - Node comparison function
284 * @_a: The first node (a #struct file)
285 * @_b: The second node (a #struct file)
287 * Compare the two nodes for the binary tree.
289 static int compare_nodes_ino(const void *_a
, const void *_b
)
291 const struct file
*a
= _a
;
292 const struct file
*b
= _b
;
296 diff
= CMP(a
->st
.st_dev
, b
->st
.st_dev
);
298 diff
= CMP(a
->st
.st_ino
, b
->st
.st_ino
);
300 /* If opts.respect_name is used, we will restrict a struct file to
301 * contain only links with the same basename to keep the rest simple.
303 if (diff
== 0 && opts
.respect_name
)
304 diff
= strcmp(a
->links
->path
+ a
->links
->basename
,
305 b
->links
->path
+ b
->links
->basename
);
311 * print_stats - Print statistics to stdout
313 static void print_stats(void)
315 struct timeval end
= { 0, 0 }, delta
= { 0, 0 };
318 gettime_monotonic(&end
);
319 timersub(&end
, &stats
.start_time
, &delta
);
321 jlog(JLOG_SUMMARY
, "%-15s %s", _("Mode:"),
322 opts
.dry_run
? _("dry-run") : _("real"));
323 jlog(JLOG_SUMMARY
, "%-15s %s", _("Method:"), opts
.method
);
324 jlog(JLOG_SUMMARY
, "%-15s %zu", _("Files:"), stats
.files
);
325 jlog(JLOG_SUMMARY
, _("%-15s %zu files"), _("Linked:"), stats
.linked
);
328 jlog(JLOG_SUMMARY
, _("%-15s %zu xattrs"), _("Compared:"),
329 stats
.xattr_comparisons
);
331 jlog(JLOG_SUMMARY
, _("%-15s %zu files"), _("Compared:"),
334 ssz
= size_to_human_string(SIZE_SUFFIX_3LETTER
|
336 SIZE_DECIMAL_2DIGITS
, stats
.saved
);
338 jlog(JLOG_SUMMARY
, "%-15s %s", _("Saved:"), ssz
);
341 jlog(JLOG_SUMMARY
, _("%-15s %"PRId64
".%06"PRId64
" seconds"), _("Duration:"),
342 (int64_t)delta
.tv_sec
, (int64_t)delta
.tv_usec
);
346 * handle_interrupt - Handle a signal
348 * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals.
350 static int handle_interrupt(void)
352 switch (last_signal
) {
369 * llistxattr_or_die - Wrapper for llistxattr()
371 * This does the same thing as llistxattr() except that it aborts if any error
372 * other than "not supported" is detected.
374 static ssize_t
llistxattr_or_die(const char *path
, char *list
, size_t size
)
376 ssize_t len
= llistxattr(path
, list
, size
);
378 if (len
< 0 && errno
!= ENOTSUP
)
379 err(EXIT_FAILURE
, _("cannot get xattr names for %s"), path
);
385 * lgetxattr_or_die - Wrapper for lgetxattr()
387 * This does the same thing as lgetxattr() except that it aborts upon error.
389 static ssize_t
lgetxattr_or_die(const char *path
,
390 const char *name
, void *value
, size_t size
)
392 ssize_t len
= lgetxattr(path
, name
, value
, size
);
395 err(EXIT_FAILURE
, _("cannot get xattr value of %s for %s"),
402 * get_xattr_name_count - Count the number of xattr names
403 * @names: a non-empty table of concatenated, null-terminated xattr names
404 * @len: the total length of the table
406 * @Returns the number of xattr names
408 static int get_xattr_name_count(const char *const names
, ssize_t len
)
413 for (name
= names
; name
< (names
+ len
); name
+= strlen(name
) + 1)
420 * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
421 * the names they point to.
423 static int cmp_xattr_name_ptrs(const void *ptr1
, const void *ptr2
)
425 return strcmp(*(char *const *)ptr1
, *(char *const *)ptr2
);
429 * get_sorted_xattr_name_table - Create a sorted table of xattr names.
430 * @names - table of concatenated, null-terminated xattr names
431 * @n - the number of names
433 * @Returns allocated table of pointers to the names, sorted alphabetically
435 static const char **get_sorted_xattr_name_table(const char *names
, int n
)
437 const char **table
= xmalloc(n
* sizeof(char *));
440 for (i
= 0; i
< n
; i
++) {
442 names
+= strlen(names
) + 1;
445 qsort(table
, n
, sizeof(char *), cmp_xattr_name_ptrs
);
451 * file_xattrs_equal - Compare the extended attributes of two files
453 * @b: The second file
455 * @Returns: %TRUE if and only if extended attributes are equal
457 static int file_xattrs_equal(const struct file
*a
, const struct file
*b
)
461 char *names_a
= NULL
;
462 char *names_b
= NULL
;
465 const char **name_ptrs_a
= NULL
;
466 const char **name_ptrs_b
= NULL
;
467 void *value_a
= NULL
;
468 void *value_b
= NULL
;
472 assert(a
->links
!= NULL
);
473 assert(b
->links
!= NULL
);
475 jlog(JLOG_VERBOSE1
, _("Comparing xattrs of %s to %s"), a
->links
->path
,
478 stats
.xattr_comparisons
++;
480 len_a
= llistxattr_or_die(a
->links
->path
, NULL
, 0);
481 len_b
= llistxattr_or_die(b
->links
->path
, NULL
, 0);
483 if (len_a
<= 0 && len_b
<= 0)
484 return TRUE
; // xattrs not supported or neither file has any
487 return FALSE
; // total lengths of xattr names differ
489 names_a
= xmalloc(len_a
);
490 names_b
= xmalloc(len_b
);
492 len_a
= llistxattr_or_die(a
->links
->path
, names_a
, len_a
);
493 len_b
= llistxattr_or_die(b
->links
->path
, names_b
, len_b
);
494 assert((len_a
> 0) && (len_a
== len_b
));
496 n_a
= get_xattr_name_count(names_a
, len_a
);
497 n_b
= get_xattr_name_count(names_b
, len_b
);
500 goto exit
; // numbers of xattrs differ
502 name_ptrs_a
= get_sorted_xattr_name_table(names_a
, n_a
);
503 name_ptrs_b
= get_sorted_xattr_name_table(names_b
, n_b
);
505 // We now have two sorted tables of xattr names.
507 for (i
= 0; i
< n_a
; i
++) {
508 if (handle_interrupt())
509 goto exit
; // user wants to quit
511 if (strcmp(name_ptrs_a
[i
], name_ptrs_b
[i
]) != 0)
512 goto exit
; // names at same slot differ
515 lgetxattr_or_die(a
->links
->path
, name_ptrs_a
[i
], NULL
, 0);
517 lgetxattr_or_die(b
->links
->path
, name_ptrs_b
[i
], NULL
, 0);
520 goto exit
; // xattrs with same name, different value lengths
522 value_a
= xmalloc(len_a
);
523 value_b
= xmalloc(len_b
);
525 len_a
= lgetxattr_or_die(a
->links
->path
, name_ptrs_a
[i
],
527 len_b
= lgetxattr_or_die(b
->links
->path
, name_ptrs_b
[i
],
529 assert((len_a
>= 0) && (len_a
== len_b
));
531 if (memcmp(value_a
, value_b
, len_a
) != 0)
532 goto exit
; // xattrs with same name, different values
551 #else /* !USE_XATTR */
552 static int file_xattrs_equal(const struct file
*a
, const struct file
*b
)
556 #endif /* USE_XATTR */
559 * file_may_link_to - Check whether a file may replace another one
561 * @b: The second file
563 * Check whether the two files are considered equal attributes and can be
564 * linked. This function does not compare content od the files!
566 static int file_may_link_to(const struct file
*a
, const struct file
*b
)
568 return (a
->st
.st_size
!= 0 &&
569 a
->st
.st_size
== b
->st
.st_size
&&
570 a
->links
!= NULL
&& b
->links
!= NULL
&&
571 a
->st
.st_dev
== b
->st
.st_dev
&&
572 a
->st
.st_ino
!= b
->st
.st_ino
&&
573 (!opts
.respect_mode
|| a
->st
.st_mode
== b
->st
.st_mode
) &&
574 (!opts
.respect_owner
|| a
->st
.st_uid
== b
->st
.st_uid
) &&
575 (!opts
.respect_owner
|| a
->st
.st_gid
== b
->st
.st_gid
) &&
576 (!opts
.respect_time
|| a
->st
.st_mtime
== b
->st
.st_mtime
) &&
578 || strcmp(a
->links
->path
+ a
->links
->basename
,
579 b
->links
->path
+ b
->links
->basename
) == 0) &&
580 (!opts
.respect_xattrs
|| file_xattrs_equal(a
, b
)));
584 * file_compare - Compare two files to decide which should be master
586 * @b: The second file
588 * Check which of the files should be considered greater and thus serve
589 * as the master when linking (the master is the file that all equal files
590 * will be replaced with).
592 static int file_compare(const struct file
*a
, const struct file
*b
)
595 if (a
->st
.st_dev
== b
->st
.st_dev
&& a
->st
.st_ino
== b
->st
.st_ino
)
598 if (res
== 0 && opts
.maximise
)
599 res
= CMP(a
->st
.st_nlink
, b
->st
.st_nlink
);
600 if (res
== 0 && opts
.minimise
)
601 res
= CMP(b
->st
.st_nlink
, a
->st
.st_nlink
);
603 res
= opts
.keep_oldest
? CMP(b
->st
.st_mtime
, a
->st
.st_mtime
)
604 : CMP(a
->st
.st_mtime
, b
->st
.st_mtime
);
606 res
= CMP(b
->st
.st_ino
, a
->st
.st_ino
);
612 * file_link - Replace b with a link to a
614 * @b: The second file
616 * Link the file, replacing @b with the current one. The file is first
617 * linked to a temporary name, and then renamed to the name of @b, making
618 * the replace atomic (@b will always exist).
620 static int file_link(struct file
*a
, struct file
*b
)
625 assert(a
->links
!= NULL
);
626 assert(b
->links
!= NULL
);
628 ssz
= size_to_human_string(SIZE_SUFFIX_3LETTER
|
630 SIZE_DECIMAL_2DIGITS
, a
->st
.st_size
);
631 jlog(JLOG_INFO
, _("%sLinking %s to %s (-%s)"),
632 opts
.dry_run
? _("[DryRun] ") : "", a
->links
->path
, b
->links
->path
,
638 strlen(b
->links
->path
) + strlen(".hardlink-temporary") + 1;
639 char *new_path
= xmalloc(len
);
641 snprintf(new_path
, len
, "%s.hardlink-temporary",
644 if (link(a
->links
->path
, new_path
) != 0) {
645 warn(_("cannot link %s to %s"), a
->links
->path
,
649 } else if (rename(new_path
, b
->links
->path
) != 0) {
650 warn(_("cannot rename %s to %s"), a
->links
->path
,
652 unlink(new_path
); /* cleanup failed rename */
659 /* Update statistics */
662 /* Increase the link count of this file, and set stat() of other file */
666 if (b
->st
.st_nlink
== 0)
667 stats
.saved
+= a
->st
.st_size
;
669 /* Move the link from file b to a */
671 struct link
*new_link
= b
->links
;
673 b
->links
= b
->links
->next
;
674 new_link
->next
= a
->links
->next
;
675 a
->links
->next
= new_link
;
686 * inserter - Callback function for nftw()
687 * @fpath: The path of the file being visited
688 * @sb: The stat information of the file
689 * @typeflag: The type flag
690 * @ftwbuf: Contains current level of nesting and offset of basename
692 * Called by nftw() for the files. See the manual page for nftw() for
693 * further information.
695 static int inserter(const char *fpath
, const struct stat
*sb
,
696 int typeflag
, struct FTW
*ftwbuf
)
704 if (handle_interrupt())
706 if (typeflag
== FTW_DNR
|| typeflag
== FTW_NS
)
707 warn(_("cannot read %s"), fpath
);
708 if (typeflag
!= FTW_F
|| !S_ISREG(sb
->st_mode
))
711 included
= match_any_regex(opts
.include
, fpath
);
712 excluded
= match_any_regex(opts
.exclude
, fpath
);
714 if ((opts
.exclude
&& excluded
&& !included
) ||
715 (!opts
.exclude
&& opts
.include
&& !included
))
720 if ((uintmax_t) sb
->st_size
< opts
.min_size
) {
722 _("Skipped %s (smaller than configured size)"), fpath
);
726 jlog(JLOG_VERBOSE2
, " %5zu: [%ld/%ld/%zu] %s",
727 stats
.files
, sb
->st_dev
, sb
->st_ino
,
728 (size_t) sb
->st_nlink
, fpath
);
730 if ((opts
.max_size
> 0) && ((uintmax_t) sb
->st_size
> opts
.max_size
)) {
732 _("Skipped %s (greater than configured size)"), fpath
);
736 pathlen
= strlen(fpath
) + 1;
738 fil
= xcalloc(1, sizeof(*fil
));
739 fil
->links
= xcalloc(1, sizeof(struct link
) + pathlen
);
742 fil
->links
->basename
= ftwbuf
->base
;
743 fil
->links
->next
= NULL
;
745 memcpy(fil
->links
->path
, fpath
, pathlen
);
747 node
= tsearch(fil
, &files_by_ino
, compare_nodes_ino
);
753 /* Already known inode, add link to inode information */
754 assert((*node
)->st
.st_dev
== sb
->st_dev
);
755 assert((*node
)->st
.st_ino
== sb
->st_ino
);
757 fil
->links
->next
= (*node
)->links
;
758 (*node
)->links
= fil
->links
;
762 /* New inode, insert into by-size table */
763 node
= tsearch(fil
, &files
, compare_nodes
);
771 if (file_compare(fil
, *node
) >= 0) {
775 for (l
= *node
; l
!= NULL
; l
= l
->next
) {
777 && file_compare(fil
, l
->next
) < 0)
792 warn(_("cannot continue")); /* probably ENOMEM */
796 static inline size_t count_nodes(struct file
*x
)
800 for ( ; x
!= NULL
; x
= x
->next
)
807 * visitor - Callback for twalk()
808 * @nodep: Pointer to a pointer to a #struct file
809 * @which: At which point this visit is (preorder, postorder, endorder)
810 * @depth: The depth of the node in the tree
812 * Visit the nodes in the binary tree. For each node, call hardlinker()
813 * on each #struct file in the linked list of #struct file instances located
816 static void visitor(const void *nodep
, const VISIT which
, const int depth
)
818 struct file
*master
= *(struct file
**)nodep
;
819 struct file
*begin
= master
;
824 if (which
!= leaf
&& which
!= endorder
)
827 for (; master
!= NULL
; master
= master
->next
) {
828 size_t nnodes
, memsiz
;
830 if (handle_interrupt())
832 if (master
->links
== NULL
)
835 /* calculate per file max memory use */
836 nnodes
= count_nodes(master
);
840 /* per-file cache size */
841 memsiz
= opts
.cache_size
/ nnodes
;
842 /* filesiz, readsiz, memsiz */
843 ul_fileeq_set_size(&fileeq
, master
->st
.st_size
, opts
.io_size
, memsiz
);
845 for (other
= master
->next
; other
!= NULL
; other
= other
->next
) {
848 if (handle_interrupt())
851 assert(other
!= other
->next
);
852 assert(other
->st
.st_size
== master
->st
.st_size
);
857 /* check file attributes, etc. */
858 if (!file_may_link_to(master
, other
)) {
860 _("Skipped (attributes mismatch) %s"), other
->links
->path
);
864 /* initialize content comparison */
865 if (!ul_fileeq_data_associated(&master
->data
))
866 ul_fileeq_data_set_file(&master
->data
, master
->links
->path
);
867 if (!ul_fileeq_data_associated(&other
->data
))
868 ul_fileeq_data_set_file(&other
->data
, other
->links
->path
);
871 eq
= ul_fileeq(&fileeq
, &master
->data
, &other
->data
);
873 /* reduce number of open files, keep only master open */
874 ul_fileeq_data_close_file(&other
->data
);
880 _("Skipped (content mismatch) %s"), other
->links
->path
);
885 if (!file_link(master
, other
) && errno
== EMLINK
) {
886 ul_fileeq_data_deinit(&master
->data
);
891 /* don't keep master data in memory */
892 ul_fileeq_data_deinit(&master
->data
);
896 for (other
= begin
; other
!= NULL
; other
= other
->next
) {
897 if (ul_fileeq_data_associated(&other
->data
))
898 ul_fileeq_data_deinit(&other
->data
);
903 * usage - Print the program help and exit
905 static void __attribute__((__noreturn__
)) usage(void)
909 fputs(USAGE_HEADER
, out
);
910 fprintf(out
, _(" %s [options] <directory>|<file> ...\n"),
911 program_invocation_short_name
);
913 fputs(USAGE_SEPARATOR
, out
);
914 fputs(_("Consolidate duplicate files using hardlinks.\n"), out
);
916 fputs(USAGE_OPTIONS
, out
);
917 fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out
);
918 fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out
);
919 fputs(_(" -n, --dry-run don't actually link anything\n"), out
);
920 fputs(_(" -y, --method <name> file content comparison method\n"), out
);
922 fputs(_(" -f, --respect-name filenames have to be identical\n"), out
);
923 fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out
);
924 fputs(_(" -o, --ignore-owner ignore owner changes\n"), out
);
925 fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out
);
927 fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out
);
929 fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n"
930 " lowest hardlink count\n"), out
);
931 fputs(_(" -M, --minimize reverse the meaning of -m\n"), out
);
932 fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n"
933 " (lower precedence than minimize/maximize)\n"), out
);
934 fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out
);
935 fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out
);
936 fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out
);
937 fputs(_(" -S, --maximum-size <size> maximum size for files.\n"), out
);
938 fputs(_(" -b, --io-size <size> I/O buffer size for file reading (speedup, using more RAM)\n"), out
);
939 fputs(_(" -r, --cache-size <size> memory limit for cached file content data\n"), out
);
940 fputs(_(" -c, --content compare only file contents, same as -pot\n"), out
);
942 fputs(USAGE_SEPARATOR
, out
);
943 printf(USAGE_HELP_OPTIONS(28));
944 printf(USAGE_MAN_TAIL("hardlink(1)"));
950 * parse_options - Parse the command line options
951 * @argc: Number of options
952 * @argv: Array of options
954 static int parse_options(int argc
, char *argv
[])
956 static const char optstr
[] = "VhvnfpotXcmMOx:y:i:r:S:s:b:q";
957 static const struct option long_options
[] = {
958 {"version", no_argument
, NULL
, 'V'},
959 {"help", no_argument
, NULL
, 'h'},
960 {"verbose", no_argument
, NULL
, 'v'},
961 {"dry-run", no_argument
, NULL
, 'n'},
962 {"respect-name", no_argument
, NULL
, 'f'},
963 {"ignore-mode", no_argument
, NULL
, 'p'},
964 {"ignore-owner", no_argument
, NULL
, 'o'},
965 {"ignore-time", no_argument
, NULL
, 't'},
966 {"respect-xattrs", no_argument
, NULL
, 'X'},
967 {"maximize", no_argument
, NULL
, 'm'},
968 {"minimize", no_argument
, NULL
, 'M'},
969 {"keep-oldest", no_argument
, NULL
, 'O'},
970 {"exclude", required_argument
, NULL
, 'x'},
971 {"include", required_argument
, NULL
, 'i'},
972 {"method", required_argument
, NULL
, 'y' },
973 {"minimum-size", required_argument
, NULL
, 's'},
974 {"maximum-size", required_argument
, NULL
, 'S'},
975 {"io-size", required_argument
, NULL
, 'b'},
976 {"content", no_argument
, NULL
, 'c'},
977 {"quiet", no_argument
, NULL
, 'q'},
978 {"cache-size", required_argument
, NULL
, 'r'},
981 static const ul_excl_t excl
[] = {
985 int excl_st
[ARRAY_SIZE(excl
)] = UL_EXCL_STATUS_INIT
;
988 while ((c
= getopt_long(argc
, argv
, optstr
, long_options
, NULL
)) != -1) {
990 err_exclusive_options(c
, long_options
, excl
, excl_st
);
994 opts
.respect_mode
= FALSE
;
997 opts
.respect_owner
= FALSE
;
1000 opts
.respect_time
= FALSE
;
1003 opts
.respect_xattrs
= TRUE
;
1006 opts
.maximise
= TRUE
;
1009 opts
.minimise
= TRUE
;
1012 opts
.keep_oldest
= TRUE
;
1015 opts
.respect_name
= TRUE
;
1024 opts
.respect_mode
= FALSE
;
1025 opts
.respect_name
= FALSE
;
1026 opts
.respect_owner
= FALSE
;
1027 opts
.respect_time
= FALSE
;
1028 opts
.respect_xattrs
= FALSE
;
1034 register_regex(&opts
.exclude
, optarg
);
1037 opts
.method
= optarg
;
1040 register_regex(&opts
.include
, optarg
);
1043 opts
.min_size
= strtosize_or_err(optarg
, _("failed to parse minimum size"));
1046 opts
.max_size
= strtosize_or_err(optarg
, _("failed to parse maximum size"));
1049 opts
.cache_size
= strtosize_or_err(optarg
, _("failed to cache size"));
1052 opts
.io_size
= strtosize_or_err(optarg
, _("failed to parse I/O size"));
1057 print_version(EXIT_SUCCESS
);
1059 errtryhelp(EXIT_FAILURE
);}
1066 * to_be_called_atexit - Cleanup handler, also prints statistics.
1068 static void to_be_called_atexit(void)
1075 * sighandler - Signal handler, sets the global last_signal variable
1076 * @i: The signal number
1078 static void sighandler(int i
)
1080 if (last_signal
!= SIGINT
)
1086 int main(int argc
, char *argv
[])
1088 struct sigaction sa
;
1091 sa
.sa_handler
= sighandler
;
1092 sa
.sa_flags
= SA_RESTART
;
1093 sigfillset(&sa
.sa_mask
);
1095 /* If we receive a SIGINT, end the processing */
1096 sigaction(SIGINT
, &sa
, NULL
);
1097 sigaction(SIGUSR1
, &sa
, NULL
);
1099 /* Pretty print numeric output */
1100 setlocale(LC_NUMERIC
, "");
1102 if (atexit(to_be_called_atexit
) != 0)
1103 err(EXIT_FAILURE
, _("cannot register exit handler"));
1105 parse_options(argc
, argv
);
1108 errx(EXIT_FAILURE
, _("no directory or file specified"));
1110 gettime_monotonic(&stats
.start_time
);
1112 rc
= ul_fileeq_init(&fileeq
, opts
.method
);
1113 if (rc
!= 0 && strcmp(opts
.method
, "memcmp") != 0) {
1114 warnx(_("cannot initialize %s method, use 'memcmp' fallback"), opts
.method
);
1115 opts
.method
= "memcmp";
1116 rc
= ul_fileeq_init(&fileeq
, opts
.method
);
1119 err(EXIT_FAILURE
, _("failed to initialize files comparior"));
1121 /* defautl I/O size */
1122 if (!opts
.io_size
) {
1123 if (strcmp(opts
.method
, "memcmp") == 0)
1124 opts
.io_size
= 8*1024;
1126 opts
.io_size
= 1024*1024;
1129 stats
.started
= TRUE
;
1131 jlog(JLOG_VERBOSE2
, _("Scanning [device/inode/links]:"));
1132 for (; optind
< argc
; optind
++) {
1133 if (nftw(argv
[optind
], inserter
, 20, FTW_PHYS
) == -1)
1134 warn(_("cannot process %s"), argv
[optind
]);
1137 twalk(files
, visitor
);
1139 ul_fileeq_deinit(&fileeq
);