]> git.ipfire.org Git - thirdparty/util-linux.git/blame - misc-utils/hardlink.c
test_uuid_parser: fix time_t=long assumptions
[thirdparty/util-linux.git] / misc-utils / hardlink.c
CommitLineData
2180ecc8 1/* hardlink.c - Link multiple identical files together
55c000e1 2 *
2180ecc8 3 * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org>
cd6b8d39 4 * Copyright (C) 2021 Karel Zak <kzak@redhat.com>
55c000e1 5 *
2180ecc8 6 * SPDX-License-Identifier: MIT
0b05aab4 7 *
2180ecc8
KZ
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
55c000e1 14 *
2180ecc8
KZ
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
55c000e1 17 *
2180ecc8
KZ
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
55c000e1 25 */
5c7cac85
KZ
26#define _POSIX_C_SOURCE 200112L /* POSIX functions */
27#define _XOPEN_SOURCE 600 /* nftw() */
28
29#include <sys/types.h> /* stat */
30#include <sys/stat.h> /* stat */
31#include <sys/time.h> /* getrlimit, getrusage */
32#include <sys/resource.h> /* getrlimit, getrusage */
33#include <fcntl.h> /* posix_fadvise */
34#include <ftw.h> /* ftw */
35#include <search.h> /* tsearch() and friends */
cd6b8d39
KZ
36#include <signal.h> /* SIG*, sigaction */
37#include <getopt.h> /* getopt_long() */
5c7cac85 38#include <ctype.h> /* tolower() */
2180ecc8 39
cd6b8d39
KZ
40#include "nls.h"
41#include "c.h"
0361f744 42#include "xalloc.h"
631e6865 43#include "strutils.h"
06d8fe89 44#include "monotonic.h"
4c467ebc 45#include "optutils.h"
2180ecc8 46
43989bad
KZ
47/* Use libpcre2posix if it's available */
48#ifdef HAVE_PCRE2_POSIX
49# include <pcre2posix.h>
50# undef REG_NOSUB
5c7cac85 51# define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
2180ecc8 52#else
5c7cac85 53# include <regex.h> /* regcomp(), regsearch() */
2180ecc8 54#endif
3807e71a 55
40b119b9 56#ifdef HAVE_SYS_XATTR_H
5c7cac85 57# include <sys/xattr.h> /* listxattr, getxattr */
2180ecc8 58#endif
0ec20db8 59
4c467ebc
KZ
60static int quiet; /* don't print anything */
61
2180ecc8
KZ
62/**
63 * struct file - Information about a file
64 * @st: The stat buffer associated with the file
65 * @next: Next file with the same size
66 * @basename: The offset off the basename in the filename
67 * @path: The path of the file
68 *
69 * This contains all information we need about a file.
70 */
71struct file {
5c7cac85
KZ
72 struct stat st;
73 struct file *next;
74 struct link {
75 struct link *next;
76 int basename;
2180ecc8 77#if __STDC_VERSION__ >= 199901L
5c7cac85 78 char path[];
2180ecc8 79#elif __GNUC__
5c7cac85 80 char path[0];
2180ecc8 81#else
5c7cac85 82 char path[1];
2180ecc8 83#endif
5c7cac85 84 } *links;
3807e71a 85};
0ec20db8 86
2180ecc8
KZ
87/**
88 * enum log_level - Logging levels
2180ecc8
KZ
89 * @JLOG_SUMMARY: Default log level
90 * @JLOG_INFO: Verbose logging (verbose == 1)
5034cd9f
KZ
91 * @JLOG_VERBOSE1: Verbosity 2
92 * @JLOG_VERBOSE2: Verbosity 3
2180ecc8
KZ
93 */
94enum log_level {
5c7cac85
KZ
95 JLOG_SUMMARY,
96 JLOG_INFO,
97 JLOG_VERBOSE1,
98 JLOG_VERBOSE2
3807e71a 99};
0ec20db8 100
2180ecc8
KZ
101/**
102 * struct statistic - Statistics about the file
103 * @started: Whether we are post command-line processing
104 * @files: The number of files worked on
105 * @linked: The number of files replaced by a hardlink to a master
106 * @xattr_comparisons: The number of extended attribute comparisons
107 * @comparisons: The number of comparisons
108 * @saved: The (exaggerated) amount of space saved
06d8fe89 109 * @start_time: The time we started at
2180ecc8
KZ
110 */
111static struct statistics {
5c7cac85
KZ
112 int started;
113 size_t files;
114 size_t linked;
115 size_t xattr_comparisons;
116 size_t comparisons;
117 double saved;
118 struct timeval start_time;
2180ecc8 119} stats;
3807e71a 120
2180ecc8
KZ
121/**
122 * struct options - Processed command-line options
123 * @include: A linked list of regular expressions for the --include option
124 * @exclude: A linked list of regular expressions for the --exclude option
125 * @verbosity: The verbosity. Should be one of #enum log_level
126 * @respect_mode: Whether to respect file modes (default = TRUE)
127 * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
128 * @respect_name: Whether to respect file names (default = FALSE)
129 * @respect_time: Whether to respect file modification times (default = TRUE)
130 * @respect_xattrs: Whether to respect extended attributes (default = FALSE)
131 * @maximise: Chose the file with the highest link count as master
132 * @minimise: Chose the file with the lowest link count as master
133 * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
134 * @dry_run: Specifies whether hardlink should not link files (default = FALSE)
135 * @min_size: Minimum size of files to consider. (default = 1 byte)
136 */
137static struct options {
5c7cac85
KZ
138 struct regex_link {
139 regex_t preg;
140 struct regex_link *next;
141 } *include, *exclude;
142
143 signed int verbosity;
144 unsigned int respect_mode:1;
145 unsigned int respect_owner:1;
146 unsigned int respect_name:1;
147 unsigned int respect_time:1;
148 unsigned int respect_xattrs:1;
149 unsigned int maximise:1;
150 unsigned int minimise:1;
151 unsigned int keep_oldest:1;
152 unsigned int dry_run:1;
153 uintmax_t min_size;
9e7235e7 154} opts = {
5c7cac85
KZ
155 /* default setting */
156 .respect_mode = TRUE,
157 .respect_owner = TRUE,
158 .respect_time = TRUE,
159 .respect_xattrs = FALSE,
160 .keep_oldest = FALSE,
161 .min_size = 1
9e7235e7 162};
2180ecc8
KZ
163
164/*
165 * files
166 *
167 * A binary tree of files, managed using tsearch(). To see which nodes
168 * are considered equal, see compare_nodes()
169 */
170static void *files;
171static void *files_by_ino;
172
173/*
174 * last_signal
175 *
176 * The last signal we received. We store the signal here in order to be able
177 * to break out of loops gracefully and to return from our nftw() handler.
178 */
179static int last_signal;
0ec20db8 180
2180ecc8
KZ
181/**
182 * jlog - Logging for hardlink
183 * @level: The log level
184 * @format: A format string for printf()
185 */
5c7cac85 186__attribute__((format(printf, 2, 3)))
2180ecc8 187static void jlog(enum log_level level, const char *format, ...)
0ec20db8 188{
5c7cac85 189 va_list args;
2180ecc8 190
5c7cac85
KZ
191 if (quiet || level > (unsigned int)opts.verbosity)
192 return;
423e80c1 193
5c7cac85
KZ
194 va_start(args, format);
195 vfprintf(stdout, format, args);
196 va_end(args);
197 fputc('\n', stdout);
0ec20db8
DJ
198}
199
2180ecc8
KZ
200/**
201 * CMP - Compare two numerical values, return 1, 0, or -1
202 * @a: First value
203 * @b: Second value
204 *
205 * Used to compare two integers of any size while avoiding overflow.
206 */
207#define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
208
2180ecc8
KZ
209/**
210 * regexec_any - Match against multiple regular expressions
211 * @pregs: A linked list of regular expressions
212 * @what: The string to match against
213 *
214 * Checks whether any of the regular expressions in the list matches the
215 * string.
216 */
cd6b8d39 217static int regexec_any(struct regex_link *pregs, const char *what)
0ec20db8 218{
5c7cac85
KZ
219 for (; pregs != NULL; pregs = pregs->next) {
220 if (regexec(&pregs->preg, what, 0, NULL, 0) == 0)
221 return TRUE;
222 }
223 return FALSE;
0ec20db8
DJ
224}
225
2180ecc8
KZ
226/**
227 * compare_nodes - Node comparison function
228 * @_a: The first node (a #struct file)
229 * @_b: The second node (a #struct file)
230 *
231 * Compare the two nodes for the binary tree.
232 */
233static int compare_nodes(const void *_a, const void *_b)
94b040b0 234{
5c7cac85
KZ
235 const struct file *a = _a;
236 const struct file *b = _b;
237 int diff = 0;
bd7722af 238
5c7cac85
KZ
239 if (diff == 0)
240 diff = CMP(a->st.st_dev, b->st.st_dev);
241 if (diff == 0)
242 diff = CMP(a->st.st_size, b->st.st_size);
2180ecc8 243
5c7cac85 244 return diff;
94b040b0
JN
245}
246
2180ecc8
KZ
247/**
248 * compare_nodes_ino - Node comparison function
249 * @_a: The first node (a #struct file)
250 * @_b: The second node (a #struct file)
251 *
252 * Compare the two nodes for the binary tree.
253 */
254static int compare_nodes_ino(const void *_a, const void *_b)
94b040b0 255{
5c7cac85
KZ
256 const struct file *a = _a;
257 const struct file *b = _b;
258 int diff = 0;
259
260 if (diff == 0)
261 diff = CMP(a->st.st_dev, b->st.st_dev);
262 if (diff == 0)
263 diff = CMP(a->st.st_ino, b->st.st_ino);
264
265 /* If opts.respect_name is used, we will restrict a struct file to
266 * contain only links with the same basename to keep the rest simple.
267 */
268 if (diff == 0 && opts.respect_name)
269 diff = strcmp(a->links->path + a->links->basename,
270 b->links->path + b->links->basename);
271
272 return diff;
94b040b0
JN
273}
274
2180ecc8
KZ
275/**
276 * print_stats - Print statistics to stdout
277 */
278static void print_stats(void)
94b040b0 279{
5c7cac85
KZ
280 struct timeval end = { 0, 0 }, delta = { 0, 0 };
281 char *ssz;
06d8fe89 282
5c7cac85
KZ
283 gettime_monotonic(&end);
284 timersub(&end, &stats.start_time, &delta);
285
286 jlog(JLOG_SUMMARY, "%-15s %s", _("Mode:"),
287 opts.dry_run ? _("dry-run") : _("real"));
288 jlog(JLOG_SUMMARY, "%-15s %zu", _("Files:"), stats.files);
289 jlog(JLOG_SUMMARY, _("%-15s %zu files"), _("Linked:"), stats.linked);
06d8fe89 290
40b119b9 291#ifdef HAVE_SYS_XATTR_H
5c7cac85
KZ
292 jlog(JLOG_SUMMARY, _("%-15s %zu xattrs"), _("Compared:"),
293 stats.xattr_comparisons);
2180ecc8 294#endif
5c7cac85
KZ
295 jlog(JLOG_SUMMARY, _("%-15s %zu files"), _("Compared:"),
296 stats.comparisons);
423e80c1 297
5c7cac85
KZ
298 ssz = size_to_human_string(SIZE_SUFFIX_3LETTER |
299 SIZE_SUFFIX_SPACE |
300 SIZE_DECIMAL_2DIGITS, stats.saved);
423e80c1 301
5c7cac85
KZ
302 jlog(JLOG_SUMMARY, "%-15s %s", _("Saved:"), ssz);
303 free(ssz);
423e80c1 304
5c7cac85
KZ
305 jlog(JLOG_SUMMARY, _("%-15s %ld.%06ld seconds"), _("Duration:"),
306 (long)delta.tv_sec, (long)delta.tv_usec);
94b040b0 307}
55c000e1 308
2180ecc8
KZ
309/**
310 * handle_interrupt - Handle a signal
311 *
312 * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals.
313 */
cd6b8d39 314static int handle_interrupt(void)
0ec20db8 315{
5c7cac85
KZ
316 switch (last_signal) {
317 case SIGINT:
318 case SIGTERM:
319 return TRUE;
320 case SIGUSR1:
321 print_stats();
322 putchar('\n');
323 break;
324 }
325
326 last_signal = 0;
327 return FALSE;
0ec20db8
DJ
328}
329
40b119b9 330#ifdef HAVE_SYS_XATTR_H
2180ecc8 331
2180ecc8
KZ
332/**
333 * llistxattr_or_die - Wrapper for llistxattr()
334 *
335 * This does the same thing as llistxattr() except that it aborts if any error
336 * other than "not supported" is detected.
337 */
338static ssize_t llistxattr_or_die(const char *path, char *list, size_t size)
339{
5c7cac85 340 ssize_t len = llistxattr(path, list, size);
2180ecc8 341
5c7cac85
KZ
342 if (len < 0 && errno != ENOTSUP)
343 err(EXIT_FAILURE, _("cannot get xattr names for %s"), path);
c12b6394 344
5c7cac85 345 return len;
2180ecc8
KZ
346}
347
348/**
349 * lgetxattr_or_die - Wrapper for lgetxattr()
350 *
351 * This does the same thing as lgetxattr() except that it aborts upon error.
352 */
5c7cac85
KZ
353static ssize_t lgetxattr_or_die(const char *path,
354 const char *name, void *value, size_t size)
2180ecc8 355{
5c7cac85 356 ssize_t len = lgetxattr(path, name, value, size);
2180ecc8 357
5c7cac85
KZ
358 if (len < 0)
359 err(EXIT_FAILURE, _("cannot get xattr value of %s for %s"),
360 name, path);
c12b6394 361
5c7cac85 362 return len;
2180ecc8
KZ
363}
364
365/**
366 * get_xattr_name_count - Count the number of xattr names
367 * @names: a non-empty table of concatenated, null-terminated xattr names
368 * @len: the total length of the table
369 *
370 * @Returns the number of xattr names
371 */
372static int get_xattr_name_count(const char *const names, ssize_t len)
373{
5c7cac85
KZ
374 int count = 0;
375 const char *name;
2180ecc8 376
5c7cac85
KZ
377 for (name = names; name < (names + len); name += strlen(name) + 1)
378 count++;
2180ecc8 379
5c7cac85 380 return count;
2180ecc8
KZ
381}
382
383/**
384 * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
385 * the names they point to.
386 */
387static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2)
388{
5c7cac85 389 return strcmp(*(char *const *)ptr1, *(char *const *)ptr2);
2180ecc8
KZ
390}
391
392/**
393 * get_sorted_xattr_name_table - Create a sorted table of xattr names.
394 * @names - table of concatenated, null-terminated xattr names
395 * @n - the number of names
396 *
397 * @Returns allocated table of pointers to the names, sorted alphabetically
398 */
399static const char **get_sorted_xattr_name_table(const char *names, int n)
400{
5c7cac85
KZ
401 const char **table = xmalloc(n * sizeof(char *));
402 int i;
2180ecc8 403
5c7cac85
KZ
404 for (i = 0; i < n; i++) {
405 table[i] = names;
406 names += strlen(names) + 1;
407 }
2180ecc8 408
5c7cac85 409 qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
2180ecc8 410
5c7cac85 411 return table;
2180ecc8
KZ
412}
413
414/**
415 * file_xattrs_equal - Compare the extended attributes of two files
416 * @a: The first file
417 * @b: The second file
418 *
419 * @Returns: %TRUE if and only if extended attributes are equal
420 */
cd6b8d39 421static int file_xattrs_equal(const struct file *a, const struct file *b)
2180ecc8 422{
5c7cac85
KZ
423 ssize_t len_a;
424 ssize_t len_b;
425 char *names_a = NULL;
426 char *names_b = NULL;
427 int n_a;
428 int n_b;
429 const char **name_ptrs_a = NULL;
430 const char **name_ptrs_b = NULL;
431 void *value_a = NULL;
432 void *value_b = NULL;
433 int ret = FALSE;
434 int i;
2180ecc8 435
5c7cac85
KZ
436 assert(a->links != NULL);
437 assert(b->links != NULL);
2180ecc8 438
5c7cac85
KZ
439 jlog(JLOG_VERBOSE1, _("Comparing xattrs of %s to %s"), a->links->path,
440 b->links->path);
2180ecc8 441
5c7cac85 442 stats.xattr_comparisons++;
2180ecc8 443
5c7cac85
KZ
444 len_a = llistxattr_or_die(a->links->path, NULL, 0);
445 len_b = llistxattr_or_die(b->links->path, NULL, 0);
2180ecc8 446
5c7cac85
KZ
447 if (len_a <= 0 && len_b <= 0)
448 return TRUE; // xattrs not supported or neither file has any
2180ecc8 449
5c7cac85
KZ
450 if (len_a != len_b)
451 return FALSE; // total lengths of xattr names differ
2180ecc8 452
5c7cac85
KZ
453 names_a = xmalloc(len_a);
454 names_b = xmalloc(len_b);
2180ecc8 455
5c7cac85
KZ
456 len_a = llistxattr_or_die(a->links->path, names_a, len_a);
457 len_b = llistxattr_or_die(b->links->path, names_b, len_b);
458 assert((len_a > 0) && (len_a == len_b));
2180ecc8 459
5c7cac85
KZ
460 n_a = get_xattr_name_count(names_a, len_a);
461 n_b = get_xattr_name_count(names_b, len_b);
2180ecc8 462
5c7cac85
KZ
463 if (n_a != n_b)
464 goto exit; // numbers of xattrs differ
2180ecc8 465
5c7cac85
KZ
466 name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
467 name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
2180ecc8 468
5c7cac85 469 // We now have two sorted tables of xattr names.
2180ecc8 470
5c7cac85
KZ
471 for (i = 0; i < n_a; i++) {
472 if (handle_interrupt())
473 goto exit; // user wants to quit
2180ecc8 474
5c7cac85
KZ
475 if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
476 goto exit; // names at same slot differ
2180ecc8 477
5c7cac85
KZ
478 len_a =
479 lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
480 len_b =
481 lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
2180ecc8 482
5c7cac85
KZ
483 if (len_a != len_b)
484 goto exit; // xattrs with same name, different value lengths
2180ecc8 485
5c7cac85
KZ
486 value_a = xmalloc(len_a);
487 value_b = xmalloc(len_b);
2180ecc8 488
5c7cac85
KZ
489 len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
490 value_a, len_a);
491 len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
492 value_b, len_b);
493 assert((len_a >= 0) && (len_a == len_b));
2180ecc8 494
5c7cac85
KZ
495 if (memcmp(value_a, value_b, len_a) != 0)
496 goto exit; // xattrs with same name, different values
2180ecc8 497
5c7cac85
KZ
498 free(value_a);
499 free(value_b);
500 value_a = NULL;
501 value_b = NULL;
502 }
2180ecc8 503
5c7cac85 504 ret = TRUE;
2180ecc8 505
5c7cac85
KZ
506 exit:
507 free(names_a);
508 free(names_b);
509 free(name_ptrs_a);
510 free(name_ptrs_b);
511 free(value_a);
512 free(value_b);
513 return ret;
2180ecc8 514}
5c7cac85 515#else /* !HAVE_SYS_XATTR_H */
cd6b8d39 516static int file_xattrs_equal(const struct file *a, const struct file *b)
2180ecc8 517{
5c7cac85 518 return TRUE;
2180ecc8 519}
5c7cac85 520#endif /* HAVE_SYS_XATTR_H */
2180ecc8
KZ
521
522/**
523 * file_contents_equal - Compare contents of two files for equality
524 * @a: The first file
525 * @b: The second file
526 *
527 * Compare the contents of the files for equality
528 */
cd6b8d39 529static int file_contents_equal(const struct file *a, const struct file *b)
2180ecc8 530{
5c7cac85
KZ
531 FILE *fa = NULL;
532 FILE *fb = NULL;
533 char buf_a[8192];
534 char buf_b[8192];
535 int cmp = 0; /* zero => equal */
536 off_t off = 0; /* current offset */
2180ecc8 537
5c7cac85
KZ
538 assert(a->links != NULL);
539 assert(b->links != NULL);
2180ecc8 540
5c7cac85
KZ
541 jlog(JLOG_VERBOSE1, _("Comparing %s to %s"), a->links->path,
542 b->links->path);
2180ecc8 543
5c7cac85 544 stats.comparisons++;
2180ecc8 545
5c7cac85
KZ
546 if ((fa = fopen(a->links->path, "rb")) == NULL)
547 goto err;
548 if ((fb = fopen(b->links->path, "rb")) == NULL)
549 goto err;
2180ecc8 550
cd6b8d39 551#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
5c7cac85
KZ
552 posix_fadvise(fileno(fa), 0, 0, POSIX_FADV_SEQUENTIAL);
553 posix_fadvise(fileno(fb), 0, 0, POSIX_FADV_SEQUENTIAL);
cd6b8d39 554#endif
2180ecc8 555
5c7cac85
KZ
556 while (!handle_interrupt() && cmp == 0) {
557 size_t ca;
558 size_t cb;
559
560 ca = fread(buf_a, 1, sizeof(buf_a), fa);
561 if (ca < sizeof(buf_a) && ferror(fa))
562 goto err;
563
564 cb = fread(buf_b, 1, sizeof(buf_b), fb);
565 if (cb < sizeof(buf_b) && ferror(fb))
566 goto err;
567
568 off += ca;
569
570 if ((ca != cb || ca == 0)) {
571 cmp = CMP(ca, cb);
572 break;
573 }
574 cmp = memcmp(buf_a, buf_b, ca);
575 }
576 out:
577 if (fa != NULL)
578 fclose(fa);
579 if (fb != NULL)
580 fclose(fb);
581 return !handle_interrupt() && cmp == 0;
582 err:
583 if (fa == NULL || fb == NULL)
584 warn(_("cannot open %s"), fa ? b->links->path : a->links->path);
585 else
586 warn(_("cannot read %s"),
587 ferror(fa) ? a->links->path : b->links->path);
588 cmp = 1;
589 goto out;
2180ecc8
KZ
590}
591
592/**
593 * file_may_link_to - Check whether a file may replace another one
594 * @a: The first file
595 * @b: The second file
596 *
597 * Check whether the two fies are considered equal and can be linked
598 * together. If the two files are identical, the result will be FALSE,
599 * as replacing a link with an identical one is stupid.
600 */
cd6b8d39 601static int file_may_link_to(const struct file *a, const struct file *b)
2180ecc8 602{
5c7cac85
KZ
603 return (a->st.st_size != 0 &&
604 a->st.st_size == b->st.st_size &&
605 a->links != NULL && b->links != NULL &&
606 a->st.st_dev == b->st.st_dev &&
607 a->st.st_ino != b->st.st_ino &&
608 (!opts.respect_mode || a->st.st_mode == b->st.st_mode) &&
609 (!opts.respect_owner || a->st.st_uid == b->st.st_uid) &&
610 (!opts.respect_owner || a->st.st_gid == b->st.st_gid) &&
611 (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
612 (!opts.respect_name
613 || strcmp(a->links->path + a->links->basename,
614 b->links->path + b->links->basename) == 0) &&
615 (!opts.respect_xattrs || file_xattrs_equal(a, b)) &&
616 file_contents_equal(a, b));
2180ecc8
KZ
617}
618
619/**
620 * file_compare - Compare two files to decide which should be master
621 * @a: The first file
622 * @b: The second file
623 *
624 * Check which of the files should be considered greater and thus serve
625 * as the master when linking (the master is the file that all equal files
626 * will be replaced with).
627 */
628static int file_compare(const struct file *a, const struct file *b)
629{
5c7cac85
KZ
630 int res = 0;
631 if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino)
632 return 0;
633
634 if (res == 0 && opts.maximise)
635 res = CMP(a->st.st_nlink, b->st.st_nlink);
636 if (res == 0 && opts.minimise)
637 res = CMP(b->st.st_nlink, a->st.st_nlink);
638 if (res == 0)
639 res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime)
640 : CMP(a->st.st_mtime, b->st.st_mtime);
641 if (res == 0)
642 res = CMP(b->st.st_ino, a->st.st_ino);
643
644 return res;
2180ecc8
KZ
645}
646
647/**
648 * file_link - Replace b with a link to a
649 * @a: The first file
650 * @b: The second file
651 *
652 * Link the file, replacing @b with the current one. The file is first
653 * linked to a temporary name, and then renamed to the name of @b, making
654 * the replace atomic (@b will always exist).
655 */
cd6b8d39 656static int file_link(struct file *a, struct file *b)
2180ecc8 657{
5c7cac85
KZ
658 char *ssz;
659
660 file_link:
661 assert(a->links != NULL);
662 assert(b->links != NULL);
663
664 ssz = size_to_human_string(SIZE_SUFFIX_3LETTER |
665 SIZE_SUFFIX_SPACE |
666 SIZE_DECIMAL_2DIGITS, a->st.st_size);
667 jlog(JLOG_INFO, _("%sLinking %s to %s (-%s)"),
668 opts.dry_run ? _("[DryRun] ") : "", a->links->path, b->links->path,
669 ssz);
670 free(ssz);
671
672 if (!opts.dry_run) {
673 size_t len =
674 strlen(b->links->path) + strlen(".hardlink-temporary") + 1;
675 char *new_path = xmalloc(len);
676
677 snprintf(new_path, len, "%s.hardlink-temporary",
678 b->links->path);
679
680 if (link(a->links->path, new_path) != 0) {
681 warn(_("cannot link %s to %s"), a->links->path,
682 new_path);
683 free(new_path);
684 return FALSE;
685 } else if (rename(new_path, b->links->path) != 0) {
686 warn(_("cannot rename %s to %s"), a->links->path,
687 new_path);
688 unlink(new_path); /* cleanup failed rename */
689 free(new_path);
690 return FALSE;
691 }
692 free(new_path);
693 }
694
695 /* Update statistics */
696 stats.linked++;
697
698 /* Increase the link count of this file, and set stat() of other file */
699 a->st.st_nlink++;
700 b->st.st_nlink--;
701
702 if (b->st.st_nlink == 0)
703 stats.saved += a->st.st_size;
704
705 /* Move the link from file b to a */
706 {
707 struct link *new_link = b->links;
708
709 b->links = b->links->next;
710 new_link->next = a->links->next;
711 a->links->next = new_link;
712 }
713
714 /* Do it again */
715 if (b->links)
716 goto file_link;
717
718 return TRUE;
2180ecc8
KZ
719}
720
721/**
722 * inserter - Callback function for nftw()
723 * @fpath: The path of the file being visited
724 * @sb: The stat information of the file
725 * @typeflag: The type flag
726 * @ftwbuf: Contains current level of nesting and offset of basename
727 *
728 * Called by nftw() for the files. See the manual page for nftw() for
729 * further information.
730 */
5c7cac85
KZ
731static int inserter(const char *fpath, const struct stat *sb,
732 int typeflag, struct FTW *ftwbuf)
2180ecc8 733{
5c7cac85
KZ
734 struct file *fil;
735 struct file **node;
736 size_t pathlen;
737 int included;
738 int excluded;
2180ecc8 739
5c7cac85
KZ
740 if (handle_interrupt())
741 return 1;
742 if (typeflag == FTW_DNR || typeflag == FTW_NS)
743 warn(_("cannot read %s"), fpath);
744 if (typeflag != FTW_F || !S_ISREG(sb->st_mode))
745 return 0;
2180ecc8 746
5c7cac85
KZ
747 included = regexec_any(opts.include, fpath);
748 excluded = regexec_any(opts.exclude, fpath);
2180ecc8 749
5c7cac85
KZ
750 if ((opts.exclude && excluded && !included) ||
751 (!opts.exclude && opts.include && !included))
752 return 0;
2180ecc8 753
5c7cac85 754 stats.files++;
2180ecc8 755
5c7cac85
KZ
756 if ((uintmax_t) sb->st_size < opts.min_size) {
757 jlog(JLOG_VERBOSE1,
758 _("Skipped %s (smaller than configured size)"), fpath);
759 return 0;
760 }
2180ecc8 761
5c7cac85 762 jlog(JLOG_VERBOSE2, _("Visiting %s (file %zu)"), fpath, stats.files);
2180ecc8 763
5c7cac85 764 pathlen = strlen(fpath) + 1;
2180ecc8 765
5c7cac85
KZ
766 fil = xcalloc(1, sizeof(*fil));
767 fil->links = xcalloc(1, sizeof(struct link) + pathlen);
2180ecc8 768
5c7cac85
KZ
769 fil->st = *sb;
770 fil->links->basename = ftwbuf->base;
771 fil->links->next = NULL;
2180ecc8 772
5c7cac85 773 memcpy(fil->links->path, fpath, pathlen);
2180ecc8 774
5c7cac85 775 node = tsearch(fil, &files_by_ino, compare_nodes_ino);
2180ecc8 776
5c7cac85
KZ
777 if (node == NULL)
778 goto fail;
2180ecc8 779
5c7cac85
KZ
780 if (*node != fil) {
781 /* Already known inode, add link to inode information */
782 assert((*node)->st.st_dev == sb->st_dev);
783 assert((*node)->st.st_ino == sb->st_ino);
2180ecc8 784
5c7cac85
KZ
785 fil->links->next = (*node)->links;
786 (*node)->links = fil->links;
2180ecc8 787
5c7cac85
KZ
788 free(fil);
789 } else {
790 /* New inode, insert into by-size table */
791 node = tsearch(fil, &files, compare_nodes);
2180ecc8 792
5c7cac85
KZ
793 if (node == NULL)
794 goto fail;
2180ecc8 795
5c7cac85
KZ
796 if (*node != fil) {
797 struct file *l;
2180ecc8 798
5c7cac85
KZ
799 if (file_compare(fil, *node) >= 0) {
800 fil->next = *node;
801 *node = fil;
802 } else {
803 for (l = *node; l != NULL; l = l->next) {
804 if (l->next != NULL
805 && file_compare(fil, l->next) < 0)
806 continue;
2180ecc8 807
5c7cac85
KZ
808 fil->next = l->next;
809 l->next = fil;
2180ecc8 810
5c7cac85
KZ
811 break;
812 }
813 }
814 }
815 }
2180ecc8 816
5c7cac85 817 return 0;
3c52b1c8 818
5c7cac85
KZ
819 fail:
820 warn(_("cannot continue")); /* probably ENOMEM */
821 return 0;
2180ecc8
KZ
822}
823
824/**
825 * visitor - Callback for twalk()
826 * @nodep: Pointer to a pointer to a #struct file
827 * @which: At which point this visit is (preorder, postorder, endorder)
828 * @depth: The depth of the node in the tree
829 *
830 * Visit the nodes in the binary tree. For each node, call hardlinker()
831 * on each #struct file in the linked list of #struct file instances located
832 * at that node.
833 */
834static void visitor(const void *nodep, const VISIT which, const int depth)
835{
5c7cac85
KZ
836 struct file *master = *(struct file **)nodep;
837 struct file *other;
2180ecc8 838
5c7cac85 839 (void)depth;
2180ecc8 840
5c7cac85
KZ
841 if (which != leaf && which != endorder)
842 return;
2180ecc8 843
5c7cac85
KZ
844 for (; master != NULL; master = master->next) {
845 if (handle_interrupt())
846 exit(EXIT_FAILURE);
847 if (master->links == NULL)
848 continue;
2180ecc8 849
5c7cac85
KZ
850 for (other = master->next; other != NULL; other = other->next) {
851 if (handle_interrupt())
852 exit(EXIT_FAILURE);
2180ecc8 853
5c7cac85
KZ
854 assert(other != other->next);
855 assert(other->st.st_size == master->st.st_size);
2180ecc8 856
5c7cac85
KZ
857 if (other->links == NULL
858 || !file_may_link_to(master, other))
859 continue;
2180ecc8 860
5c7cac85
KZ
861 if (!file_link(master, other) && errno == EMLINK)
862 master = other;
863 }
864 }
2180ecc8
KZ
865}
866
867/**
d2c3c5a6 868 * usage - Print the program help and exit
2180ecc8 869 */
d2c3c5a6 870static void __attribute__((__noreturn__)) usage(void)
2180ecc8 871{
5c7cac85
KZ
872 FILE *out = stdout;
873
874 fputs(USAGE_HEADER, out);
875 fprintf(out, _(" %s [options] <directory>|<file> ...\n"),
876 program_invocation_short_name);
877
878 fputs(USAGE_SEPARATOR, out);
879 fputs(_("Consolidate duplicate files using hardlinks.\n"), out);
880
881 fputs(USAGE_OPTIONS, out);
882 fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out);
883 fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out);
884 fputs(_(" -n, --dry-run don't actually link anything\n"), out);
885 fputs(_(" -f, --respect-name filenames have to be identical\n"), out);
886 fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out);
887 fputs(_(" -o, --ignore-owner ignore owner changes\n"), out);
888 fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out);
40b119b9 889#ifdef HAVE_SYS_XATTR_H
5c7cac85 890 fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out);
04ae85a7 891#endif
5c7cac85
KZ
892 fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n"
893 " lowest hardlink count\n"), out);
894 fputs(_(" -M, --minimize reverse the meaning of -m\n"), out);
895 fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n"
896 " (lower precedence than minimize/maximize)\n"), out);
897 fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out);
898 fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out);
899 fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out);
900 fputs(_(" -c, --content compare only file contents, same as -pot\n"), out);
901
902 fputs(USAGE_SEPARATOR, out);
903 printf(USAGE_HELP_OPTIONS(28));
904 printf(USAGE_MAN_TAIL("hardlink(1)"));
905
906 exit(EXIT_SUCCESS);
2180ecc8
KZ
907}
908
909/**
910 * register_regex - Compile and insert a regular expression into list
911 * @pregs: Pointer to a linked list of regular expressions
912 * @regex: String containing the regular expression to be compiled
913 */
ec194114 914static void register_regex(struct regex_link **pregs, const char *regex)
2180ecc8 915{
5c7cac85
KZ
916 struct regex_link *link;
917 int err;
2180ecc8 918
5c7cac85 919 link = xmalloc(sizeof(*link));
2180ecc8 920
5c7cac85
KZ
921 if ((err = regcomp(&link->preg, regex, REG_NOSUB | REG_EXTENDED)) != 0) {
922 size_t size = regerror(err, &link->preg, NULL, 0);
923 char *buf = xmalloc(size + 1);
2180ecc8 924
5c7cac85 925 regerror(err, &link->preg, buf, size);
2180ecc8 926
5c7cac85
KZ
927 errx(EXIT_FAILURE, _("could not compile regular expression %s: %s"),
928 regex, buf);
929 }
930 link->next = *pregs; *pregs = link;
2180ecc8
KZ
931}
932
933/**
934 * parse_options - Parse the command line options
935 * @argc: Number of options
936 * @argv: Array of options
937 */
938static int parse_options(int argc, char *argv[])
939{
5c7cac85
KZ
940 static const char optstr[] = "VhvnfpotXcmMOx:i:s:q";
941 static const struct option long_options[] = {
942 {"version", no_argument, NULL, 'V'},
943 {"help", no_argument, NULL, 'h'},
944 {"verbose", no_argument, NULL, 'v'},
945 {"dry-run", no_argument, NULL, 'n'},
946 {"respect-name", no_argument, NULL, 'f'},
947 {"ignore-mode", no_argument, NULL, 'p'},
948 {"ignore-owner", no_argument, NULL, 'o'},
949 {"ignore-time", no_argument, NULL, 't'},
950 {"respect-xattrs", no_argument, NULL, 'X'},
951 {"maximize", no_argument, NULL, 'm'},
952 {"minimize", no_argument, NULL, 'M'},
953 {"keep-oldest", no_argument, NULL, 'O'},
954 {"exclude", required_argument, NULL, 'x'},
955 {"include", required_argument, NULL, 'i'},
956 {"minimum-size", required_argument, NULL, 's'},
957 {"content", no_argument, NULL, 'c'},
958 {"quiet", no_argument, NULL, 'q'},
959 {NULL, 0, NULL, 0}
960 };
961 static const ul_excl_t excl[] = {
962 {'q', 'v'},
963 {0}
964 };
965 int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
966 int c;
967
968 while ((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
969
970 err_exclusive_options(c, long_options, excl, excl_st);
971
972 switch (c) {
973 case 'p':
974 opts.respect_mode = FALSE;
975 break;
976 case 'o':
977 opts.respect_owner = FALSE;
978 break;
979 case 't':
980 opts.respect_time = FALSE;
981 break;
982 case 'X':
983 opts.respect_xattrs = TRUE;
984 break;
985 case 'm':
986 opts.maximise = TRUE;
987 break;
988 case 'M':
989 opts.minimise = TRUE;
990 break;
991 case 'O':
992 opts.keep_oldest = TRUE;
993 break;
994 case 'f':
995 opts.respect_name = TRUE;
996 break;
997 case 'v':
998 opts.verbosity++;
999 break;
1000 case 'q':
1001 quiet = TRUE;
1002 break;
1003 case 'c':
1004 opts.respect_mode = FALSE;
1005 opts.respect_name = FALSE;
1006 opts.respect_owner = FALSE;
1007 opts.respect_time = FALSE;
1008 opts.respect_xattrs = FALSE;
1009 break;
1010 case 'n':
1011 opts.dry_run = 1;
1012 break;
1013 case 'x':
1014 register_regex(&opts.exclude, optarg);
1015 break;
1016 case 'i':
1017 register_regex(&opts.include, optarg);
1018 break;
1019 case 's':
1020 opts.min_size = strtosize_or_err(optarg, _("failed to parse size"));
1021 break;
1022 case 'h':
1023 usage();
1024 case 'V':
1025 print_version(EXIT_SUCCESS);
1026 default:
1027 errtryhelp(EXIT_FAILURE);}
1028 }
1029
1030 return 0;
2180ecc8
KZ
1031}
1032
1033/**
5c7cac85
KZ
1034* to_be_called_atexit - Cleanup handler, also prints statistics.
1035*/
2180ecc8
KZ
1036static void to_be_called_atexit(void)
1037{
5c7cac85
KZ
1038 if (stats.started)
1039 print_stats();
2180ecc8
KZ
1040}
1041
1042/**
5c7cac85
KZ
1043* sighandler - Signal handler, sets the global last_signal variable
1044* @i: The signal number
1045*/
2180ecc8
KZ
1046static void sighandler(int i)
1047{
5c7cac85
KZ
1048 if (last_signal != SIGINT)
1049 last_signal = i;
1050 if (i == SIGINT)
1051 putchar('\n');
2180ecc8
KZ
1052}
1053
1054int main(int argc, char *argv[])
1055{
5c7cac85 1056 struct sigaction sa;
2180ecc8 1057
5c7cac85
KZ
1058 sa.sa_handler = sighandler;
1059 sa.sa_flags = SA_RESTART;
1060 sigfillset(&sa.sa_mask);
2180ecc8 1061
5c7cac85
KZ
1062 /* If we receive a SIGINT, end the processing */
1063 sigaction(SIGINT, &sa, NULL);
1064 sigaction(SIGUSR1, &sa, NULL);
2180ecc8 1065
5c7cac85
KZ
1066 /* Pretty print numeric output */
1067 setlocale(LC_NUMERIC, "");
2180ecc8 1068
5c7cac85
KZ
1069 if (atexit(to_be_called_atexit) != 0)
1070 err(EXIT_FAILURE, _("cannot register exit handler"));
2180ecc8 1071
5c7cac85 1072 parse_options(argc, argv);
2180ecc8 1073
5c7cac85 1074 if (optind == argc)
01480c61 1075 errx(EXIT_FAILURE, _("no directory or file specified"));
2180ecc8 1076
5c7cac85
KZ
1077 gettime_monotonic(&stats.start_time);
1078 stats.started = TRUE;
2180ecc8 1079
5c7cac85
KZ
1080 for (; optind < argc; optind++) {
1081 if (nftw(argv[optind], inserter, 20, FTW_PHYS) == -1)
1082 warn(_("cannot process %s"), argv[optind]);
1083 }
2180ecc8 1084
5c7cac85
KZ
1085 twalk(files, visitor);
1086 return 0;
0ec20db8 1087}