]>
Commit | Line | Data |
---|---|---|
2180ecc8 | 1 | /* hardlink.c - Link multiple identical files together |
55c000e1 | 2 | * |
2180ecc8 | 3 | * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org> |
cd6b8d39 | 4 | * Copyright (C) 2021 Karel Zak <kzak@redhat.com> |
55c000e1 | 5 | * |
2180ecc8 | 6 | * SPDX-License-Identifier: MIT |
0b05aab4 | 7 | * |
2180ecc8 KZ |
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | * of this software and associated documentation files (the "Software"), to deal | |
10 | * in the Software without restriction, including without limitation the rights | |
11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
12 | * copies of the Software, and to permit persons to whom the Software is | |
13 | * furnished to do so, subject to the following conditions: | |
55c000e1 | 14 | * |
2180ecc8 KZ |
15 | * The above copyright notice and this permission notice shall be included in |
16 | * all copies or substantial portions of the Software. | |
55c000e1 | 17 | * |
2180ecc8 KZ |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 | * THE SOFTWARE. | |
55c000e1 | 25 | */ |
5c7cac85 KZ |
26 | #define _POSIX_C_SOURCE 200112L /* POSIX functions */ |
27 | #define _XOPEN_SOURCE 600 /* nftw() */ | |
28 | ||
29 | #include <sys/types.h> /* stat */ | |
30 | #include <sys/stat.h> /* stat */ | |
31 | #include <sys/time.h> /* getrlimit, getrusage */ | |
32 | #include <sys/resource.h> /* getrlimit, getrusage */ | |
33 | #include <fcntl.h> /* posix_fadvise */ | |
34 | #include <ftw.h> /* ftw */ | |
35 | #include <search.h> /* tsearch() and friends */ | |
cd6b8d39 KZ |
36 | #include <signal.h> /* SIG*, sigaction */ |
37 | #include <getopt.h> /* getopt_long() */ | |
5c7cac85 | 38 | #include <ctype.h> /* tolower() */ |
66a38e97 KZ |
39 | #include <sys/ioctl.h> |
40 | ||
41 | #if defined(HAVE_LINUX_FIEMAP_H) | |
42 | # include <linux/fs.h> | |
43 | # include <linux/fiemap.h> | |
44 | # ifdef FICLONE | |
45 | # define USE_REFLINK 1 | |
46 | # endif | |
47 | #endif | |
2180ecc8 | 48 | |
cd6b8d39 KZ |
49 | #include "nls.h" |
50 | #include "c.h" | |
0361f744 | 51 | #include "xalloc.h" |
631e6865 | 52 | #include "strutils.h" |
06d8fe89 | 53 | #include "monotonic.h" |
4c467ebc | 54 | #include "optutils.h" |
259bed15 | 55 | #include "fileeq.h" |
66a38e97 | 56 | #include "statfs_magic.h" |
2180ecc8 | 57 | |
abaf378c | 58 | #include <regex.h> /* regcomp(), regexec() */ |
3807e71a | 59 | |
3854515c KZ |
60 | #if defined(HAVE_SYS_XATTR_H) && defined(HAVE_LLISTXATTR) && defined(HAVE_LGETXATTR) |
61 | # include <sys/xattr.h> | |
62 | # define USE_XATTR 1 | |
2180ecc8 | 63 | #endif |
0ec20db8 | 64 | |
4c467ebc KZ |
65 | static int quiet; /* don't print anything */ |
66 | ||
66a38e97 KZ |
67 | #ifdef USE_REFLINK |
68 | enum { | |
69 | REFLINK_NEVER = 0, | |
70 | REFLINK_AUTO, | |
71 | REFLINK_ALWAYS | |
72 | }; | |
73 | static int reflink_mode = REFLINK_NEVER; | |
74 | static int reflinks_skip; | |
75 | #endif | |
76 | ||
259bed15 KZ |
77 | static struct ul_fileeq fileeq; |
78 | ||
2180ecc8 KZ |
79 | /** |
80 | * struct file - Information about a file | |
81 | * @st: The stat buffer associated with the file | |
82 | * @next: Next file with the same size | |
83 | * @basename: The offset off the basename in the filename | |
84 | * @path: The path of the file | |
85 | * | |
86 | * This contains all information we need about a file. | |
87 | */ | |
88 | struct file { | |
5c7cac85 | 89 | struct stat st; |
259bed15 KZ |
90 | struct ul_fileeq_data data; |
91 | ||
5c7cac85 KZ |
92 | struct file *next; |
93 | struct link { | |
94 | struct link *next; | |
95 | int basename; | |
2180ecc8 | 96 | #if __STDC_VERSION__ >= 199901L |
5c7cac85 | 97 | char path[]; |
2180ecc8 | 98 | #elif __GNUC__ |
5c7cac85 | 99 | char path[0]; |
2180ecc8 | 100 | #else |
5c7cac85 | 101 | char path[1]; |
2180ecc8 | 102 | #endif |
5c7cac85 | 103 | } *links; |
3807e71a | 104 | }; |
0ec20db8 | 105 | |
2180ecc8 KZ |
106 | /** |
107 | * enum log_level - Logging levels | |
2180ecc8 KZ |
108 | * @JLOG_SUMMARY: Default log level |
109 | * @JLOG_INFO: Verbose logging (verbose == 1) | |
5034cd9f KZ |
110 | * @JLOG_VERBOSE1: Verbosity 2 |
111 | * @JLOG_VERBOSE2: Verbosity 3 | |
2180ecc8 KZ |
112 | */ |
113 | enum log_level { | |
5c7cac85 KZ |
114 | JLOG_SUMMARY, |
115 | JLOG_INFO, | |
116 | JLOG_VERBOSE1, | |
117 | JLOG_VERBOSE2 | |
3807e71a | 118 | }; |
0ec20db8 | 119 | |
2180ecc8 KZ |
120 | /** |
121 | * struct statistic - Statistics about the file | |
122 | * @started: Whether we are post command-line processing | |
123 | * @files: The number of files worked on | |
124 | * @linked: The number of files replaced by a hardlink to a master | |
125 | * @xattr_comparisons: The number of extended attribute comparisons | |
126 | * @comparisons: The number of comparisons | |
127 | * @saved: The (exaggerated) amount of space saved | |
06d8fe89 | 128 | * @start_time: The time we started at |
2180ecc8 KZ |
129 | */ |
130 | static struct statistics { | |
5c7cac85 KZ |
131 | int started; |
132 | size_t files; | |
133 | size_t linked; | |
134 | size_t xattr_comparisons; | |
135 | size_t comparisons; | |
66a38e97 | 136 | size_t ignored_reflinks; |
5c7cac85 KZ |
137 | double saved; |
138 | struct timeval start_time; | |
2180ecc8 | 139 | } stats; |
3807e71a | 140 | |
8ff8b027 KZ |
141 | |
142 | struct hdl_regex { | |
143 | regex_t re; /* POSIX compatible regex handler */ | |
144 | ||
145 | struct hdl_regex *next; | |
146 | }; | |
147 | ||
2180ecc8 KZ |
148 | /** |
149 | * struct options - Processed command-line options | |
150 | * @include: A linked list of regular expressions for the --include option | |
151 | * @exclude: A linked list of regular expressions for the --exclude option | |
152 | * @verbosity: The verbosity. Should be one of #enum log_level | |
153 | * @respect_mode: Whether to respect file modes (default = TRUE) | |
154 | * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE) | |
155 | * @respect_name: Whether to respect file names (default = FALSE) | |
156 | * @respect_time: Whether to respect file modification times (default = TRUE) | |
157 | * @respect_xattrs: Whether to respect extended attributes (default = FALSE) | |
158 | * @maximise: Chose the file with the highest link count as master | |
159 | * @minimise: Chose the file with the lowest link count as master | |
160 | * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE) | |
161 | * @dry_run: Specifies whether hardlink should not link files (default = FALSE) | |
162 | * @min_size: Minimum size of files to consider. (default = 1 byte) | |
40a82456 | 163 | * @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte) |
2180ecc8 KZ |
164 | */ |
165 | static struct options { | |
8ff8b027 KZ |
166 | struct hdl_regex *include; |
167 | struct hdl_regex *exclude; | |
5c7cac85 | 168 | |
259bed15 | 169 | const char *method; |
5c7cac85 KZ |
170 | signed int verbosity; |
171 | unsigned int respect_mode:1; | |
172 | unsigned int respect_owner:1; | |
173 | unsigned int respect_name:1; | |
174 | unsigned int respect_time:1; | |
175 | unsigned int respect_xattrs:1; | |
176 | unsigned int maximise:1; | |
177 | unsigned int minimise:1; | |
178 | unsigned int keep_oldest:1; | |
179 | unsigned int dry_run:1; | |
180 | uintmax_t min_size; | |
40a82456 | 181 | uintmax_t max_size; |
64c8db3c | 182 | size_t io_size; |
f3212b91 | 183 | size_t cache_size; |
9e7235e7 | 184 | } opts = { |
5c7cac85 | 185 | /* default setting */ |
259bed15 | 186 | .method = "sha256", |
5c7cac85 KZ |
187 | .respect_mode = TRUE, |
188 | .respect_owner = TRUE, | |
189 | .respect_time = TRUE, | |
190 | .respect_xattrs = FALSE, | |
191 | .keep_oldest = FALSE, | |
722762b6 | 192 | .min_size = 1, |
f3212b91 | 193 | .cache_size = 10*1024*1024 |
9e7235e7 | 194 | }; |
2180ecc8 KZ |
195 | |
196 | /* | |
197 | * files | |
198 | * | |
199 | * A binary tree of files, managed using tsearch(). To see which nodes | |
200 | * are considered equal, see compare_nodes() | |
201 | */ | |
202 | static void *files; | |
203 | static void *files_by_ino; | |
204 | ||
205 | /* | |
206 | * last_signal | |
207 | * | |
208 | * The last signal we received. We store the signal here in order to be able | |
209 | * to break out of loops gracefully and to return from our nftw() handler. | |
210 | */ | |
211 | static int last_signal; | |
0ec20db8 | 212 | |
4939964c KZ |
213 | |
214 | #define is_log_enabled(_level) (quiet == 0 && (_level) <= (unsigned int)opts.verbosity) | |
215 | ||
2180ecc8 KZ |
216 | /** |
217 | * jlog - Logging for hardlink | |
218 | * @level: The log level | |
219 | * @format: A format string for printf() | |
220 | */ | |
5c7cac85 | 221 | __attribute__((format(printf, 2, 3))) |
2180ecc8 | 222 | static void jlog(enum log_level level, const char *format, ...) |
0ec20db8 | 223 | { |
5c7cac85 | 224 | va_list args; |
2180ecc8 | 225 | |
4939964c | 226 | if (!is_log_enabled(level)) |
5c7cac85 | 227 | return; |
423e80c1 | 228 | |
5c7cac85 KZ |
229 | va_start(args, format); |
230 | vfprintf(stdout, format, args); | |
231 | va_end(args); | |
232 | fputc('\n', stdout); | |
0ec20db8 DJ |
233 | } |
234 | ||
2180ecc8 KZ |
235 | /** |
236 | * CMP - Compare two numerical values, return 1, 0, or -1 | |
237 | * @a: First value | |
238 | * @b: Second value | |
239 | * | |
240 | * Used to compare two integers of any size while avoiding overflow. | |
241 | */ | |
242 | #define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0)) | |
243 | ||
2180ecc8 | 244 | /** |
8ff8b027 KZ |
245 | * register_regex - Compile and insert a regular expression into list |
246 | * @pregs: Pointer to a linked list of regular expressions | |
247 | * @regex: String containing the regular expression to be compiled | |
248 | */ | |
249 | static void register_regex(struct hdl_regex **pregs, const char *regex) | |
250 | { | |
251 | struct hdl_regex *link; | |
252 | int err; | |
253 | ||
254 | link = xmalloc(sizeof(*link)); | |
255 | ||
256 | if ((err = regcomp(&link->re, regex, REG_NOSUB | REG_EXTENDED)) != 0) { | |
257 | size_t size = regerror(err, &link->re, NULL, 0); | |
258 | char *buf = xmalloc(size + 1); | |
259 | ||
260 | regerror(err, &link->re, buf, size); | |
261 | ||
262 | errx(EXIT_FAILURE, _("could not compile regular expression %s: %s"), | |
263 | regex, buf); | |
264 | } | |
265 | link->next = *pregs; *pregs = link; | |
266 | } | |
267 | ||
268 | /** | |
269 | * match_any_regex - Match against multiple regular expressions | |
2180ecc8 KZ |
270 | * @pregs: A linked list of regular expressions |
271 | * @what: The string to match against | |
272 | * | |
273 | * Checks whether any of the regular expressions in the list matches the | |
274 | * string. | |
275 | */ | |
8ff8b027 | 276 | static int match_any_regex(struct hdl_regex *pregs, const char *what) |
0ec20db8 | 277 | { |
5c7cac85 | 278 | for (; pregs != NULL; pregs = pregs->next) { |
8ff8b027 | 279 | if (regexec(&pregs->re, what, 0, NULL, 0) == 0) |
5c7cac85 KZ |
280 | return TRUE; |
281 | } | |
282 | return FALSE; | |
0ec20db8 DJ |
283 | } |
284 | ||
2180ecc8 KZ |
285 | /** |
286 | * compare_nodes - Node comparison function | |
287 | * @_a: The first node (a #struct file) | |
288 | * @_b: The second node (a #struct file) | |
289 | * | |
290 | * Compare the two nodes for the binary tree. | |
291 | */ | |
292 | static int compare_nodes(const void *_a, const void *_b) | |
94b040b0 | 293 | { |
5c7cac85 KZ |
294 | const struct file *a = _a; |
295 | const struct file *b = _b; | |
296 | int diff = 0; | |
bd7722af | 297 | |
5c7cac85 KZ |
298 | if (diff == 0) |
299 | diff = CMP(a->st.st_dev, b->st.st_dev); | |
300 | if (diff == 0) | |
301 | diff = CMP(a->st.st_size, b->st.st_size); | |
2180ecc8 | 302 | |
5c7cac85 | 303 | return diff; |
94b040b0 JN |
304 | } |
305 | ||
2180ecc8 KZ |
306 | /** |
307 | * compare_nodes_ino - Node comparison function | |
308 | * @_a: The first node (a #struct file) | |
309 | * @_b: The second node (a #struct file) | |
310 | * | |
311 | * Compare the two nodes for the binary tree. | |
312 | */ | |
313 | static int compare_nodes_ino(const void *_a, const void *_b) | |
94b040b0 | 314 | { |
5c7cac85 KZ |
315 | const struct file *a = _a; |
316 | const struct file *b = _b; | |
317 | int diff = 0; | |
318 | ||
319 | if (diff == 0) | |
320 | diff = CMP(a->st.st_dev, b->st.st_dev); | |
321 | if (diff == 0) | |
322 | diff = CMP(a->st.st_ino, b->st.st_ino); | |
323 | ||
324 | /* If opts.respect_name is used, we will restrict a struct file to | |
325 | * contain only links with the same basename to keep the rest simple. | |
326 | */ | |
327 | if (diff == 0 && opts.respect_name) | |
328 | diff = strcmp(a->links->path + a->links->basename, | |
329 | b->links->path + b->links->basename); | |
330 | ||
331 | return diff; | |
94b040b0 JN |
332 | } |
333 | ||
2180ecc8 KZ |
334 | /** |
335 | * print_stats - Print statistics to stdout | |
336 | */ | |
337 | static void print_stats(void) | |
94b040b0 | 338 | { |
5c7cac85 KZ |
339 | struct timeval end = { 0, 0 }, delta = { 0, 0 }; |
340 | char *ssz; | |
06d8fe89 | 341 | |
5c7cac85 KZ |
342 | gettime_monotonic(&end); |
343 | timersub(&end, &stats.start_time, &delta); | |
344 | ||
66a38e97 | 345 | jlog(JLOG_SUMMARY, "%-25s %s", _("Mode:"), |
5c7cac85 | 346 | opts.dry_run ? _("dry-run") : _("real")); |
66a38e97 KZ |
347 | jlog(JLOG_SUMMARY, "%-25s %s", _("Method:"), opts.method); |
348 | jlog(JLOG_SUMMARY, "%-25s %zu", _("Files:"), stats.files); | |
349 | jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Linked:"), stats.linked); | |
06d8fe89 | 350 | |
3854515c | 351 | #ifdef USE_XATTR |
66a38e97 | 352 | jlog(JLOG_SUMMARY, _("%-25s %zu xattrs"), _("Compared:"), |
5c7cac85 | 353 | stats.xattr_comparisons); |
2180ecc8 | 354 | #endif |
66a38e97 | 355 | jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Compared:"), |
5c7cac85 | 356 | stats.comparisons); |
66a38e97 KZ |
357 | #ifdef USE_REFLINK |
358 | if (reflinks_skip) | |
359 | jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Skipped reflinks:"), | |
360 | stats.ignored_reflinks); | |
361 | #endif | |
5c7cac85 KZ |
362 | ssz = size_to_human_string(SIZE_SUFFIX_3LETTER | |
363 | SIZE_SUFFIX_SPACE | | |
364 | SIZE_DECIMAL_2DIGITS, stats.saved); | |
423e80c1 | 365 | |
66a38e97 | 366 | jlog(JLOG_SUMMARY, "%-25s %s", _("Saved:"), ssz); |
5c7cac85 | 367 | free(ssz); |
423e80c1 | 368 | |
66a38e97 | 369 | jlog(JLOG_SUMMARY, _("%-25s %"PRId64".%06"PRId64" seconds"), _("Duration:"), |
63463630 | 370 | (int64_t)delta.tv_sec, (int64_t)delta.tv_usec); |
94b040b0 | 371 | } |
55c000e1 | 372 | |
2180ecc8 KZ |
373 | /** |
374 | * handle_interrupt - Handle a signal | |
375 | * | |
376 | * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals. | |
377 | */ | |
cd6b8d39 | 378 | static int handle_interrupt(void) |
0ec20db8 | 379 | { |
5c7cac85 KZ |
380 | switch (last_signal) { |
381 | case SIGINT: | |
382 | case SIGTERM: | |
383 | return TRUE; | |
384 | case SIGUSR1: | |
385 | print_stats(); | |
386 | putchar('\n'); | |
387 | break; | |
388 | } | |
389 | ||
390 | last_signal = 0; | |
391 | return FALSE; | |
0ec20db8 DJ |
392 | } |
393 | ||
3854515c | 394 | #ifdef USE_XATTR |
2180ecc8 | 395 | |
2180ecc8 KZ |
396 | /** |
397 | * llistxattr_or_die - Wrapper for llistxattr() | |
398 | * | |
399 | * This does the same thing as llistxattr() except that it aborts if any error | |
400 | * other than "not supported" is detected. | |
401 | */ | |
402 | static ssize_t llistxattr_or_die(const char *path, char *list, size_t size) | |
403 | { | |
5c7cac85 | 404 | ssize_t len = llistxattr(path, list, size); |
2180ecc8 | 405 | |
5c7cac85 KZ |
406 | if (len < 0 && errno != ENOTSUP) |
407 | err(EXIT_FAILURE, _("cannot get xattr names for %s"), path); | |
c12b6394 | 408 | |
5c7cac85 | 409 | return len; |
2180ecc8 KZ |
410 | } |
411 | ||
412 | /** | |
413 | * lgetxattr_or_die - Wrapper for lgetxattr() | |
414 | * | |
415 | * This does the same thing as lgetxattr() except that it aborts upon error. | |
416 | */ | |
5c7cac85 KZ |
417 | static ssize_t lgetxattr_or_die(const char *path, |
418 | const char *name, void *value, size_t size) | |
2180ecc8 | 419 | { |
5c7cac85 | 420 | ssize_t len = lgetxattr(path, name, value, size); |
2180ecc8 | 421 | |
5c7cac85 KZ |
422 | if (len < 0) |
423 | err(EXIT_FAILURE, _("cannot get xattr value of %s for %s"), | |
424 | name, path); | |
c12b6394 | 425 | |
5c7cac85 | 426 | return len; |
2180ecc8 KZ |
427 | } |
428 | ||
429 | /** | |
430 | * get_xattr_name_count - Count the number of xattr names | |
431 | * @names: a non-empty table of concatenated, null-terminated xattr names | |
432 | * @len: the total length of the table | |
433 | * | |
434 | * @Returns the number of xattr names | |
435 | */ | |
436 | static int get_xattr_name_count(const char *const names, ssize_t len) | |
437 | { | |
5c7cac85 KZ |
438 | int count = 0; |
439 | const char *name; | |
2180ecc8 | 440 | |
5c7cac85 KZ |
441 | for (name = names; name < (names + len); name += strlen(name) + 1) |
442 | count++; | |
2180ecc8 | 443 | |
5c7cac85 | 444 | return count; |
2180ecc8 KZ |
445 | } |
446 | ||
447 | /** | |
448 | * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing | |
449 | * the names they point to. | |
450 | */ | |
451 | static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2) | |
452 | { | |
5c7cac85 | 453 | return strcmp(*(char *const *)ptr1, *(char *const *)ptr2); |
2180ecc8 KZ |
454 | } |
455 | ||
456 | /** | |
457 | * get_sorted_xattr_name_table - Create a sorted table of xattr names. | |
458 | * @names - table of concatenated, null-terminated xattr names | |
459 | * @n - the number of names | |
460 | * | |
461 | * @Returns allocated table of pointers to the names, sorted alphabetically | |
462 | */ | |
463 | static const char **get_sorted_xattr_name_table(const char *names, int n) | |
464 | { | |
5c7cac85 KZ |
465 | const char **table = xmalloc(n * sizeof(char *)); |
466 | int i; | |
2180ecc8 | 467 | |
5c7cac85 KZ |
468 | for (i = 0; i < n; i++) { |
469 | table[i] = names; | |
470 | names += strlen(names) + 1; | |
471 | } | |
2180ecc8 | 472 | |
5c7cac85 | 473 | qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs); |
2180ecc8 | 474 | |
5c7cac85 | 475 | return table; |
2180ecc8 KZ |
476 | } |
477 | ||
478 | /** | |
479 | * file_xattrs_equal - Compare the extended attributes of two files | |
480 | * @a: The first file | |
481 | * @b: The second file | |
482 | * | |
483 | * @Returns: %TRUE if and only if extended attributes are equal | |
484 | */ | |
cd6b8d39 | 485 | static int file_xattrs_equal(const struct file *a, const struct file *b) |
2180ecc8 | 486 | { |
5c7cac85 KZ |
487 | ssize_t len_a; |
488 | ssize_t len_b; | |
489 | char *names_a = NULL; | |
490 | char *names_b = NULL; | |
491 | int n_a; | |
492 | int n_b; | |
493 | const char **name_ptrs_a = NULL; | |
494 | const char **name_ptrs_b = NULL; | |
495 | void *value_a = NULL; | |
496 | void *value_b = NULL; | |
497 | int ret = FALSE; | |
498 | int i; | |
2180ecc8 | 499 | |
5c7cac85 KZ |
500 | assert(a->links != NULL); |
501 | assert(b->links != NULL); | |
2180ecc8 | 502 | |
5c7cac85 KZ |
503 | jlog(JLOG_VERBOSE1, _("Comparing xattrs of %s to %s"), a->links->path, |
504 | b->links->path); | |
2180ecc8 | 505 | |
5c7cac85 | 506 | stats.xattr_comparisons++; |
2180ecc8 | 507 | |
5c7cac85 KZ |
508 | len_a = llistxattr_or_die(a->links->path, NULL, 0); |
509 | len_b = llistxattr_or_die(b->links->path, NULL, 0); | |
2180ecc8 | 510 | |
5c7cac85 KZ |
511 | if (len_a <= 0 && len_b <= 0) |
512 | return TRUE; // xattrs not supported or neither file has any | |
2180ecc8 | 513 | |
5c7cac85 KZ |
514 | if (len_a != len_b) |
515 | return FALSE; // total lengths of xattr names differ | |
2180ecc8 | 516 | |
5c7cac85 KZ |
517 | names_a = xmalloc(len_a); |
518 | names_b = xmalloc(len_b); | |
2180ecc8 | 519 | |
5c7cac85 KZ |
520 | len_a = llistxattr_or_die(a->links->path, names_a, len_a); |
521 | len_b = llistxattr_or_die(b->links->path, names_b, len_b); | |
522 | assert((len_a > 0) && (len_a == len_b)); | |
2180ecc8 | 523 | |
5c7cac85 KZ |
524 | n_a = get_xattr_name_count(names_a, len_a); |
525 | n_b = get_xattr_name_count(names_b, len_b); | |
2180ecc8 | 526 | |
5c7cac85 KZ |
527 | if (n_a != n_b) |
528 | goto exit; // numbers of xattrs differ | |
2180ecc8 | 529 | |
5c7cac85 KZ |
530 | name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a); |
531 | name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b); | |
2180ecc8 | 532 | |
5c7cac85 | 533 | // We now have two sorted tables of xattr names. |
2180ecc8 | 534 | |
5c7cac85 KZ |
535 | for (i = 0; i < n_a; i++) { |
536 | if (handle_interrupt()) | |
537 | goto exit; // user wants to quit | |
2180ecc8 | 538 | |
5c7cac85 KZ |
539 | if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0) |
540 | goto exit; // names at same slot differ | |
2180ecc8 | 541 | |
5c7cac85 KZ |
542 | len_a = |
543 | lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0); | |
544 | len_b = | |
545 | lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0); | |
2180ecc8 | 546 | |
5c7cac85 KZ |
547 | if (len_a != len_b) |
548 | goto exit; // xattrs with same name, different value lengths | |
2180ecc8 | 549 | |
5c7cac85 KZ |
550 | value_a = xmalloc(len_a); |
551 | value_b = xmalloc(len_b); | |
2180ecc8 | 552 | |
5c7cac85 KZ |
553 | len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], |
554 | value_a, len_a); | |
555 | len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], | |
556 | value_b, len_b); | |
557 | assert((len_a >= 0) && (len_a == len_b)); | |
2180ecc8 | 558 | |
5c7cac85 KZ |
559 | if (memcmp(value_a, value_b, len_a) != 0) |
560 | goto exit; // xattrs with same name, different values | |
2180ecc8 | 561 | |
5c7cac85 KZ |
562 | free(value_a); |
563 | free(value_b); | |
564 | value_a = NULL; | |
565 | value_b = NULL; | |
566 | } | |
2180ecc8 | 567 | |
5c7cac85 | 568 | ret = TRUE; |
2180ecc8 | 569 | |
5c7cac85 KZ |
570 | exit: |
571 | free(names_a); | |
572 | free(names_b); | |
573 | free(name_ptrs_a); | |
574 | free(name_ptrs_b); | |
575 | free(value_a); | |
576 | free(value_b); | |
577 | return ret; | |
2180ecc8 | 578 | } |
3854515c | 579 | #else /* !USE_XATTR */ |
cd6b8d39 | 580 | static int file_xattrs_equal(const struct file *a, const struct file *b) |
2180ecc8 | 581 | { |
5c7cac85 | 582 | return TRUE; |
2180ecc8 | 583 | } |
3854515c | 584 | #endif /* USE_XATTR */ |
2180ecc8 | 585 | |
2180ecc8 KZ |
586 | /** |
587 | * file_may_link_to - Check whether a file may replace another one | |
588 | * @a: The first file | |
589 | * @b: The second file | |
590 | * | |
259bed15 KZ |
591 | * Check whether the two files are considered equal attributes and can be |
592 | * linked. This function does not compare content od the files! | |
2180ecc8 | 593 | */ |
cd6b8d39 | 594 | static int file_may_link_to(const struct file *a, const struct file *b) |
2180ecc8 | 595 | { |
5c7cac85 KZ |
596 | return (a->st.st_size != 0 && |
597 | a->st.st_size == b->st.st_size && | |
598 | a->links != NULL && b->links != NULL && | |
599 | a->st.st_dev == b->st.st_dev && | |
600 | a->st.st_ino != b->st.st_ino && | |
601 | (!opts.respect_mode || a->st.st_mode == b->st.st_mode) && | |
602 | (!opts.respect_owner || a->st.st_uid == b->st.st_uid) && | |
603 | (!opts.respect_owner || a->st.st_gid == b->st.st_gid) && | |
604 | (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) && | |
605 | (!opts.respect_name | |
606 | || strcmp(a->links->path + a->links->basename, | |
607 | b->links->path + b->links->basename) == 0) && | |
259bed15 | 608 | (!opts.respect_xattrs || file_xattrs_equal(a, b))); |
2180ecc8 KZ |
609 | } |
610 | ||
611 | /** | |
612 | * file_compare - Compare two files to decide which should be master | |
613 | * @a: The first file | |
614 | * @b: The second file | |
615 | * | |
616 | * Check which of the files should be considered greater and thus serve | |
617 | * as the master when linking (the master is the file that all equal files | |
618 | * will be replaced with). | |
619 | */ | |
620 | static int file_compare(const struct file *a, const struct file *b) | |
621 | { | |
5c7cac85 KZ |
622 | int res = 0; |
623 | if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino) | |
624 | return 0; | |
625 | ||
626 | if (res == 0 && opts.maximise) | |
627 | res = CMP(a->st.st_nlink, b->st.st_nlink); | |
628 | if (res == 0 && opts.minimise) | |
629 | res = CMP(b->st.st_nlink, a->st.st_nlink); | |
630 | if (res == 0) | |
631 | res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime) | |
632 | : CMP(a->st.st_mtime, b->st.st_mtime); | |
633 | if (res == 0) | |
634 | res = CMP(b->st.st_ino, a->st.st_ino); | |
635 | ||
636 | return res; | |
2180ecc8 KZ |
637 | } |
638 | ||
66a38e97 KZ |
639 | #ifdef USE_REFLINK |
640 | static inline int do_link(struct file *a, struct file *b, | |
641 | const char *new_name, int reflink) | |
642 | { | |
643 | if (reflink) { | |
644 | int dest = -1, src = -1; | |
645 | ||
2401078c | 646 | dest = open(new_name, O_CREAT|O_WRONLY|O_TRUNC, 0600); |
66a38e97 KZ |
647 | if (dest < 0) |
648 | goto fallback; | |
649 | if (fchmod(dest, b->st.st_mode) != 0) | |
650 | goto fallback; | |
651 | if (fchown(dest, b->st.st_uid, b->st.st_gid) != 0) | |
652 | goto fallback; | |
653 | src = open(a->links->path, O_RDONLY); | |
654 | if (src < 0) | |
655 | goto fallback; | |
656 | if (ioctl(dest, FICLONE, src) != 0) | |
657 | goto fallback; | |
658 | close(dest); | |
659 | close(src); | |
660 | return 0; | |
661 | fallback: | |
662 | if (dest >= 0) { | |
663 | close(dest); | |
664 | unlink(new_name); | |
665 | } | |
666 | if (src >= 0) | |
667 | close(src); | |
668 | ||
669 | if (reflink_mode == REFLINK_ALWAYS) | |
670 | return -errno; | |
671 | jlog(JLOG_VERBOSE2,_("Reflinking failed, fallback to hardlinking")); | |
672 | } | |
673 | ||
674 | return link(a->links->path, new_name); | |
675 | } | |
676 | #else | |
677 | static inline int do_link(struct file *a, | |
678 | struct file *b __attribute__((__unused__)), | |
679 | const char *new_name, | |
680 | int reflink __attribute__((__unused__))) | |
681 | { | |
682 | return link(a->links->path, new_name); | |
683 | } | |
684 | #endif /* USE_REFLINK */ | |
685 | ||
2180ecc8 KZ |
686 | /** |
687 | * file_link - Replace b with a link to a | |
688 | * @a: The first file | |
689 | * @b: The second file | |
690 | * | |
691 | * Link the file, replacing @b with the current one. The file is first | |
692 | * linked to a temporary name, and then renamed to the name of @b, making | |
693 | * the replace atomic (@b will always exist). | |
694 | */ | |
66a38e97 | 695 | static int file_link(struct file *a, struct file *b, int reflink) |
2180ecc8 | 696 | { |
5c7cac85 KZ |
697 | |
698 | file_link: | |
699 | assert(a->links != NULL); | |
700 | assert(b->links != NULL); | |
701 | ||
4939964c KZ |
702 | if (is_log_enabled(JLOG_INFO)) { |
703 | char *ssz = size_to_human_string(SIZE_SUFFIX_3LETTER | | |
5c7cac85 KZ |
704 | SIZE_SUFFIX_SPACE | |
705 | SIZE_DECIMAL_2DIGITS, a->st.st_size); | |
66a38e97 KZ |
706 | jlog(JLOG_INFO, _("%s%sLinking %s to %s (-%s)"), |
707 | opts.dry_run ? _("[DryRun] ") : "", | |
708 | reflink ? "Ref" : "", | |
709 | a->links->path, b->links->path, | |
4939964c KZ |
710 | ssz); |
711 | free(ssz); | |
712 | } | |
5c7cac85 KZ |
713 | |
714 | if (!opts.dry_run) { | |
fd1e57a7 KZ |
715 | char *new_path; |
716 | int failed = 1; | |
5c7cac85 | 717 | |
fd1e57a7 KZ |
718 | xasprintf(&new_path, "%s.hardlink-temporary", b->links->path); |
719 | ||
66a38e97 | 720 | if (do_link(a, b, new_path, reflink) != 0) |
fd1e57a7 KZ |
721 | warn(_("cannot link %s to %s"), a->links->path, new_path); |
722 | ||
723 | else if (rename(new_path, b->links->path) != 0) { | |
724 | warn(_("cannot rename %s to %s"), a->links->path, new_path); | |
725 | unlink(new_path); | |
726 | } else | |
727 | failed = 0; | |
5c7cac85 | 728 | |
5c7cac85 | 729 | free(new_path); |
fd1e57a7 KZ |
730 | if (failed) |
731 | return FALSE; | |
5c7cac85 KZ |
732 | } |
733 | ||
734 | /* Update statistics */ | |
735 | stats.linked++; | |
736 | ||
737 | /* Increase the link count of this file, and set stat() of other file */ | |
738 | a->st.st_nlink++; | |
739 | b->st.st_nlink--; | |
740 | ||
741 | if (b->st.st_nlink == 0) | |
742 | stats.saved += a->st.st_size; | |
743 | ||
744 | /* Move the link from file b to a */ | |
745 | { | |
746 | struct link *new_link = b->links; | |
747 | ||
748 | b->links = b->links->next; | |
749 | new_link->next = a->links->next; | |
750 | a->links->next = new_link; | |
751 | } | |
752 | ||
753 | /* Do it again */ | |
754 | if (b->links) | |
755 | goto file_link; | |
756 | ||
757 | return TRUE; | |
2180ecc8 KZ |
758 | } |
759 | ||
2a28d961 KZ |
760 | static int has_fpath(struct file *node, const char *path) |
761 | { | |
762 | struct link *l; | |
763 | ||
764 | for (l = node->links; l; l = l->next) { | |
765 | if (strcmp(l->path, path) == 0) | |
766 | return 1; | |
767 | } | |
768 | ||
769 | return 0; | |
770 | } | |
771 | ||
772 | ||
2180ecc8 KZ |
773 | /** |
774 | * inserter - Callback function for nftw() | |
775 | * @fpath: The path of the file being visited | |
776 | * @sb: The stat information of the file | |
777 | * @typeflag: The type flag | |
778 | * @ftwbuf: Contains current level of nesting and offset of basename | |
779 | * | |
780 | * Called by nftw() for the files. See the manual page for nftw() for | |
781 | * further information. | |
782 | */ | |
5c7cac85 KZ |
783 | static int inserter(const char *fpath, const struct stat *sb, |
784 | int typeflag, struct FTW *ftwbuf) | |
2180ecc8 | 785 | { |
5c7cac85 KZ |
786 | struct file *fil; |
787 | struct file **node; | |
788 | size_t pathlen; | |
789 | int included; | |
790 | int excluded; | |
2180ecc8 | 791 | |
5c7cac85 KZ |
792 | if (handle_interrupt()) |
793 | return 1; | |
794 | if (typeflag == FTW_DNR || typeflag == FTW_NS) | |
795 | warn(_("cannot read %s"), fpath); | |
796 | if (typeflag != FTW_F || !S_ISREG(sb->st_mode)) | |
797 | return 0; | |
2180ecc8 | 798 | |
8ff8b027 KZ |
799 | included = match_any_regex(opts.include, fpath); |
800 | excluded = match_any_regex(opts.exclude, fpath); | |
2180ecc8 | 801 | |
5c7cac85 KZ |
802 | if ((opts.exclude && excluded && !included) || |
803 | (!opts.exclude && opts.include && !included)) | |
804 | return 0; | |
2180ecc8 | 805 | |
5c7cac85 | 806 | stats.files++; |
2180ecc8 | 807 | |
5c7cac85 KZ |
808 | if ((uintmax_t) sb->st_size < opts.min_size) { |
809 | jlog(JLOG_VERBOSE1, | |
810 | _("Skipped %s (smaller than configured size)"), fpath); | |
811 | return 0; | |
812 | } | |
2180ecc8 | 813 | |
57d9fd2b KZ |
814 | jlog(JLOG_VERBOSE2, " %5zu: [%ld/%ld/%zu] %s", |
815 | stats.files, sb->st_dev, sb->st_ino, | |
816 | (size_t) sb->st_nlink, fpath); | |
2180ecc8 | 817 | |
40a82456 DP |
818 | if ((opts.max_size > 0) && ((uintmax_t) sb->st_size > opts.max_size)) { |
819 | jlog(JLOG_VERBOSE1, | |
820 | _("Skipped %s (greater than configured size)"), fpath); | |
821 | return 0; | |
822 | } | |
2180ecc8 | 823 | |
5c7cac85 | 824 | pathlen = strlen(fpath) + 1; |
2180ecc8 | 825 | |
5c7cac85 KZ |
826 | fil = xcalloc(1, sizeof(*fil)); |
827 | fil->links = xcalloc(1, sizeof(struct link) + pathlen); | |
2180ecc8 | 828 | |
5c7cac85 KZ |
829 | fil->st = *sb; |
830 | fil->links->basename = ftwbuf->base; | |
831 | fil->links->next = NULL; | |
2180ecc8 | 832 | |
5c7cac85 | 833 | memcpy(fil->links->path, fpath, pathlen); |
2180ecc8 | 834 | |
5c7cac85 | 835 | node = tsearch(fil, &files_by_ino, compare_nodes_ino); |
2180ecc8 | 836 | |
5c7cac85 KZ |
837 | if (node == NULL) |
838 | goto fail; | |
2180ecc8 | 839 | |
5c7cac85 KZ |
840 | if (*node != fil) { |
841 | /* Already known inode, add link to inode information */ | |
842 | assert((*node)->st.st_dev == sb->st_dev); | |
843 | assert((*node)->st.st_ino == sb->st_ino); | |
2180ecc8 | 844 | |
2a28d961 KZ |
845 | if (has_fpath(*node, fpath)) { |
846 | jlog(JLOG_VERBOSE1, | |
847 | _("Skipped %s (specified more than once)"), fpath); | |
848 | free(fil->links); | |
849 | } else { | |
850 | fil->links->next = (*node)->links; | |
851 | (*node)->links = fil->links; | |
852 | } | |
2180ecc8 | 853 | |
5c7cac85 KZ |
854 | free(fil); |
855 | } else { | |
856 | /* New inode, insert into by-size table */ | |
857 | node = tsearch(fil, &files, compare_nodes); | |
2180ecc8 | 858 | |
5c7cac85 KZ |
859 | if (node == NULL) |
860 | goto fail; | |
2180ecc8 | 861 | |
5c7cac85 KZ |
862 | if (*node != fil) { |
863 | struct file *l; | |
2180ecc8 | 864 | |
5c7cac85 KZ |
865 | if (file_compare(fil, *node) >= 0) { |
866 | fil->next = *node; | |
867 | *node = fil; | |
868 | } else { | |
869 | for (l = *node; l != NULL; l = l->next) { | |
870 | if (l->next != NULL | |
871 | && file_compare(fil, l->next) < 0) | |
872 | continue; | |
2180ecc8 | 873 | |
5c7cac85 KZ |
874 | fil->next = l->next; |
875 | l->next = fil; | |
2180ecc8 | 876 | |
5c7cac85 KZ |
877 | break; |
878 | } | |
879 | } | |
880 | } | |
881 | } | |
2180ecc8 | 882 | |
5c7cac85 | 883 | return 0; |
3c52b1c8 | 884 | |
5c7cac85 KZ |
885 | fail: |
886 | warn(_("cannot continue")); /* probably ENOMEM */ | |
887 | return 0; | |
2180ecc8 KZ |
888 | } |
889 | ||
66a38e97 KZ |
890 | #ifdef USE_REFLINK |
891 | static int is_reflink_compatible(dev_t devno, const char *filename) | |
892 | { | |
893 | static dev_t last_dev = 0; | |
894 | static int last_status = 0; | |
895 | ||
896 | if (last_dev != devno) { | |
897 | struct statfs vfs; | |
898 | ||
899 | if (statfs(filename, &vfs) != 0) | |
900 | return 0; | |
901 | ||
902 | last_dev = devno; | |
903 | switch (vfs.f_type) { | |
904 | case STATFS_BTRFS_MAGIC: | |
905 | case STATFS_XFS_MAGIC: | |
906 | last_status = 1; | |
907 | break; | |
908 | default: | |
909 | last_status = 0; | |
910 | break; | |
911 | } | |
912 | } | |
913 | ||
914 | return last_status; | |
915 | } | |
916 | ||
917 | static int is_reflink(struct file *xa, struct file *xb) | |
918 | { | |
919 | int last = 0, rc = 0; | |
920 | char abuf[BUFSIZ] = { 0 }, | |
921 | bbuf[BUFSIZ] = { 0 }; | |
922 | ||
923 | struct fiemap *amap = (struct fiemap *) abuf, | |
924 | *bmap = (struct fiemap *) bbuf; | |
925 | ||
926 | int af = open(xa->links->path, O_RDONLY), | |
927 | bf = open(xb->links->path, O_RDONLY); | |
928 | ||
2a596d70 KZ |
929 | if (af < 0 || bf < 0) |
930 | goto done; | |
931 | ||
66a38e97 KZ |
932 | do { |
933 | size_t i; | |
934 | ||
935 | amap->fm_length = ~0ULL; | |
936 | amap->fm_flags = FIEMAP_FLAG_SYNC; | |
937 | amap->fm_extent_count = (sizeof(abuf) - sizeof(*amap)) / sizeof(struct fiemap_extent); | |
938 | ||
939 | bmap->fm_length = ~0ULL; | |
940 | bmap->fm_flags = FIEMAP_FLAG_SYNC; | |
941 | bmap->fm_extent_count = (sizeof(bbuf) - sizeof(*bmap)) / sizeof(struct fiemap_extent); | |
942 | ||
943 | if (ioctl(af, FS_IOC_FIEMAP, (unsigned long) amap) < 0) | |
944 | goto done; | |
945 | if (ioctl(bf, FS_IOC_FIEMAP, (unsigned long) bmap) < 0) | |
946 | goto done; | |
947 | ||
948 | if (amap->fm_mapped_extents != bmap->fm_mapped_extents) | |
949 | goto done; | |
950 | ||
951 | for (i = 0; i < amap->fm_mapped_extents; i++) { | |
952 | struct fiemap_extent *a = &amap->fm_extents[i]; | |
953 | struct fiemap_extent *b = &bmap->fm_extents[i]; | |
954 | ||
955 | if (a->fe_logical != b->fe_logical || | |
956 | a->fe_length != b->fe_length || | |
957 | a->fe_physical != b->fe_physical) | |
958 | goto done; | |
959 | if (!(a->fe_flags & FIEMAP_EXTENT_SHARED) || | |
960 | !(b->fe_flags & FIEMAP_EXTENT_SHARED)) | |
961 | goto done; | |
962 | if (a->fe_flags & FIEMAP_EXTENT_LAST) | |
963 | last = 1; | |
964 | } | |
965 | ||
966 | bmap->fm_start = amap->fm_start = | |
967 | amap->fm_extents[amap->fm_mapped_extents - 1].fe_logical + | |
968 | amap->fm_extents[amap->fm_mapped_extents - 1].fe_length; | |
969 | } while (last == 0); | |
970 | ||
971 | rc = 1; | |
972 | done: | |
2a596d70 KZ |
973 | if (af >= 0) |
974 | close(af); | |
975 | if (bf >= 0) | |
976 | close(bf); | |
66a38e97 KZ |
977 | return rc; |
978 | } | |
979 | #endif /* USE_REFLINK */ | |
980 | ||
259bed15 KZ |
981 | static inline size_t count_nodes(struct file *x) |
982 | { | |
983 | size_t ct = 0; | |
984 | ||
985 | for ( ; x != NULL; x = x->next) | |
986 | ct++; | |
987 | ||
988 | return ct; | |
989 | } | |
990 | ||
2180ecc8 KZ |
991 | /** |
992 | * visitor - Callback for twalk() | |
993 | * @nodep: Pointer to a pointer to a #struct file | |
994 | * @which: At which point this visit is (preorder, postorder, endorder) | |
995 | * @depth: The depth of the node in the tree | |
996 | * | |
997 | * Visit the nodes in the binary tree. For each node, call hardlinker() | |
998 | * on each #struct file in the linked list of #struct file instances located | |
999 | * at that node. | |
1000 | */ | |
1001 | static void visitor(const void *nodep, const VISIT which, const int depth) | |
1002 | { | |
5c7cac85 | 1003 | struct file *master = *(struct file **)nodep; |
259bed15 | 1004 | struct file *begin = master; |
5c7cac85 | 1005 | struct file *other; |
2180ecc8 | 1006 | |
5c7cac85 | 1007 | (void)depth; |
2180ecc8 | 1008 | |
5c7cac85 KZ |
1009 | if (which != leaf && which != endorder) |
1010 | return; | |
2180ecc8 | 1011 | |
5c7cac85 | 1012 | for (; master != NULL; master = master->next) { |
259bed15 | 1013 | size_t nnodes, memsiz; |
66a38e97 | 1014 | int may_reflink = 0; |
ee4c3249 | 1015 | |
5c7cac85 KZ |
1016 | if (handle_interrupt()) |
1017 | exit(EXIT_FAILURE); | |
1018 | if (master->links == NULL) | |
1019 | continue; | |
2180ecc8 | 1020 | |
259bed15 KZ |
1021 | /* calculate per file max memory use */ |
1022 | nnodes = count_nodes(master); | |
1023 | if (!nnodes) | |
1024 | continue; | |
f3212b91 KZ |
1025 | |
1026 | /* per-file cache size */ | |
1027 | memsiz = opts.cache_size / nnodes; | |
64c8db3c KZ |
1028 | /* filesiz, readsiz, memsiz */ |
1029 | ul_fileeq_set_size(&fileeq, master->st.st_size, opts.io_size, memsiz); | |
259bed15 | 1030 | |
66a38e97 KZ |
1031 | #ifdef USE_REFLINK |
1032 | if (reflink_mode || reflinks_skip) { | |
1033 | may_reflink = | |
1034 | reflink_mode == REFLINK_ALWAYS ? 1 : | |
1035 | is_reflink_compatible(master->st.st_dev, | |
1036 | master->links->path); | |
1037 | } | |
1038 | #endif | |
5c7cac85 | 1039 | for (other = master->next; other != NULL; other = other->next) { |
259bed15 KZ |
1040 | int eq; |
1041 | ||
5c7cac85 KZ |
1042 | if (handle_interrupt()) |
1043 | exit(EXIT_FAILURE); | |
2180ecc8 | 1044 | |
5c7cac85 KZ |
1045 | assert(other != other->next); |
1046 | assert(other->st.st_size == master->st.st_size); | |
2180ecc8 | 1047 | |
af5f0078 KZ |
1048 | if (!other->links) |
1049 | continue; | |
1050 | ||
259bed15 | 1051 | /* check file attributes, etc. */ |
af5f0078 KZ |
1052 | if (!file_may_link_to(master, other)) { |
1053 | jlog(JLOG_VERBOSE2, | |
1054 | _("Skipped (attributes mismatch) %s"), other->links->path); | |
5c7cac85 | 1055 | continue; |
af5f0078 | 1056 | } |
66a38e97 KZ |
1057 | #ifdef USE_REFLINK |
1058 | if (may_reflink && reflinks_skip && is_reflink(master, other)) { | |
1059 | jlog(JLOG_VERBOSE2, | |
1060 | _("Skipped (already reflink) %s"), other->links->path); | |
1061 | stats.ignored_reflinks++; | |
1062 | continue; | |
1063 | } | |
1064 | #endif | |
259bed15 KZ |
1065 | /* initialize content comparison */ |
1066 | if (!ul_fileeq_data_associated(&master->data)) | |
1067 | ul_fileeq_data_set_file(&master->data, master->links->path); | |
1068 | if (!ul_fileeq_data_associated(&other->data)) | |
1069 | ul_fileeq_data_set_file(&other->data, other->links->path); | |
1070 | ||
1071 | /* compare files */ | |
1072 | eq = ul_fileeq(&fileeq, &master->data, &other->data); | |
1073 | ||
1074 | /* reduce number of open files, keep only master open */ | |
1075 | ul_fileeq_data_close_file(&other->data); | |
1076 | ||
1077 | stats.comparisons++; | |
1078 | ||
af5f0078 KZ |
1079 | if (!eq) { |
1080 | jlog(JLOG_VERBOSE2, | |
1081 | _("Skipped (content mismatch) %s"), other->links->path); | |
259bed15 | 1082 | continue; |
af5f0078 | 1083 | } |
259bed15 KZ |
1084 | |
1085 | /* link files */ | |
66a38e97 | 1086 | if (!file_link(master, other, may_reflink) && errno == EMLINK) { |
259bed15 | 1087 | ul_fileeq_data_deinit(&master->data); |
5c7cac85 | 1088 | master = other; |
259bed15 | 1089 | } |
5c7cac85 | 1090 | } |
259bed15 KZ |
1091 | |
1092 | /* don't keep master data in memory */ | |
1093 | ul_fileeq_data_deinit(&master->data); | |
1094 | } | |
1095 | ||
1096 | /* final cleanup */ | |
1097 | for (other = begin; other != NULL; other = other->next) { | |
1098 | if (ul_fileeq_data_associated(&other->data)) | |
1099 | ul_fileeq_data_deinit(&other->data); | |
5c7cac85 | 1100 | } |
2180ecc8 KZ |
1101 | } |
1102 | ||
1103 | /** | |
d2c3c5a6 | 1104 | * usage - Print the program help and exit |
2180ecc8 | 1105 | */ |
d2c3c5a6 | 1106 | static void __attribute__((__noreturn__)) usage(void) |
2180ecc8 | 1107 | { |
5c7cac85 KZ |
1108 | FILE *out = stdout; |
1109 | ||
1110 | fputs(USAGE_HEADER, out); | |
1111 | fprintf(out, _(" %s [options] <directory>|<file> ...\n"), | |
1112 | program_invocation_short_name); | |
1113 | ||
1114 | fputs(USAGE_SEPARATOR, out); | |
1115 | fputs(_("Consolidate duplicate files using hardlinks.\n"), out); | |
1116 | ||
1117 | fputs(USAGE_OPTIONS, out); | |
1118 | fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out); | |
1119 | fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out); | |
1120 | fputs(_(" -n, --dry-run don't actually link anything\n"), out); | |
259bed15 KZ |
1121 | fputs(_(" -y, --method <name> file content comparison method\n"), out); |
1122 | ||
5c7cac85 KZ |
1123 | fputs(_(" -f, --respect-name filenames have to be identical\n"), out); |
1124 | fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out); | |
1125 | fputs(_(" -o, --ignore-owner ignore owner changes\n"), out); | |
1126 | fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out); | |
3854515c | 1127 | #ifdef USE_XATTR |
5c7cac85 | 1128 | fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out); |
66a38e97 KZ |
1129 | #endif |
1130 | #ifdef USE_REFLINK | |
1131 | fputs(_(" --reflink[=<when>] create clone/CoW copies (auto, always, never)\n"), out); | |
1132 | fputs(_(" --skip-reflinks skip already cloned files (enabled on --reflink)\n"), out); | |
04ae85a7 | 1133 | #endif |
5c7cac85 KZ |
1134 | fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n" |
1135 | " lowest hardlink count\n"), out); | |
1136 | fputs(_(" -M, --minimize reverse the meaning of -m\n"), out); | |
1137 | fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n" | |
1138 | " (lower precedence than minimize/maximize)\n"), out); | |
1139 | fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out); | |
1140 | fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out); | |
1141 | fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out); | |
40a82456 | 1142 | fputs(_(" -S, --maximum-size <size> maximum size for files.\n"), out); |
64c8db3c | 1143 | fputs(_(" -b, --io-size <size> I/O buffer size for file reading (speedup, using more RAM)\n"), out); |
f3212b91 | 1144 | fputs(_(" -r, --cache-size <size> memory limit for cached file content data\n"), out); |
5c7cac85 KZ |
1145 | fputs(_(" -c, --content compare only file contents, same as -pot\n"), out); |
1146 | ||
1147 | fputs(USAGE_SEPARATOR, out); | |
1148 | printf(USAGE_HELP_OPTIONS(28)); | |
1149 | printf(USAGE_MAN_TAIL("hardlink(1)")); | |
1150 | ||
1151 | exit(EXIT_SUCCESS); | |
2180ecc8 KZ |
1152 | } |
1153 | ||
2180ecc8 KZ |
1154 | /** |
1155 | * parse_options - Parse the command line options | |
1156 | * @argc: Number of options | |
1157 | * @argv: Array of options | |
1158 | */ | |
1159 | static int parse_options(int argc, char *argv[]) | |
1160 | { | |
66a38e97 KZ |
1161 | enum { |
1162 | OPT_REFLINK = CHAR_MAX + 1, | |
1163 | OPT_SKIP_RELINKS | |
1164 | }; | |
40a82456 | 1165 | static const char optstr[] = "VhvnfpotXcmMOx:y:i:r:S:s:b:q"; |
5c7cac85 KZ |
1166 | static const struct option long_options[] = { |
1167 | {"version", no_argument, NULL, 'V'}, | |
1168 | {"help", no_argument, NULL, 'h'}, | |
1169 | {"verbose", no_argument, NULL, 'v'}, | |
1170 | {"dry-run", no_argument, NULL, 'n'}, | |
1171 | {"respect-name", no_argument, NULL, 'f'}, | |
1172 | {"ignore-mode", no_argument, NULL, 'p'}, | |
1173 | {"ignore-owner", no_argument, NULL, 'o'}, | |
1174 | {"ignore-time", no_argument, NULL, 't'}, | |
1175 | {"respect-xattrs", no_argument, NULL, 'X'}, | |
1176 | {"maximize", no_argument, NULL, 'm'}, | |
1177 | {"minimize", no_argument, NULL, 'M'}, | |
1178 | {"keep-oldest", no_argument, NULL, 'O'}, | |
1179 | {"exclude", required_argument, NULL, 'x'}, | |
1180 | {"include", required_argument, NULL, 'i'}, | |
259bed15 | 1181 | {"method", required_argument, NULL, 'y' }, |
5c7cac85 | 1182 | {"minimum-size", required_argument, NULL, 's'}, |
40a82456 | 1183 | {"maximum-size", required_argument, NULL, 'S'}, |
66a38e97 KZ |
1184 | #ifdef USE_REFLINK |
1185 | {"reflink", optional_argument, NULL, OPT_REFLINK }, | |
1186 | {"skip-reflinks", no_argument, NULL, OPT_SKIP_RELINKS }, | |
1187 | #endif | |
64c8db3c | 1188 | {"io-size", required_argument, NULL, 'b'}, |
5c7cac85 KZ |
1189 | {"content", no_argument, NULL, 'c'}, |
1190 | {"quiet", no_argument, NULL, 'q'}, | |
f3212b91 | 1191 | {"cache-size", required_argument, NULL, 'r'}, |
5c7cac85 KZ |
1192 | {NULL, 0, NULL, 0} |
1193 | }; | |
1194 | static const ul_excl_t excl[] = { | |
1195 | {'q', 'v'}, | |
1196 | {0} | |
1197 | }; | |
1198 | int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; | |
1199 | int c; | |
1200 | ||
1201 | while ((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) { | |
1202 | ||
1203 | err_exclusive_options(c, long_options, excl, excl_st); | |
1204 | ||
1205 | switch (c) { | |
1206 | case 'p': | |
1207 | opts.respect_mode = FALSE; | |
1208 | break; | |
1209 | case 'o': | |
1210 | opts.respect_owner = FALSE; | |
1211 | break; | |
1212 | case 't': | |
1213 | opts.respect_time = FALSE; | |
1214 | break; | |
1215 | case 'X': | |
1216 | opts.respect_xattrs = TRUE; | |
1217 | break; | |
1218 | case 'm': | |
1219 | opts.maximise = TRUE; | |
1220 | break; | |
1221 | case 'M': | |
1222 | opts.minimise = TRUE; | |
1223 | break; | |
1224 | case 'O': | |
1225 | opts.keep_oldest = TRUE; | |
1226 | break; | |
1227 | case 'f': | |
1228 | opts.respect_name = TRUE; | |
1229 | break; | |
1230 | case 'v': | |
1231 | opts.verbosity++; | |
1232 | break; | |
1233 | case 'q': | |
1234 | quiet = TRUE; | |
1235 | break; | |
1236 | case 'c': | |
1237 | opts.respect_mode = FALSE; | |
1238 | opts.respect_name = FALSE; | |
1239 | opts.respect_owner = FALSE; | |
1240 | opts.respect_time = FALSE; | |
1241 | opts.respect_xattrs = FALSE; | |
1242 | break; | |
1243 | case 'n': | |
1244 | opts.dry_run = 1; | |
1245 | break; | |
1246 | case 'x': | |
1247 | register_regex(&opts.exclude, optarg); | |
1248 | break; | |
259bed15 KZ |
1249 | case 'y': |
1250 | opts.method = optarg; | |
1251 | break; | |
5c7cac85 KZ |
1252 | case 'i': |
1253 | register_regex(&opts.include, optarg); | |
1254 | break; | |
1255 | case 's': | |
40a82456 | 1256 | opts.min_size = strtosize_or_err(optarg, _("failed to parse minimum size")); |
5c7cac85 | 1257 | break; |
722762b6 | 1258 | case 'S': |
40a82456 | 1259 | opts.max_size = strtosize_or_err(optarg, _("failed to parse maximum size")); |
5c7cac85 | 1260 | break; |
f3212b91 KZ |
1261 | case 'r': |
1262 | opts.cache_size = strtosize_or_err(optarg, _("failed to cache size")); | |
1263 | break; | |
64c8db3c KZ |
1264 | case 'b': |
1265 | opts.io_size = strtosize_or_err(optarg, _("failed to parse I/O size")); | |
722762b6 | 1266 | break; |
66a38e97 KZ |
1267 | #ifdef USE_REFLINK |
1268 | case OPT_REFLINK: | |
1269 | reflink_mode = REFLINK_AUTO; | |
1270 | if (optarg) { | |
1271 | if (strcmp(optarg, "auto") == 0) | |
1272 | reflink_mode = REFLINK_AUTO; | |
1273 | else if (strcmp(optarg, "always") == 0) | |
1274 | reflink_mode = REFLINK_ALWAYS; | |
1275 | else if (strcmp(optarg, "never") == 0) | |
1276 | reflink_mode = REFLINK_NEVER; | |
1277 | else | |
1278 | errx(EXIT_FAILURE, _("unsupported reflink mode; %s"), optarg); | |
1279 | } | |
1280 | if (reflink_mode != REFLINK_NEVER) | |
1281 | reflinks_skip = 1; | |
1282 | break; | |
1283 | case OPT_SKIP_RELINKS: | |
1284 | reflinks_skip = 1; | |
1285 | break; | |
1286 | #endif | |
5c7cac85 KZ |
1287 | case 'h': |
1288 | usage(); | |
1289 | case 'V': | |
1290 | print_version(EXIT_SUCCESS); | |
1291 | default: | |
1292 | errtryhelp(EXIT_FAILURE);} | |
1293 | } | |
1294 | ||
1295 | return 0; | |
2180ecc8 KZ |
1296 | } |
1297 | ||
1298 | /** | |
5c7cac85 KZ |
1299 | * to_be_called_atexit - Cleanup handler, also prints statistics. |
1300 | */ | |
2180ecc8 KZ |
1301 | static void to_be_called_atexit(void) |
1302 | { | |
5c7cac85 KZ |
1303 | if (stats.started) |
1304 | print_stats(); | |
2180ecc8 KZ |
1305 | } |
1306 | ||
1307 | /** | |
5c7cac85 KZ |
1308 | * sighandler - Signal handler, sets the global last_signal variable |
1309 | * @i: The signal number | |
1310 | */ | |
2180ecc8 KZ |
1311 | static void sighandler(int i) |
1312 | { | |
5c7cac85 KZ |
1313 | if (last_signal != SIGINT) |
1314 | last_signal = i; | |
1315 | if (i == SIGINT) | |
1316 | putchar('\n'); | |
2180ecc8 KZ |
1317 | } |
1318 | ||
1319 | int main(int argc, char *argv[]) | |
1320 | { | |
5c7cac85 | 1321 | struct sigaction sa; |
259bed15 | 1322 | int rc; |
2180ecc8 | 1323 | |
5c7cac85 KZ |
1324 | sa.sa_handler = sighandler; |
1325 | sa.sa_flags = SA_RESTART; | |
1326 | sigfillset(&sa.sa_mask); | |
2180ecc8 | 1327 | |
5c7cac85 KZ |
1328 | /* If we receive a SIGINT, end the processing */ |
1329 | sigaction(SIGINT, &sa, NULL); | |
1330 | sigaction(SIGUSR1, &sa, NULL); | |
2180ecc8 | 1331 | |
47470044 BS |
1332 | /* Localize messages, number formatting, and anything else. */ |
1333 | setlocale(LC_ALL, ""); | |
1334 | bindtextdomain(PACKAGE, LOCALEDIR); | |
1335 | textdomain(PACKAGE); | |
2180ecc8 | 1336 | |
5c7cac85 KZ |
1337 | if (atexit(to_be_called_atexit) != 0) |
1338 | err(EXIT_FAILURE, _("cannot register exit handler")); | |
2180ecc8 | 1339 | |
5c7cac85 | 1340 | parse_options(argc, argv); |
2180ecc8 | 1341 | |
5c7cac85 | 1342 | if (optind == argc) |
01480c61 | 1343 | errx(EXIT_FAILURE, _("no directory or file specified")); |
2180ecc8 | 1344 | |
5c7cac85 | 1345 | gettime_monotonic(&stats.start_time); |
722762b6 | 1346 | |
259bed15 KZ |
1347 | rc = ul_fileeq_init(&fileeq, opts.method); |
1348 | if (rc != 0 && strcmp(opts.method, "memcmp") != 0) { | |
1349 | warnx(_("cannot initialize %s method, use 'memcmp' fallback"), opts.method); | |
1350 | opts.method = "memcmp"; | |
1351 | rc = ul_fileeq_init(&fileeq, opts.method); | |
1352 | } | |
1353 | if (rc < 0) | |
1354 | err(EXIT_FAILURE, _("failed to initialize files comparior")); | |
722762b6 | 1355 | |
64c8db3c KZ |
1356 | /* defautl I/O size */ |
1357 | if (!opts.io_size) { | |
1358 | if (strcmp(opts.method, "memcmp") == 0) | |
1359 | opts.io_size = 8*1024; | |
1360 | else | |
1361 | opts.io_size = 1024*1024; | |
1362 | } | |
722762b6 | 1363 | |
5c7cac85 | 1364 | stats.started = TRUE; |
2180ecc8 | 1365 | |
ee4c3249 | 1366 | jlog(JLOG_VERBOSE2, _("Scanning [device/inode/links]:")); |
5c7cac85 | 1367 | for (; optind < argc; optind++) { |
a66fbaaf KZ |
1368 | char *path = realpath(argv[optind], NULL); |
1369 | ||
1370 | if (!path) { | |
1371 | warn(_("cannot get realpath: %s"), argv[optind]); | |
1372 | continue; | |
1373 | } | |
1374 | if (nftw(path, inserter, 20, FTW_PHYS) == -1) | |
1375 | warn(_("cannot process %s"), path); | |
1376 | free(path); | |
5c7cac85 | 1377 | } |
2180ecc8 | 1378 | |
5c7cac85 | 1379 | twalk(files, visitor); |
722762b6 | 1380 | |
259bed15 | 1381 | ul_fileeq_deinit(&fileeq); |
5c7cac85 | 1382 | return 0; |
0ec20db8 | 1383 | } |