]>
Commit | Line | Data |
---|---|---|
2180ecc8 | 1 | /* hardlink.c - Link multiple identical files together |
55c000e1 | 2 | * |
2180ecc8 | 3 | * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org> |
cd6b8d39 | 4 | * Copyright (C) 2021 Karel Zak <kzak@redhat.com> |
55c000e1 | 5 | * |
2180ecc8 | 6 | * SPDX-License-Identifier: MIT |
0b05aab4 | 7 | * |
2180ecc8 KZ |
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | * of this software and associated documentation files (the "Software"), to deal | |
10 | * in the Software without restriction, including without limitation the rights | |
11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
12 | * copies of the Software, and to permit persons to whom the Software is | |
13 | * furnished to do so, subject to the following conditions: | |
55c000e1 | 14 | * |
2180ecc8 KZ |
15 | * The above copyright notice and this permission notice shall be included in |
16 | * all copies or substantial portions of the Software. | |
55c000e1 | 17 | * |
2180ecc8 KZ |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 | * THE SOFTWARE. | |
55c000e1 | 25 | */ |
5c7cac85 KZ |
26 | #define _POSIX_C_SOURCE 200112L /* POSIX functions */ |
27 | #define _XOPEN_SOURCE 600 /* nftw() */ | |
28 | ||
29 | #include <sys/types.h> /* stat */ | |
30 | #include <sys/stat.h> /* stat */ | |
31 | #include <sys/time.h> /* getrlimit, getrusage */ | |
32 | #include <sys/resource.h> /* getrlimit, getrusage */ | |
33 | #include <fcntl.h> /* posix_fadvise */ | |
34 | #include <ftw.h> /* ftw */ | |
35 | #include <search.h> /* tsearch() and friends */ | |
cd6b8d39 KZ |
36 | #include <signal.h> /* SIG*, sigaction */ |
37 | #include <getopt.h> /* getopt_long() */ | |
5c7cac85 | 38 | #include <ctype.h> /* tolower() */ |
66a38e97 KZ |
39 | #include <sys/ioctl.h> |
40 | ||
3671d4a8 | 41 | #if defined(HAVE_LINUX_FIEMAP_H) && defined(HAVE_SYS_VFS_H) |
66a38e97 KZ |
42 | # include <linux/fs.h> |
43 | # include <linux/fiemap.h> | |
44 | # ifdef FICLONE | |
45 | # define USE_REFLINK 1 | |
46 | # endif | |
47 | #endif | |
2180ecc8 | 48 | |
cd6b8d39 KZ |
49 | #include "nls.h" |
50 | #include "c.h" | |
0361f744 | 51 | #include "xalloc.h" |
631e6865 | 52 | #include "strutils.h" |
06d8fe89 | 53 | #include "monotonic.h" |
4c467ebc | 54 | #include "optutils.h" |
259bed15 | 55 | #include "fileeq.h" |
478b9d47 KZ |
56 | |
57 | #ifdef USE_REFLINK | |
58 | # include "statfs_magic.h" | |
59 | #endif | |
2180ecc8 | 60 | |
abaf378c | 61 | #include <regex.h> /* regcomp(), regexec() */ |
3807e71a | 62 | |
3854515c KZ |
63 | #if defined(HAVE_SYS_XATTR_H) && defined(HAVE_LLISTXATTR) && defined(HAVE_LGETXATTR) |
64 | # include <sys/xattr.h> | |
65 | # define USE_XATTR 1 | |
2180ecc8 | 66 | #endif |
0ec20db8 | 67 | |
4c467ebc | 68 | static int quiet; /* don't print anything */ |
a9b1dfd9 | 69 | static int rootbasesz; /* size of the directory for nftw() */ |
4c467ebc | 70 | |
66a38e97 KZ |
71 | #ifdef USE_REFLINK |
72 | enum { | |
73 | REFLINK_NEVER = 0, | |
74 | REFLINK_AUTO, | |
75 | REFLINK_ALWAYS | |
76 | }; | |
77 | static int reflink_mode = REFLINK_NEVER; | |
78 | static int reflinks_skip; | |
79 | #endif | |
80 | ||
259bed15 KZ |
81 | static struct ul_fileeq fileeq; |
82 | ||
2180ecc8 KZ |
83 | /** |
84 | * struct file - Information about a file | |
85 | * @st: The stat buffer associated with the file | |
86 | * @next: Next file with the same size | |
87 | * @basename: The offset off the basename in the filename | |
88 | * @path: The path of the file | |
89 | * | |
90 | * This contains all information we need about a file. | |
91 | */ | |
92 | struct file { | |
5c7cac85 | 93 | struct stat st; |
259bed15 KZ |
94 | struct ul_fileeq_data data; |
95 | ||
5c7cac85 KZ |
96 | struct file *next; |
97 | struct link { | |
98 | struct link *next; | |
99 | int basename; | |
a9b1dfd9 | 100 | int dirname; |
2180ecc8 | 101 | #if __STDC_VERSION__ >= 199901L |
5c7cac85 | 102 | char path[]; |
2180ecc8 | 103 | #elif __GNUC__ |
5c7cac85 | 104 | char path[0]; |
2180ecc8 | 105 | #else |
5c7cac85 | 106 | char path[1]; |
2180ecc8 | 107 | #endif |
5c7cac85 | 108 | } *links; |
3807e71a | 109 | }; |
0ec20db8 | 110 | |
2180ecc8 KZ |
111 | /** |
112 | * enum log_level - Logging levels | |
2180ecc8 KZ |
113 | * @JLOG_SUMMARY: Default log level |
114 | * @JLOG_INFO: Verbose logging (verbose == 1) | |
5034cd9f KZ |
115 | * @JLOG_VERBOSE1: Verbosity 2 |
116 | * @JLOG_VERBOSE2: Verbosity 3 | |
2180ecc8 KZ |
117 | */ |
118 | enum log_level { | |
5c7cac85 KZ |
119 | JLOG_SUMMARY, |
120 | JLOG_INFO, | |
121 | JLOG_VERBOSE1, | |
122 | JLOG_VERBOSE2 | |
3807e71a | 123 | }; |
0ec20db8 | 124 | |
2180ecc8 KZ |
125 | /** |
126 | * struct statistic - Statistics about the file | |
127 | * @started: Whether we are post command-line processing | |
128 | * @files: The number of files worked on | |
129 | * @linked: The number of files replaced by a hardlink to a master | |
130 | * @xattr_comparisons: The number of extended attribute comparisons | |
131 | * @comparisons: The number of comparisons | |
132 | * @saved: The (exaggerated) amount of space saved | |
06d8fe89 | 133 | * @start_time: The time we started at |
2180ecc8 KZ |
134 | */ |
135 | static struct statistics { | |
5c7cac85 KZ |
136 | int started; |
137 | size_t files; | |
138 | size_t linked; | |
139 | size_t xattr_comparisons; | |
140 | size_t comparisons; | |
66a38e97 | 141 | size_t ignored_reflinks; |
5c7cac85 KZ |
142 | double saved; |
143 | struct timeval start_time; | |
2180ecc8 | 144 | } stats; |
3807e71a | 145 | |
8ff8b027 KZ |
146 | |
147 | struct hdl_regex { | |
148 | regex_t re; /* POSIX compatible regex handler */ | |
149 | ||
150 | struct hdl_regex *next; | |
151 | }; | |
152 | ||
2180ecc8 KZ |
153 | /** |
154 | * struct options - Processed command-line options | |
155 | * @include: A linked list of regular expressions for the --include option | |
156 | * @exclude: A linked list of regular expressions for the --exclude option | |
157 | * @verbosity: The verbosity. Should be one of #enum log_level | |
158 | * @respect_mode: Whether to respect file modes (default = TRUE) | |
159 | * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE) | |
160 | * @respect_name: Whether to respect file names (default = FALSE) | |
161 | * @respect_time: Whether to respect file modification times (default = TRUE) | |
162 | * @respect_xattrs: Whether to respect extended attributes (default = FALSE) | |
163 | * @maximise: Chose the file with the highest link count as master | |
164 | * @minimise: Chose the file with the lowest link count as master | |
165 | * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE) | |
166 | * @dry_run: Specifies whether hardlink should not link files (default = FALSE) | |
167 | * @min_size: Minimum size of files to consider. (default = 1 byte) | |
40a82456 | 168 | * @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte) |
2180ecc8 KZ |
169 | */ |
170 | static struct options { | |
8ff8b027 KZ |
171 | struct hdl_regex *include; |
172 | struct hdl_regex *exclude; | |
5c7cac85 | 173 | |
259bed15 | 174 | const char *method; |
5c7cac85 KZ |
175 | signed int verbosity; |
176 | unsigned int respect_mode:1; | |
177 | unsigned int respect_owner:1; | |
178 | unsigned int respect_name:1; | |
a9b1dfd9 | 179 | unsigned int respect_dir:1; |
5c7cac85 KZ |
180 | unsigned int respect_time:1; |
181 | unsigned int respect_xattrs:1; | |
182 | unsigned int maximise:1; | |
183 | unsigned int minimise:1; | |
184 | unsigned int keep_oldest:1; | |
185 | unsigned int dry_run:1; | |
186 | uintmax_t min_size; | |
40a82456 | 187 | uintmax_t max_size; |
64c8db3c | 188 | size_t io_size; |
f3212b91 | 189 | size_t cache_size; |
9e7235e7 | 190 | } opts = { |
5c7cac85 | 191 | /* default setting */ |
6a2baa1a | 192 | #ifdef USE_FILEEQ_CRYPTOAPI |
259bed15 | 193 | .method = "sha256", |
6a2baa1a KZ |
194 | #else |
195 | .method = "memcmp", | |
7d2de8d4 | 196 | #endif |
5c7cac85 KZ |
197 | .respect_mode = TRUE, |
198 | .respect_owner = TRUE, | |
199 | .respect_time = TRUE, | |
200 | .respect_xattrs = FALSE, | |
201 | .keep_oldest = FALSE, | |
722762b6 | 202 | .min_size = 1, |
f3212b91 | 203 | .cache_size = 10*1024*1024 |
9e7235e7 | 204 | }; |
2180ecc8 KZ |
205 | |
206 | /* | |
207 | * files | |
208 | * | |
209 | * A binary tree of files, managed using tsearch(). To see which nodes | |
210 | * are considered equal, see compare_nodes() | |
211 | */ | |
212 | static void *files; | |
213 | static void *files_by_ino; | |
214 | ||
215 | /* | |
216 | * last_signal | |
217 | * | |
218 | * The last signal we received. We store the signal here in order to be able | |
219 | * to break out of loops gracefully and to return from our nftw() handler. | |
220 | */ | |
86d9efee | 221 | static volatile sig_atomic_t last_signal; |
0ec20db8 | 222 | |
4939964c KZ |
223 | |
224 | #define is_log_enabled(_level) (quiet == 0 && (_level) <= (unsigned int)opts.verbosity) | |
225 | ||
2180ecc8 KZ |
226 | /** |
227 | * jlog - Logging for hardlink | |
228 | * @level: The log level | |
229 | * @format: A format string for printf() | |
230 | */ | |
5c7cac85 | 231 | __attribute__((format(printf, 2, 3))) |
2180ecc8 | 232 | static void jlog(enum log_level level, const char *format, ...) |
0ec20db8 | 233 | { |
5c7cac85 | 234 | va_list args; |
2180ecc8 | 235 | |
4939964c | 236 | if (!is_log_enabled(level)) |
5c7cac85 | 237 | return; |
423e80c1 | 238 | |
5c7cac85 KZ |
239 | va_start(args, format); |
240 | vfprintf(stdout, format, args); | |
241 | va_end(args); | |
242 | fputc('\n', stdout); | |
0ec20db8 DJ |
243 | } |
244 | ||
2180ecc8 KZ |
245 | /** |
246 | * CMP - Compare two numerical values, return 1, 0, or -1 | |
247 | * @a: First value | |
248 | * @b: Second value | |
249 | * | |
250 | * Used to compare two integers of any size while avoiding overflow. | |
251 | */ | |
252 | #define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0)) | |
253 | ||
2180ecc8 | 254 | /** |
8ff8b027 KZ |
255 | * register_regex - Compile and insert a regular expression into list |
256 | * @pregs: Pointer to a linked list of regular expressions | |
257 | * @regex: String containing the regular expression to be compiled | |
258 | */ | |
259 | static void register_regex(struct hdl_regex **pregs, const char *regex) | |
260 | { | |
261 | struct hdl_regex *link; | |
262 | int err; | |
263 | ||
264 | link = xmalloc(sizeof(*link)); | |
265 | ||
266 | if ((err = regcomp(&link->re, regex, REG_NOSUB | REG_EXTENDED)) != 0) { | |
267 | size_t size = regerror(err, &link->re, NULL, 0); | |
268 | char *buf = xmalloc(size + 1); | |
269 | ||
270 | regerror(err, &link->re, buf, size); | |
271 | ||
272 | errx(EXIT_FAILURE, _("could not compile regular expression %s: %s"), | |
273 | regex, buf); | |
274 | } | |
275 | link->next = *pregs; *pregs = link; | |
276 | } | |
277 | ||
278 | /** | |
279 | * match_any_regex - Match against multiple regular expressions | |
2180ecc8 KZ |
280 | * @pregs: A linked list of regular expressions |
281 | * @what: The string to match against | |
282 | * | |
283 | * Checks whether any of the regular expressions in the list matches the | |
284 | * string. | |
285 | */ | |
8ff8b027 | 286 | static int match_any_regex(struct hdl_regex *pregs, const char *what) |
0ec20db8 | 287 | { |
5c7cac85 | 288 | for (; pregs != NULL; pregs = pregs->next) { |
8ff8b027 | 289 | if (regexec(&pregs->re, what, 0, NULL, 0) == 0) |
5c7cac85 KZ |
290 | return TRUE; |
291 | } | |
292 | return FALSE; | |
0ec20db8 DJ |
293 | } |
294 | ||
2180ecc8 KZ |
295 | /** |
296 | * compare_nodes - Node comparison function | |
297 | * @_a: The first node (a #struct file) | |
298 | * @_b: The second node (a #struct file) | |
299 | * | |
300 | * Compare the two nodes for the binary tree. | |
301 | */ | |
302 | static int compare_nodes(const void *_a, const void *_b) | |
94b040b0 | 303 | { |
5c7cac85 KZ |
304 | const struct file *a = _a; |
305 | const struct file *b = _b; | |
306 | int diff = 0; | |
bd7722af | 307 | |
5c7cac85 KZ |
308 | if (diff == 0) |
309 | diff = CMP(a->st.st_dev, b->st.st_dev); | |
310 | if (diff == 0) | |
311 | diff = CMP(a->st.st_size, b->st.st_size); | |
2180ecc8 | 312 | |
5c7cac85 | 313 | return diff; |
94b040b0 JN |
314 | } |
315 | ||
a9b1dfd9 KZ |
316 | /* Compare only filenames */ |
317 | static inline int filename_strcmp(const struct file *a, const struct file *b) | |
318 | { | |
319 | return strcmp( a->links->path + a->links->basename, | |
320 | b->links->path + b->links->basename); | |
321 | } | |
322 | ||
323 | /** | |
324 | * Compare only directory names (ignores root directory and basename (filename)) | |
325 | * | |
326 | * The complete path conrains three fragments: | |
327 | * | |
328 | * <rootdir> is specified on hardlink command line | |
329 | * <dirname> is all betweehn rootdir and filename | |
330 | * <filename> is last component (aka basename) | |
331 | */ | |
332 | static inline int dirname_strcmp(const struct file *a, const struct file *b) | |
333 | { | |
334 | int diff = 0; | |
335 | int asz = a->links->basename - a->links->dirname, | |
336 | bsz = b->links->basename - b->links->dirname; | |
337 | ||
338 | diff = CMP(asz, bsz); | |
339 | ||
340 | if (diff == 0) { | |
341 | const char *a_start, *b_start; | |
342 | ||
343 | a_start = a->links->path + a->links->dirname; | |
344 | b_start = b->links->path + b->links->dirname; | |
345 | ||
346 | diff = strncmp(a_start, b_start, asz); | |
347 | } | |
348 | return diff; | |
349 | } | |
350 | ||
2180ecc8 KZ |
351 | /** |
352 | * compare_nodes_ino - Node comparison function | |
353 | * @_a: The first node (a #struct file) | |
354 | * @_b: The second node (a #struct file) | |
355 | * | |
356 | * Compare the two nodes for the binary tree. | |
357 | */ | |
358 | static int compare_nodes_ino(const void *_a, const void *_b) | |
94b040b0 | 359 | { |
5c7cac85 KZ |
360 | const struct file *a = _a; |
361 | const struct file *b = _b; | |
362 | int diff = 0; | |
363 | ||
364 | if (diff == 0) | |
365 | diff = CMP(a->st.st_dev, b->st.st_dev); | |
366 | if (diff == 0) | |
367 | diff = CMP(a->st.st_ino, b->st.st_ino); | |
368 | ||
369 | /* If opts.respect_name is used, we will restrict a struct file to | |
370 | * contain only links with the same basename to keep the rest simple. | |
371 | */ | |
372 | if (diff == 0 && opts.respect_name) | |
a9b1dfd9 KZ |
373 | diff = filename_strcmp(a, b); |
374 | if (diff == 0 && opts.respect_dir) | |
375 | diff = dirname_strcmp(a, b); | |
5c7cac85 KZ |
376 | |
377 | return diff; | |
94b040b0 JN |
378 | } |
379 | ||
2180ecc8 KZ |
380 | /** |
381 | * print_stats - Print statistics to stdout | |
382 | */ | |
383 | static void print_stats(void) | |
94b040b0 | 384 | { |
5c7cac85 KZ |
385 | struct timeval end = { 0, 0 }, delta = { 0, 0 }; |
386 | char *ssz; | |
06d8fe89 | 387 | |
5c7cac85 KZ |
388 | gettime_monotonic(&end); |
389 | timersub(&end, &stats.start_time, &delta); | |
390 | ||
66a38e97 | 391 | jlog(JLOG_SUMMARY, "%-25s %s", _("Mode:"), |
5c7cac85 | 392 | opts.dry_run ? _("dry-run") : _("real")); |
66a38e97 KZ |
393 | jlog(JLOG_SUMMARY, "%-25s %s", _("Method:"), opts.method); |
394 | jlog(JLOG_SUMMARY, "%-25s %zu", _("Files:"), stats.files); | |
395 | jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Linked:"), stats.linked); | |
06d8fe89 | 396 | |
3854515c | 397 | #ifdef USE_XATTR |
66a38e97 | 398 | jlog(JLOG_SUMMARY, _("%-25s %zu xattrs"), _("Compared:"), |
5c7cac85 | 399 | stats.xattr_comparisons); |
2180ecc8 | 400 | #endif |
66a38e97 | 401 | jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Compared:"), |
5c7cac85 | 402 | stats.comparisons); |
66a38e97 KZ |
403 | #ifdef USE_REFLINK |
404 | if (reflinks_skip) | |
405 | jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Skipped reflinks:"), | |
406 | stats.ignored_reflinks); | |
407 | #endif | |
5c7cac85 KZ |
408 | ssz = size_to_human_string(SIZE_SUFFIX_3LETTER | |
409 | SIZE_SUFFIX_SPACE | | |
410 | SIZE_DECIMAL_2DIGITS, stats.saved); | |
423e80c1 | 411 | |
66a38e97 | 412 | jlog(JLOG_SUMMARY, "%-25s %s", _("Saved:"), ssz); |
5c7cac85 | 413 | free(ssz); |
423e80c1 | 414 | |
66a38e97 | 415 | jlog(JLOG_SUMMARY, _("%-25s %"PRId64".%06"PRId64" seconds"), _("Duration:"), |
63463630 | 416 | (int64_t)delta.tv_sec, (int64_t)delta.tv_usec); |
94b040b0 | 417 | } |
55c000e1 | 418 | |
2180ecc8 KZ |
419 | /** |
420 | * handle_interrupt - Handle a signal | |
421 | * | |
422 | * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals. | |
423 | */ | |
cd6b8d39 | 424 | static int handle_interrupt(void) |
0ec20db8 | 425 | { |
5c7cac85 KZ |
426 | switch (last_signal) { |
427 | case SIGINT: | |
428 | case SIGTERM: | |
429 | return TRUE; | |
430 | case SIGUSR1: | |
431 | print_stats(); | |
432 | putchar('\n'); | |
433 | break; | |
434 | } | |
435 | ||
436 | last_signal = 0; | |
437 | return FALSE; | |
0ec20db8 DJ |
438 | } |
439 | ||
3854515c | 440 | #ifdef USE_XATTR |
2180ecc8 | 441 | |
2180ecc8 KZ |
442 | /** |
443 | * llistxattr_or_die - Wrapper for llistxattr() | |
444 | * | |
445 | * This does the same thing as llistxattr() except that it aborts if any error | |
446 | * other than "not supported" is detected. | |
447 | */ | |
448 | static ssize_t llistxattr_or_die(const char *path, char *list, size_t size) | |
449 | { | |
5c7cac85 | 450 | ssize_t len = llistxattr(path, list, size); |
2180ecc8 | 451 | |
5c7cac85 KZ |
452 | if (len < 0 && errno != ENOTSUP) |
453 | err(EXIT_FAILURE, _("cannot get xattr names for %s"), path); | |
c12b6394 | 454 | |
5c7cac85 | 455 | return len; |
2180ecc8 KZ |
456 | } |
457 | ||
458 | /** | |
459 | * lgetxattr_or_die - Wrapper for lgetxattr() | |
460 | * | |
461 | * This does the same thing as lgetxattr() except that it aborts upon error. | |
462 | */ | |
5c7cac85 KZ |
463 | static ssize_t lgetxattr_or_die(const char *path, |
464 | const char *name, void *value, size_t size) | |
2180ecc8 | 465 | { |
5c7cac85 | 466 | ssize_t len = lgetxattr(path, name, value, size); |
2180ecc8 | 467 | |
5c7cac85 KZ |
468 | if (len < 0) |
469 | err(EXIT_FAILURE, _("cannot get xattr value of %s for %s"), | |
470 | name, path); | |
c12b6394 | 471 | |
5c7cac85 | 472 | return len; |
2180ecc8 KZ |
473 | } |
474 | ||
475 | /** | |
476 | * get_xattr_name_count - Count the number of xattr names | |
477 | * @names: a non-empty table of concatenated, null-terminated xattr names | |
478 | * @len: the total length of the table | |
479 | * | |
480 | * @Returns the number of xattr names | |
481 | */ | |
482 | static int get_xattr_name_count(const char *const names, ssize_t len) | |
483 | { | |
5c7cac85 KZ |
484 | int count = 0; |
485 | const char *name; | |
2180ecc8 | 486 | |
5c7cac85 KZ |
487 | for (name = names; name < (names + len); name += strlen(name) + 1) |
488 | count++; | |
2180ecc8 | 489 | |
5c7cac85 | 490 | return count; |
2180ecc8 KZ |
491 | } |
492 | ||
493 | /** | |
494 | * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing | |
495 | * the names they point to. | |
496 | */ | |
497 | static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2) | |
498 | { | |
5c7cac85 | 499 | return strcmp(*(char *const *)ptr1, *(char *const *)ptr2); |
2180ecc8 KZ |
500 | } |
501 | ||
502 | /** | |
503 | * get_sorted_xattr_name_table - Create a sorted table of xattr names. | |
504 | * @names - table of concatenated, null-terminated xattr names | |
505 | * @n - the number of names | |
506 | * | |
507 | * @Returns allocated table of pointers to the names, sorted alphabetically | |
508 | */ | |
509 | static const char **get_sorted_xattr_name_table(const char *names, int n) | |
510 | { | |
5c7cac85 KZ |
511 | const char **table = xmalloc(n * sizeof(char *)); |
512 | int i; | |
2180ecc8 | 513 | |
5c7cac85 KZ |
514 | for (i = 0; i < n; i++) { |
515 | table[i] = names; | |
516 | names += strlen(names) + 1; | |
517 | } | |
2180ecc8 | 518 | |
5c7cac85 | 519 | qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs); |
2180ecc8 | 520 | |
5c7cac85 | 521 | return table; |
2180ecc8 KZ |
522 | } |
523 | ||
524 | /** | |
525 | * file_xattrs_equal - Compare the extended attributes of two files | |
526 | * @a: The first file | |
527 | * @b: The second file | |
528 | * | |
529 | * @Returns: %TRUE if and only if extended attributes are equal | |
530 | */ | |
cd6b8d39 | 531 | static int file_xattrs_equal(const struct file *a, const struct file *b) |
2180ecc8 | 532 | { |
5c7cac85 KZ |
533 | ssize_t len_a; |
534 | ssize_t len_b; | |
535 | char *names_a = NULL; | |
536 | char *names_b = NULL; | |
537 | int n_a; | |
538 | int n_b; | |
539 | const char **name_ptrs_a = NULL; | |
540 | const char **name_ptrs_b = NULL; | |
541 | void *value_a = NULL; | |
542 | void *value_b = NULL; | |
543 | int ret = FALSE; | |
544 | int i; | |
2180ecc8 | 545 | |
5c7cac85 KZ |
546 | assert(a->links != NULL); |
547 | assert(b->links != NULL); | |
2180ecc8 | 548 | |
5c7cac85 KZ |
549 | jlog(JLOG_VERBOSE1, _("Comparing xattrs of %s to %s"), a->links->path, |
550 | b->links->path); | |
2180ecc8 | 551 | |
5c7cac85 | 552 | stats.xattr_comparisons++; |
2180ecc8 | 553 | |
5c7cac85 KZ |
554 | len_a = llistxattr_or_die(a->links->path, NULL, 0); |
555 | len_b = llistxattr_or_die(b->links->path, NULL, 0); | |
2180ecc8 | 556 | |
5c7cac85 KZ |
557 | if (len_a <= 0 && len_b <= 0) |
558 | return TRUE; // xattrs not supported or neither file has any | |
2180ecc8 | 559 | |
5c7cac85 KZ |
560 | if (len_a != len_b) |
561 | return FALSE; // total lengths of xattr names differ | |
2180ecc8 | 562 | |
5c7cac85 KZ |
563 | names_a = xmalloc(len_a); |
564 | names_b = xmalloc(len_b); | |
2180ecc8 | 565 | |
5c7cac85 KZ |
566 | len_a = llistxattr_or_die(a->links->path, names_a, len_a); |
567 | len_b = llistxattr_or_die(b->links->path, names_b, len_b); | |
568 | assert((len_a > 0) && (len_a == len_b)); | |
2180ecc8 | 569 | |
5c7cac85 KZ |
570 | n_a = get_xattr_name_count(names_a, len_a); |
571 | n_b = get_xattr_name_count(names_b, len_b); | |
2180ecc8 | 572 | |
5c7cac85 KZ |
573 | if (n_a != n_b) |
574 | goto exit; // numbers of xattrs differ | |
2180ecc8 | 575 | |
5c7cac85 KZ |
576 | name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a); |
577 | name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b); | |
2180ecc8 | 578 | |
5c7cac85 | 579 | // We now have two sorted tables of xattr names. |
2180ecc8 | 580 | |
5c7cac85 KZ |
581 | for (i = 0; i < n_a; i++) { |
582 | if (handle_interrupt()) | |
583 | goto exit; // user wants to quit | |
2180ecc8 | 584 | |
5c7cac85 KZ |
585 | if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0) |
586 | goto exit; // names at same slot differ | |
2180ecc8 | 587 | |
5c7cac85 KZ |
588 | len_a = |
589 | lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0); | |
590 | len_b = | |
591 | lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0); | |
2180ecc8 | 592 | |
5c7cac85 KZ |
593 | if (len_a != len_b) |
594 | goto exit; // xattrs with same name, different value lengths | |
2180ecc8 | 595 | |
5c7cac85 KZ |
596 | value_a = xmalloc(len_a); |
597 | value_b = xmalloc(len_b); | |
2180ecc8 | 598 | |
5c7cac85 KZ |
599 | len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], |
600 | value_a, len_a); | |
601 | len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], | |
602 | value_b, len_b); | |
603 | assert((len_a >= 0) && (len_a == len_b)); | |
2180ecc8 | 604 | |
5c7cac85 KZ |
605 | if (memcmp(value_a, value_b, len_a) != 0) |
606 | goto exit; // xattrs with same name, different values | |
2180ecc8 | 607 | |
5c7cac85 KZ |
608 | free(value_a); |
609 | free(value_b); | |
610 | value_a = NULL; | |
611 | value_b = NULL; | |
612 | } | |
2180ecc8 | 613 | |
5c7cac85 | 614 | ret = TRUE; |
2180ecc8 | 615 | |
5c7cac85 KZ |
616 | exit: |
617 | free(names_a); | |
618 | free(names_b); | |
619 | free(name_ptrs_a); | |
620 | free(name_ptrs_b); | |
621 | free(value_a); | |
622 | free(value_b); | |
623 | return ret; | |
2180ecc8 | 624 | } |
3854515c | 625 | #else /* !USE_XATTR */ |
cd6b8d39 | 626 | static int file_xattrs_equal(const struct file *a, const struct file *b) |
2180ecc8 | 627 | { |
5c7cac85 | 628 | return TRUE; |
2180ecc8 | 629 | } |
3854515c | 630 | #endif /* USE_XATTR */ |
2180ecc8 | 631 | |
2180ecc8 KZ |
632 | /** |
633 | * file_may_link_to - Check whether a file may replace another one | |
634 | * @a: The first file | |
635 | * @b: The second file | |
636 | * | |
259bed15 KZ |
637 | * Check whether the two files are considered equal attributes and can be |
638 | * linked. This function does not compare content od the files! | |
2180ecc8 | 639 | */ |
cd6b8d39 | 640 | static int file_may_link_to(const struct file *a, const struct file *b) |
2180ecc8 | 641 | { |
5c7cac85 KZ |
642 | return (a->st.st_size != 0 && |
643 | a->st.st_size == b->st.st_size && | |
644 | a->links != NULL && b->links != NULL && | |
645 | a->st.st_dev == b->st.st_dev && | |
646 | a->st.st_ino != b->st.st_ino && | |
647 | (!opts.respect_mode || a->st.st_mode == b->st.st_mode) && | |
648 | (!opts.respect_owner || a->st.st_uid == b->st.st_uid) && | |
649 | (!opts.respect_owner || a->st.st_gid == b->st.st_gid) && | |
650 | (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) && | |
a9b1dfd9 KZ |
651 | (!opts.respect_name || filename_strcmp(a, b) == 0) && |
652 | (!opts.respect_dir || dirname_strcmp(a, b) == 0) && | |
259bed15 | 653 | (!opts.respect_xattrs || file_xattrs_equal(a, b))); |
2180ecc8 KZ |
654 | } |
655 | ||
656 | /** | |
657 | * file_compare - Compare two files to decide which should be master | |
658 | * @a: The first file | |
659 | * @b: The second file | |
660 | * | |
661 | * Check which of the files should be considered greater and thus serve | |
662 | * as the master when linking (the master is the file that all equal files | |
663 | * will be replaced with). | |
664 | */ | |
665 | static int file_compare(const struct file *a, const struct file *b) | |
666 | { | |
5c7cac85 KZ |
667 | int res = 0; |
668 | if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino) | |
669 | return 0; | |
670 | ||
671 | if (res == 0 && opts.maximise) | |
672 | res = CMP(a->st.st_nlink, b->st.st_nlink); | |
673 | if (res == 0 && opts.minimise) | |
674 | res = CMP(b->st.st_nlink, a->st.st_nlink); | |
675 | if (res == 0) | |
676 | res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime) | |
677 | : CMP(a->st.st_mtime, b->st.st_mtime); | |
678 | if (res == 0) | |
679 | res = CMP(b->st.st_ino, a->st.st_ino); | |
680 | ||
681 | return res; | |
2180ecc8 KZ |
682 | } |
683 | ||
66a38e97 KZ |
684 | #ifdef USE_REFLINK |
685 | static inline int do_link(struct file *a, struct file *b, | |
686 | const char *new_name, int reflink) | |
687 | { | |
688 | if (reflink) { | |
689 | int dest = -1, src = -1; | |
690 | ||
2401078c | 691 | dest = open(new_name, O_CREAT|O_WRONLY|O_TRUNC, 0600); |
66a38e97 KZ |
692 | if (dest < 0) |
693 | goto fallback; | |
694 | if (fchmod(dest, b->st.st_mode) != 0) | |
695 | goto fallback; | |
696 | if (fchown(dest, b->st.st_uid, b->st.st_gid) != 0) | |
697 | goto fallback; | |
698 | src = open(a->links->path, O_RDONLY); | |
699 | if (src < 0) | |
700 | goto fallback; | |
701 | if (ioctl(dest, FICLONE, src) != 0) | |
702 | goto fallback; | |
703 | close(dest); | |
704 | close(src); | |
705 | return 0; | |
706 | fallback: | |
707 | if (dest >= 0) { | |
708 | close(dest); | |
709 | unlink(new_name); | |
710 | } | |
711 | if (src >= 0) | |
712 | close(src); | |
713 | ||
714 | if (reflink_mode == REFLINK_ALWAYS) | |
715 | return -errno; | |
716 | jlog(JLOG_VERBOSE2,_("Reflinking failed, fallback to hardlinking")); | |
717 | } | |
718 | ||
719 | return link(a->links->path, new_name); | |
720 | } | |
721 | #else | |
722 | static inline int do_link(struct file *a, | |
723 | struct file *b __attribute__((__unused__)), | |
724 | const char *new_name, | |
725 | int reflink __attribute__((__unused__))) | |
726 | { | |
727 | return link(a->links->path, new_name); | |
728 | } | |
729 | #endif /* USE_REFLINK */ | |
730 | ||
2180ecc8 KZ |
731 | /** |
732 | * file_link - Replace b with a link to a | |
733 | * @a: The first file | |
734 | * @b: The second file | |
735 | * | |
736 | * Link the file, replacing @b with the current one. The file is first | |
737 | * linked to a temporary name, and then renamed to the name of @b, making | |
738 | * the replace atomic (@b will always exist). | |
739 | */ | |
66a38e97 | 740 | static int file_link(struct file *a, struct file *b, int reflink) |
2180ecc8 | 741 | { |
5c7cac85 KZ |
742 | |
743 | file_link: | |
744 | assert(a->links != NULL); | |
745 | assert(b->links != NULL); | |
746 | ||
4939964c KZ |
747 | if (is_log_enabled(JLOG_INFO)) { |
748 | char *ssz = size_to_human_string(SIZE_SUFFIX_3LETTER | | |
5c7cac85 KZ |
749 | SIZE_SUFFIX_SPACE | |
750 | SIZE_DECIMAL_2DIGITS, a->st.st_size); | |
66a38e97 KZ |
751 | jlog(JLOG_INFO, _("%s%sLinking %s to %s (-%s)"), |
752 | opts.dry_run ? _("[DryRun] ") : "", | |
753 | reflink ? "Ref" : "", | |
754 | a->links->path, b->links->path, | |
4939964c KZ |
755 | ssz); |
756 | free(ssz); | |
757 | } | |
5c7cac85 KZ |
758 | |
759 | if (!opts.dry_run) { | |
fd1e57a7 KZ |
760 | char *new_path; |
761 | int failed = 1; | |
5c7cac85 | 762 | |
fd1e57a7 KZ |
763 | xasprintf(&new_path, "%s.hardlink-temporary", b->links->path); |
764 | ||
66a38e97 | 765 | if (do_link(a, b, new_path, reflink) != 0) |
fd1e57a7 KZ |
766 | warn(_("cannot link %s to %s"), a->links->path, new_path); |
767 | ||
768 | else if (rename(new_path, b->links->path) != 0) { | |
769 | warn(_("cannot rename %s to %s"), a->links->path, new_path); | |
770 | unlink(new_path); | |
771 | } else | |
772 | failed = 0; | |
5c7cac85 | 773 | |
5c7cac85 | 774 | free(new_path); |
fd1e57a7 KZ |
775 | if (failed) |
776 | return FALSE; | |
5c7cac85 KZ |
777 | } |
778 | ||
779 | /* Update statistics */ | |
780 | stats.linked++; | |
781 | ||
782 | /* Increase the link count of this file, and set stat() of other file */ | |
783 | a->st.st_nlink++; | |
784 | b->st.st_nlink--; | |
785 | ||
786 | if (b->st.st_nlink == 0) | |
787 | stats.saved += a->st.st_size; | |
788 | ||
789 | /* Move the link from file b to a */ | |
790 | { | |
791 | struct link *new_link = b->links; | |
792 | ||
793 | b->links = b->links->next; | |
794 | new_link->next = a->links->next; | |
795 | a->links->next = new_link; | |
796 | } | |
797 | ||
798 | /* Do it again */ | |
799 | if (b->links) | |
800 | goto file_link; | |
801 | ||
802 | return TRUE; | |
2180ecc8 KZ |
803 | } |
804 | ||
2a28d961 KZ |
805 | static int has_fpath(struct file *node, const char *path) |
806 | { | |
807 | struct link *l; | |
808 | ||
809 | for (l = node->links; l; l = l->next) { | |
810 | if (strcmp(l->path, path) == 0) | |
811 | return 1; | |
812 | } | |
813 | ||
814 | return 0; | |
815 | } | |
816 | ||
817 | ||
2180ecc8 KZ |
818 | /** |
819 | * inserter - Callback function for nftw() | |
820 | * @fpath: The path of the file being visited | |
821 | * @sb: The stat information of the file | |
822 | * @typeflag: The type flag | |
823 | * @ftwbuf: Contains current level of nesting and offset of basename | |
824 | * | |
825 | * Called by nftw() for the files. See the manual page for nftw() for | |
826 | * further information. | |
827 | */ | |
5c7cac85 KZ |
828 | static int inserter(const char *fpath, const struct stat *sb, |
829 | int typeflag, struct FTW *ftwbuf) | |
2180ecc8 | 830 | { |
5c7cac85 KZ |
831 | struct file *fil; |
832 | struct file **node; | |
833 | size_t pathlen; | |
834 | int included; | |
835 | int excluded; | |
2180ecc8 | 836 | |
5c7cac85 KZ |
837 | if (handle_interrupt()) |
838 | return 1; | |
839 | if (typeflag == FTW_DNR || typeflag == FTW_NS) | |
840 | warn(_("cannot read %s"), fpath); | |
841 | if (typeflag != FTW_F || !S_ISREG(sb->st_mode)) | |
842 | return 0; | |
2180ecc8 | 843 | |
8ff8b027 KZ |
844 | included = match_any_regex(opts.include, fpath); |
845 | excluded = match_any_regex(opts.exclude, fpath); | |
2180ecc8 | 846 | |
5c7cac85 KZ |
847 | if ((opts.exclude && excluded && !included) || |
848 | (!opts.exclude && opts.include && !included)) | |
849 | return 0; | |
2180ecc8 | 850 | |
5c7cac85 | 851 | stats.files++; |
2180ecc8 | 852 | |
5c7cac85 KZ |
853 | if ((uintmax_t) sb->st_size < opts.min_size) { |
854 | jlog(JLOG_VERBOSE1, | |
855 | _("Skipped %s (smaller than configured size)"), fpath); | |
856 | return 0; | |
857 | } | |
2180ecc8 | 858 | |
710e8ecb | 859 | jlog(JLOG_VERBOSE2, " %5zu: [%" PRIu64 "/%" PRIu64 "/%zu] %s", |
57d9fd2b KZ |
860 | stats.files, sb->st_dev, sb->st_ino, |
861 | (size_t) sb->st_nlink, fpath); | |
2180ecc8 | 862 | |
40a82456 DP |
863 | if ((opts.max_size > 0) && ((uintmax_t) sb->st_size > opts.max_size)) { |
864 | jlog(JLOG_VERBOSE1, | |
865 | _("Skipped %s (greater than configured size)"), fpath); | |
866 | return 0; | |
867 | } | |
2180ecc8 | 868 | |
5c7cac85 | 869 | pathlen = strlen(fpath) + 1; |
2180ecc8 | 870 | |
5c7cac85 KZ |
871 | fil = xcalloc(1, sizeof(*fil)); |
872 | fil->links = xcalloc(1, sizeof(struct link) + pathlen); | |
2180ecc8 | 873 | |
5c7cac85 KZ |
874 | fil->st = *sb; |
875 | fil->links->basename = ftwbuf->base; | |
a9b1dfd9 | 876 | fil->links->dirname = rootbasesz; |
5c7cac85 | 877 | fil->links->next = NULL; |
2180ecc8 | 878 | |
5c7cac85 | 879 | memcpy(fil->links->path, fpath, pathlen); |
2180ecc8 | 880 | |
5c7cac85 | 881 | node = tsearch(fil, &files_by_ino, compare_nodes_ino); |
2180ecc8 | 882 | |
5c7cac85 KZ |
883 | if (node == NULL) |
884 | goto fail; | |
2180ecc8 | 885 | |
5c7cac85 KZ |
886 | if (*node != fil) { |
887 | /* Already known inode, add link to inode information */ | |
888 | assert((*node)->st.st_dev == sb->st_dev); | |
889 | assert((*node)->st.st_ino == sb->st_ino); | |
2180ecc8 | 890 | |
2a28d961 KZ |
891 | if (has_fpath(*node, fpath)) { |
892 | jlog(JLOG_VERBOSE1, | |
893 | _("Skipped %s (specified more than once)"), fpath); | |
894 | free(fil->links); | |
895 | } else { | |
896 | fil->links->next = (*node)->links; | |
897 | (*node)->links = fil->links; | |
898 | } | |
2180ecc8 | 899 | |
5c7cac85 KZ |
900 | free(fil); |
901 | } else { | |
902 | /* New inode, insert into by-size table */ | |
903 | node = tsearch(fil, &files, compare_nodes); | |
2180ecc8 | 904 | |
5c7cac85 KZ |
905 | if (node == NULL) |
906 | goto fail; | |
2180ecc8 | 907 | |
5c7cac85 KZ |
908 | if (*node != fil) { |
909 | struct file *l; | |
2180ecc8 | 910 | |
5c7cac85 KZ |
911 | if (file_compare(fil, *node) >= 0) { |
912 | fil->next = *node; | |
913 | *node = fil; | |
914 | } else { | |
915 | for (l = *node; l != NULL; l = l->next) { | |
916 | if (l->next != NULL | |
917 | && file_compare(fil, l->next) < 0) | |
918 | continue; | |
2180ecc8 | 919 | |
5c7cac85 KZ |
920 | fil->next = l->next; |
921 | l->next = fil; | |
2180ecc8 | 922 | |
5c7cac85 KZ |
923 | break; |
924 | } | |
925 | } | |
926 | } | |
927 | } | |
2180ecc8 | 928 | |
5c7cac85 | 929 | return 0; |
3c52b1c8 | 930 | |
5c7cac85 KZ |
931 | fail: |
932 | warn(_("cannot continue")); /* probably ENOMEM */ | |
933 | return 0; | |
2180ecc8 KZ |
934 | } |
935 | ||
66a38e97 KZ |
936 | #ifdef USE_REFLINK |
937 | static int is_reflink_compatible(dev_t devno, const char *filename) | |
938 | { | |
939 | static dev_t last_dev = 0; | |
940 | static int last_status = 0; | |
941 | ||
942 | if (last_dev != devno) { | |
943 | struct statfs vfs; | |
944 | ||
945 | if (statfs(filename, &vfs) != 0) | |
946 | return 0; | |
947 | ||
948 | last_dev = devno; | |
949 | switch (vfs.f_type) { | |
950 | case STATFS_BTRFS_MAGIC: | |
951 | case STATFS_XFS_MAGIC: | |
952 | last_status = 1; | |
953 | break; | |
954 | default: | |
955 | last_status = 0; | |
956 | break; | |
957 | } | |
958 | } | |
959 | ||
960 | return last_status; | |
961 | } | |
962 | ||
963 | static int is_reflink(struct file *xa, struct file *xb) | |
964 | { | |
965 | int last = 0, rc = 0; | |
966 | char abuf[BUFSIZ] = { 0 }, | |
967 | bbuf[BUFSIZ] = { 0 }; | |
968 | ||
969 | struct fiemap *amap = (struct fiemap *) abuf, | |
970 | *bmap = (struct fiemap *) bbuf; | |
971 | ||
972 | int af = open(xa->links->path, O_RDONLY), | |
973 | bf = open(xb->links->path, O_RDONLY); | |
974 | ||
2a596d70 KZ |
975 | if (af < 0 || bf < 0) |
976 | goto done; | |
977 | ||
66a38e97 KZ |
978 | do { |
979 | size_t i; | |
980 | ||
981 | amap->fm_length = ~0ULL; | |
982 | amap->fm_flags = FIEMAP_FLAG_SYNC; | |
983 | amap->fm_extent_count = (sizeof(abuf) - sizeof(*amap)) / sizeof(struct fiemap_extent); | |
984 | ||
985 | bmap->fm_length = ~0ULL; | |
986 | bmap->fm_flags = FIEMAP_FLAG_SYNC; | |
987 | bmap->fm_extent_count = (sizeof(bbuf) - sizeof(*bmap)) / sizeof(struct fiemap_extent); | |
988 | ||
989 | if (ioctl(af, FS_IOC_FIEMAP, (unsigned long) amap) < 0) | |
990 | goto done; | |
991 | if (ioctl(bf, FS_IOC_FIEMAP, (unsigned long) bmap) < 0) | |
992 | goto done; | |
993 | ||
d829e785 KZ |
994 | if (amap->fm_mapped_extents == 0 || |
995 | amap->fm_mapped_extents != bmap->fm_mapped_extents) | |
66a38e97 KZ |
996 | goto done; |
997 | ||
998 | for (i = 0; i < amap->fm_mapped_extents; i++) { | |
999 | struct fiemap_extent *a = &amap->fm_extents[i]; | |
1000 | struct fiemap_extent *b = &bmap->fm_extents[i]; | |
1001 | ||
1002 | if (a->fe_logical != b->fe_logical || | |
1003 | a->fe_length != b->fe_length || | |
1004 | a->fe_physical != b->fe_physical) | |
1005 | goto done; | |
1006 | if (!(a->fe_flags & FIEMAP_EXTENT_SHARED) || | |
1007 | !(b->fe_flags & FIEMAP_EXTENT_SHARED)) | |
1008 | goto done; | |
1009 | if (a->fe_flags & FIEMAP_EXTENT_LAST) | |
1010 | last = 1; | |
1011 | } | |
1012 | ||
1013 | bmap->fm_start = amap->fm_start = | |
1014 | amap->fm_extents[amap->fm_mapped_extents - 1].fe_logical + | |
1015 | amap->fm_extents[amap->fm_mapped_extents - 1].fe_length; | |
1016 | } while (last == 0); | |
1017 | ||
1018 | rc = 1; | |
1019 | done: | |
2a596d70 KZ |
1020 | if (af >= 0) |
1021 | close(af); | |
1022 | if (bf >= 0) | |
1023 | close(bf); | |
66a38e97 KZ |
1024 | return rc; |
1025 | } | |
1026 | #endif /* USE_REFLINK */ | |
1027 | ||
259bed15 KZ |
1028 | static inline size_t count_nodes(struct file *x) |
1029 | { | |
1030 | size_t ct = 0; | |
1031 | ||
1032 | for ( ; x != NULL; x = x->next) | |
1033 | ct++; | |
1034 | ||
1035 | return ct; | |
1036 | } | |
1037 | ||
2180ecc8 KZ |
1038 | /** |
1039 | * visitor - Callback for twalk() | |
1040 | * @nodep: Pointer to a pointer to a #struct file | |
1041 | * @which: At which point this visit is (preorder, postorder, endorder) | |
1042 | * @depth: The depth of the node in the tree | |
1043 | * | |
1044 | * Visit the nodes in the binary tree. For each node, call hardlinker() | |
1045 | * on each #struct file in the linked list of #struct file instances located | |
1046 | * at that node. | |
1047 | */ | |
1048 | static void visitor(const void *nodep, const VISIT which, const int depth) | |
1049 | { | |
5c7cac85 | 1050 | struct file *master = *(struct file **)nodep; |
259bed15 | 1051 | struct file *begin = master; |
5c7cac85 | 1052 | struct file *other; |
2180ecc8 | 1053 | |
5c7cac85 | 1054 | (void)depth; |
2180ecc8 | 1055 | |
5c7cac85 KZ |
1056 | if (which != leaf && which != endorder) |
1057 | return; | |
2180ecc8 | 1058 | |
5c7cac85 | 1059 | for (; master != NULL; master = master->next) { |
259bed15 | 1060 | size_t nnodes, memsiz; |
66a38e97 | 1061 | int may_reflink = 0; |
ee4c3249 | 1062 | |
5c7cac85 KZ |
1063 | if (handle_interrupt()) |
1064 | exit(EXIT_FAILURE); | |
1065 | if (master->links == NULL) | |
1066 | continue; | |
2180ecc8 | 1067 | |
259bed15 KZ |
1068 | /* calculate per file max memory use */ |
1069 | nnodes = count_nodes(master); | |
1070 | if (!nnodes) | |
1071 | continue; | |
f3212b91 KZ |
1072 | |
1073 | /* per-file cache size */ | |
1074 | memsiz = opts.cache_size / nnodes; | |
64c8db3c KZ |
1075 | /* filesiz, readsiz, memsiz */ |
1076 | ul_fileeq_set_size(&fileeq, master->st.st_size, opts.io_size, memsiz); | |
259bed15 | 1077 | |
66a38e97 KZ |
1078 | #ifdef USE_REFLINK |
1079 | if (reflink_mode || reflinks_skip) { | |
1080 | may_reflink = | |
1081 | reflink_mode == REFLINK_ALWAYS ? 1 : | |
1082 | is_reflink_compatible(master->st.st_dev, | |
1083 | master->links->path); | |
1084 | } | |
1085 | #endif | |
5c7cac85 | 1086 | for (other = master->next; other != NULL; other = other->next) { |
259bed15 KZ |
1087 | int eq; |
1088 | ||
5c7cac85 KZ |
1089 | if (handle_interrupt()) |
1090 | exit(EXIT_FAILURE); | |
2180ecc8 | 1091 | |
5c7cac85 KZ |
1092 | assert(other != other->next); |
1093 | assert(other->st.st_size == master->st.st_size); | |
2180ecc8 | 1094 | |
af5f0078 KZ |
1095 | if (!other->links) |
1096 | continue; | |
1097 | ||
259bed15 | 1098 | /* check file attributes, etc. */ |
af5f0078 KZ |
1099 | if (!file_may_link_to(master, other)) { |
1100 | jlog(JLOG_VERBOSE2, | |
1101 | _("Skipped (attributes mismatch) %s"), other->links->path); | |
5c7cac85 | 1102 | continue; |
af5f0078 | 1103 | } |
66a38e97 KZ |
1104 | #ifdef USE_REFLINK |
1105 | if (may_reflink && reflinks_skip && is_reflink(master, other)) { | |
1106 | jlog(JLOG_VERBOSE2, | |
1107 | _("Skipped (already reflink) %s"), other->links->path); | |
1108 | stats.ignored_reflinks++; | |
1109 | continue; | |
1110 | } | |
1111 | #endif | |
259bed15 KZ |
1112 | /* initialize content comparison */ |
1113 | if (!ul_fileeq_data_associated(&master->data)) | |
1114 | ul_fileeq_data_set_file(&master->data, master->links->path); | |
1115 | if (!ul_fileeq_data_associated(&other->data)) | |
1116 | ul_fileeq_data_set_file(&other->data, other->links->path); | |
1117 | ||
1118 | /* compare files */ | |
1119 | eq = ul_fileeq(&fileeq, &master->data, &other->data); | |
1120 | ||
1121 | /* reduce number of open files, keep only master open */ | |
1122 | ul_fileeq_data_close_file(&other->data); | |
1123 | ||
1124 | stats.comparisons++; | |
1125 | ||
af5f0078 KZ |
1126 | if (!eq) { |
1127 | jlog(JLOG_VERBOSE2, | |
1128 | _("Skipped (content mismatch) %s"), other->links->path); | |
259bed15 | 1129 | continue; |
af5f0078 | 1130 | } |
259bed15 KZ |
1131 | |
1132 | /* link files */ | |
66a38e97 | 1133 | if (!file_link(master, other, may_reflink) && errno == EMLINK) { |
259bed15 | 1134 | ul_fileeq_data_deinit(&master->data); |
5c7cac85 | 1135 | master = other; |
259bed15 | 1136 | } |
5c7cac85 | 1137 | } |
259bed15 KZ |
1138 | |
1139 | /* don't keep master data in memory */ | |
1140 | ul_fileeq_data_deinit(&master->data); | |
1141 | } | |
1142 | ||
1143 | /* final cleanup */ | |
1144 | for (other = begin; other != NULL; other = other->next) { | |
1145 | if (ul_fileeq_data_associated(&other->data)) | |
1146 | ul_fileeq_data_deinit(&other->data); | |
5c7cac85 | 1147 | } |
2180ecc8 KZ |
1148 | } |
1149 | ||
1150 | /** | |
d2c3c5a6 | 1151 | * usage - Print the program help and exit |
2180ecc8 | 1152 | */ |
d2c3c5a6 | 1153 | static void __attribute__((__noreturn__)) usage(void) |
2180ecc8 | 1154 | { |
5c7cac85 KZ |
1155 | FILE *out = stdout; |
1156 | ||
1157 | fputs(USAGE_HEADER, out); | |
1158 | fprintf(out, _(" %s [options] <directory>|<file> ...\n"), | |
1159 | program_invocation_short_name); | |
1160 | ||
1161 | fputs(USAGE_SEPARATOR, out); | |
1162 | fputs(_("Consolidate duplicate files using hardlinks.\n"), out); | |
1163 | ||
1164 | fputs(USAGE_OPTIONS, out); | |
85a956a7 KZ |
1165 | fputs(_(" -c, --content compare only file contents, same as -pot\n"), out); |
1166 | fputs(_(" -b, --io-size <size> I/O buffer size for file reading\n" | |
1167 | " (speedup, using more RAM)\n"), out); | |
a9b1dfd9 | 1168 | fputs(_(" -d, --respect-dir directory names have to be identical\n"), out); |
85a956a7 KZ |
1169 | fputs(_(" -f, --respect-name filenames have to be identical\n"), out); |
1170 | fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out); | |
1171 | fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n" | |
1172 | " lowest hardlink count\n"), out); | |
1173 | fputs(_(" -M, --minimize reverse the meaning of -m\n"), out); | |
1174 | fputs(_(" -n, --dry-run don't actually link anything\n"), out); | |
5c7cac85 | 1175 | fputs(_(" -o, --ignore-owner ignore owner changes\n"), out); |
85a956a7 KZ |
1176 | fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n" |
1177 | " (lower precedence than minimize/maximize)\n"), out); | |
1178 | fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out); | |
1179 | fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out); | |
1180 | fputs(_(" -r, --cache-size <size> memory limit for cached file content data\n"), out); | |
1181 | fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out); | |
1182 | fputs(_(" -S, --maximum-size <size> maximum size for files.\n"), out); | |
5c7cac85 | 1183 | fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out); |
85a956a7 KZ |
1184 | fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out); |
1185 | fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out); | |
3854515c | 1186 | #ifdef USE_XATTR |
5c7cac85 | 1187 | fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out); |
66a38e97 | 1188 | #endif |
85a956a7 KZ |
1189 | fputs(_(" -y, --method <name> file content comparison method\n"), out); |
1190 | ||
66a38e97 KZ |
1191 | #ifdef USE_REFLINK |
1192 | fputs(_(" --reflink[=<when>] create clone/CoW copies (auto, always, never)\n"), out); | |
1193 | fputs(_(" --skip-reflinks skip already cloned files (enabled on --reflink)\n"), out); | |
04ae85a7 | 1194 | #endif |
5c7cac85 | 1195 | fputs(USAGE_SEPARATOR, out); |
bad4c729 MY |
1196 | fprintf(out, USAGE_HELP_OPTIONS(28)); |
1197 | fprintf(out, USAGE_MAN_TAIL("hardlink(1)")); | |
5c7cac85 KZ |
1198 | |
1199 | exit(EXIT_SUCCESS); | |
2180ecc8 KZ |
1200 | } |
1201 | ||
2180ecc8 KZ |
1202 | /** |
1203 | * parse_options - Parse the command line options | |
1204 | * @argc: Number of options | |
1205 | * @argv: Array of options | |
1206 | */ | |
1207 | static int parse_options(int argc, char *argv[]) | |
1208 | { | |
66a38e97 KZ |
1209 | enum { |
1210 | OPT_REFLINK = CHAR_MAX + 1, | |
1211 | OPT_SKIP_RELINKS | |
1212 | }; | |
a9b1dfd9 | 1213 | static const char optstr[] = "VhvndfpotXcmMOx:y:i:r:S:s:b:q"; |
5c7cac85 KZ |
1214 | static const struct option long_options[] = { |
1215 | {"version", no_argument, NULL, 'V'}, | |
1216 | {"help", no_argument, NULL, 'h'}, | |
1217 | {"verbose", no_argument, NULL, 'v'}, | |
1218 | {"dry-run", no_argument, NULL, 'n'}, | |
1219 | {"respect-name", no_argument, NULL, 'f'}, | |
a9b1dfd9 | 1220 | {"respect-dir", no_argument, NULL, 'd'}, |
5c7cac85 KZ |
1221 | {"ignore-mode", no_argument, NULL, 'p'}, |
1222 | {"ignore-owner", no_argument, NULL, 'o'}, | |
1223 | {"ignore-time", no_argument, NULL, 't'}, | |
1224 | {"respect-xattrs", no_argument, NULL, 'X'}, | |
1225 | {"maximize", no_argument, NULL, 'm'}, | |
1226 | {"minimize", no_argument, NULL, 'M'}, | |
1227 | {"keep-oldest", no_argument, NULL, 'O'}, | |
1228 | {"exclude", required_argument, NULL, 'x'}, | |
1229 | {"include", required_argument, NULL, 'i'}, | |
259bed15 | 1230 | {"method", required_argument, NULL, 'y' }, |
5c7cac85 | 1231 | {"minimum-size", required_argument, NULL, 's'}, |
40a82456 | 1232 | {"maximum-size", required_argument, NULL, 'S'}, |
66a38e97 KZ |
1233 | #ifdef USE_REFLINK |
1234 | {"reflink", optional_argument, NULL, OPT_REFLINK }, | |
1235 | {"skip-reflinks", no_argument, NULL, OPT_SKIP_RELINKS }, | |
1236 | #endif | |
64c8db3c | 1237 | {"io-size", required_argument, NULL, 'b'}, |
5c7cac85 KZ |
1238 | {"content", no_argument, NULL, 'c'}, |
1239 | {"quiet", no_argument, NULL, 'q'}, | |
f3212b91 | 1240 | {"cache-size", required_argument, NULL, 'r'}, |
5c7cac85 KZ |
1241 | {NULL, 0, NULL, 0} |
1242 | }; | |
1243 | static const ul_excl_t excl[] = { | |
1244 | {'q', 'v'}, | |
1245 | {0} | |
1246 | }; | |
1247 | int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; | |
a9b1dfd9 | 1248 | int c, content_only = 0; |
5c7cac85 KZ |
1249 | |
1250 | while ((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) { | |
1251 | ||
1252 | err_exclusive_options(c, long_options, excl, excl_st); | |
1253 | ||
1254 | switch (c) { | |
1255 | case 'p': | |
1256 | opts.respect_mode = FALSE; | |
1257 | break; | |
1258 | case 'o': | |
1259 | opts.respect_owner = FALSE; | |
1260 | break; | |
1261 | case 't': | |
1262 | opts.respect_time = FALSE; | |
1263 | break; | |
1264 | case 'X': | |
1265 | opts.respect_xattrs = TRUE; | |
1266 | break; | |
1267 | case 'm': | |
1268 | opts.maximise = TRUE; | |
1269 | break; | |
1270 | case 'M': | |
1271 | opts.minimise = TRUE; | |
1272 | break; | |
1273 | case 'O': | |
1274 | opts.keep_oldest = TRUE; | |
1275 | break; | |
1276 | case 'f': | |
1277 | opts.respect_name = TRUE; | |
1278 | break; | |
a9b1dfd9 KZ |
1279 | case 'd': |
1280 | opts.respect_dir = TRUE; | |
1281 | break; | |
5c7cac85 KZ |
1282 | case 'v': |
1283 | opts.verbosity++; | |
1284 | break; | |
1285 | case 'q': | |
1286 | quiet = TRUE; | |
1287 | break; | |
1288 | case 'c': | |
a9b1dfd9 | 1289 | content_only = 1; |
5c7cac85 KZ |
1290 | break; |
1291 | case 'n': | |
1292 | opts.dry_run = 1; | |
1293 | break; | |
1294 | case 'x': | |
1295 | register_regex(&opts.exclude, optarg); | |
1296 | break; | |
259bed15 KZ |
1297 | case 'y': |
1298 | opts.method = optarg; | |
1299 | break; | |
5c7cac85 KZ |
1300 | case 'i': |
1301 | register_regex(&opts.include, optarg); | |
1302 | break; | |
1303 | case 's': | |
40a82456 | 1304 | opts.min_size = strtosize_or_err(optarg, _("failed to parse minimum size")); |
5c7cac85 | 1305 | break; |
722762b6 | 1306 | case 'S': |
40a82456 | 1307 | opts.max_size = strtosize_or_err(optarg, _("failed to parse maximum size")); |
5c7cac85 | 1308 | break; |
f3212b91 | 1309 | case 'r': |
cd8d4b53 | 1310 | opts.cache_size = strtosize_or_err(optarg, _("failed to parse cache size")); |
f3212b91 | 1311 | break; |
64c8db3c KZ |
1312 | case 'b': |
1313 | opts.io_size = strtosize_or_err(optarg, _("failed to parse I/O size")); | |
722762b6 | 1314 | break; |
66a38e97 KZ |
1315 | #ifdef USE_REFLINK |
1316 | case OPT_REFLINK: | |
1317 | reflink_mode = REFLINK_AUTO; | |
1318 | if (optarg) { | |
1319 | if (strcmp(optarg, "auto") == 0) | |
1320 | reflink_mode = REFLINK_AUTO; | |
1321 | else if (strcmp(optarg, "always") == 0) | |
1322 | reflink_mode = REFLINK_ALWAYS; | |
1323 | else if (strcmp(optarg, "never") == 0) | |
1324 | reflink_mode = REFLINK_NEVER; | |
1325 | else | |
1326 | errx(EXIT_FAILURE, _("unsupported reflink mode; %s"), optarg); | |
1327 | } | |
1328 | if (reflink_mode != REFLINK_NEVER) | |
1329 | reflinks_skip = 1; | |
1330 | break; | |
1331 | case OPT_SKIP_RELINKS: | |
1332 | reflinks_skip = 1; | |
1333 | break; | |
1334 | #endif | |
5c7cac85 KZ |
1335 | case 'h': |
1336 | usage(); | |
1337 | case 'V': | |
6a2baa1a KZ |
1338 | { |
1339 | static const char *features[] = { | |
1340 | #ifdef USE_REFLINK | |
1341 | "reflink", | |
1342 | #endif | |
1343 | #ifdef USE_FILEEQ_CRYPTOAPI | |
1344 | "cryptoapi", | |
1345 | #endif | |
1346 | NULL | |
1347 | }; | |
1348 | print_version_with_features(EXIT_SUCCESS, features); | |
1349 | } | |
5c7cac85 | 1350 | default: |
6a2baa1a KZ |
1351 | errtryhelp(EXIT_FAILURE); |
1352 | } | |
5c7cac85 KZ |
1353 | } |
1354 | ||
a9b1dfd9 KZ |
1355 | if (content_only) { |
1356 | opts.respect_mode = FALSE; | |
1357 | opts.respect_name = FALSE; | |
1358 | opts.respect_dir = FALSE; | |
1359 | opts.respect_owner = FALSE; | |
1360 | opts.respect_time = FALSE; | |
1361 | opts.respect_xattrs = FALSE; | |
1362 | } | |
5c7cac85 | 1363 | return 0; |
2180ecc8 KZ |
1364 | } |
1365 | ||
1366 | /** | |
5c7cac85 KZ |
1367 | * to_be_called_atexit - Cleanup handler, also prints statistics. |
1368 | */ | |
2180ecc8 KZ |
1369 | static void to_be_called_atexit(void) |
1370 | { | |
5c7cac85 KZ |
1371 | if (stats.started) |
1372 | print_stats(); | |
2180ecc8 KZ |
1373 | } |
1374 | ||
1375 | /** | |
5c7cac85 KZ |
1376 | * sighandler - Signal handler, sets the global last_signal variable |
1377 | * @i: The signal number | |
1378 | */ | |
2180ecc8 KZ |
1379 | static void sighandler(int i) |
1380 | { | |
5c7cac85 KZ |
1381 | if (last_signal != SIGINT) |
1382 | last_signal = i; | |
1383 | if (i == SIGINT) | |
ab35a4fb CR |
1384 | /* can't use stdio on signal handler */ |
1385 | ignore_result(write(STDOUT_FILENO, "\n", sizeof("\n")-1)); | |
2180ecc8 KZ |
1386 | } |
1387 | ||
1388 | int main(int argc, char *argv[]) | |
1389 | { | |
5c7cac85 | 1390 | struct sigaction sa; |
259bed15 | 1391 | int rc; |
2180ecc8 | 1392 | |
5c7cac85 KZ |
1393 | sa.sa_handler = sighandler; |
1394 | sa.sa_flags = SA_RESTART; | |
1395 | sigfillset(&sa.sa_mask); | |
2180ecc8 | 1396 | |
5c7cac85 KZ |
1397 | /* If we receive a SIGINT, end the processing */ |
1398 | sigaction(SIGINT, &sa, NULL); | |
1399 | sigaction(SIGUSR1, &sa, NULL); | |
2180ecc8 | 1400 | |
47470044 BS |
1401 | /* Localize messages, number formatting, and anything else. */ |
1402 | setlocale(LC_ALL, ""); | |
1403 | bindtextdomain(PACKAGE, LOCALEDIR); | |
1404 | textdomain(PACKAGE); | |
2180ecc8 | 1405 | |
5c7cac85 KZ |
1406 | if (atexit(to_be_called_atexit) != 0) |
1407 | err(EXIT_FAILURE, _("cannot register exit handler")); | |
2180ecc8 | 1408 | |
5c7cac85 | 1409 | parse_options(argc, argv); |
2180ecc8 | 1410 | |
5c7cac85 | 1411 | if (optind == argc) |
01480c61 | 1412 | errx(EXIT_FAILURE, _("no directory or file specified")); |
2180ecc8 | 1413 | |
5c7cac85 | 1414 | gettime_monotonic(&stats.start_time); |
722762b6 | 1415 | |
259bed15 KZ |
1416 | rc = ul_fileeq_init(&fileeq, opts.method); |
1417 | if (rc != 0 && strcmp(opts.method, "memcmp") != 0) { | |
d2d297bd | 1418 | jlog(JLOG_INFO, _("cannot initialize %s method, use 'memcmp' fallback"), opts.method); |
259bed15 KZ |
1419 | opts.method = "memcmp"; |
1420 | rc = ul_fileeq_init(&fileeq, opts.method); | |
1421 | } | |
1422 | if (rc < 0) | |
1423 | err(EXIT_FAILURE, _("failed to initialize files comparior")); | |
722762b6 | 1424 | |
64c8db3c KZ |
1425 | /* defautl I/O size */ |
1426 | if (!opts.io_size) { | |
1427 | if (strcmp(opts.method, "memcmp") == 0) | |
1428 | opts.io_size = 8*1024; | |
1429 | else | |
1430 | opts.io_size = 1024*1024; | |
1431 | } | |
722762b6 | 1432 | |
5c7cac85 | 1433 | stats.started = TRUE; |
2180ecc8 | 1434 | |
ee4c3249 | 1435 | jlog(JLOG_VERBOSE2, _("Scanning [device/inode/links]:")); |
5c7cac85 | 1436 | for (; optind < argc; optind++) { |
a66fbaaf KZ |
1437 | char *path = realpath(argv[optind], NULL); |
1438 | ||
1439 | if (!path) { | |
1440 | warn(_("cannot get realpath: %s"), argv[optind]); | |
1441 | continue; | |
1442 | } | |
a9b1dfd9 KZ |
1443 | if (opts.respect_dir) |
1444 | rootbasesz = strlen(path); | |
a66fbaaf KZ |
1445 | if (nftw(path, inserter, 20, FTW_PHYS) == -1) |
1446 | warn(_("cannot process %s"), path); | |
1447 | free(path); | |
a9b1dfd9 | 1448 | rootbasesz = 0; |
5c7cac85 | 1449 | } |
2180ecc8 | 1450 | |
5c7cac85 | 1451 | twalk(files, visitor); |
722762b6 | 1452 | |
259bed15 | 1453 | ul_fileeq_deinit(&fileeq); |
5c7cac85 | 1454 | return 0; |
0ec20db8 | 1455 | } |