* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#define _POSIX_C_SOURCE 200112L /* POSIX functions */
-#define _XOPEN_SOURCE 600 /* nftw() */
-
-#include <sys/types.h> /* stat */
-#include <sys/stat.h> /* stat */
-#include <sys/time.h> /* getrlimit, getrusage */
-#include <sys/resource.h> /* getrlimit, getrusage */
-#include <fcntl.h> /* posix_fadvise */
-#include <ftw.h> /* ftw */
-#include <search.h> /* tsearch() and friends */
+#define _POSIX_C_SOURCE 200112L /* POSIX functions */
+#define _XOPEN_SOURCE 600 /* nftw() */
+
+#include <sys/types.h> /* stat */
+#include <sys/stat.h> /* stat */
+#include <sys/time.h> /* getrlimit, getrusage */
+#include <sys/resource.h> /* getrlimit, getrusage */
+#include <fcntl.h> /* posix_fadvise */
+#include <ftw.h> /* ftw */
+#include <search.h> /* tsearch() and friends */
#include <signal.h> /* SIG*, sigaction */
#include <getopt.h> /* getopt_long() */
-#include <ctype.h> /* tolower() */
+#include <ctype.h> /* tolower() */
+#include <sys/ioctl.h>
+
+#if defined(HAVE_LINUX_FIEMAP_H) && defined(HAVE_SYS_VFS_H)
+# include <linux/fs.h>
+# include <linux/fiemap.h>
+# ifdef FICLONE
+# define USE_REFLINK 1
+# endif
+#endif
#include "nls.h"
#include "c.h"
#include "xalloc.h"
+#include "strutils.h"
+#include "monotonic.h"
+#include "optutils.h"
+#include "fileeq.h"
-/* Use libpcre2posix if it's available */
-#ifdef HAVE_PCRE2_POSIX
-# include <pcre2posix.h>
-# undef REG_NOSUB
-# define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
-#else
-# include <regex.h> /* regcomp(), regsearch() */
+#ifdef USE_REFLINK
+# include "statfs_magic.h"
#endif
-#ifdef HAVE_SYS_XATTR_H
-# include <sys/xattr.h> /* listxattr, getxattr */
+#include <regex.h> /* regcomp(), regexec() */
+
+#if defined(HAVE_SYS_XATTR_H) && defined(HAVE_LLISTXATTR) && defined(HAVE_LGETXATTR)
+# include <sys/xattr.h>
+# define USE_XATTR 1
#endif
+static int quiet; /* don't print anything */
+static int rootbasesz; /* size of the directory for nftw() */
+
+#ifdef USE_REFLINK
+enum {
+ REFLINK_NEVER = 0,
+ REFLINK_AUTO,
+ REFLINK_ALWAYS
+};
+static int reflink_mode = REFLINK_NEVER;
+static int reflinks_skip;
+#endif
+
+static struct ul_fileeq fileeq;
+
/**
* struct file - Information about a file
* @st: The stat buffer associated with the file
* This contains all information we need about a file.
*/
struct file {
- struct stat st;
- struct file *next;
- struct link {
- struct link *next;
- int basename;
+ struct stat st;
+ struct ul_fileeq_data data;
+
+ struct file *next;
+ struct link {
+ struct link *next;
+ int basename;
+ int dirname;
#if __STDC_VERSION__ >= 199901L
- char path[];
+ char path[];
#elif __GNUC__
- char path[0];
+ char path[0];
#else
- char path[1];
+ char path[1];
#endif
- } *links;
+ } *links;
};
/**
* enum log_level - Logging levels
- * @JLOG_SYSFAT: Fatal error message with errno, will be printed to stderr
- * @JLOG_FATAL: Fatal error message with errno, will be printed to stderr
- * @JLOG_SYSERR: Error message with errno, will be printed to stderr
- * @JLOG_ERROR: Error message, will be printed to stderr
* @JLOG_SUMMARY: Default log level
* @JLOG_INFO: Verbose logging (verbose == 1)
- * @JLOG_DEBUG1: Verbosity 2
- * @JLOG_DEBUG2: Verbosity 3
+ * @JLOG_VERBOSE1: Verbosity 2
+ * @JLOG_VERBOSE2: Verbosity 3
*/
enum log_level {
- JLOG_SYSFAT = -4,
- JLOG_FATAL = -3,
- JLOG_SYSERR = -2,
- JLOG_ERROR = -1,
- JLOG_SUMMARY,
- JLOG_INFO,
- JLOG_DEBUG1,
- JLOG_DEBUG2
+ JLOG_SUMMARY,
+ JLOG_INFO,
+ JLOG_VERBOSE1,
+ JLOG_VERBOSE2
};
/**
* @xattr_comparisons: The number of extended attribute comparisons
* @comparisons: The number of comparisons
* @saved: The (exaggerated) amount of space saved
- * @start_time: The time we started at, in seconds since some unspecified point
+ * @start_time: The time we started at
*/
static struct statistics {
- int started;
- size_t files;
- size_t linked;
- size_t xattr_comparisons;
- size_t comparisons;
- double saved;
- double start_time;
+ int started;
+ size_t files;
+ size_t linked;
+ size_t xattr_comparisons;
+ size_t comparisons;
+ size_t ignored_reflinks;
+ double saved;
+ struct timeval start_time;
} stats;
+
+struct hdl_regex {
+ regex_t re; /* POSIX compatible regex handler */
+
+ struct hdl_regex *next;
+};
+
/**
* struct options - Processed command-line options
* @include: A linked list of regular expressions for the --include option
* @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
* @dry_run: Specifies whether hardlink should not link files (default = FALSE)
* @min_size: Minimum size of files to consider. (default = 1 byte)
+ * @max_size: Maximum size of files to consider, 0 means umlimited. (default = 0 byte)
*/
static struct options {
- struct regex_link {
- regex_t preg;
- struct regex_link *next;
- } *include, *exclude;
- signed int verbosity;
- unsigned int respect_mode:1;
- unsigned int respect_owner:1;
- unsigned int respect_name:1;
- unsigned int respect_time:1;
- unsigned int respect_xattrs:1;
- unsigned int maximise:1;
- unsigned int minimise:1;
- unsigned int keep_oldest:1;
- unsigned int dry_run:1;
- unsigned long long min_size;
+ struct hdl_regex *include;
+ struct hdl_regex *exclude;
+
+ const char *method;
+ signed int verbosity;
+ unsigned int respect_mode:1;
+ unsigned int respect_owner:1;
+ unsigned int respect_name:1;
+ unsigned int respect_dir:1;
+ unsigned int respect_time:1;
+ unsigned int respect_xattrs:1;
+ unsigned int maximise:1;
+ unsigned int minimise:1;
+ unsigned int keep_oldest:1;
+ unsigned int dry_run:1;
+ uintmax_t min_size;
+ uintmax_t max_size;
+ size_t io_size;
+ size_t cache_size;
} opts = {
- /* default setting */
- .respect_mode = TRUE,
- .respect_owner = TRUE,
- .respect_time = TRUE,
- .respect_xattrs = FALSE,
- .keep_oldest = FALSE,
- .min_size = 1
+ /* default setting */
+#ifdef USE_FILEEQ_CRYPTOAPI
+ .method = "sha256",
+#else
+ .method = "memcmp",
+#endif
+ .respect_mode = TRUE,
+ .respect_owner = TRUE,
+ .respect_time = TRUE,
+ .respect_xattrs = FALSE,
+ .keep_oldest = FALSE,
+ .min_size = 1,
+ .cache_size = 10*1024*1024
};
/*
* The last signal we received. We store the signal here in order to be able
* to break out of loops gracefully and to return from our nftw() handler.
*/
-static int last_signal;
+static volatile sig_atomic_t last_signal;
+
+
+#define is_log_enabled(_level) (quiet == 0 && (_level) <= (unsigned int)opts.verbosity)
-__attribute__ ((format(printf, 2, 3)))
/**
* jlog - Logging for hardlink
* @level: The log level
* @format: A format string for printf()
*/
+__attribute__((format(printf, 2, 3)))
static void jlog(enum log_level level, const char *format, ...)
{
- FILE *stream = (level >= 0) ? stdout : stderr;
- int errno_ = errno;
- va_list args;
-
- if (level <= opts.verbosity) {
- if (level <= JLOG_FATAL)
- fprintf(stream, "ERROR: ");
- else if (level < 0)
- fprintf(stream, "WARNING: ");
- va_start(args, format);
- vfprintf(stream, format, args);
- va_end(args);
- if (level == JLOG_SYSERR || level == JLOG_SYSFAT)
- fprintf(stream, ": %s\n", strerror(errno_));
- else
- fputc('\n', stream);
- }
+ va_list args;
+
+ if (!is_log_enabled(level))
+ return;
+
+ va_start(args, format);
+ vfprintf(stdout, format, args);
+ va_end(args);
+ fputc('\n', stdout);
}
/**
#define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
/**
- * format - Print a human-readable name for the given size
- * @bytes: A number specifying an amount of bytes
- *
- * Uses a double. The result with infinity and NaN is most likely
- * not pleasant.
+ * register_regex - Compile and insert a regular expression into list
+ * @pregs: Pointer to a linked list of regular expressions
+ * @regex: String containing the regular expression to be compiled
*/
-static const char *format(double bytes)
+static void register_regex(struct hdl_regex **pregs, const char *regex)
{
- static char buf[256];
-
- if (bytes >= 1024 * 1024 * 1024)
- snprintf(buf, sizeof(buf), "%.2f GiB", (bytes / 1024 / 1024 / 1024));
- else if (bytes >= 1024 * 1024)
- snprintf(buf, sizeof(buf), "%.2f MiB", (bytes / 1024 / 1024));
- else if (bytes >= 1024)
- snprintf(buf, sizeof(buf), "%.2f KiB", (bytes / 1024));
- else
- snprintf(buf, sizeof(buf), "%.0f bytes", bytes);
-
- return buf;
-}
+ struct hdl_regex *link;
+ int err;
-/**
- * gettime() - Get the current time from the system
- */
-static double gettime(void)
-{
- struct timeval tv = { 0, 0 };
+ link = xmalloc(sizeof(*link));
+
+ if ((err = regcomp(&link->re, regex, REG_NOSUB | REG_EXTENDED)) != 0) {
+ size_t size = regerror(err, &link->re, NULL, 0);
+ char *buf = xmalloc(size + 1);
- if (gettimeofday(&tv, NULL) != 0)
- jlog(JLOG_SYSERR, "Cannot read current time");
+ regerror(err, &link->re, buf, size);
- return (double) tv.tv_sec + (double) tv.tv_usec / 1000000;
+ errx(EXIT_FAILURE, _("could not compile regular expression %s: %s"),
+ regex, buf);
+ }
+ link->next = *pregs; *pregs = link;
}
/**
- * regexec_any - Match against multiple regular expressions
+ * match_any_regex - Match against multiple regular expressions
* @pregs: A linked list of regular expressions
* @what: The string to match against
*
* Checks whether any of the regular expressions in the list matches the
* string.
*/
-static int regexec_any(struct regex_link *pregs, const char *what)
+static int match_any_regex(struct hdl_regex *pregs, const char *what)
{
- for (; pregs != NULL; pregs = pregs->next)
- if (regexec(&pregs->preg, what, 0, NULL, 0) == 0)
- return TRUE;
- return FALSE;
+ for (; pregs != NULL; pregs = pregs->next) {
+ if (regexec(&pregs->re, what, 0, NULL, 0) == 0)
+ return TRUE;
+ }
+ return FALSE;
}
/**
*/
static int compare_nodes(const void *_a, const void *_b)
{
- const struct file *a = _a;
- const struct file *b = _b;
- int diff = 0;
+ const struct file *a = _a;
+ const struct file *b = _b;
+ int diff = 0;
- if (diff == 0)
- diff = CMP(a->st.st_dev, b->st.st_dev);
- if (diff == 0)
- diff = CMP(a->st.st_size, b->st.st_size);
+ if (diff == 0)
+ diff = CMP(a->st.st_dev, b->st.st_dev);
+ if (diff == 0)
+ diff = CMP(a->st.st_size, b->st.st_size);
- return diff;
+ return diff;
+}
+
+/* Compare only filenames */
+static inline int filename_strcmp(const struct file *a, const struct file *b)
+{
+ return strcmp( a->links->path + a->links->basename,
+ b->links->path + b->links->basename);
+}
+
+/**
+ * Compare only directory names (ignores root directory and basename (filename))
+ *
+ * The complete path conrains three fragments:
+ *
+ * <rootdir> is specified on hardlink command line
+ * <dirname> is all betweehn rootdir and filename
+ * <filename> is last component (aka basename)
+ */
+static inline int dirname_strcmp(const struct file *a, const struct file *b)
+{
+ int diff = 0;
+ int asz = a->links->basename - a->links->dirname,
+ bsz = b->links->basename - b->links->dirname;
+
+ diff = CMP(asz, bsz);
+
+ if (diff == 0) {
+ const char *a_start, *b_start;
+
+ a_start = a->links->path + a->links->dirname;
+ b_start = b->links->path + b->links->dirname;
+
+ diff = strncmp(a_start, b_start, asz);
+ }
+ return diff;
}
/**
*/
static int compare_nodes_ino(const void *_a, const void *_b)
{
- const struct file *a = _a;
- const struct file *b = _b;
- int diff = 0;
-
- if (diff == 0)
- diff = CMP(a->st.st_dev, b->st.st_dev);
- if (diff == 0)
- diff = CMP(a->st.st_ino, b->st.st_ino);
-
- /* If opts.respect_name is used, we will restrict a struct file to
- * contain only links with the same basename to keep the rest simple.
- */
- if (diff == 0 && opts.respect_name)
- diff = strcmp(a->links->path + a->links->basename,
- b->links->path + b->links->basename);
-
- return diff;
+ const struct file *a = _a;
+ const struct file *b = _b;
+ int diff = 0;
+
+ if (diff == 0)
+ diff = CMP(a->st.st_dev, b->st.st_dev);
+ if (diff == 0)
+ diff = CMP(a->st.st_ino, b->st.st_ino);
+
+ /* If opts.respect_name is used, we will restrict a struct file to
+ * contain only links with the same basename to keep the rest simple.
+ */
+ if (diff == 0 && opts.respect_name)
+ diff = filename_strcmp(a, b);
+ if (diff == 0 && opts.respect_dir)
+ diff = dirname_strcmp(a, b);
+
+ return diff;
}
/**
*/
static void print_stats(void)
{
- jlog(JLOG_SUMMARY, "Mode: %s", opts.dry_run ? "dry-run" : "real");
- jlog(JLOG_SUMMARY, "Files: %zu", stats.files);
- jlog(JLOG_SUMMARY, "Linked: %zu files", stats.linked);
-#ifdef HAVE_SYS_XATTR_H
- jlog(JLOG_SUMMARY, "Compared: %zu xattrs", stats.xattr_comparisons);
+ struct timeval end = { 0, 0 }, delta = { 0, 0 };
+ char *ssz;
+
+ gettime_monotonic(&end);
+ timersub(&end, &stats.start_time, &delta);
+
+ jlog(JLOG_SUMMARY, "%-25s %s", _("Mode:"),
+ opts.dry_run ? _("dry-run") : _("real"));
+ jlog(JLOG_SUMMARY, "%-25s %s", _("Method:"), opts.method);
+ jlog(JLOG_SUMMARY, "%-25s %zu", _("Files:"), stats.files);
+ jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Linked:"), stats.linked);
+
+#ifdef USE_XATTR
+ jlog(JLOG_SUMMARY, _("%-25s %zu xattrs"), _("Compared:"),
+ stats.xattr_comparisons);
#endif
- jlog(JLOG_SUMMARY, "Compared: %zu files", stats.comparisons);
- jlog(JLOG_SUMMARY, "Saved: %s", format(stats.saved));
- jlog(JLOG_SUMMARY, "Duration: %.2f seconds", gettime() - stats.start_time);
+ jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Compared:"),
+ stats.comparisons);
+#ifdef USE_REFLINK
+ if (reflinks_skip)
+ jlog(JLOG_SUMMARY, _("%-25s %zu files"), _("Skipped reflinks:"),
+ stats.ignored_reflinks);
+#endif
+ ssz = size_to_human_string(SIZE_SUFFIX_3LETTER |
+ SIZE_SUFFIX_SPACE |
+ SIZE_DECIMAL_2DIGITS, stats.saved);
+
+ jlog(JLOG_SUMMARY, "%-25s %s", _("Saved:"), ssz);
+ free(ssz);
+
+ jlog(JLOG_SUMMARY, _("%-25s %"PRId64".%06"PRId64" seconds"), _("Duration:"),
+ (int64_t)delta.tv_sec, (int64_t)delta.tv_usec);
}
/**
*/
static int handle_interrupt(void)
{
- switch (last_signal) {
- case SIGINT:
- case SIGTERM:
- return TRUE;
- case SIGUSR1:
- print_stats();
- putchar('\n');
- break;
- }
-
- last_signal = 0;
- return FALSE;
+ switch (last_signal) {
+ case SIGINT:
+ case SIGTERM:
+ return TRUE;
+ case SIGUSR1:
+ print_stats();
+ putchar('\n');
+ break;
+ }
+
+ last_signal = 0;
+ return FALSE;
}
-#ifdef HAVE_SYS_XATTR_H
+#ifdef USE_XATTR
/**
* llistxattr_or_die - Wrapper for llistxattr()
*/
static ssize_t llistxattr_or_die(const char *path, char *list, size_t size)
{
- ssize_t len = llistxattr(path, list, size);
+ ssize_t len = llistxattr(path, list, size);
+
+ if (len < 0 && errno != ENOTSUP)
+ err(EXIT_FAILURE, _("cannot get xattr names for %s"), path);
- if (len < 0 && errno != ENOTSUP) {
- jlog(JLOG_SYSFAT, "Cannot get xattr names for %s", path);
- exit(1);
- }
- return len;
+ return len;
}
/**
*
* This does the same thing as lgetxattr() except that it aborts upon error.
*/
-static ssize_t lgetxattr_or_die(const char *path, const char *name, void *value,
- size_t size)
+static ssize_t lgetxattr_or_die(const char *path,
+ const char *name, void *value, size_t size)
{
- ssize_t len = lgetxattr(path, name, value, size);
+ ssize_t len = lgetxattr(path, name, value, size);
- if (len < 0) {
- jlog(JLOG_SYSFAT, "Cannot get xattr value of %s for %s", name, path);
- exit(1);
- }
- return len;
+ if (len < 0)
+ err(EXIT_FAILURE, _("cannot get xattr value of %s for %s"),
+ name, path);
+
+ return len;
}
/**
*/
static int get_xattr_name_count(const char *const names, ssize_t len)
{
- int count = 0;
- const char *name;
+ int count = 0;
+ const char *name;
- for (name = names; name < (names + len); name += strlen(name) + 1)
- count++;
+ for (name = names; name < (names + len); name += strlen(name) + 1)
+ count++;
- return count;
+ return count;
}
/**
*/
static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2)
{
- return strcmp(*(char *const *) ptr1, *(char *const *) ptr2);
+ return strcmp(*(char *const *)ptr1, *(char *const *)ptr2);
}
/**
*/
static const char **get_sorted_xattr_name_table(const char *names, int n)
{
- const char **table = xmalloc(n * sizeof(char *));
- int i;
+ const char **table = xmalloc(n * sizeof(char *));
+ int i;
- for (i = 0; i < n; i++) {
- table[i] = names;
- names += strlen(names) + 1;
- }
+ for (i = 0; i < n; i++) {
+ table[i] = names;
+ names += strlen(names) + 1;
+ }
- qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
+ qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
- return table;
+ return table;
}
/**
*/
static int file_xattrs_equal(const struct file *a, const struct file *b)
{
- ssize_t len_a;
- ssize_t len_b;
- char *names_a = NULL;
- char *names_b = NULL;
- int n_a;
- int n_b;
- const char **name_ptrs_a = NULL;
- const char **name_ptrs_b = NULL;
- void *value_a = NULL;
- void *value_b = NULL;
- int ret = FALSE;
- int i;
+ ssize_t len_a;
+ ssize_t len_b;
+ char *names_a = NULL;
+ char *names_b = NULL;
+ int n_a;
+ int n_b;
+ const char **name_ptrs_a = NULL;
+ const char **name_ptrs_b = NULL;
+ void *value_a = NULL;
+ void *value_b = NULL;
+ int ret = FALSE;
+ int i;
- assert(a->links != NULL);
- assert(b->links != NULL);
+ assert(a->links != NULL);
+ assert(b->links != NULL);
- jlog(JLOG_DEBUG1, "Comparing xattrs of %s to %s", a->links->path,
- b->links->path);
+ jlog(JLOG_VERBOSE1, _("Comparing xattrs of %s to %s"), a->links->path,
+ b->links->path);
- stats.xattr_comparisons++;
+ stats.xattr_comparisons++;
- len_a = llistxattr_or_die(a->links->path, NULL, 0);
- len_b = llistxattr_or_die(b->links->path, NULL, 0);
+ len_a = llistxattr_or_die(a->links->path, NULL, 0);
+ len_b = llistxattr_or_die(b->links->path, NULL, 0);
- if (len_a <= 0 && len_b <= 0)
- return TRUE; // xattrs not supported or neither file has any
+ if (len_a <= 0 && len_b <= 0)
+ return TRUE; // xattrs not supported or neither file has any
- if (len_a != len_b)
- return FALSE; // total lengths of xattr names differ
+ if (len_a != len_b)
+ return FALSE; // total lengths of xattr names differ
- names_a = xmalloc(len_a);
- names_b = xmalloc(len_b);
+ names_a = xmalloc(len_a);
+ names_b = xmalloc(len_b);
- len_a = llistxattr_or_die(a->links->path, names_a, len_a);
- len_b = llistxattr_or_die(b->links->path, names_b, len_b);
- assert((len_a > 0) && (len_a == len_b));
+ len_a = llistxattr_or_die(a->links->path, names_a, len_a);
+ len_b = llistxattr_or_die(b->links->path, names_b, len_b);
+ assert((len_a > 0) && (len_a == len_b));
- n_a = get_xattr_name_count(names_a, len_a);
- n_b = get_xattr_name_count(names_b, len_b);
+ n_a = get_xattr_name_count(names_a, len_a);
+ n_b = get_xattr_name_count(names_b, len_b);
- if (n_a != n_b)
- goto exit; // numbers of xattrs differ
+ if (n_a != n_b)
+ goto exit; // numbers of xattrs differ
- name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
- name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
+ name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
+ name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
- // We now have two sorted tables of xattr names.
+ // We now have two sorted tables of xattr names.
- for (i = 0; i < n_a; i++) {
- if (handle_interrupt())
- goto exit; // user wants to quit
+ for (i = 0; i < n_a; i++) {
+ if (handle_interrupt())
+ goto exit; // user wants to quit
- if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
- goto exit; // names at same slot differ
+ if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
+ goto exit; // names at same slot differ
- len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
- len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
+ len_a =
+ lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
+ len_b =
+ lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
- if (len_a != len_b)
- goto exit; // xattrs with same name, different value lengths
+ if (len_a != len_b)
+ goto exit; // xattrs with same name, different value lengths
- value_a = xmalloc(len_a);
- value_b = xmalloc(len_b);
+ value_a = xmalloc(len_a);
+ value_b = xmalloc(len_b);
- len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
- value_a, len_a);
- len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
- value_b, len_b);
- assert((len_a >= 0) && (len_a == len_b));
+ len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
+ value_a, len_a);
+ len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
+ value_b, len_b);
+ assert((len_a >= 0) && (len_a == len_b));
- if (memcmp(value_a, value_b, len_a) != 0)
- goto exit; // xattrs with same name, different values
+ if (memcmp(value_a, value_b, len_a) != 0)
+ goto exit; // xattrs with same name, different values
- free(value_a);
- free(value_b);
- value_a = NULL;
- value_b = NULL;
- }
+ free(value_a);
+ free(value_b);
+ value_a = NULL;
+ value_b = NULL;
+ }
- ret = TRUE;
+ ret = TRUE;
- exit:
- free(names_a);
- free(names_b);
- free(name_ptrs_a);
- free(name_ptrs_b);
- free(value_a);
- free(value_b);
- return ret;
+ exit:
+ free(names_a);
+ free(names_b);
+ free(name_ptrs_a);
+ free(name_ptrs_b);
+ free(value_a);
+ free(value_b);
+ return ret;
}
-#else
+#else /* !USE_XATTR */
static int file_xattrs_equal(const struct file *a, const struct file *b)
{
- return TRUE;
-}
-#endif
-
-/**
- * file_contents_equal - Compare contents of two files for equality
- * @a: The first file
- * @b: The second file
- *
- * Compare the contents of the files for equality
- */
-static int file_contents_equal(const struct file *a, const struct file *b)
-{
- FILE *fa = NULL;
- FILE *fb = NULL;
- char buf_a[8192];
- char buf_b[8192];
- int cmp = 0; /* zero => equal */
- off_t off = 0; /* current offset */
-
- assert(a->links != NULL);
- assert(b->links != NULL);
-
- jlog(JLOG_DEBUG1, "Comparing %s to %s", a->links->path, b->links->path);
-
- stats.comparisons++;
-
- if ((fa = fopen(a->links->path, "rb")) == NULL)
- goto err;
- if ((fb = fopen(b->links->path, "rb")) == NULL)
- goto err;
-
-#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
- posix_fadvise(fileno(fa), 0, 0, POSIX_FADV_SEQUENTIAL);
- posix_fadvise(fileno(fb), 0, 0, POSIX_FADV_SEQUENTIAL);
-#endif
-
- while (!handle_interrupt() && cmp == 0) {
- size_t ca;
- size_t cb;
-
- ca = fread(buf_a, 1, sizeof(buf_a), fa);
- if (ca < sizeof(buf_a) && ferror(fa))
- goto err;
-
- cb = fread(buf_b, 1, sizeof(buf_b), fb);
- if (cb < sizeof(buf_b) && ferror(fb))
- goto err;
-
- off += ca;
-
- if ((ca != cb || ca == 0)) {
- cmp = CMP(ca, cb);
- break;
- }
- cmp = memcmp(buf_a, buf_b, ca);
- }
- out:
- if (fa != NULL)
- fclose(fa);
- if (fb != NULL)
- fclose(fb);
- return !handle_interrupt() && cmp == 0;
- err:
- if (fa == NULL || fb == NULL)
- jlog(JLOG_SYSERR, "Cannot open %s",
- fa ? b->links->path : a->links->path);
- else
- jlog(JLOG_SYSERR, "Cannot read %s",
- ferror(fa) ? a->links->path : b->links->path);
- cmp = 1;
- goto out;
+ return TRUE;
}
+#endif /* USE_XATTR */
/**
* file_may_link_to - Check whether a file may replace another one
* @a: The first file
* @b: The second file
*
- * Check whether the two fies are considered equal and can be linked
- * together. If the two files are identical, the result will be FALSE,
- * as replacing a link with an identical one is stupid.
+ * Check whether the two files are considered equal attributes and can be
+ * linked. This function does not compare content od the files!
*/
static int file_may_link_to(const struct file *a, const struct file *b)
{
- return (a->st.st_size != 0 &&
- a->st.st_size == b->st.st_size &&
- a->links != NULL && b->links != NULL &&
- a->st.st_dev == b->st.st_dev &&
- a->st.st_ino != b->st.st_ino &&
- (!opts.respect_mode || a->st.st_mode == b->st.st_mode) &&
- (!opts.respect_owner || a->st.st_uid == b->st.st_uid) &&
- (!opts.respect_owner || a->st.st_gid == b->st.st_gid) &&
- (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
- (!opts.respect_name
- || strcmp(a->links->path + a->links->basename,
- b->links->path + b->links->basename) == 0) &&
- (!opts.respect_xattrs || file_xattrs_equal(a, b)) &&
- file_contents_equal(a, b));
+ return (a->st.st_size != 0 &&
+ a->st.st_size == b->st.st_size &&
+ a->links != NULL && b->links != NULL &&
+ a->st.st_dev == b->st.st_dev &&
+ a->st.st_ino != b->st.st_ino &&
+ (!opts.respect_mode || a->st.st_mode == b->st.st_mode) &&
+ (!opts.respect_owner || a->st.st_uid == b->st.st_uid) &&
+ (!opts.respect_owner || a->st.st_gid == b->st.st_gid) &&
+ (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
+ (!opts.respect_name || filename_strcmp(a, b) == 0) &&
+ (!opts.respect_dir || dirname_strcmp(a, b) == 0) &&
+ (!opts.respect_xattrs || file_xattrs_equal(a, b)));
}
/**
*/
static int file_compare(const struct file *a, const struct file *b)
{
- int res = 0;
- if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino)
- return 0;
-
- if (res == 0 && opts.maximise)
- res = CMP(a->st.st_nlink, b->st.st_nlink);
- if (res == 0 && opts.minimise)
- res = CMP(b->st.st_nlink, a->st.st_nlink);
- if (res == 0)
- res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime)
- : CMP(a->st.st_mtime, b->st.st_mtime);
- if (res == 0)
- res = CMP(b->st.st_ino, a->st.st_ino);
-
- return res;
+ int res = 0;
+ if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino)
+ return 0;
+
+ if (res == 0 && opts.maximise)
+ res = CMP(a->st.st_nlink, b->st.st_nlink);
+ if (res == 0 && opts.minimise)
+ res = CMP(b->st.st_nlink, a->st.st_nlink);
+ if (res == 0)
+ res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime)
+ : CMP(a->st.st_mtime, b->st.st_mtime);
+ if (res == 0)
+ res = CMP(b->st.st_ino, a->st.st_ino);
+
+ return res;
+}
+
+#ifdef USE_REFLINK
+static inline int do_link(struct file *a, struct file *b,
+ const char *new_name, int reflink)
+{
+ if (reflink) {
+ int dest = -1, src = -1;
+
+ dest = open(new_name, O_CREAT|O_WRONLY|O_TRUNC, 0600);
+ if (dest < 0)
+ goto fallback;
+ if (fchmod(dest, b->st.st_mode) != 0)
+ goto fallback;
+ if (fchown(dest, b->st.st_uid, b->st.st_gid) != 0)
+ goto fallback;
+ src = open(a->links->path, O_RDONLY);
+ if (src < 0)
+ goto fallback;
+ if (ioctl(dest, FICLONE, src) != 0)
+ goto fallback;
+ close(dest);
+ close(src);
+ return 0;
+fallback:
+ if (dest >= 0) {
+ close(dest);
+ unlink(new_name);
+ }
+ if (src >= 0)
+ close(src);
+
+ if (reflink_mode == REFLINK_ALWAYS)
+ return -errno;
+ jlog(JLOG_VERBOSE2,_("Reflinking failed, fallback to hardlinking"));
+ }
+
+ return link(a->links->path, new_name);
}
+#else
+static inline int do_link(struct file *a,
+ struct file *b __attribute__((__unused__)),
+ const char *new_name,
+ int reflink __attribute__((__unused__)))
+{
+ return link(a->links->path, new_name);
+}
+#endif /* USE_REFLINK */
/**
* file_link - Replace b with a link to a
* linked to a temporary name, and then renamed to the name of @b, making
* the replace atomic (@b will always exist).
*/
-static int file_link(struct file *a, struct file *b)
+static int file_link(struct file *a, struct file *b, int reflink)
{
- file_link:
- assert(a->links != NULL);
- assert(b->links != NULL);
-
- jlog(JLOG_INFO, "%sLinking %s to %s (-%s)",
- opts.dry_run ? "[DryRun] " : "", a->links->path, b->links->path,
- format(a->st.st_size));
-
- if (!opts.dry_run) {
- size_t len = strlen(b->links->path) + strlen(".hardlink-temporary") + 1;
- char *new_path = xmalloc(len);
-
- snprintf(new_path, len, "%s.hardlink-temporary", b->links->path);
-
- if (link(a->links->path, new_path) != 0) {
- jlog(JLOG_SYSERR, "Cannot link %s to %s", a->links->path, new_path);
- free(new_path);
- return FALSE;
- } else if (rename(new_path, b->links->path) != 0) {
- jlog(JLOG_SYSERR, "Cannot rename %s to %s", a->links->path,
- new_path);
- unlink(new_path); /* cleanup failed rename */
- free(new_path);
- return FALSE;
- }
- free(new_path);
- }
-
- /* Update statistics */
- stats.linked++;
-
- /* Increase the link count of this file, and set stat() of other file */
- a->st.st_nlink++;
- b->st.st_nlink--;
-
- if (b->st.st_nlink == 0)
- stats.saved += a->st.st_size;
-
- /* Move the link from file b to a */
- {
- struct link *new_link = b->links;
-
- b->links = b->links->next;
- new_link->next = a->links->next;
- a->links->next = new_link;
- }
-
- // Do it again
- if (b->links)
- goto file_link;
-
- return TRUE;
+
+ file_link:
+ assert(a->links != NULL);
+ assert(b->links != NULL);
+
+ if (is_log_enabled(JLOG_INFO)) {
+ char *ssz = size_to_human_string(SIZE_SUFFIX_3LETTER |
+ SIZE_SUFFIX_SPACE |
+ SIZE_DECIMAL_2DIGITS, a->st.st_size);
+ jlog(JLOG_INFO, _("%s%sLinking %s to %s (-%s)"),
+ opts.dry_run ? _("[DryRun] ") : "",
+ reflink ? "Ref" : "",
+ a->links->path, b->links->path,
+ ssz);
+ free(ssz);
+ }
+
+ if (!opts.dry_run) {
+ char *new_path;
+ int failed = 1;
+
+ xasprintf(&new_path, "%s.hardlink-temporary", b->links->path);
+
+ if (do_link(a, b, new_path, reflink) != 0)
+ warn(_("cannot link %s to %s"), a->links->path, new_path);
+
+ else if (rename(new_path, b->links->path) != 0) {
+ warn(_("cannot rename %s to %s"), a->links->path, new_path);
+ unlink(new_path);
+ } else
+ failed = 0;
+
+ free(new_path);
+ if (failed)
+ return FALSE;
+ }
+
+ /* Update statistics */
+ stats.linked++;
+
+ /* Increase the link count of this file, and set stat() of other file */
+ a->st.st_nlink++;
+ b->st.st_nlink--;
+
+ if (b->st.st_nlink == 0)
+ stats.saved += a->st.st_size;
+
+ /* Move the link from file b to a */
+ {
+ struct link *new_link = b->links;
+
+ b->links = b->links->next;
+ new_link->next = a->links->next;
+ a->links->next = new_link;
+ }
+
+ /* Do it again */
+ if (b->links)
+ goto file_link;
+
+ return TRUE;
+}
+
+static int has_fpath(struct file *node, const char *path)
+{
+ struct link *l;
+
+ for (l = node->links; l; l = l->next) {
+ if (strcmp(l->path, path) == 0)
+ return 1;
+ }
+
+ return 0;
}
+
/**
* inserter - Callback function for nftw()
* @fpath: The path of the file being visited
* Called by nftw() for the files. See the manual page for nftw() for
* further information.
*/
-static int inserter(const char *fpath, const struct stat *sb, int typeflag,
- struct FTW *ftwbuf)
+static int inserter(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
{
- struct file *fil;
- struct file **node;
- size_t pathlen;
- int included;
- int excluded;
-
- if (handle_interrupt())
- return 1;
- if (typeflag == FTW_DNR || typeflag == FTW_NS)
- jlog(JLOG_SYSERR, "Cannot read %s", fpath);
- if (typeflag != FTW_F || !S_ISREG(sb->st_mode))
- return 0;
-
- included = regexec_any(opts.include, fpath);
- excluded = regexec_any(opts.exclude, fpath);
-
- if ((opts.exclude && excluded && !included) ||
- (!opts.exclude && opts.include && !included))
- return 0;
-
- stats.files++;
-
- if (sb->st_size < opts.min_size) {
- jlog(JLOG_DEBUG1, "Skipped %s (smaller than configured size)", fpath);
- return 0;
- }
-
- jlog(JLOG_DEBUG2, "Visiting %s (file %zu)", fpath, stats.files);
-
- pathlen = strlen(fpath) + 1;
-
- fil = xcalloc(1, sizeof(*fil));
- fil->links = xcalloc(1, sizeof(struct link) + pathlen);
-
- fil->st = *sb;
- fil->links->basename = ftwbuf->base;
- fil->links->next = NULL;
-
- memcpy(fil->links->path, fpath, pathlen);
-
- node = tsearch(fil, &files_by_ino, compare_nodes_ino);
-
- if (node == NULL)
- return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
-
- if (*node != fil) {
- /* Already known inode, add link to inode information */
- assert((*node)->st.st_dev == sb->st_dev);
- assert((*node)->st.st_ino == sb->st_ino);
-
- fil->links->next = (*node)->links;
- (*node)->links = fil->links;
-
- free(fil);
- } else {
- /* New inode, insert into by-size table */
- node = tsearch(fil, &files, compare_nodes);
-
- if (node == NULL)
- return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
+ struct file *fil;
+ struct file **node;
+ size_t pathlen;
+ int included;
+ int excluded;
+
+ if (handle_interrupt())
+ return 1;
+ if (typeflag == FTW_DNR || typeflag == FTW_NS)
+ warn(_("cannot read %s"), fpath);
+ if (typeflag != FTW_F || !S_ISREG(sb->st_mode))
+ return 0;
+
+ included = match_any_regex(opts.include, fpath);
+ excluded = match_any_regex(opts.exclude, fpath);
+
+ if ((opts.exclude && excluded && !included) ||
+ (!opts.exclude && opts.include && !included))
+ return 0;
+
+ stats.files++;
+
+ if ((uintmax_t) sb->st_size < opts.min_size) {
+ jlog(JLOG_VERBOSE1,
+ _("Skipped %s (smaller than configured size)"), fpath);
+ return 0;
+ }
+
+ jlog(JLOG_VERBOSE2, " %5zu: [%" PRIu64 "/%" PRIu64 "/%zu] %s",
+ stats.files, sb->st_dev, sb->st_ino,
+ (size_t) sb->st_nlink, fpath);
+
+ if ((opts.max_size > 0) && ((uintmax_t) sb->st_size > opts.max_size)) {
+ jlog(JLOG_VERBOSE1,
+ _("Skipped %s (greater than configured size)"), fpath);
+ return 0;
+ }
+
+ pathlen = strlen(fpath) + 1;
+
+ fil = xcalloc(1, sizeof(*fil));
+ fil->links = xcalloc(1, sizeof(struct link) + pathlen);
+
+ fil->st = *sb;
+ fil->links->basename = ftwbuf->base;
+ fil->links->dirname = rootbasesz;
+ fil->links->next = NULL;
+
+ memcpy(fil->links->path, fpath, pathlen);
+
+ node = tsearch(fil, &files_by_ino, compare_nodes_ino);
+
+ if (node == NULL)
+ goto fail;
+
+ if (*node != fil) {
+ /* Already known inode, add link to inode information */
+ assert((*node)->st.st_dev == sb->st_dev);
+ assert((*node)->st.st_ino == sb->st_ino);
+
+ if (has_fpath(*node, fpath)) {
+ jlog(JLOG_VERBOSE1,
+ _("Skipped %s (specified more than once)"), fpath);
+ free(fil->links);
+ } else {
+ fil->links->next = (*node)->links;
+ (*node)->links = fil->links;
+ }
+
+ free(fil);
+ } else {
+ /* New inode, insert into by-size table */
+ node = tsearch(fil, &files, compare_nodes);
+
+ if (node == NULL)
+ goto fail;
+
+ if (*node != fil) {
+ struct file *l;
+
+ if (file_compare(fil, *node) >= 0) {
+ fil->next = *node;
+ *node = fil;
+ } else {
+ for (l = *node; l != NULL; l = l->next) {
+ if (l->next != NULL
+ && file_compare(fil, l->next) < 0)
+ continue;
+
+ fil->next = l->next;
+ l->next = fil;
+
+ break;
+ }
+ }
+ }
+ }
+
+ return 0;
+
+ fail:
+ warn(_("cannot continue")); /* probably ENOMEM */
+ return 0;
+}
- if (*node != fil) {
- struct file *l;
+#ifdef USE_REFLINK
+static int is_reflink_compatible(dev_t devno, const char *filename)
+{
+ static dev_t last_dev = 0;
+ static int last_status = 0;
+
+ if (last_dev != devno) {
+ struct statfs vfs;
+
+ if (statfs(filename, &vfs) != 0)
+ return 0;
+
+ last_dev = devno;
+ switch (vfs.f_type) {
+ case STATFS_BTRFS_MAGIC:
+ case STATFS_XFS_MAGIC:
+ last_status = 1;
+ break;
+ default:
+ last_status = 0;
+ break;
+ }
+ }
+
+ return last_status;
+}
- if (file_compare(fil, *node) >= 0) {
- fil->next = *node;
- *node = fil;
- } else {
- for (l = *node; l != NULL; l = l->next) {
- if (l->next != NULL && file_compare(fil, l->next) < 0)
- continue;
+static int is_reflink(struct file *xa, struct file *xb)
+{
+ int last = 0, rc = 0;
+ char abuf[BUFSIZ] = { 0 },
+ bbuf[BUFSIZ] = { 0 };
+
+ struct fiemap *amap = (struct fiemap *) abuf,
+ *bmap = (struct fiemap *) bbuf;
+
+ int af = open(xa->links->path, O_RDONLY),
+ bf = open(xb->links->path, O_RDONLY);
+
+ if (af < 0 || bf < 0)
+ goto done;
+
+ do {
+ size_t i;
+
+ amap->fm_length = ~0ULL;
+ amap->fm_flags = FIEMAP_FLAG_SYNC;
+ amap->fm_extent_count = (sizeof(abuf) - sizeof(*amap)) / sizeof(struct fiemap_extent);
+
+ bmap->fm_length = ~0ULL;
+ bmap->fm_flags = FIEMAP_FLAG_SYNC;
+ bmap->fm_extent_count = (sizeof(bbuf) - sizeof(*bmap)) / sizeof(struct fiemap_extent);
+
+ if (ioctl(af, FS_IOC_FIEMAP, (unsigned long) amap) < 0)
+ goto done;
+ if (ioctl(bf, FS_IOC_FIEMAP, (unsigned long) bmap) < 0)
+ goto done;
+
+ if (amap->fm_mapped_extents != bmap->fm_mapped_extents)
+ goto done;
+
+ for (i = 0; i < amap->fm_mapped_extents; i++) {
+ struct fiemap_extent *a = &amap->fm_extents[i];
+ struct fiemap_extent *b = &bmap->fm_extents[i];
+
+ if (a->fe_logical != b->fe_logical ||
+ a->fe_length != b->fe_length ||
+ a->fe_physical != b->fe_physical)
+ goto done;
+ if (!(a->fe_flags & FIEMAP_EXTENT_SHARED) ||
+ !(b->fe_flags & FIEMAP_EXTENT_SHARED))
+ goto done;
+ if (a->fe_flags & FIEMAP_EXTENT_LAST)
+ last = 1;
+ }
+
+ bmap->fm_start = amap->fm_start =
+ amap->fm_extents[amap->fm_mapped_extents - 1].fe_logical +
+ amap->fm_extents[amap->fm_mapped_extents - 1].fe_length;
+ } while (last == 0);
+
+ rc = 1;
+done:
+ if (af >= 0)
+ close(af);
+ if (bf >= 0)
+ close(bf);
+ return rc;
+}
+#endif /* USE_REFLINK */
- fil->next = l->next;
- l->next = fil;
+static inline size_t count_nodes(struct file *x)
+{
+ size_t ct = 0;
- break;
- }
- }
- }
- }
+ for ( ; x != NULL; x = x->next)
+ ct++;
- return 0;
+ return ct;
}
/**
*/
static void visitor(const void *nodep, const VISIT which, const int depth)
{
- struct file *master = *(struct file **) nodep;
- struct file *other;
-
- (void) depth;
-
- if (which != leaf && which != endorder)
- return;
-
- for (; master != NULL; master = master->next) {
- if (handle_interrupt())
- exit(1);
- if (master->links == NULL)
- continue;
-
- for (other = master->next; other != NULL; other = other->next) {
- if (handle_interrupt())
- exit(1);
-
- assert(other != other->next);
- assert(other->st.st_size == master->st.st_size);
-
- if (other->links == NULL || !file_may_link_to(master, other))
- continue;
-
- if (!file_link(master, other) && errno == EMLINK)
- master = other;
- }
- }
+ struct file *master = *(struct file **)nodep;
+ struct file *begin = master;
+ struct file *other;
+
+ (void)depth;
+
+ if (which != leaf && which != endorder)
+ return;
+
+ for (; master != NULL; master = master->next) {
+ size_t nnodes, memsiz;
+ int may_reflink = 0;
+
+ if (handle_interrupt())
+ exit(EXIT_FAILURE);
+ if (master->links == NULL)
+ continue;
+
+ /* calculate per file max memory use */
+ nnodes = count_nodes(master);
+ if (!nnodes)
+ continue;
+
+ /* per-file cache size */
+ memsiz = opts.cache_size / nnodes;
+ /* filesiz, readsiz, memsiz */
+ ul_fileeq_set_size(&fileeq, master->st.st_size, opts.io_size, memsiz);
+
+#ifdef USE_REFLINK
+ if (reflink_mode || reflinks_skip) {
+ may_reflink =
+ reflink_mode == REFLINK_ALWAYS ? 1 :
+ is_reflink_compatible(master->st.st_dev,
+ master->links->path);
+ }
+#endif
+ for (other = master->next; other != NULL; other = other->next) {
+ int eq;
+
+ if (handle_interrupt())
+ exit(EXIT_FAILURE);
+
+ assert(other != other->next);
+ assert(other->st.st_size == master->st.st_size);
+
+ if (!other->links)
+ continue;
+
+ /* check file attributes, etc. */
+ if (!file_may_link_to(master, other)) {
+ jlog(JLOG_VERBOSE2,
+ _("Skipped (attributes mismatch) %s"), other->links->path);
+ continue;
+ }
+#ifdef USE_REFLINK
+ if (may_reflink && reflinks_skip && is_reflink(master, other)) {
+ jlog(JLOG_VERBOSE2,
+ _("Skipped (already reflink) %s"), other->links->path);
+ stats.ignored_reflinks++;
+ continue;
+ }
+#endif
+ /* initialize content comparison */
+ if (!ul_fileeq_data_associated(&master->data))
+ ul_fileeq_data_set_file(&master->data, master->links->path);
+ if (!ul_fileeq_data_associated(&other->data))
+ ul_fileeq_data_set_file(&other->data, other->links->path);
+
+ /* compare files */
+ eq = ul_fileeq(&fileeq, &master->data, &other->data);
+
+ /* reduce number of open files, keep only master open */
+ ul_fileeq_data_close_file(&other->data);
+
+ stats.comparisons++;
+
+ if (!eq) {
+ jlog(JLOG_VERBOSE2,
+ _("Skipped (content mismatch) %s"), other->links->path);
+ continue;
+ }
+
+ /* link files */
+ if (!file_link(master, other, may_reflink) && errno == EMLINK) {
+ ul_fileeq_data_deinit(&master->data);
+ master = other;
+ }
+ }
+
+ /* don't keep master data in memory */
+ ul_fileeq_data_deinit(&master->data);
+ }
+
+ /* final cleanup */
+ for (other = begin; other != NULL; other = other->next) {
+ if (ul_fileeq_data_associated(&other->data))
+ ul_fileeq_data_deinit(&other->data);
+ }
}
/**
*/
static void __attribute__((__noreturn__)) usage(void)
{
- FILE *out = stdout;
-
- fputs(USAGE_HEADER, out);
- fprintf(out, _(" %s [options] <directory>|<file> ...\n"), program_invocation_short_name);
-
- fputs(USAGE_SEPARATOR, out);
- fputs(_("Consolidate duplicate files using hardlinks.\n"), out);
-
- fputs(USAGE_OPTIONS, out);
- fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out);
- fputs(_(" -n, --dry-run don't actually link anything\n"), out);
- fputs(_(" -f, --respect-name filenames have to be identical\n"), out);
- fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out);
- fputs(_(" -o, --ignore-owner ignore owner changes\n"), out);
- fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out);
-#ifdef HAVE_SYS_XATTR_H
- fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out);
+ FILE *out = stdout;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] <directory>|<file> ...\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Consolidate duplicate files using hardlinks.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -c, --content compare only file contents, same as -pot\n"), out);
+ fputs(_(" -b, --io-size <size> I/O buffer size for file reading\n"
+ " (speedup, using more RAM)\n"), out);
+ fputs(_(" -d, --respect-dir directory names have to be identical\n"), out);
+ fputs(_(" -f, --respect-name filenames have to be identical\n"), out);
+ fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out);
+ fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n"
+ " lowest hardlink count\n"), out);
+ fputs(_(" -M, --minimize reverse the meaning of -m\n"), out);
+ fputs(_(" -n, --dry-run don't actually link anything\n"), out);
+ fputs(_(" -o, --ignore-owner ignore owner changes\n"), out);
+ fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n"
+ " (lower precedence than minimize/maximize)\n"), out);
+ fputs(_(" -p, --ignore-mode ignore changes of file mode\n"), out);
+ fputs(_(" -q, --quiet quiet mode - don't print anything\n"), out);
+ fputs(_(" -r, --cache-size <size> memory limit for cached file content data\n"), out);
+ fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out);
+ fputs(_(" -S, --maximum-size <size> maximum size for files.\n"), out);
+ fputs(_(" -t, --ignore-time ignore timestamps (when testing for equality)\n"), out);
+ fputs(_(" -v, --verbose verbose output (repeat for more verbosity)\n"), out);
+ fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out);
+#ifdef USE_XATTR
+ fputs(_(" -X, --respect-xattrs respect extended attributes\n"), out);
#endif
- fputs(_(" -m, --maximize maximize the hardlink count, remove the file with\n"
- " lowest hardlink count\n"), out);
- fputs(_(" -M, --minimize reverse the meaning of -m\n"), out);
- fputs(_(" -O, --keep-oldest keep the oldest file of multiple equal files\n"
- " (lower precedence than minimize/maximize)\n"), out);
- fputs(_(" -x, --exclude <regex> regular expression to exclude files\n"), out);
- fputs(_(" -i, --include <regex> regular expression to include files/dirs\n"), out);
- fputs(_(" -s, --minimum-size <size> minimum size for files.\n"), out);
- fputs(_(" -c, --content compare only file contents, same as -pot\n"), out);
-
- fputs(USAGE_SEPARATOR, out);
- printf(USAGE_HELP_OPTIONS(28));
-
- printf(USAGE_MAN_TAIL("hardlink(1)"));
-
- exit(EXIT_SUCCESS);
-}
-
-/**
- * register_regex - Compile and insert a regular expression into list
- * @pregs: Pointer to a linked list of regular expressions
- * @regex: String containing the regular expression to be compiled
- */
-static int register_regex(struct regex_link **pregs, const char *regex)
-{
- struct regex_link *link;
- int err;
-
- link = xmalloc(sizeof(*link));
-
- if ((err = regcomp(&link->preg, regex, REG_NOSUB | REG_EXTENDED)) != 0) {
- size_t size = regerror(err, &link->preg, NULL, 0);
- char *buf = xmalloc(size + 1);
-
- regerror(err, &link->preg, buf, size);
+ fputs(_(" -y, --method <name> file content comparison method\n"), out);
- jlog(JLOG_FATAL, "Could not compile regular expression %s: %s",
- regex, buf);
- free(buf);
- free(link);
- return 1;
- }
+#ifdef USE_REFLINK
+ fputs(_(" --reflink[=<when>] create clone/CoW copies (auto, always, never)\n"), out);
+ fputs(_(" --skip-reflinks skip already cloned files (enabled on --reflink)\n"), out);
+#endif
+ fputs(USAGE_SEPARATOR, out);
+ fprintf(out, USAGE_HELP_OPTIONS(28));
+ fprintf(out, USAGE_MAN_TAIL("hardlink(1)"));
- link->next = *pregs;
- *pregs = link;
- return 0;
+ exit(EXIT_SUCCESS);
}
/**
*/
static int parse_options(int argc, char *argv[])
{
- static const char optstr[] = "VhvnfpotXcmMOx:i:s:";
- static const struct option long_options[] = {
- {"version", no_argument, NULL, 'V'},
- {"help", no_argument, NULL, 'h'},
- {"verbose", no_argument, NULL, 'v'},
- {"dry-run", no_argument, NULL, 'n'},
- {"respect-name", no_argument, NULL, 'f'},
- {"ignore-mode", no_argument, NULL, 'p'},
- {"ignore-owner", no_argument, NULL, 'o'},
- {"ignore-time", no_argument, NULL, 't'},
- {"respect-xattrs", no_argument, NULL, 'X'},
- {"maximize", no_argument, NULL, 'm'},
- {"minimize", no_argument, NULL, 'M'},
- {"keep-oldest", no_argument, NULL, 'O'},
- {"exclude", required_argument, NULL, 'x'},
- {"include", required_argument, NULL, 'i'},
- {"minimum-size", required_argument, NULL, 's'},
- {"content", no_argument, NULL, 'c'},
- {NULL, 0, NULL, 0}
- };
-
- int opt;
- char unit = '\0';
-
-
- while ((opt = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
- switch (opt) {
- case 'p':
- opts.respect_mode = FALSE;
- break;
- case 'o':
- opts.respect_owner = FALSE;
- break;
- case 't':
- opts.respect_time = FALSE;
- break;
- case 'X':
- opts.respect_xattrs = TRUE;
- break;
- case 'm':
- opts.maximise = TRUE;
- break;
- case 'M':
- opts.minimise = TRUE;
- break;
- case 'O':
- opts.keep_oldest = TRUE;
- break;
- case 'f':
- opts.respect_name = TRUE;
- break;
- case 'v':
- opts.verbosity++;
- break;
- case 'c':
- opts.respect_mode = FALSE;
- opts.respect_name = FALSE;
- opts.respect_owner = FALSE;
- opts.respect_time = FALSE;
- opts.respect_xattrs = FALSE;
- break;
- case 'n':
- opts.dry_run = 1;
- break;
- case 'x':
- if (register_regex(&opts.exclude, optarg) != 0)
- return 1;
- break;
- case 'i':
- if (register_regex(&opts.include, optarg) != 0)
- return 1;
- break;
- case 's':
- if (sscanf(optarg, "%llu%c", &opts.min_size, &unit) < 1) {
- jlog(JLOG_ERROR, "Invalid option given to -s: %s", optarg);
- return 1;
- }
- switch (tolower(unit)) {
- case '\0':
- break;
- case 't':
- opts.min_size *= 1024;
- case 'g':
- opts.min_size *= 1024;
- case 'm':
- opts.min_size *= 1024;
- case 'k':
- opts.min_size *= 1024;
- break;
- default:
- jlog(JLOG_ERROR, "Unknown unit indicator %c.", unit);
- return 1;
- }
- jlog(JLOG_DEBUG1, "Using minimum size of %lld bytes.",
- opts.min_size);
- break;
-
- case 'h':
- usage();
- case 'V':
- print_version(EXIT_SUCCESS);
- default:
- errtryhelp(EXIT_FAILURE);
- }
- }
- return 0;
+ enum {
+ OPT_REFLINK = CHAR_MAX + 1,
+ OPT_SKIP_RELINKS
+ };
+ static const char optstr[] = "VhvndfpotXcmMOx:y:i:r:S:s:b:q";
+ static const struct option long_options[] = {
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"dry-run", no_argument, NULL, 'n'},
+ {"respect-name", no_argument, NULL, 'f'},
+ {"respect-dir", no_argument, NULL, 'd'},
+ {"ignore-mode", no_argument, NULL, 'p'},
+ {"ignore-owner", no_argument, NULL, 'o'},
+ {"ignore-time", no_argument, NULL, 't'},
+ {"respect-xattrs", no_argument, NULL, 'X'},
+ {"maximize", no_argument, NULL, 'm'},
+ {"minimize", no_argument, NULL, 'M'},
+ {"keep-oldest", no_argument, NULL, 'O'},
+ {"exclude", required_argument, NULL, 'x'},
+ {"include", required_argument, NULL, 'i'},
+ {"method", required_argument, NULL, 'y' },
+ {"minimum-size", required_argument, NULL, 's'},
+ {"maximum-size", required_argument, NULL, 'S'},
+#ifdef USE_REFLINK
+ {"reflink", optional_argument, NULL, OPT_REFLINK },
+ {"skip-reflinks", no_argument, NULL, OPT_SKIP_RELINKS },
+#endif
+ {"io-size", required_argument, NULL, 'b'},
+ {"content", no_argument, NULL, 'c'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"cache-size", required_argument, NULL, 'r'},
+ {NULL, 0, NULL, 0}
+ };
+ static const ul_excl_t excl[] = {
+ {'q', 'v'},
+ {0}
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+ int c, content_only = 0;
+
+ while ((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
+
+ err_exclusive_options(c, long_options, excl, excl_st);
+
+ switch (c) {
+ case 'p':
+ opts.respect_mode = FALSE;
+ break;
+ case 'o':
+ opts.respect_owner = FALSE;
+ break;
+ case 't':
+ opts.respect_time = FALSE;
+ break;
+ case 'X':
+ opts.respect_xattrs = TRUE;
+ break;
+ case 'm':
+ opts.maximise = TRUE;
+ break;
+ case 'M':
+ opts.minimise = TRUE;
+ break;
+ case 'O':
+ opts.keep_oldest = TRUE;
+ break;
+ case 'f':
+ opts.respect_name = TRUE;
+ break;
+ case 'd':
+ opts.respect_dir = TRUE;
+ break;
+ case 'v':
+ opts.verbosity++;
+ break;
+ case 'q':
+ quiet = TRUE;
+ break;
+ case 'c':
+ content_only = 1;
+ break;
+ case 'n':
+ opts.dry_run = 1;
+ break;
+ case 'x':
+ register_regex(&opts.exclude, optarg);
+ break;
+ case 'y':
+ opts.method = optarg;
+ break;
+ case 'i':
+ register_regex(&opts.include, optarg);
+ break;
+ case 's':
+ opts.min_size = strtosize_or_err(optarg, _("failed to parse minimum size"));
+ break;
+ case 'S':
+ opts.max_size = strtosize_or_err(optarg, _("failed to parse maximum size"));
+ break;
+ case 'r':
+ opts.cache_size = strtosize_or_err(optarg, _("failed to parse cache size"));
+ break;
+ case 'b':
+ opts.io_size = strtosize_or_err(optarg, _("failed to parse I/O size"));
+ break;
+#ifdef USE_REFLINK
+ case OPT_REFLINK:
+ reflink_mode = REFLINK_AUTO;
+ if (optarg) {
+ if (strcmp(optarg, "auto") == 0)
+ reflink_mode = REFLINK_AUTO;
+ else if (strcmp(optarg, "always") == 0)
+ reflink_mode = REFLINK_ALWAYS;
+ else if (strcmp(optarg, "never") == 0)
+ reflink_mode = REFLINK_NEVER;
+ else
+ errx(EXIT_FAILURE, _("unsupported reflink mode; %s"), optarg);
+ }
+ if (reflink_mode != REFLINK_NEVER)
+ reflinks_skip = 1;
+ break;
+ case OPT_SKIP_RELINKS:
+ reflinks_skip = 1;
+ break;
+#endif
+ case 'h':
+ usage();
+ case 'V':
+ {
+ static const char *features[] = {
+#ifdef USE_REFLINK
+ "reflink",
+#endif
+#ifdef USE_FILEEQ_CRYPTOAPI
+ "cryptoapi",
+#endif
+ NULL
+ };
+ print_version_with_features(EXIT_SUCCESS, features);
+ }
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (content_only) {
+ opts.respect_mode = FALSE;
+ opts.respect_name = FALSE;
+ opts.respect_dir = FALSE;
+ opts.respect_owner = FALSE;
+ opts.respect_time = FALSE;
+ opts.respect_xattrs = FALSE;
+ }
+ return 0;
}
/**
- * to_be_called_atexit - Cleanup handler, also prints statistics.
- */
+* to_be_called_atexit - Cleanup handler, also prints statistics.
+*/
static void to_be_called_atexit(void)
{
- if (stats.started)
- print_stats();
+ if (stats.started)
+ print_stats();
}
/**
- * sighandler - Signal handler, sets the global last_signal variable
- * @i: The signal number
- */
+* sighandler - Signal handler, sets the global last_signal variable
+* @i: The signal number
+*/
static void sighandler(int i)
{
- if (last_signal != SIGINT)
- last_signal = i;
- if (i == SIGINT)
- putchar('\n');
+ if (last_signal != SIGINT)
+ last_signal = i;
+ if (i == SIGINT)
+ /* can't use stdio on signal handler */
+ ignore_result(write(STDOUT_FILENO, "\n", sizeof("\n")-1));
}
int main(int argc, char *argv[])
{
- struct sigaction sa;
-
- sa.sa_handler = sighandler;
- sa.sa_flags = SA_RESTART;
- sigfillset(&sa.sa_mask);
-
- /* If we receive a SIGINT, end the processing */
- sigaction(SIGINT, &sa, NULL);
- sigaction(SIGUSR1, &sa, NULL);
-
- /* Pretty print numeric output */
- setlocale(LC_NUMERIC, "");
- stats.start_time = gettime();
-
- if (atexit(to_be_called_atexit) != 0) {
- jlog(JLOG_SYSFAT, "Cannot register exit handler");
- return 1;
- }
-
- if (parse_options(argc, argv) != 0)
- return 1;
-
- if (optind == argc) {
- jlog(JLOG_FATAL, "Expected file or directory names");
- return 1;
- }
-
- stats.started = TRUE;
-
- for (; optind < argc; optind++)
- if (nftw(argv[optind], inserter, 20, FTW_PHYS) == -1)
- jlog(JLOG_SYSERR, "Cannot process %s", argv[optind]);
-
- twalk(files, visitor);
-
- return 0;
+ struct sigaction sa;
+ int rc;
+
+ sa.sa_handler = sighandler;
+ sa.sa_flags = SA_RESTART;
+ sigfillset(&sa.sa_mask);
+
+ /* If we receive a SIGINT, end the processing */
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGUSR1, &sa, NULL);
+
+ /* Localize messages, number formatting, and anything else. */
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+
+ if (atexit(to_be_called_atexit) != 0)
+ err(EXIT_FAILURE, _("cannot register exit handler"));
+
+ parse_options(argc, argv);
+
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no directory or file specified"));
+
+ gettime_monotonic(&stats.start_time);
+
+ rc = ul_fileeq_init(&fileeq, opts.method);
+ if (rc != 0 && strcmp(opts.method, "memcmp") != 0) {
+ jlog(JLOG_INFO, _("cannot initialize %s method, use 'memcmp' fallback"), opts.method);
+ opts.method = "memcmp";
+ rc = ul_fileeq_init(&fileeq, opts.method);
+ }
+ if (rc < 0)
+ err(EXIT_FAILURE, _("failed to initialize files comparior"));
+
+ /* defautl I/O size */
+ if (!opts.io_size) {
+ if (strcmp(opts.method, "memcmp") == 0)
+ opts.io_size = 8*1024;
+ else
+ opts.io_size = 1024*1024;
+ }
+
+ stats.started = TRUE;
+
+ jlog(JLOG_VERBOSE2, _("Scanning [device/inode/links]:"));
+ for (; optind < argc; optind++) {
+ char *path = realpath(argv[optind], NULL);
+
+ if (!path) {
+ warn(_("cannot get realpath: %s"), argv[optind]);
+ continue;
+ }
+ if (opts.respect_dir)
+ rootbasesz = strlen(path);
+ if (nftw(path, inserter, 20, FTW_PHYS) == -1)
+ warn(_("cannot process %s"), path);
+ free(path);
+ rootbasesz = 0;
+ }
+
+ twalk(files, visitor);
+
+ ul_fileeq_deinit(&fileeq);
+ return 0;
}