-/*
- * hardlink - consolidate duplicate files via hardlinks
+/* hardlink.c - Link multiple identical files together
*
- * Copyright (C) 2018 Red Hat, Inc. All rights reserved.
- * Written by Jakub Jelinek <jakub@redhat.com>
+ * Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org>
*
- * Copyright (C) 2019 Karel Zak <kzak@redhat.com>
+ * SPDX-License-Identifier: MIT
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
*
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
-#include <sys/types.h>
-#include <stdlib.h>
+
+#define _GNU_SOURCE /* GNU extensions (optional) */
+#define _POSIX_C_SOURCE 200112L /* POSIX functions */
+#define _XOPEN_SOURCE 600 /* nftw() */
+
+#define _FILE_OFFSET_BITS 64 /* Large file support */
+#define _LARGEFILE_SOURCE /* Large file support */
+#define _LARGE_FILES /* AIX apparently */
+
+#include <sys/types.h> /* stat */
+#include <sys/stat.h> /* stat */
+#include <sys/time.h> /* getrlimit, getrusage */
+#include <sys/resource.h> /* getrlimit, getrusage */
+#include <unistd.h> /* stat */
+#include <fcntl.h> /* posix_fadvise */
+#include <ftw.h> /* ftw */
+#include <search.h> /* tsearch() and friends */
+
+#include <errno.h> /* strerror, errno */
+#include <locale.h> /* setlocale */
+#include <signal.h> /* SIG*, sigaction */
+#include <stdio.h> /* stderr, fprint */
+#include <stdarg.h> /* va_arg */
+#include <stdlib.h> /* free(), realloc() */
+#include <string.h> /* strcmp() and friends */
+#include <assert.h> /* assert() */
+#include <ctype.h> /* tolower() */
+
+/* Some boolean names for clarity */
+typedef enum hl_bool {
+ FALSE,
+ TRUE
+} hl_bool;
+
+/* The makefile sets this for us and creates config.h */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* We don't have getopt_long(). Define no-op alternatives */
+#ifdef HAVE_GETOPT_LONG
#include <getopt.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <errno.h>
-#ifdef HAVE_PCRE
-# define PCRE2_CODE_UNIT_WIDTH 8
-# include <pcre2.h>
+#else
+#define getopt_long(argc, argv, shrt, lng, index) getopt((argc), (argv), (shrt))
#endif
-#include "c.h"
-#include "xalloc.h"
-#include "nls.h"
-#include "closestream.h"
+/* For systems without posix_fadvise */
+#ifndef HAVE_POSIX_FADVISE
+#define posix_fadvise(fd, offset, len, advise) (void) 0
+#endif
-#define NHASH (1<<17) /* Must be a power of 2! */
-#define NBUF 64
+/* __attribute__ is fairly GNU-specific, define a no-op alternative elsewhere */
+#ifndef __GNUC__
+#define __attribute__(attributes)
+#endif
-struct hardlink_file;
+/* Use libpcreposix if it's available, it's cooler */
+#if defined(HAVE_libpcre2_posix)
+#warning hie
+#include <pcre2posix.h>
+#undef REG_NOSUB
+#define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
+#elif defined(HAVE_libpcreposix)
+#include <pcreposix.h>
+#undef REG_NOSUB
+#define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
+#else
+#include <regex.h> /* regcomp(), regsearch() */
+#endif
-struct hardlink_hash {
- struct hardlink_hash *next;
- struct hardlink_file *chain;
- off_t size;
- time_t mtime;
-};
+#ifdef HAVE_XATTR
+#include <sys/xattr.h> /* listxattr, getxattr */
+#endif
-struct hardlink_dir {
- struct hardlink_dir *next;
- char name[];
+/**
+ * struct file - Information about a file
+ * @st: The stat buffer associated with the file
+ * @next: Next file with the same size
+ * @basename: The offset off the basename in the filename
+ * @path: The path of the file
+ *
+ * This contains all information we need about a file.
+ */
+struct file {
+ struct stat st;
+ struct file *next;
+ struct link {
+ struct link *next;
+ int basename;
+#if __STDC_VERSION__ >= 199901L
+ char path[];
+#elif __GNUC__
+ char path[0];
+#else
+ char path[1];
+#endif
+ } *links;
};
-struct hardlink_file {
- struct hardlink_file *next;
- ino_t ino;
- dev_t dev;
- unsigned int cksum;
- char name[];
+/**
+ * enum log_level - Logging levels
+ * @JLOG_SYSFAT: Fatal error message with errno, will be printed to stderr
+ * @JLOG_FATAL: Fatal error message with errno, will be printed to stderr
+ * @JLOG_SYSERR: Error message with errno, will be printed to stderr
+ * @JLOG_ERROR: Error message, will be printed to stderr
+ * @JLOG_SUMMARY: Default log level
+ * @JLOG_INFO: Verbose logging (verbose == 1)
+ * @JLOG_DEBUG1: Verbosity 2
+ * @JLOG_DEBUG2: Verbosity 3
+ */
+enum log_level {
+ JLOG_SYSFAT = -4,
+ JLOG_FATAL = -3,
+ JLOG_SYSERR = -2,
+ JLOG_ERROR = -1,
+ JLOG_SUMMARY,
+ JLOG_INFO,
+ JLOG_DEBUG1,
+ JLOG_DEBUG2
};
-struct hardlink_dynstr {
- char *buf;
- size_t alloc;
-};
+/**
+ * struct statistic - Statistics about the file
+ * @started: Whether we are post command-line processing
+ * @files: The number of files worked on
+ * @linked: The number of files replaced by a hardlink to a master
+ * @xattr_comparisons: The number of extended attribute comparisons
+ * @comparisons: The number of comparisons
+ * @saved: The (exaggerated) amount of space saved
+ * @start_time: The time we started at, in seconds since some unspecified point
+ */
+static struct statistics {
+ hl_bool started;
+ size_t files;
+ size_t linked;
+ size_t xattr_comparisons;
+ size_t comparisons;
+ double saved;
+ double start_time;
+} stats;
-struct hardlink_ctl {
- struct hardlink_dir *dirs;
- struct hardlink_hash *hps[NHASH];
- char iobuf1[BUFSIZ];
- char iobuf2[BUFSIZ];
- /* summary counters */
- unsigned long long ndirs;
- unsigned long long nobjects;
- unsigned long long nregfiles;
- unsigned long long ncomp;
- unsigned long long nlinks;
- unsigned long long nsaved;
- /* current device */
- dev_t dev;
- /* flags */
- unsigned int verbose;
- unsigned int
- no_link:1,
- content_only:1,
- force:1;
-};
-/* ctl is in global scope due use in atexit() */
-struct hardlink_ctl global_ctl;
+/**
+ * struct options - Processed command-line options
+ * @include: A linked list of regular expressions for the --include option
+ * @exclude: A linked list of regular expressions for the --exclude option
+ * @verbosity: The verbosity. Should be one of #enum log_level
+ * @respect_mode: Whether to respect file modes (default = TRUE)
+ * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
+ * @respect_name: Whether to respect file names (default = FALSE)
+ * @respect_time: Whether to respect file modification times (default = TRUE)
+ * @respect_xattrs: Whether to respect extended attributes (default = FALSE)
+ * @maximise: Chose the file with the highest link count as master
+ * @minimise: Chose the file with the lowest link count as master
+ * @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
+ * @dry_run: Specifies whether hardlink should not link files (default = FALSE)
+ * @min_size: Minimum size of files to consider. (default = 1 byte)
+ */
+static struct options {
+ struct regex_link {
+ regex_t preg;
+ struct regex_link *next;
+ } *include, *exclude;
+ signed int verbosity;
+ unsigned int respect_mode:1;
+ unsigned int respect_owner:1;
+ unsigned int respect_name:1;
+ unsigned int respect_time:1;
+ unsigned int respect_xattrs:1;
+ unsigned int maximise:1;
+ unsigned int minimise:1;
+ unsigned int keep_oldest:1;
+ unsigned int dry_run:1;
+ unsigned long long min_size;
+} opts;
+
+/*
+ * files
+ *
+ * A binary tree of files, managed using tsearch(). To see which nodes
+ * are considered equal, see compare_nodes()
+ */
+static void *files;
+static void *files_by_ino;
+
+/*
+ * last_signal
+ *
+ * The last signal we received. We store the signal here in order to be able
+ * to break out of loops gracefully and to return from our nftw() handler.
+ */
+static int last_signal;
-__attribute__ ((always_inline))
-static inline unsigned int hash(off_t size, time_t mtime)
+__attribute__ ((format(printf, 2, 3)))
+/**
+ * jlog - Logging for hardlink
+ * @level: The log level
+ * @format: A format string for printf()
+ */
+static void jlog(enum log_level level, const char *format, ...)
{
- return (size ^ mtime) & (NHASH - 1);
+ FILE *stream = (level >= 0) ? stdout : stderr;
+ int errno_ = errno;
+ va_list args;
+
+ if (level <= opts.verbosity) {
+ if (level <= JLOG_FATAL)
+ fprintf(stream, "ERROR: ");
+ else if (level < 0)
+ fprintf(stream, "WARNING: ");
+ va_start(args, format);
+ vfprintf(stream, format, args);
+ va_end(args);
+ if (level == JLOG_SYSERR || level == JLOG_SYSFAT)
+ fprintf(stream, ": %s\n", strerror(errno_));
+ else
+ fputc('\n', stream);
+ }
}
-__attribute__ ((always_inline))
-static inline int stcmp(struct stat *st1, struct stat *st2, int content_scope)
+/**
+ * CMP - Compare two numerical values, return 1, 0, or -1
+ * @a: First value
+ * @b: Second value
+ *
+ * Used to compare two integers of any size while avoiding overflow.
+ */
+#define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
+
+/**
+ * format - Print a human-readable name for the given size
+ * @bytes: A number specifying an amount of bytes
+ *
+ * Uses a double. The result with infinity and NaN is most likely
+ * not pleasant.
+ */
+static const char *format(double bytes)
{
- if (content_scope)
- return st1->st_size != st2->st_size;
-
- return st1->st_mode != st2->st_mode
- || st1->st_uid != st2->st_uid
- || st1->st_gid != st2->st_gid
- || st1->st_size != st2->st_size
- || st1->st_mtime != st2->st_mtime;
+ static char buf[256];
+
+ if (bytes >= 1024 * 1024 * 1024)
+ snprintf(buf, sizeof(buf), "%.2f GiB", (bytes / 1024 / 1024 / 1024));
+ else if (bytes >= 1024 * 1024)
+ snprintf(buf, sizeof(buf), "%.2f MiB", (bytes / 1024 / 1024));
+ else if (bytes >= 1024)
+ snprintf(buf, sizeof(buf), "%.2f KiB", (bytes / 1024));
+ else
+ snprintf(buf, sizeof(buf), "%.0f bytes", bytes);
+
+ return buf;
}
-static void print_summary(void)
+/**
+ * gettime() - Get the current time from the system
+ */
+static double gettime(void)
{
- struct hardlink_ctl const *const ctl = &global_ctl;
-
- if (!ctl->verbose)
- return;
-
- if (ctl->verbose > 1 && ctl->nlinks)
- fputc('\n', stdout);
-
- printf(_("Directories: %9lld\n"), ctl->ndirs);
- printf(_("Objects: %9lld\n"), ctl->nobjects);
- printf(_("Regular files: %9lld\n"), ctl->nregfiles);
- printf(_("Comparisons: %9lld\n"), ctl->ncomp);
- printf( "%s%9lld\n", (ctl->no_link ?
- _("Would link: ") :
- _("Linked: ")), ctl->nlinks);
- printf( "%s %9lld\n", (ctl->no_link ?
- _("Would save: ") :
- _("Saved: ")), ctl->nsaved);
+ struct timeval tv = { 0, 0 };
+
+ if (gettimeofday(&tv, NULL) != 0)
+ jlog(JLOG_SYSERR, "Cannot read current time");
+
+ return (double) tv.tv_sec + (double) tv.tv_usec / 1000000;
}
-static void __attribute__((__noreturn__)) usage(void)
+/**
+ * regexec_any - Match against multiple regular expressions
+ * @pregs: A linked list of regular expressions
+ * @what: The string to match against
+ *
+ * Checks whether any of the regular expressions in the list matches the
+ * string.
+ */
+static hl_bool regexec_any(struct regex_link *pregs, const char *what)
{
- fputs(USAGE_HEADER, stdout);
- printf(_(" %s [options] directory...\n"), program_invocation_short_name);
-
- fputs(USAGE_SEPARATOR, stdout);
- puts(_("Consolidate duplicate files using hardlinks."));
-
- fputs(USAGE_OPTIONS, stdout);
- puts(_(" -c, --content compare only contents, ignore permission, etc."));
- puts(_(" -n, --dry-run don't actually link anything"));
- puts(_(" -v, --verbose print summary after hardlinking"));
- puts(_(" -vv print every hardlinked file and summary"));
- puts(_(" -f, --force force hardlinking across filesystems"));
- puts(_(" -x, --exclude <regex> exclude files matching pattern"));
-
- fputs(USAGE_SEPARATOR, stdout);
- printf(USAGE_HELP_OPTIONS(16)); /* char offset to align option descriptions */
- printf(USAGE_MAN_TAIL("hardlink(1)"));
- exit(EXIT_SUCCESS);
+ for (; pregs != NULL; pregs = pregs->next)
+ if (regexec(&pregs->preg, what, 0, NULL, 0) == 0)
+ return TRUE;
+ return FALSE;
}
-__attribute__ ((always_inline))
-static inline size_t add2(size_t a, size_t b)
+/**
+ * compare_nodes - Node comparison function
+ * @_a: The first node (a #struct file)
+ * @_b: The second node (a #struct file)
+ *
+ * Compare the two nodes for the binary tree.
+ */
+static int compare_nodes(const void *_a, const void *_b)
{
- size_t sum = a + b;
+ const struct file *a = _a;
+ const struct file *b = _b;
+ int diff = 0;
- if (sum < a)
- errx(EXIT_FAILURE, _("integer overflow"));
- return sum;
+ if (diff == 0)
+ diff = CMP(a->st.st_dev, b->st.st_dev);
+ if (diff == 0)
+ diff = CMP(a->st.st_size, b->st.st_size);
+
+ return diff;
}
-__attribute__ ((always_inline))
-static inline size_t add3(size_t a, size_t b, size_t c)
+/**
+ * compare_nodes_ino - Node comparison function
+ * @_a: The first node (a #struct file)
+ * @_b: The second node (a #struct file)
+ *
+ * Compare the two nodes for the binary tree.
+ */
+static int compare_nodes_ino(const void *_a, const void *_b)
{
- return add2(add2(a, b), c);
+ const struct file *a = _a;
+ const struct file *b = _b;
+ int diff = 0;
+
+ if (diff == 0)
+ diff = CMP(a->st.st_dev, b->st.st_dev);
+ if (diff == 0)
+ diff = CMP(a->st.st_ino, b->st.st_ino);
+
+ /* If opts.respect_name is used, we will restrict a struct file to
+ * contain only links with the same basename to keep the rest simple.
+ */
+ if (diff == 0 && opts.respect_name)
+ diff = strcmp(a->links->path + a->links->basename,
+ b->links->path + b->links->basename);
+
+ return diff;
}
-static void growstr(struct hardlink_dynstr *str, size_t newlen)
+/**
+ * print_stats - Print statistics to stdout
+ */
+static void print_stats(void)
{
- if (newlen < str->alloc)
- return;
- str->buf = xrealloc(str->buf, str->alloc = add2(newlen, 1));
+ jlog(JLOG_SUMMARY, "Mode: %s", opts.dry_run ? "dry-run" : "real");
+ jlog(JLOG_SUMMARY, "Files: %zu", stats.files);
+ jlog(JLOG_SUMMARY, "Linked: %zu files", stats.linked);
+#ifdef HAVE_XATTR
+ jlog(JLOG_SUMMARY, "Compared: %zu xattrs", stats.xattr_comparisons);
+#endif
+ jlog(JLOG_SUMMARY, "Compared: %zu files", stats.comparisons);
+ jlog(JLOG_SUMMARY, "Saved: %s", format(stats.saved));
+ jlog(JLOG_SUMMARY, "Duration: %.2f seconds", gettime() - stats.start_time);
}
-static void process_path(struct hardlink_ctl *ctl, const char *name)
+/**
+ * handle_interrupt - Handle a signal
+ *
+ * Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals.
+ */
+static hl_bool handle_interrupt(void)
{
- struct stat st, st2, st3;
- const size_t namelen = strlen(name);
-
- ctl->nobjects++;
- if (lstat(name, &st))
- return;
-
- if (st.st_dev != ctl->dev && !ctl->force) {
- if (ctl->dev)
- errx(EXIT_FAILURE,
- _("%s is on different filesystem than the rest "
- "(use -f option to override)."), name);
- ctl->dev = st.st_dev;
- }
- if (S_ISDIR(st.st_mode)) {
- struct hardlink_dir *dp = xmalloc(add3(sizeof(*dp), namelen, 1));
- memcpy(dp->name, name, namelen + 1);
- dp->next = ctl->dirs;
- ctl->dirs = dp;
-
- } else if (S_ISREG(st.st_mode)) {
- int fd, i;
- struct hardlink_file *fp, *fp2;
- struct hardlink_hash *hp;
- const char *n1, *n2;
- unsigned int buf[NBUF];
- int cksumsize = sizeof(buf);
- unsigned int cksum;
- time_t mtime = ctl->content_only ? 0 : st.st_mtime;
- unsigned int hsh = hash(st.st_size, mtime);
- off_t fsize;
-
- ctl->nregfiles++;
- if (ctl->verbose > 1)
- printf("%s\n", name);
-
- fd = open(name, O_RDONLY);
- if (fd < 0)
- return;
-
- if ((size_t)st.st_size < sizeof(buf)) {
- cksumsize = st.st_size;
- memset(((char *)buf) + cksumsize, 0,
- (sizeof(buf) - cksumsize) % sizeof(buf[0]));
- }
- if (read(fd, buf, cksumsize) != cksumsize) {
- close(fd);
- return;
- }
- cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]);
- for (i = 0, cksum = 0; i < cksumsize; i++) {
- if (cksum + buf[i] < cksum)
- cksum += buf[i] + 1;
- else
- cksum += buf[i];
- }
- for (hp = ctl->hps[hsh]; hp; hp = hp->next) {
- if (hp->size == st.st_size && hp->mtime == mtime)
- break;
- }
- if (!hp) {
- hp = xmalloc(sizeof(*hp));
- hp->size = st.st_size;
- hp->mtime = mtime;
- hp->chain = NULL;
- hp->next = ctl->hps[hsh];
- ctl->hps[hsh] = hp;
- }
- for (fp = hp->chain; fp; fp = fp->next) {
- if (fp->cksum == cksum)
- break;
- }
- for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) {
- if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) {
- close(fd);
- return;
- }
- }
- for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) {
-
- if (!lstat(fp2->name, &st2) && S_ISREG(st2.st_mode) &&
- !stcmp(&st, &st2, ctl->content_only) &&
- st2.st_ino != st.st_ino &&
- st2.st_dev == st.st_dev) {
-
- int fd2 = open(fp2->name, O_RDONLY);
- if (fd2 < 0)
- continue;
-
- if (fstat(fd2, &st2) || !S_ISREG(st2.st_mode)
- || st2.st_size == 0) {
- close(fd2);
- continue;
- }
- ctl->ncomp++;
- lseek(fd, 0, SEEK_SET);
-
- for (fsize = st.st_size; fsize > 0;
- fsize -= (off_t)sizeof(ctl->iobuf1)) {
- ssize_t xsz;
- ssize_t rsize = fsize > (ssize_t) sizeof(ctl->iobuf1) ?
- (ssize_t) sizeof(ctl->iobuf1) : fsize;
-
- if ((xsz = read(fd, ctl->iobuf1, rsize)) != rsize)
- warn(_("cannot read %s"), name);
- else if ((xsz = read(fd2, ctl->iobuf2, rsize)) != rsize)
- warn(_("cannot read %s"), fp2->name);
-
- if (xsz != rsize) {
- close(fd);
- close(fd2);
- return;
- }
- if (memcmp(ctl->iobuf1, ctl->iobuf2, rsize) != 0)
- break;
- }
- close(fd2);
- if (fsize > 0)
- continue;
- if (lstat(name, &st3)) {
- warn(_("cannot stat %s"), name);
- close(fd);
- return;
- }
- st3.st_atime = st.st_atime;
- if (stcmp(&st, &st3, 0)) {
- warnx(_("file %s changed underneath us"), name);
- close(fd);
- return;
- }
- n1 = fp2->name;
- n2 = name;
-
- if (!ctl->no_link) {
- const char *suffix =
- ".$$$___cleanit___$$$";
- const size_t suffixlen = strlen(suffix);
- size_t n2len = strlen(n2);
- struct hardlink_dynstr nam2 = { NULL, 0 };
-
- growstr(&nam2, add2(n2len, suffixlen));
- memcpy(nam2.buf, n2, n2len);
- memcpy(&nam2.buf[n2len], suffix,
- suffixlen + 1);
- /* First create a temporary link to n1 under a new name */
- if (link(n1, nam2.buf)) {
- warn(_("failed to hardlink %s to %s (create temporary link as %s failed)"),
- n1, n2, nam2.buf);
- free(nam2.buf);
- continue;
- }
- /* Then rename into place over the existing n2 */
- if (rename(nam2.buf, n2)) {
- warn(_("failed to hardlink %s to %s (rename temporary link to %s failed)"),
- n1, n2, n2);
- /* Something went wrong, try to remove the now redundant temporary link */
- if (unlink(nam2.buf))
- warn(_("failed to remove temporary link %s"), nam2.buf);
- free(nam2.buf);
- continue;
- }
- free(nam2.buf);
- }
- ctl->nlinks++;
- if (st3.st_nlink > 1) {
- /* We actually did not save anything this time, since the link second argument
- had some other links as well. */
- if (ctl->verbose > 1)
- printf(_(" %s %s to %s\n"),
- (ctl->no_link ? _("Would link") : _("Linked")),
- n1, n2);
- } else {
- ctl->nsaved += ((st.st_size + 4095) / 4096) * 4096;
- if (ctl->verbose > 1)
- printf(_(" %s %s to %s, %s %jd\n"),
- (ctl->no_link ? _("Would link") : _("Linked")),
- n1, n2,
- (ctl->no_link ? _("would save") : _("saved")),
- (intmax_t)st.st_size);
- }
- close(fd);
- return;
- }
- }
- fp2 = xmalloc(add3(sizeof(*fp2), namelen, 1));
- close(fd);
- fp2->ino = st.st_ino;
- fp2->dev = st.st_dev;
- fp2->cksum = cksum;
- memcpy(fp2->name, name, namelen + 1);
-
- if (fp) {
- fp2->next = fp->next;
- fp->next = fp2;
- } else {
- fp2->next = hp->chain;
- hp->chain = fp2;
- }
- return;
- }
+ switch (last_signal) {
+ case SIGINT:
+ case SIGTERM:
+ return TRUE;
+ case SIGUSR1:
+ print_stats();
+ putchar('\n');
+ break;
+ }
+
+ last_signal = 0;
+ return FALSE;
}
-int main(int argc, char **argv)
+#ifdef HAVE_XATTR
+
+/**
+ * malloc_or_die -- Wrapper for malloc()
+ *
+ * This does the same thing as malloc() except that it aborts if memory
+ * can't be allocated.
+ */
+static void *malloc_or_die(size_t size)
{
- int ch;
- int i;
-#ifdef HAVE_PCRE
- int errornumber;
- PCRE2_SIZE erroroffset;
- pcre2_code *re = NULL;
- PCRE2_SPTR exclude_pattern = NULL;
- pcre2_match_data *match_data = NULL;
-#endif
- struct hardlink_dynstr nam1 = { NULL, 0 };
- struct hardlink_ctl *ctl = &global_ctl;
-
- static const struct option longopts[] = {
- { "content", no_argument, NULL, 'c' },
- { "dry-run", no_argument, NULL, 'n' },
- { "exclude", required_argument, NULL, 'x' },
- { "force", no_argument, NULL, 'f' },
- { "help", no_argument, NULL, 'h' },
- { "verbose", no_argument, NULL, 'v' },
- { "version", no_argument, NULL, 'V' },
- { NULL, 0, NULL, 0 },
- };
-
- setlocale(LC_ALL, "");
- bindtextdomain(PACKAGE, LOCALEDIR);
- textdomain(PACKAGE);
- close_stdout_atexit();
-
- while ((ch = getopt_long(argc, argv, "cnvfx:Vh", longopts, NULL)) != -1) {
- switch (ch) {
- case 'n':
- ctl->no_link = 1;
- break;
- case 'v':
- ctl->verbose++;
- break;
- case 'c':
- ctl->content_only = 1;
- break;
- case 'f':
- ctl->force = 1;
- break;
- case 'x':
-#ifdef HAVE_PCRE
- exclude_pattern = (PCRE2_SPTR) optarg;
+ void *mem = malloc(size);
+
+ if (!mem) {
+ jlog(JLOG_SYSFAT, "Cannot allocate memory");
+ exit(1);
+ }
+ return mem;
+}
+
+/**
+ * llistxattr_or_die - Wrapper for llistxattr()
+ *
+ * This does the same thing as llistxattr() except that it aborts if any error
+ * other than "not supported" is detected.
+ */
+static ssize_t llistxattr_or_die(const char *path, char *list, size_t size)
+{
+ ssize_t len = llistxattr(path, list, size);
+
+ if (len < 0 && errno != ENOTSUP) {
+ jlog(JLOG_SYSFAT, "Cannot get xattr names for %s", path);
+ exit(1);
+ }
+ return len;
+}
+
+/**
+ * lgetxattr_or_die - Wrapper for lgetxattr()
+ *
+ * This does the same thing as lgetxattr() except that it aborts upon error.
+ */
+static ssize_t lgetxattr_or_die(const char *path, const char *name, void *value,
+ size_t size)
+{
+ ssize_t len = lgetxattr(path, name, value, size);
+
+ if (len < 0) {
+ jlog(JLOG_SYSFAT, "Cannot get xattr value of %s for %s", name, path);
+ exit(1);
+ }
+ return len;
+}
+
+/**
+ * get_xattr_name_count - Count the number of xattr names
+ * @names: a non-empty table of concatenated, null-terminated xattr names
+ * @len: the total length of the table
+ *
+ * @Returns the number of xattr names
+ */
+static int get_xattr_name_count(const char *const names, ssize_t len)
+{
+ int count = 0;
+ const char *name;
+
+ for (name = names; name < (names + len); name += strlen(name) + 1)
+ count++;
+
+ return count;
+}
+
+/**
+ * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
+ * the names they point to.
+ */
+static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2)
+{
+ return strcmp(*(char *const *) ptr1, *(char *const *) ptr2);
+}
+
+/**
+ * get_sorted_xattr_name_table - Create a sorted table of xattr names.
+ * @names - table of concatenated, null-terminated xattr names
+ * @n - the number of names
+ *
+ * @Returns allocated table of pointers to the names, sorted alphabetically
+ */
+static const char **get_sorted_xattr_name_table(const char *names, int n)
+{
+ const char **table = malloc_or_die(n * sizeof(char *));
+ int i;
+
+ for (i = 0; i < n; i++) {
+ table[i] = names;
+ names += strlen(names) + 1;
+ }
+
+ qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
+
+ return table;
+}
+
+/**
+ * file_xattrs_equal - Compare the extended attributes of two files
+ * @a: The first file
+ * @b: The second file
+ *
+ * @Returns: %TRUE if and only if extended attributes are equal
+ */
+static hl_bool file_xattrs_equal(const struct file *a, const struct file *b)
+{
+ ssize_t len_a;
+ ssize_t len_b;
+ char *names_a = NULL;
+ char *names_b = NULL;
+ int n_a;
+ int n_b;
+ const char **name_ptrs_a = NULL;
+ const char **name_ptrs_b = NULL;
+ void *value_a = NULL;
+ void *value_b = NULL;
+ hl_bool ret = FALSE;
+ int i;
+
+ assert(a->links != NULL);
+ assert(b->links != NULL);
+
+ jlog(JLOG_DEBUG1, "Comparing xattrs of %s to %s", a->links->path,
+ b->links->path);
+
+ stats.xattr_comparisons++;
+
+ len_a = llistxattr_or_die(a->links->path, NULL, 0);
+ len_b = llistxattr_or_die(b->links->path, NULL, 0);
+
+ if (len_a <= 0 && len_b <= 0)
+ return TRUE; // xattrs not supported or neither file has any
+
+ if (len_a != len_b)
+ return FALSE; // total lengths of xattr names differ
+
+ names_a = malloc_or_die(len_a);
+ names_b = malloc_or_die(len_b);
+
+ len_a = llistxattr_or_die(a->links->path, names_a, len_a);
+ len_b = llistxattr_or_die(b->links->path, names_b, len_b);
+ assert((len_a > 0) && (len_a == len_b));
+
+ n_a = get_xattr_name_count(names_a, len_a);
+ n_b = get_xattr_name_count(names_b, len_b);
+
+ if (n_a != n_b)
+ goto exit; // numbers of xattrs differ
+
+ name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
+ name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
+
+ // We now have two sorted tables of xattr names.
+
+ for (i = 0; i < n_a; i++) {
+ if (handle_interrupt())
+ goto exit; // user wants to quit
+
+ if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
+ goto exit; // names at same slot differ
+
+ len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
+ len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
+
+ if (len_a != len_b)
+ goto exit; // xattrs with same name, different value lengths
+
+ value_a = malloc_or_die(len_a);
+ value_b = malloc_or_die(len_b);
+
+ len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
+ value_a, len_a);
+ len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
+ value_b, len_b);
+ assert((len_a >= 0) && (len_a == len_b));
+
+ if (memcmp(value_a, value_b, len_a) != 0)
+ goto exit; // xattrs with same name, different values
+
+ free(value_a);
+ free(value_b);
+ value_a = NULL;
+ value_b = NULL;
+ }
+
+ ret = TRUE;
+
+ exit:
+ free(names_a);
+ free(names_b);
+ free(name_ptrs_a);
+ free(name_ptrs_b);
+ free(value_a);
+ free(value_b);
+ return ret;
+}
#else
- errx(EXIT_FAILURE,
- _("option --exclude not supported (built without pcre2)"));
+static hl_bool file_xattrs_equal(const struct file *a, const struct file *b)
+{
+ return TRUE;
+}
#endif
- break;
- case 'V':
- print_version(EXIT_SUCCESS);
- case 'h':
- usage();
- default:
- errtryhelp(EXIT_FAILURE);
- }
- }
-
- if (optind == argc) {
- warnx(_("no directory specified"));
- errtryhelp(EXIT_FAILURE);
- }
-
-#ifdef HAVE_PCRE
- if (exclude_pattern) {
- re = pcre2_compile(exclude_pattern, /* the pattern */
- PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */
- 0, /* default options */
- &errornumber, &erroroffset, NULL); /* use default compile context */
- if (!re) {
- PCRE2_UCHAR buffer[256];
- pcre2_get_error_message(errornumber, buffer,
- sizeof(buffer));
- errx(EXIT_FAILURE, _("pattern error at offset %d: %s"),
- (int)erroroffset, buffer);
- }
- match_data = pcre2_match_data_create_from_pattern(re, NULL);
- }
+
+/**
+ * file_contents_equal - Compare contents of two files for equality
+ * @a: The first file
+ * @b: The second file
+ *
+ * Compare the contents of the files for equality
+ */
+static hl_bool file_contents_equal(const struct file *a, const struct file *b)
+{
+ FILE *fa = NULL;
+ FILE *fb = NULL;
+ char buf_a[8192];
+ char buf_b[8192];
+ int cmp = 0; /* zero => equal */
+ off_t off = 0; /* current offset */
+
+ assert(a->links != NULL);
+ assert(b->links != NULL);
+
+ jlog(JLOG_DEBUG1, "Comparing %s to %s", a->links->path, b->links->path);
+
+ stats.comparisons++;
+
+ if ((fa = fopen(a->links->path, "rb")) == NULL)
+ goto err;
+ if ((fb = fopen(b->links->path, "rb")) == NULL)
+ goto err;
+
+ posix_fadvise(fileno(fa), 0, 0, POSIX_FADV_SEQUENTIAL);
+ posix_fadvise(fileno(fb), 0, 0, POSIX_FADV_SEQUENTIAL);
+
+ while (!handle_interrupt() && cmp == 0) {
+ size_t ca;
+ size_t cb;
+
+ ca = fread(buf_a, 1, sizeof(buf_a), fa);
+ if (ca < sizeof(buf_a) && ferror(fa))
+ goto err;
+
+ cb = fread(buf_b, 1, sizeof(buf_b), fb);
+ if (cb < sizeof(buf_b) && ferror(fb))
+ goto err;
+
+ off += ca;
+
+ if ((ca != cb || ca == 0)) {
+ cmp = CMP(ca, cb);
+ break;
+ }
+ cmp = memcmp(buf_a, buf_b, ca);
+ }
+ out:
+ if (fa != NULL)
+ fclose(fa);
+ if (fb != NULL)
+ fclose(fb);
+ return !handle_interrupt() && cmp == 0;
+ err:
+ if (fa == NULL || fb == NULL)
+ jlog(JLOG_SYSERR, "Cannot open %s",
+ fa ? b->links->path : a->links->path);
+ else
+ jlog(JLOG_SYSERR, "Cannot read %s",
+ ferror(fa) ? a->links->path : b->links->path);
+ cmp = 1;
+ goto out;
+}
+
+/**
+ * file_may_link_to - Check whether a file may replace another one
+ * @a: The first file
+ * @b: The second file
+ *
+ * Check whether the two fies are considered equal and can be linked
+ * together. If the two files are identical, the result will be FALSE,
+ * as replacing a link with an identical one is stupid.
+ */
+static hl_bool file_may_link_to(const struct file *a, const struct file *b)
+{
+ return (a->st.st_size != 0 &&
+ a->st.st_size == b->st.st_size &&
+ a->links != NULL && b->links != NULL &&
+ a->st.st_dev == b->st.st_dev &&
+ a->st.st_ino != b->st.st_ino &&
+ (!opts.respect_mode || a->st.st_mode == b->st.st_mode) &&
+ (!opts.respect_owner || a->st.st_uid == b->st.st_uid) &&
+ (!opts.respect_owner || a->st.st_gid == b->st.st_gid) &&
+ (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
+ (!opts.respect_name
+ || strcmp(a->links->path + a->links->basename,
+ b->links->path + b->links->basename) == 0) &&
+ (!opts.respect_xattrs || file_xattrs_equal(a, b)) &&
+ file_contents_equal(a, b));
+}
+
+/**
+ * file_compare - Compare two files to decide which should be master
+ * @a: The first file
+ * @b: The second file
+ *
+ * Check which of the files should be considered greater and thus serve
+ * as the master when linking (the master is the file that all equal files
+ * will be replaced with).
+ */
+static int file_compare(const struct file *a, const struct file *b)
+{
+ int res = 0;
+ if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino)
+ return 0;
+
+ if (res == 0 && opts.maximise)
+ res = CMP(a->st.st_nlink, b->st.st_nlink);
+ if (res == 0 && opts.minimise)
+ res = CMP(b->st.st_nlink, a->st.st_nlink);
+ if (res == 0)
+ res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime)
+ : CMP(a->st.st_mtime, b->st.st_mtime);
+ if (res == 0)
+ res = CMP(b->st.st_ino, a->st.st_ino);
+
+ return res;
+}
+
+/**
+ * file_link - Replace b with a link to a
+ * @a: The first file
+ * @b: The second file
+ *
+ * Link the file, replacing @b with the current one. The file is first
+ * linked to a temporary name, and then renamed to the name of @b, making
+ * the replace atomic (@b will always exist).
+ */
+static hl_bool file_link(struct file *a, struct file *b)
+{
+ file_link:
+ assert(a->links != NULL);
+ assert(b->links != NULL);
+
+ jlog(JLOG_INFO, "%sLinking %s to %s (-%s)",
+ opts.dry_run ? "[DryRun] " : "", a->links->path, b->links->path,
+ format(a->st.st_size));
+
+ if (!opts.dry_run) {
+ size_t len = strlen(b->links->path) + strlen(".hardlink-temporary") + 1;
+ char *new_path = malloc(len);
+
+ if (new_path == NULL) {
+ jlog(JLOG_SYSFAT, "Cannot allocate memory");
+ exit(1);
+ }
+
+ snprintf(new_path, len, "%s.hardlink-temporary", b->links->path);
+
+ if (link(a->links->path, new_path) != 0) {
+ jlog(JLOG_SYSERR, "Cannot link %s to %s", a->links->path, new_path);
+ free(new_path);
+ return FALSE;
+ } else if (rename(new_path, b->links->path) != 0) {
+ jlog(JLOG_SYSERR, "Cannot rename %s to %s", a->links->path,
+ new_path);
+ unlink(new_path); /* cleanup failed rename */
+ free(new_path);
+ return FALSE;
+ }
+ free(new_path);
+ }
+
+ /* Update statistics */
+ stats.linked++;
+
+ /* Increase the link count of this file, and set stat() of other file */
+ a->st.st_nlink++;
+ b->st.st_nlink--;
+
+ if (b->st.st_nlink == 0)
+ stats.saved += a->st.st_size;
+
+ /* Move the link from file b to a */
+ {
+ struct link *new_link = b->links;
+
+ b->links = b->links->next;
+ new_link->next = a->links->next;
+ a->links->next = new_link;
+ }
+
+ // Do it again
+ if (b->links)
+ goto file_link;
+
+ return TRUE;
+}
+
+/**
+ * inserter - Callback function for nftw()
+ * @fpath: The path of the file being visited
+ * @sb: The stat information of the file
+ * @typeflag: The type flag
+ * @ftwbuf: Contains current level of nesting and offset of basename
+ *
+ * Called by nftw() for the files. See the manual page for nftw() for
+ * further information.
+ */
+static int inserter(const char *fpath, const struct stat *sb, int typeflag,
+ struct FTW *ftwbuf)
+{
+ struct file *fil;
+ struct file **node;
+ size_t pathlen;
+ hl_bool included;
+ hl_bool excluded;
+
+ if (handle_interrupt())
+ return 1;
+ if (typeflag == FTW_DNR || typeflag == FTW_NS)
+ jlog(JLOG_SYSERR, "Cannot read %s", fpath);
+ if (typeflag != FTW_F || !S_ISREG(sb->st_mode))
+ return 0;
+
+ included = regexec_any(opts.include, fpath);
+ excluded = regexec_any(opts.exclude, fpath);
+
+ if ((opts.exclude && excluded && !included) ||
+ (!opts.exclude && opts.include && !included))
+ return 0;
+
+ stats.files++;
+
+ if (sb->st_size < opts.min_size) {
+ jlog(JLOG_DEBUG1, "Skipped %s (smaller than configured size)", fpath);
+ return 0;
+ }
+
+ jlog(JLOG_DEBUG2, "Visiting %s (file %zu)", fpath, stats.files);
+
+ pathlen = strlen(fpath) + 1;
+
+ fil = calloc(1, sizeof(*fil));
+
+ if (fil == NULL)
+ return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
+
+ fil->links = calloc(1, sizeof(struct link) + pathlen);
+
+ if (fil->links == NULL)
+ return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
+
+ fil->st = *sb;
+ fil->links->basename = ftwbuf->base;
+ fil->links->next = NULL;
+
+ memcpy(fil->links->path, fpath, pathlen);
+
+ node = tsearch(fil, &files_by_ino, compare_nodes_ino);
+
+ if (node == NULL)
+ return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
+
+ if (*node != fil) {
+ /* Already known inode, add link to inode information */
+ assert((*node)->st.st_dev == sb->st_dev);
+ assert((*node)->st.st_ino == sb->st_ino);
+
+ fil->links->next = (*node)->links;
+ (*node)->links = fil->links;
+
+ free(fil);
+ } else {
+ /* New inode, insert into by-size table */
+ node = tsearch(fil, &files, compare_nodes);
+
+ if (node == NULL)
+ return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
+
+ if (*node != fil) {
+ struct file *l;
+
+ if (file_compare(fil, *node) >= 0) {
+ fil->next = *node;
+ *node = fil;
+ } else {
+ for (l = *node; l != NULL; l = l->next) {
+ if (l->next != NULL && file_compare(fil, l->next) < 0)
+ continue;
+
+ fil->next = l->next;
+ l->next = fil;
+
+ break;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * visitor - Callback for twalk()
+ * @nodep: Pointer to a pointer to a #struct file
+ * @which: At which point this visit is (preorder, postorder, endorder)
+ * @depth: The depth of the node in the tree
+ *
+ * Visit the nodes in the binary tree. For each node, call hardlinker()
+ * on each #struct file in the linked list of #struct file instances located
+ * at that node.
+ */
+static void visitor(const void *nodep, const VISIT which, const int depth)
+{
+ struct file *master = *(struct file **) nodep;
+ struct file *other;
+
+ (void) depth;
+
+ if (which != leaf && which != endorder)
+ return;
+
+ for (; master != NULL; master = master->next) {
+ if (handle_interrupt())
+ exit(1);
+ if (master->links == NULL)
+ continue;
+
+ for (other = master->next; other != NULL; other = other->next) {
+ if (handle_interrupt())
+ exit(1);
+
+ assert(other != other->next);
+ assert(other->st.st_size == master->st.st_size);
+
+ if (other->links == NULL || !file_may_link_to(master, other))
+ continue;
+
+ if (!file_link(master, other) && errno == EMLINK)
+ master = other;
+ }
+ }
+}
+
+/**
+ * version - Print the program version and exit
+ */
+static int version(void)
+{
+ printf("hardlink 0.3 RC2\n");
+ printf("Compiled %s at %s\n", __DATE__, __TIME__);
+ exit(0);
+}
+
+/**
+ * help - Print the program help and exit
+ * @name: The name of the program executable (argv[0])
+ */
+static int help(const char *name)
+{
+ printf("Usage: %s [options] directory|file ...\n", name);
+ puts("Options:");
+ puts(" -V, --version show program's version number and exit");
+ puts(" -h, --help show this help message and exit");
+ puts(" -v, --verbose Increase verbosity (repeat for more verbosity)");
+ puts(" -n, --dry-run Modify nothing, just print what would happen");
+ puts(" -f, --respect-name Filenames have to be identical");
+ puts(" -p, --ignore-mode Ignore changes of file mode");
+ puts(" -o, --ignore-owner Ignore owner changes");
+ puts(" -t, --ignore-time Ignore timestamps (when testing for equality)");
+#ifdef HAVE_XATTR
+ puts(" -X, --respect-xattrs Respect extended attributes");
#endif
- atexit(print_summary);
-
- for (i = optind; i < argc; i++)
- process_path(ctl, argv[i]);
-
- while (ctl->dirs) {
- DIR *dh;
- struct dirent *di;
- struct hardlink_dir *dp = ctl->dirs;
- size_t nam1baselen = strlen(dp->name);
-
- ctl->dirs = dp->next;
- growstr(&nam1, add2(nam1baselen, 1));
- memcpy(nam1.buf, dp->name, nam1baselen);
- free(dp);
- nam1.buf[nam1baselen++] = '/';
- nam1.buf[nam1baselen] = 0;
- dh = opendir(nam1.buf);
-
- if (dh == NULL)
- continue;
- ctl->ndirs++;
-
- while ((di = readdir(dh)) != NULL) {
- if (!di->d_name[0])
- continue;
- if (di->d_name[0] == '.') {
- if (!di->d_name[1] || !strcmp(di->d_name, ".."))
- continue;
- }
-#ifdef HAVE_PCRE
- if (re && pcre2_match(re, /* compiled regex */
- (PCRE2_SPTR) di->d_name, strlen(di->d_name), 0, /* start at offset 0 */
- 0, /* default options */
- match_data, /* block for storing the result */
- NULL) /* use default match context */
- >=0) {
- if (ctl->verbose) {
- nam1.buf[nam1baselen] = 0;
- printf(_("Skipping %s%s\n"), nam1.buf, di->d_name);
- }
- continue;
- }
+ puts(" -m, --maximize Maximize the hardlink count, remove the file with");
+ puts(" lowest hardlink cout");
+ puts(" -M, --minimize Reverse the meaning of -m");
+ puts(" -O, --keep-oldest Keep the oldest file of multiple equal files");
+ puts(" (lower precedence than minimize/maximize)");
+ puts(" -x REGEXP, --exclude=REGEXP");
+ puts(" Regular expression to exclude files");
+ puts(" -i REGEXP, --include=REGEXP");
+ puts(" Regular expression to include files/dirs");
+ puts(" -s <num>[K,M,G], --minimum-size=<num>[K,M,G]");
+ puts(" Minimum size for files. Optional suffix");
+ puts(" allows for using KiB, MiB, or GiB");
+ puts("");
+ puts("Compatibility options to Jakub Jelinek's hardlink:");
+ puts(" -c Compare only file contents, same as -pot");
+
+#ifndef HAVE_GETOPT_LONG
+ puts("");
+ puts("Your system only supports the short option names given above.");
#endif
- {
- size_t subdirlen;
- growstr(&nam1,
- add2(nam1baselen, subdirlen =
- strlen(di->d_name)));
- memcpy(&nam1.buf[nam1baselen], di->d_name,
- add2(subdirlen, 1));
- }
- process_path(ctl, nam1.buf);
- }
- closedir(dh);
- }
-#ifdef HAVE_PCRE
- pcre2_code_free(re);
- pcre2_match_data_free(match_data);
+ exit(0);
+}
+
+/**
+ * register_regex - Compile and insert a regular expression into list
+ * @pregs: Pointer to a linked list of regular expressions
+ * @regex: String containing the regular expression to be compiled
+ */
+static int register_regex(struct regex_link **pregs, const char *regex)
+{
+ struct regex_link *link;
+ int err;
+
+ link = malloc(sizeof(*link));
+
+ if (link == NULL) {
+ jlog(JLOG_SYSFAT, "Cannot allocate memory");
+ exit(1);
+ }
+
+ if ((err = regcomp(&link->preg, regex, REG_NOSUB | REG_EXTENDED)) != 0) {
+ size_t size = regerror(err, &link->preg, NULL, 0);
+ char *buf = malloc(size + 1);
+
+ if (buf == NULL) {
+ jlog(JLOG_SYSFAT, "Cannot allocate memory");
+ exit(1);
+ }
+
+ regerror(err, &link->preg, buf, size);
+
+ jlog(JLOG_FATAL, "Could not compile regular expression %s: %s",
+ regex, buf);
+ free(buf);
+ free(link);
+ return 1;
+ }
+
+ link->next = *pregs;
+ *pregs = link;
+ return 0;
+}
+
+/**
+ * parse_options - Parse the command line options
+ * @argc: Number of options
+ * @argv: Array of options
+ */
+static int parse_options(int argc, char *argv[])
+{
+ static const char optstr[] = "VhvnfpotXcmMOx:i:s:";
+#ifdef HAVE_GETOPT_LONG
+ static const struct option long_options[] = {
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"dry-run", no_argument, NULL, 'n'},
+ {"respect-name", no_argument, NULL, 'f'},
+ {"ignore-mode", no_argument, NULL, 'p'},
+ {"ignore-owner", no_argument, NULL, 'o'},
+ {"ignore-time", no_argument, NULL, 't'},
+ {"respect-xattrs", no_argument, NULL, 'X'},
+ {"maximize", no_argument, NULL, 'm'},
+ {"minimize", no_argument, NULL, 'M'},
+ {"keep-oldest", no_argument, NULL, 'O'},
+ {"exclude", required_argument, NULL, 'x'},
+ {"include", required_argument, NULL, 'i'},
+ {"minimum-size", required_argument, NULL, 's'},
+ {NULL, 0, NULL, 0}
+ };
#endif
- return 0;
+
+ int opt;
+ char unit = '\0';
+
+ opts.respect_mode = TRUE;
+ opts.respect_owner = TRUE;
+ opts.respect_time = TRUE;
+ opts.respect_xattrs = FALSE;
+ opts.keep_oldest = FALSE;
+ opts.min_size = 1;
+
+ while ((opt = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'p':
+ opts.respect_mode = FALSE;
+ break;
+ case 'o':
+ opts.respect_owner = FALSE;
+ break;
+ case 't':
+ opts.respect_time = FALSE;
+ break;
+ case 'X':
+ opts.respect_xattrs = TRUE;
+ break;
+ case 'm':
+ opts.maximise = TRUE;
+ break;
+ case 'M':
+ opts.minimise = TRUE;
+ break;
+ case 'O':
+ opts.keep_oldest = TRUE;
+ break;
+ case 'f':
+ opts.respect_name = TRUE;
+ break;
+ case 'v':
+ opts.verbosity++;
+ break;
+ case 'c':
+ opts.respect_mode = FALSE;
+ opts.respect_name = FALSE;
+ opts.respect_owner = FALSE;
+ opts.respect_time = FALSE;
+ opts.respect_xattrs = FALSE;
+ break;
+ case 'n':
+ opts.dry_run = 1;
+ break;
+ case 'h':
+ return help(argv[0]);
+ case 'V':
+ return version();
+ case 'x':
+ if (register_regex(&opts.exclude, optarg) != 0)
+ return 1;
+ break;
+ case 'i':
+ if (register_regex(&opts.include, optarg) != 0)
+ return 1;
+ break;
+ case 's':
+ if (sscanf(optarg, "%llu%c", &opts.min_size, &unit) < 1) {
+ jlog(JLOG_ERROR, "Invalid option given to -s: %s", optarg);
+ return 1;
+ }
+ switch (tolower(unit)) {
+ case '\0':
+ break;
+ case 't':
+ opts.min_size *= 1024;
+ case 'g':
+ opts.min_size *= 1024;
+ case 'm':
+ opts.min_size *= 1024;
+ case 'k':
+ opts.min_size *= 1024;
+ break;
+ default:
+ jlog(JLOG_ERROR, "Unknown unit indicator %c.", unit);
+ return 1;
+ }
+ jlog(JLOG_DEBUG1, "Using minimum size of %lld bytes.",
+ opts.min_size);
+ break;
+ case '?':
+ return 1;
+ default:
+ jlog(JLOG_ERROR, "Unexpected invalid option: -%c\n", opt);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * to_be_called_atexit - Cleanup handler, also prints statistics.
+ */
+static void to_be_called_atexit(void)
+{
+ if (stats.started)
+ print_stats();
+}
+
+/**
+ * sighandler - Signal handler, sets the global last_signal variable
+ * @i: The signal number
+ */
+static void sighandler(int i)
+{
+ if (last_signal != SIGINT)
+ last_signal = i;
+ if (i == SIGINT)
+ putchar('\n');
+}
+
+int main(int argc, char *argv[])
+{
+ struct sigaction sa;
+
+ sa.sa_handler = sighandler;
+ sa.sa_flags = SA_RESTART;
+ sigfillset(&sa.sa_mask);
+
+ /* If we receive a SIGINT, end the processing */
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGUSR1, &sa, NULL);
+
+ /* Pretty print numeric output */
+ setlocale(LC_NUMERIC, "");
+ stats.start_time = gettime();
+
+ if (atexit(to_be_called_atexit) != 0) {
+ jlog(JLOG_SYSFAT, "Cannot register exit handler");
+ return 1;
+ }
+
+ if (parse_options(argc, argv) != 0)
+ return 1;
+
+ if (optind == argc) {
+ jlog(JLOG_FATAL, "Expected file or directory names");
+ return 1;
+ }
+
+ stats.started = TRUE;
+
+ for (; optind < argc; optind++)
+ if (nftw(argv[optind], inserter, 20, FTW_PHYS) == -1)
+ jlog(JLOG_SYSERR, "Cannot process %s", argv[optind]);
+
+ twalk(files, visitor);
+
+ return 0;
}