]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
move fsr over from xfsdump
authorChristoph Hellwig <hch@lst.de>
Sat, 24 Jan 2009 13:59:31 +0000 (14:59 +0100)
committerChristoph Hellwig <hch@brick.lst.de>
Sat, 24 Jan 2009 13:59:31 +0000 (14:59 +0100)
There's really no reason to hide fsr in xfsdump, so move it over
to xfsprogs now that we have a 3.0 release with some major shaking
pending.  The only code change is to replace the single attr_setf call
with a fsetxattr to avoid a dependency on libattr.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nathan Scott <nscott@aconex.com>
Makefile
fsr/Makefile [new file with mode: 0644]
fsr/xfs_fsr.c [new file with mode: 0644]
man/man8/xfs_fsr.8 [new file with mode: 0644]

index b1a59d60403a729c782e384f596cb10922d1ad36..b450150961d3ff589c6dd4efd3d27a330d18c74f 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ LDIRT = config.log .dep config.status config.cache confdefs.h conftest* \
        Logs/* built .census install.* install-dev.* *.gz
 
 LIB_SUBDIRS = include libxfs libxlog libxcmd libhandle libdisk
-TOOL_SUBDIRS = copy db estimate fsck growfs io logprint mkfs quota \
+TOOL_SUBDIRS = copy db estimate fsck fsr growfs io logprint mkfs quota \
                mdrestore repair rtcp m4 man doc po debian build
 
 SUBDIRS = $(LIB_SUBDIRS) $(TOOL_SUBDIRS)
diff --git a/fsr/Makefile b/fsr/Makefile
new file mode 100644 (file)
index 0000000..b62447a
--- /dev/null
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+LTCOMMAND = xfs_fsr
+CFILES = xfs_fsr.c
+LLDLIBS = $(LIBHANDLE)
+
+default: $(LTCOMMAND)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(PKG_BIN_DIR)
+       $(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_BIN_DIR)
+install-dev:
diff --git a/fsr/xfs_fsr.c b/fsr/xfs_fsr.c
new file mode 100644 (file)
index 0000000..9f2bf24
--- /dev/null
@@ -0,0 +1,1600 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <xfs/xfs.h>
+#include <xfs/jdm.h>
+#include <xfs/xfs_dfrag.h>
+
+#include <fcntl.h>
+#include <errno.h>
+#include <malloc.h>
+#include <mntent.h>
+#include <syslog.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/xattr.h>
+
+
+#ifndef XFS_XFLAG_NODEFRAG
+#define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */
+#endif
+
+#define _PATH_FSRLAST  "/var/tmp/.fsrlast_xfs"
+
+char *progname;
+
+int vflag;
+int gflag;
+static int Mflag;
+/* static int nflag; */
+int dflag = 0;
+/* static int sflag; */
+int argv_blksz_dio;
+extern int max_ext_size;
+static int npasses = 10;
+static int startpass = 0;
+
+struct getbmap  *outmap = NULL;
+int             outmap_size = 0;
+int            RealUid;
+int            tmp_agi;
+static __int64_t       minimumfree = 2048;
+
+#define MNTTYPE_XFS             "xfs"
+
+#define SMBUFSZ                1024
+#define ROOT           0
+#define NULLFD         -1
+#define GRABSZ         64
+#define TARGETRANGE    10
+#define        V_NONE          0
+#define        V_OVERVIEW      1
+#define        V_ALL           2
+#define BUFFER_SIZE    (1<<16)
+#define BUFFER_MAX     (1<<24)
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+static time_t howlong = 7200;          /* default seconds of reorganizing */
+static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
+static char *mtab = MOUNTED;
+static time_t endtime;
+static time_t starttime;
+static xfs_ino_t       leftoffino = 0;
+static int     pagesize;
+
+void usage(int ret);
+static int  fsrfile(char *fname, xfs_ino_t ino);
+static int  fsrfile_common( char *fname, char *tname, char *mnt,
+                            int fd, xfs_bstat_t *statp);
+static int  packfile(char *fname, char *tname, int fd,
+                     xfs_bstat_t *statp, struct fsxattr *fsxp);
+static void fsrdir(char *dirname);
+static int  fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
+static void initallfs(char *mtab);
+static void fsrallfs(int howlong, char *leftofffile);
+static void fsrall_cleanup(int timeout);
+static int  getnextents(int);
+int xfsrtextsize(int fd);
+int xfs_getrt(int fd, struct statvfs64 *sfbp);
+char * gettmpname(char *fname);
+char * getparent(char *fname);
+int fsrprintf(const char *fmt, ...);
+int read_fd_bmap(int, xfs_bstat_t *, int *);
+int cmp(const void *, const void *);
+static void tmp_init(char *mnt);
+static char * tmp_next(char *mnt);
+static void tmp_close(char *mnt);
+int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
+static int getmntany(FILE *, struct mntent *, struct mntent *, struct stat64 *);
+
+xfs_fsop_geom_v1_t fsgeom;     /* geometry of active mounted system */
+
+#define NMOUNT 64
+static int numfs;
+
+typedef struct fsdesc {
+       char *dev;
+       char *mnt;
+       int  npass;
+} fsdesc_t;
+
+fsdesc_t       *fs, *fsbase, *fsend;
+int            fsbufsize = 10; /* A starting value */
+int            nfrags = 0;     /* Debug option: Coerse into specific number
+                                * of extents */
+int            openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
+
+int
+xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
+{
+    return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
+}
+
+int
+xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
+{
+    xfs_fsop_bulkreq_t  bulkreq;
+
+    bulkreq.lastip = lastip;
+    bulkreq.icount = 1;
+    bulkreq.ubuffer = ubuffer;
+    bulkreq.ocount = NULL;
+    return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
+}
+
+int
+xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
+                    xfs_bstat_t *ubuffer, __s32 *ocount)
+{
+    xfs_fsop_bulkreq_t  bulkreq;
+
+    bulkreq.lastip = lastip;
+    bulkreq.icount = icount;
+    bulkreq.ubuffer = ubuffer;
+    bulkreq.ocount = ocount;
+    return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
+}
+
+int
+xfs_swapext(int fd, xfs_swapext_t *sx)
+{
+    return ioctl(fd, XFS_IOC_SWAPEXT, sx);
+}
+
+int
+xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
+{
+    return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
+}
+
+void
+aborter(int unused)
+{
+       fsrall_cleanup(1);
+       exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+       struct stat64 sb, sb2;
+       char *argname;
+       char *cp;
+       int c;
+       struct mntent mntpref;
+       register struct mntent *mntp;
+       struct mntent ment;
+       register FILE *mtabp;
+
+       setlinebuf(stdout);
+       progname = basename(argv[0]);
+
+       setlocale(LC_ALL, "");
+       bindtextdomain(PACKAGE, LOCALEDIR);
+       textdomain(PACKAGE);
+
+       gflag = ! isatty(0);
+
+       while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1 )
+               switch (c) {
+               case 'M':
+                       Mflag = 1;
+                       break;
+               case 'g':
+                       gflag = 1;
+                       break;
+               case 'n':
+                       /* nflag = 1; */
+                       break;
+               case 'v':
+                       ++vflag;
+                       break;
+               case 'd':
+                       dflag = 1;
+                       break;
+               case 's':               /* frag stats only */
+                       /* sflag = 1; */
+                       fprintf(stderr,
+                               _("%s: Stats not yet supported for XFS\n"),
+                               progname);
+                       usage(1);
+                       break;
+               case 't':
+                       howlong = atoi(optarg);
+                       break;
+               case 'f':
+                       leftofffile = optarg;
+                       break;
+               case 'm':
+                       mtab = optarg;
+                       break;
+               case 'b':
+                       argv_blksz_dio = atoi(optarg);
+                       break;
+               case 'p':
+                       npasses = atoi(optarg);
+                       break;
+               case 'C':
+                       /* Testing opt: coerses frag count in result */
+                       if (getenv("FSRXFSTEST") != NULL) {
+                               nfrags = atoi(optarg);
+                               openopts |= O_SYNC;
+                       }
+                       break;
+               case 'V':
+                       printf(_("%s version %s\n"), progname, VERSION);
+                       exit(0);
+               default:
+                       usage(1);
+               }
+       if (vflag)
+               setbuf(stdout, NULL);
+
+       starttime = time(0);
+
+       /* Save the caller's real uid */
+       RealUid = getuid();
+
+       pagesize = getpagesize();
+
+       if (optind < argc) {
+               for (; optind < argc; optind++) {
+                       argname = argv[optind];
+                       mntp = NULL;
+                       if (lstat64(argname, &sb) < 0) {
+                               fprintf(stderr,
+                                       _("%s: could not stat: %s: %s\n"),
+                                       progname, argname, strerror(errno));
+                               continue;
+                       }
+                       if (S_ISLNK(sb.st_mode) && stat64(argname, &sb2) == 0 &&
+                           (S_ISBLK(sb2.st_mode) || S_ISCHR(sb2.st_mode)))
+                               sb = sb2;
+                       if (S_ISBLK(sb.st_mode) || (S_ISDIR(sb.st_mode))) {
+                               if ((mtabp = setmntent(mtab, "r")) == NULL) {
+                                       fprintf(stderr,
+                                               _("%s: cannot read %s\n"),
+                                               progname, mtab);
+                                       exit(1);
+                               }
+                               bzero(&mntpref, sizeof(mntpref));
+                               if (S_ISDIR(sb.st_mode))
+                                       mntpref.mnt_dir = argname;
+                               else
+                                       mntpref.mnt_fsname = argname;
+
+                               if (getmntany(mtabp, &ment, &mntpref, &sb) &&
+                                   strcmp(ment.mnt_type, MNTTYPE_XFS) == 0) {
+                                       mntp = &ment;
+                                       if (S_ISBLK(sb.st_mode)) {
+                                               cp = mntp->mnt_dir;
+                                               if (cp == NULL ||
+                                                   stat64(cp, &sb2) < 0) {
+                                                       fprintf(stderr, _(
+                                               "%s: could not stat: %s: %s\n"),
+                                                       progname, argname,
+                                                       strerror(errno));
+                                                       continue;
+                                               }
+                                               sb = sb2;
+                                               argname = cp;
+                                       }
+                               }
+                       }
+                       if (mntp != NULL) {
+                               fsrfs(mntp->mnt_dir, 0, 100);
+                       } else if (S_ISCHR(sb.st_mode)) {
+                               fprintf(stderr, _(
+                                       "%s: char special not supported: %s\n"),
+                                       progname, argname);
+                               exit(1);
+                       } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
+                               if (!platform_test_xfs_path(argname)) {
+                                       fprintf(stderr, _(
+                                       "%s: cannot defragment: %s: Not XFS\n"),
+                                       progname, argname);
+                                       continue;
+                               }
+                               if (S_ISDIR(sb.st_mode))
+                                       fsrdir(argname);
+                               else
+                                       fsrfile(argname, sb.st_ino);
+                       } else {
+                               printf(
+                       _("%s: not fsys dev, dir, or reg file, ignoring\n"),
+                                       argname);
+                       }
+               }
+       } else {
+               initallfs(mtab);
+               fsrallfs(howlong, leftofffile);
+       }
+       return 0;
+}
+
+void
+usage(int ret)
+{
+       fprintf(stderr, _(
+"Usage: %s [-d] [-v] [-n] [-s] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
+"       %s [-d] [-v] [-n] [-s] [-g] xfsdev | dir | file ...\n\n"
+"Options:\n"
+"       -n              Do nothing, only interesting with -v. Not\n"
+"                       effective with in mtab mode.\n"
+"       -s             Print statistics only.\n"
+"       -g              Print to syslog (default if stdout not a tty).\n"
+"       -t time         How long to run in seconds.\n"
+"       -p passes      Number of passes before terminating global re-org.\n"
+"       -f leftoff      Use this instead of %s.\n"
+"       -m mtab         Use something other than /etc/mtab.\n"
+"       -d              Debug, print even more.\n"
+"       -v             Verbose, more -v's more verbose.\n"
+               ), progname, progname, _PATH_FSRLAST);
+       exit(ret);
+}
+
+/*
+ * initallfs -- read the mount table and set up an internal form
+ */
+static void
+initallfs(char *mtab)
+{
+       FILE *fp;
+       struct mntent *mp;
+       int mi;
+       char *cp;
+       struct stat64 sb;
+
+       fp = setmntent(mtab, "r");
+       if (fp == NULL) {
+               fsrprintf(_("could not open mtab file: %s\n"), mtab);
+               exit(1);
+       }
+
+       /* malloc a number of descriptors, increased later if needed */
+       if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
+               fsrprintf(_("out of memory: %s\n"), strerror(errno));
+               exit(1);
+       }
+       fsend = (fsbase + fsbufsize - 1);
+
+       /* find all rw xfs file systems */
+       mi = 0;
+       fs = fsbase;
+       while ((mp = getmntent(fp))) {
+               int rw = 0;
+
+               if (strcmp(mp->mnt_type, MNTTYPE_XFS ) != 0 ||
+                   stat64(mp->mnt_fsname, &sb) == -1 ||
+                   !S_ISBLK(sb.st_mode))
+                       continue;
+
+               cp = strtok(mp->mnt_opts,",");
+               do {
+                       if (strcmp("rw", cp) == 0)
+                               rw++;
+               } while ((cp = strtok(NULL, ",")) != NULL);
+               if (rw == 0) {
+                       if (dflag)
+                               fsrprintf(_("Skipping %s: not mounted rw\n"),
+                                       mp->mnt_fsname);
+                       continue;
+               }
+
+               if (mi == fsbufsize) {
+                       fsbufsize += NMOUNT;
+                       if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
+                                     fsbufsize * sizeof(fsdesc_t))) == NULL) {
+                               fsrprintf(_("out of memory: %s\n"),
+                                       strerror(errno));
+                               exit(1);
+                       }
+                       if (!fsbase) {
+                               fsrprintf(_("out of memory on realloc: %s\n"),
+                                         strerror(errno));
+                               exit(1);
+                       }
+                       fs = (fsbase + mi);  /* Needed ? */
+               }
+
+               fs->dev = strdup(mp->mnt_fsname);
+               fs->mnt = strdup(mp->mnt_dir);
+
+               if (fs->mnt == NULL || fs->mnt == NULL) {
+                       fsrprintf(_("strdup(%s) failed\n"), mp->mnt_fsname);
+                       exit(1);
+               }
+               mi++;
+               fs++;
+       }
+       numfs = mi;
+       fsend = (fsbase + numfs);
+       endmntent(fp);
+       if (numfs == 0) {
+               fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
+               exit(0);
+       }
+       if (vflag || dflag) {
+               fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
+                          numfs);
+               if (dflag)
+                       for (fs = fsbase; fs < fsend; fs++)
+                           fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
+       }
+}
+
+static void
+fsrallfs(int howlong, char *leftofffile)
+{
+       int fd;
+       int error;
+       int found = 0;
+       char *fsname;
+       char buf[SMBUFSZ];
+       int mdonly = Mflag;
+       char *ptr;
+       xfs_ino_t startino = 0;
+       fsdesc_t *fsp;
+       struct stat64 sb, sb2;
+
+       fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
+
+       endtime = starttime + howlong;
+       fs = fsbase;
+
+       /* where'd we leave off last time? */
+       if (lstat64(leftofffile, &sb) == 0) {
+               if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
+                       fsrprintf(_("%s: open failed\n"), leftofffile);
+               }
+               else if ( fstat64(fd, &sb2) == 0) {
+                       /*
+                        * Verify that lstat & fstat point to the
+                        * same regular file (no links/no quick spoofs)
+                        */
+                       if ( (sb.st_dev  != sb2.st_dev) ||
+                            (sb.st_ino  != sb2.st_ino) ||
+                            ((sb.st_mode & S_IFMT) != S_IFREG) ||
+                            ((sb2.st_mode & S_IFMT) != S_IFREG) ||
+                            (sb2.st_uid  != ROOT) ||
+                            (sb2.st_nlink != 1)
+                          )
+                       {
+                               fsrprintf(_("Can't use %s: mode=0%o own=%d"
+                                       " nlink=%d\n"),
+                                       leftofffile, sb.st_mode,
+                                       sb.st_uid, sb.st_nlink);
+                               close(fd);
+                               fd = NULLFD;
+                       }
+               }
+               else {
+                       close(fd);
+                       fd = NULLFD;
+               }
+       }
+       else {
+               fd = NULLFD;
+       }
+
+       if (fd != NULLFD) {
+               if (read(fd, buf, SMBUFSZ) == -1) {
+                       fs = fsbase;
+                       fsrprintf(_("could not read %s, starting with %s\n"),
+                               leftofffile, *fs->dev);
+               } else {
+                       for (fs = fsbase; fs < fsend; fs++) {
+                               fsname = fs->dev;
+                               if ((strncmp(buf,fsname,strlen(fsname)) == 0)
+                                   && buf[strlen(fsname)] == ' ') {
+                                       found = 1;
+                                       break;
+                               }
+                       }
+                       if (! found)
+                               fs = fsbase;
+
+                       ptr = strchr(buf, ' ');
+                       if (ptr) {
+                               startpass = atoi(++ptr);
+                               ptr = strchr(ptr, ' ');
+                               if (ptr) {
+                                       startino = strtoull(++ptr, NULL, 10);
+                               }
+                       }
+                       if (startpass < 0)
+                               startpass = 0;
+
+                       /* Init pass counts */
+                       for (fsp = fsbase; fsp < fs; fsp++) {
+                               fsp->npass = startpass + 1;
+                       }
+                       for (fsp = fs; fsp <= fsend; fsp++) {
+                               fsp->npass = startpass;
+                       }
+               }
+               close(fd);
+       }
+
+       if (vflag) {
+               fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
+                         fs->npass, (unsigned long long)startino,
+                         fs->dev, fs->mnt);
+       }
+
+       signal(SIGABRT, aborter);
+       signal(SIGHUP, aborter);
+       signal(SIGINT, aborter);
+       signal(SIGQUIT, aborter);
+       signal(SIGTERM, aborter);
+
+       /* reorg for 'howlong' -- checked in 'fsrfs' */
+       while (endtime > time(0)) {
+               pid_t pid;
+               if (fs == fsend)
+                       fs = fsbase;
+               if (fs->npass == npasses) {
+                       fsrprintf(_("Completed all %d passes\n"), npasses);
+                       break;
+               }
+               if (npasses > 1 && !fs->npass)
+                       Mflag = 1;
+               else
+                       Mflag = mdonly;
+               pid = fork();
+               switch(pid) {
+               case -1:
+                       fsrprintf(_("couldn't fork sub process:"));
+                       exit(1);
+                       break;
+               case 0:
+                       error = fsrfs(fs->mnt, startino, TARGETRANGE);
+                       exit (error);
+                       break;
+               default:
+                       wait(&error);
+                       close(fd);
+                       if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
+                               /* child timed out & did fsrall_cleanup */
+                               exit(0);
+                       }
+                       break;
+               }
+               startino = 0;  /* reset after the first time through */
+               fs->npass++;
+               fs++;
+       }
+       fsrall_cleanup(endtime <= time(0));
+}
+
+/*
+ * fsrall_cleanup -- close files, print next starting location, etc.
+ */
+static void
+fsrall_cleanup(int timeout)
+{
+       int fd;
+       int ret;
+       char buf[SMBUFSZ];
+
+       /* record where we left off */
+       unlink(leftofffile);
+       fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
+       if (fd == -1)
+               fsrprintf(_("open(%s) failed: %s\n"),
+                         leftofffile, strerror(errno));
+       else {
+               if (timeout) {
+                       ret = sprintf(buf, "%s %d %llu\n", fs->dev,
+                               fs->npass, (unsigned long long)leftoffino);
+                       if (write(fd, buf, ret) < strlen(buf))
+                               fsrprintf(_("write(%s) failed: %s\n"),
+                                       leftofffile, strerror(errno));
+                       close(fd);
+               }
+       }
+
+       if (timeout)
+               fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
+                       progname, startpass, fs->npass,
+                       time(0) - endtime + howlong);
+}
+
+/*
+ * fsrfs -- reorganize a file system
+ */
+static int
+fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
+{
+
+       int     fsfd, fd;
+       int     count = 0;
+       int     ret;
+       __s32   buflenout;
+       xfs_bstat_t buf[GRABSZ];
+       char    fname[64];
+       char    *tname;
+       jdm_fshandle_t  *fshandlep;
+       xfs_ino_t       lastino = startino;
+
+       fsrprintf(_("%s start inode=%llu\n"), mntdir,
+               (unsigned long long)startino);
+
+       fshandlep = jdm_getfshandle( mntdir );
+       if ( ! fshandlep ) {
+               fsrprintf(_("unable to get handle: %s: %s\n"),
+                         mntdir, strerror( errno ));
+               return -1;
+       }
+
+       if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
+               fsrprintf(_("unable to open: %s: %s\n"),
+                         mntdir, strerror( errno ));
+               return -1;
+       }
+
+       if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
+               fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
+                         mntdir);
+               return -1;
+       }
+
+       tmp_init(mntdir);
+
+       while ((ret = xfs_bulkstat(fsfd,
+                               &lastino, GRABSZ, &buf[0], &buflenout) == 0)) {
+               xfs_bstat_t *p;
+               xfs_bstat_t *endp;
+
+               if (buflenout == 0)
+                       goto out0;
+
+               /* Each loop through, defrag targetrange percent of the files */
+               count = (buflenout * targetrange) / 100;
+
+               qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
+
+               for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
+                       /* Do some obvious checks now */
+                       if (((p->bs_mode & S_IFMT) != S_IFREG) ||
+                            (p->bs_extents < 2))
+                               continue;
+
+                       if ((fd = jdm_open(fshandlep, p, O_RDWR)) < 0) {
+                               /* This probably means the file was
+                                * removed while in progress of handling
+                                * it.  Just quietly ignore this file.
+                                */
+                               if (dflag)
+                                       fsrprintf(_("could not open: "
+                                               "inode %llu\n"), p->bs_ino);
+                               continue;
+                       }
+
+                       /* Don't know the pathname, so make up something */
+                       sprintf(fname, "ino=%lld", (long long)p->bs_ino);
+
+                       /* Get a tmp file name */
+                       tname = tmp_next(mntdir);
+
+                       ret = fsrfile_common(fname, tname, mntdir, fd, p);
+
+                       leftoffino = p->bs_ino;
+
+                       close(fd);
+
+                       if (ret == 0) {
+                               if (--count <= 0)
+                                       break;
+                       }
+               }
+               if (endtime && endtime < time(0)) {
+                       tmp_close(mntdir);
+                       close(fsfd);
+                       fsrall_cleanup(1);
+                       exit(1);
+               }
+       }
+       if (ret < 0)
+               fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
+out0:
+       tmp_close(mntdir);
+       close(fsfd);
+       return 0;
+}
+
+/*
+ * To compare bstat structs for qsort.
+ */
+int
+cmp(const void *s1, const void *s2)
+{
+       return( ((xfs_bstat_t *)s2)->bs_extents -
+               ((xfs_bstat_t *)s1)->bs_extents);
+
+}
+
+/*
+ * reorganize by directory hierarchy.
+ * Stay in dev (a restriction based on structure of this program -- either
+ * call efs_{n,u}mount() around each file, something smarter or this)
+ */
+static void
+fsrdir(char *dirname)
+{
+       fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
+}
+
+/*
+ * Sets up the defragmentation of a file based on the
+ * filepath.  It collects the bstat information, does
+ * an open on the file and passes this all to fsrfile_common.
+ */
+static int
+fsrfile(char *fname, xfs_ino_t ino)
+{
+       xfs_bstat_t     statbuf;
+       jdm_fshandle_t  *fshandlep;
+       int     fd, fsfd;
+       int     error = 0;
+       char    *tname;
+
+       fshandlep = jdm_getfshandle(getparent (fname) );
+       if (! fshandlep) {
+               fsrprintf(_("unable to construct sys handle for %s: %s\n"),
+                       fname, strerror(errno));
+               return -1;
+       }
+
+       /*
+        * Need to open something on the same filesystem as the
+        * file.  Open the parent.
+        */
+       fsfd = open(getparent(fname), O_RDONLY);
+       if (fsfd < 0) {
+               fsrprintf(_("unable to open sys handle for %s: %s\n"),
+                       fname, strerror(errno));
+               return -1;
+       }
+
+       if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
+               fsrprintf(_("unable to get bstat on %s: %s\n"),
+                       fname, strerror(errno));
+               close(fsfd);
+               return -1;
+       }
+
+       fd = jdm_open( fshandlep, &statbuf, O_RDWR);
+       if (fd < 0) {
+               fsrprintf(_("unable to open handle %s: %s\n"),
+                       fname, strerror(errno));
+               close(fsfd);
+               return -1;
+       }
+
+       /* Get the fs geometry */
+       if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
+               fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
+               close(fsfd);
+               return -1;
+       }
+
+       close(fsfd);
+
+       tname = gettmpname(fname);
+
+       if (tname)
+               error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
+
+       close(fd);
+
+       return error;
+}
+
+
+/*
+ * This is the common defrag code for either a full fs
+ * defragmentation or a single file.  Check as much as
+ * possible with the file, fork a process to setuid to the
+ * target file owner's uid and defragment the file.
+ * This is done so the new extents created in a tmp file are
+ * reflected in the owners' quota without having to do any
+ * special code in the kernel.  When the existing extents
+ * are removed, the quotas will be correct.  It's ugly but
+ * it saves us from doing some quota  re-construction in
+ * the extent swap.  The price is that the defragmentation
+ * will fail if the owner of the target file is already at
+ * their quota limit.
+ */
+static int
+fsrfile_common(
+       char            *fname,
+       char            *tname,
+       char            *fsname,
+       int             fd,
+       xfs_bstat_t     *statp)
+{
+       int             error;
+       struct statvfs64 vfss;
+       struct fsxattr  fsx;
+       unsigned long   bsize;
+
+       if (vflag)
+               fsrprintf("%s\n", fname);
+
+       if (fsync(fd) < 0) {
+               fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
+               return -1;
+       }
+
+       if (statp->bs_size == 0) {
+               if (vflag)
+                       fsrprintf(_("%s: zero size, ignoring\n"), fname);
+               return(0);
+       }
+
+       /* Check if a mandatory lock is set on the file to try and
+        * avoid blocking indefinitely on the reads later. Note that
+        * someone could still set a mandatory lock after this check
+        * but before all reads have completed to block fsr reads.
+        * This change just closes the window a bit.
+        */
+       if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
+               struct flock fl;
+
+               fl.l_type = F_RDLCK;
+               fl.l_whence = SEEK_SET;
+               fl.l_start = (off_t)0;
+               fl.l_len = 0;
+               if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
+                       if (vflag)
+                               fsrprintf(_("locking check failed: %s\n"),
+                                       fname);
+                       return(-1);
+               }
+               if (fl.l_type != F_UNLCK) {
+                       /* Mandatory lock is set */
+                       if (vflag)
+                               fsrprintf(_("mandatory lock: %s: ignoring\n"),
+                                       fname);
+                       return(-1);
+               }
+       }
+
+       /*
+        * Check if there is room to copy the file.
+        *
+        * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
+        * not the optimal I/O size as struct stat.
+        */
+       if (statvfs64(fsname ? fsname : fname, &vfss) < 0) {
+               fsrprintf(_("unable to get fs stat on %s: %s\n"),
+                       fname, strerror(errno));
+               return -1;
+       }
+       bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
+       if (statp->bs_blksize * statp->bs_blocks >
+           vfss.f_bfree * bsize - minimumfree) {
+               fsrprintf(_("insufficient freespace for: %s: "
+                           "size=%lld: ignoring\n"), fname,
+                           statp->bs_blksize * statp->bs_blocks);
+               return 1;
+       }
+
+       if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
+               fsrprintf(_("failed to get inode attrs: %s\n"), fname);
+               return(-1);
+       }
+       if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE|XFS_XFLAG_APPEND)) {
+               if (vflag)
+                       fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
+               return(0);
+       }
+       if (fsx.fsx_xflags & XFS_XFLAG_NODEFRAG) {
+               if (vflag)
+                       fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
+                           fname);
+               return(0);
+       }
+       if (fsx.fsx_xflags & XFS_XFLAG_REALTIME) {
+               if (xfs_getrt(fd, &vfss) < 0) {
+                       fsrprintf(_("cannot get realtime geometry for: %s\n"),
+                               fname);
+                       return(-1);
+               }
+               if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
+                       fsrprintf(_("low on realtime free space: %s: "
+                               "ignoring file\n"), fname);
+                       return(-1);
+               }
+       }
+
+       if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
+               fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
+               return -1;
+       }
+
+       /*
+        * Previously the code forked here, & the child changed it's uid to
+        * that of the file's owner and then called packfile(), to keep
+        * quota counts correct.  (defragged files could use fewer blocks).
+        *
+        * Instead, just fchown() the temp file to the uid,gid of the
+        * file we're defragging, in packfile().
+        */
+
+       if ((error = packfile(fname, tname, fd, statp, &fsx)))
+               return error;
+       return -1; /* no error */
+}
+
+
+/*
+ * Do the defragmentation of a single file.
+ * We already are pretty sure we can and want to
+ * defragment the file.  Create the tmp file, copy
+ * the data (maintaining holes) and call the kernel
+ * extent swap routinte.
+ */
+static int
+packfile(char *fname, char *tname, int fd,
+        xfs_bstat_t *statp, struct fsxattr *fsxp)
+{
+       int             tfd;
+       int             srval;
+       int             nextents, extent, cur_nextents, new_nextents;
+       unsigned        blksz_dio;
+       unsigned        dio_min;
+       struct dioattr  dio;
+       static xfs_swapext_t   sx;
+       struct xfs_flock64  space;
+       off64_t         cnt, pos;
+       void            *fbuf;
+       int             ct, wc, wc_b4;
+       char            ffname[SMBUFSZ];
+       int             ffd = -1;
+
+       /*
+        * Work out the extent map - nextents will be set to the
+        * minimum number of extents needed for the file (taking
+        * into account holes), cur_nextents is the current number
+        * of extents.
+        */
+       nextents = read_fd_bmap(fd, statp, &cur_nextents);
+
+       if (cur_nextents == 1 || cur_nextents <= nextents) {
+               if (vflag)
+                       fsrprintf(_("%s already fully defragmented.\n"), fname);
+               return 1; /* indicates no change/no error */
+       }
+
+       if (dflag)
+               fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
+                         fname, cur_nextents, (cur_nextents - nextents),
+                         tname);
+
+       if ((tfd = open(tname, openopts, 0666)) < 0) {
+               if (vflag)
+                       fsrprintf(_("could not open tmp file: %s: %s\n"),
+                                  tname, strerror(errno));
+               return -1;
+       }
+       unlink(tname);
+
+       /* Setup extended attributes */
+       if (statp->bs_xflags & XFS_XFLAG_HASATTR) {
+               if (fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE) != 0) {
+                       fsrprintf(_("could not set ATTR on tmp: %s:\n"), tname);
+                       close(tfd);
+                       return -1;
+               }
+               if (dflag)
+                       fsrprintf(_("%s set temp attr\n"), tname);
+       }
+
+       /* Setup extended inode flags, project identifier, etc */
+       if (fsxp->fsx_xflags || fsxp->fsx_projid) {
+               if (ioctl(tfd, XFS_IOC_FSSETXATTR, fsxp) < 0) {
+                       fsrprintf(_("could not set inode attrs on tmp: %s\n"),
+                               tname);
+                       close(tfd);
+                       return -1;
+               }
+       }
+
+       if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
+               fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
+               close(tfd);
+               return -1;
+       }
+
+       dio_min = dio.d_miniosz;
+       if (statp->bs_size <= dio_min) {
+               blksz_dio = dio_min;
+       } else {
+               blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
+               if (argv_blksz_dio != 0)
+                       blksz_dio = min(argv_blksz_dio, blksz_dio);
+               blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
+       }
+
+       if (dflag) {
+               fsrprintf(_("DEBUG: "
+                       "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
+                       statp->bs_size, blksz_dio, dio.d_miniosz,
+                       dio.d_maxiosz, pagesize);
+       }
+
+       if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
+               fsrprintf(_("could not allocate buf: %s\n"), tname);
+               close(tfd);
+               return -1;
+       }
+
+       if (nfrags) {
+               /* Create new tmp file in same AG as first */
+               sprintf(ffname, "%s.frag", tname);
+
+               /* Open the new file for sync writes */
+               if ((ffd = open(ffname, openopts, 0666)) < 0) {
+                       fsrprintf(_("could not open fragfile: %s : %s\n"),
+                                  ffname, strerror(errno));
+                       close(tfd);
+                       free(fbuf);
+                       return -1;
+               }
+               unlink(ffname);
+       }
+
+       /* Loop through block map allocating new extents */
+       for (extent = 0; extent < nextents; extent++) {
+               pos = outmap[extent].bmv_offset;
+               if (outmap[extent].bmv_block == -1) {
+                       space.l_whence = SEEK_SET;
+                       space.l_start = pos;
+                       space.l_len = outmap[extent].bmv_length;
+                       if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
+                               fsrprintf(_("could not trunc tmp %s\n"),
+                                          tname);
+                       }
+                       lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR);
+                       continue;
+               } else if (outmap[extent].bmv_length == 0) {
+                       /* to catch holes at the beginning of the file */
+                       continue;
+               }
+               if (! nfrags) {
+                       space.l_whence = SEEK_CUR;
+                       space.l_start = 0;
+                       space.l_len = outmap[extent].bmv_length;
+
+                       if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
+                               fsrprintf(_("could not pre-allocate tmp space:"
+                                       " %s\n"), tname);
+                               close(tfd);
+                               free(fbuf);
+                               return -1;
+                       }
+                       lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR);
+               }
+       } /* end of space allocation loop */
+
+       if (lseek64(tfd, 0, SEEK_SET)) {
+               fsrprintf(_("Couldn't rewind on temporary file\n"));
+               close(tfd);
+               free(fbuf);
+               return -1;
+       }
+
+       /* Check if the temporary file has fewer extents */
+       new_nextents = getnextents(tfd);
+       if (dflag)
+               fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
+       if (cur_nextents <= new_nextents) {
+               if (vflag)
+                       fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
+               free(fbuf);
+               close(tfd);
+               return 1; /* no change/no error */
+       }
+
+       /* Loop through block map copying the file. */
+       for (extent = 0; extent < nextents; extent++) {
+               pos = outmap[extent].bmv_offset;
+               if (outmap[extent].bmv_block == -1) {
+                       lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR);
+                       lseek64(fd, outmap[extent].bmv_length, SEEK_CUR);
+                       continue;
+               } else if (outmap[extent].bmv_length == 0) {
+                       /* to catch holes at the beginning of the file */
+                       continue;
+               }
+               for (cnt = outmap[extent].bmv_length; cnt > 0;
+                    cnt -= ct, pos += ct) {
+                       if (nfrags && --nfrags) {
+                               ct = min(cnt, dio_min);
+                       } else if (cnt % dio_min == 0) {
+                               ct = min(cnt, blksz_dio);
+                       } else {
+                               ct = min(cnt + dio_min - (cnt % dio_min),
+                                       blksz_dio);
+                       }
+                       ct = read(fd, fbuf, ct);
+                       if (ct == 0) {
+                               /* EOF, stop trying to read */
+                               extent = nextents;
+                               break;
+                       }
+                       /* Ensure we do direct I/O to correct block
+                        * boundaries.
+                        */
+                       if (ct % dio_min != 0) {
+                               wc = ct + dio_min - (ct % dio_min);
+                       } else {
+                               wc = ct;
+                       }
+                       wc_b4 = wc;
+                       if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
+                               if (ct < 0)
+                                       fsrprintf(_("bad read of %d bytes "
+                                               "from %s: %s\n"), wc_b4,
+                                               fname, strerror(errno));
+                               else if (wc < 0)
+                                       fsrprintf(_("bad write of %d bytes "
+                                               "to %s: %s\n"), wc_b4,
+                                               tname, strerror(errno));
+                               else {
+                                       /*
+                                        * Might be out of space
+                                        *
+                                        * Try to finish write
+                                        */
+                                       int resid = ct-wc;
+
+                                       if ((wc = write(tfd, ((char *)fbuf)+wc,
+                                                       resid)) == resid) {
+                                               /* worked on second attempt? */
+                                               continue;
+                                       }
+                                       else if (wc < 0) {
+                                               fsrprintf(_("bad write2 of %d "
+                                                       "bytes to %s: %s\n"),
+                                                       resid, tname,
+                                                       strerror(errno));
+                                       } else {
+                                               fsrprintf(_("bad copy to %s\n"),
+                                                       tname);
+                                       }
+                               }
+                               free(fbuf);
+                               close(tfd);
+                               return -1;
+                       }
+                       if (nfrags) {
+                               /* Do a matching write to the tmp file */
+                               wc = wc_b4;
+                               if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
+                                       fsrprintf(_("bad write of %d bytes "
+                                               "to %s: %s\n"),
+                                               wc_b4, ffname, strerror(errno));
+                               }
+                       }
+               }
+       }
+       ftruncate64(tfd, statp->bs_size);
+       if (ffd > 0) close(ffd);
+       fsync(tfd);
+
+       free(fbuf);
+
+       sx.sx_stat     = *statp; /* struct copy */
+       sx.sx_version  = XFS_SX_VERSION;
+       sx.sx_fdtarget = fd;
+       sx.sx_fdtmp    = tfd;
+       sx.sx_offset   = 0;
+       sx.sx_length   = statp->bs_size;
+
+       /* switch to the owner's id, to keep quota in line */
+        if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
+                if (vflag)
+                        fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
+                                   tname, strerror(errno));
+               close(tfd);
+                return -1;
+        }
+
+       /* Swap the extents */
+       srval = xfs_swapext(fd, &sx);
+       if (srval < 0) {
+               if (errno == ENOTSUP) {
+                       if (vflag || dflag)
+                          fsrprintf(_("%s: file type not supported\n"), fname);
+               } else if (errno == EFAULT) {
+                       /* The file has changed since we started the copy */
+                       if (vflag || dflag)
+                          fsrprintf(_("%s: file modified defrag aborted\n"),
+                                    fname);
+               } else if (errno == EBUSY) {
+                       /* Timestamp has changed or mmap'ed file */
+                       if (vflag || dflag)
+                          fsrprintf(_("%s: file busy\n"), fname);
+               } else {
+                       fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
+                                 fname, strerror(errno));
+               }
+               close(tfd);
+               return -1;
+       }
+
+       /* Report progress */
+       if (vflag)
+               fsrprintf(_("extents before:%d after:%d %s %s\n"),
+                         cur_nextents, new_nextents,
+                         (new_nextents <= nextents ? "DONE" : "    " ),
+                         fname);
+       close(tfd);
+       return 0;
+}
+
+char *
+gettmpname(char *fname)
+{
+       static char     buf[PATH_MAX+1];
+       char            sbuf[SMBUFSZ];
+       char            *ptr;
+
+       sprintf(sbuf, "/.fsr%d", getpid());
+
+       strcpy(buf, fname);
+       ptr = strrchr(buf, '/');
+       if (ptr) {
+               *ptr = '\0';
+       } else {
+               strcpy(buf, ".");
+       }
+
+       if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
+               fsrprintf(_("tmp file name too long: %s\n"), fname);
+               return(NULL);
+       }
+
+       strcat(buf, sbuf);
+
+       return(buf);
+}
+
+char *
+getparent(char *fname)
+{
+       static char     buf[PATH_MAX+1];
+       char            *ptr;
+
+       strcpy(buf, fname);
+       ptr = strrchr(buf, '/');
+       if (ptr) {
+               if (ptr == &buf[0])
+                       ++ptr;
+               *ptr = '\0';
+       } else {
+               strcpy(buf, ".");
+       }
+
+       return(buf);
+}
+
+/*
+ * Read in block map of the input file, coalesce contiguous
+ * extents into a single range, keep all holes. Convert from 512 byte
+ * blocks to bytes.
+ *
+ * This code was borrowed from mv.c with some minor mods.
+ */
+#define MAPSIZE        128
+#define        OUTMAP_SIZE_INCREMENT   MAPSIZE
+
+int    read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
+{
+       int             i, cnt;
+       struct getbmap  map[MAPSIZE];
+
+#define        BUMP_CNT        \
+       if (++cnt >= outmap_size) { \
+               outmap_size += OUTMAP_SIZE_INCREMENT; \
+               outmap = (struct getbmap *)realloc(outmap, \
+                                          outmap_size*sizeof(*outmap)); \
+               if (outmap == NULL) { \
+                       fsrprintf(_("realloc failed: %s\n"), \
+                               strerror(errno)); \
+                       exit(1); \
+               } \
+       }
+
+       /*      Initialize the outmap array.  It always grows - never shrinks.
+        *      Left-over memory allocation is saved for the next files.
+        */
+       if (outmap_size == 0) {
+               outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
+               outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
+               if (!outmap) {
+                       fsrprintf(_("malloc failed: %s\n"),
+                               strerror(errno));
+                       exit(1);
+               }
+       }
+
+       outmap[0].bmv_block = 0;
+       outmap[0].bmv_offset = 0;
+       outmap[0].bmv_length = sin->bs_size;
+
+       /*
+        * If a non regular file is involved then forget holes
+        */
+
+       if (!S_ISREG(sin->bs_mode))
+               return(1);
+
+       outmap[0].bmv_length = 0;
+
+       map[0].bmv_offset = 0;
+       map[0].bmv_block = 0;
+       map[0].bmv_entries = 0;
+       map[0].bmv_count = MAPSIZE;
+       map[0].bmv_length = -1;
+
+       cnt = 0;
+       *cur_nextents = 0;
+
+       do {
+               if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
+                       fsrprintf(_("failed reading extents: inode %llu"),
+                                (unsigned long long)sin->bs_ino);
+                       exit(1);
+               }
+
+               /* Concatenate extents together and replicate holes into
+                * the output map.
+                */
+               *cur_nextents += map[0].bmv_entries;
+               for (i = 0; i < map[0].bmv_entries; i++) {
+                       if (map[i + 1].bmv_block == -1) {
+                               BUMP_CNT;
+                               outmap[cnt] = map[i+1];
+                       } else if (outmap[cnt].bmv_block == -1) {
+                               BUMP_CNT;
+                               outmap[cnt] = map[i+1];
+                       } else {
+                               outmap[cnt].bmv_length += map[i + 1].bmv_length;
+                       }
+               }
+       } while (map[0].bmv_entries == (MAPSIZE-1));
+       for (i = 0; i <= cnt; i++) {
+               outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
+               outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
+       }
+
+       outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
+
+       return(cnt+1);
+}
+
+/*
+ * Read the block map and return the number of extents.
+ */
+int
+getnextents(int fd)
+{
+       int             nextents;
+       struct getbmap  map[MAPSIZE];
+
+       map[0].bmv_offset = 0;
+       map[0].bmv_block = 0;
+       map[0].bmv_entries = 0;
+       map[0].bmv_count = MAPSIZE;
+       map[0].bmv_length = -1;
+
+       nextents = 0;
+
+       do {
+               if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
+                       fsrprintf(_("failed reading extents"));
+                       exit(1);
+               }
+
+               nextents += map[0].bmv_entries;
+       } while (map[0].bmv_entries == (MAPSIZE-1));
+
+       return(nextents);
+}
+
+/*
+ * Get the fs geometry
+ */
+int
+xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
+{
+       if (xfs_fsgeometry(fd, fsgeom) < 0) {
+               return -1;
+       }
+       return 0;
+}
+
+/*
+ * Get xfs realtime space information
+ */
+int
+xfs_getrt(int fd, struct statvfs64 *sfbp)
+{
+       unsigned long   bsize;
+       unsigned long   factor;
+       xfs_fsop_counts_t cnt;
+
+       if (!fsgeom.rtblocks)
+               return -1;
+
+       if (xfs_fscounts(fd, &cnt) < 0) {
+               close(fd);
+               return -1;
+       }
+       bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
+       factor = fsgeom.blocksize / bsize;         /* currently this is == 1 */
+       sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
+       return 0;
+}
+
+int
+fsrprintf(const char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+       if (gflag) {
+               static int didopenlog;
+               if (!didopenlog) {
+                       openlog("fsr", LOG_PID, LOG_USER);
+                       didopenlog = 1;
+               }
+               vsyslog(LOG_INFO, fmt, ap);
+       } else
+               vprintf(fmt, ap);
+       va_end(ap);
+       return 0;
+}
+
+/*
+ * emulate getmntany
+ */
+static int
+getmntany(FILE *fp, struct mntent *mp, struct mntent *mpref, struct stat64 *s)
+{
+       struct mntent *t;
+       struct stat64 ms;
+
+       while ((t = getmntent(fp))) {
+               if (mpref->mnt_fsname) {        /* device */
+                       if (stat64(t->mnt_fsname, &ms) < 0)
+                               continue;
+                       if (s->st_rdev != ms.st_rdev)
+                               continue;
+               }
+               if (mpref->mnt_dir) {           /* mount point */
+                       if (stat64(t->mnt_dir, &ms) < 0)
+                               continue;
+                       if (s->st_ino != ms.st_ino || s->st_dev != ms.st_dev)
+                               continue;
+               }
+               *mp = *t;
+               break;
+       }
+       return (t != NULL);
+}
+
+
+/*
+ * Initialize a directory for tmp file use.  This is used
+ * by the full filesystem defragmentation when we're walking
+ * the inodes and do not know the path for the individual
+ * files.  Multiple directories are used to spread out the
+ * tmp data around to different ag's (since file data is
+ * usually allocated to the same ag as the directory and
+ * directories allocated round robin from the same
+ * parent directory).
+ */
+static void
+tmp_init(char *mnt)
+{
+       int     i;
+       static char     buf[SMBUFSZ];
+       mode_t  mask;
+
+       tmp_agi = 0;
+       sprintf(buf, "%s/.fsr", mnt);
+
+       mask = umask(0);
+       if (mkdir(buf, 0700) < 0) {
+               if (errno == EEXIST) {
+                       if (dflag)
+                               fsrprintf(_("tmpdir already exists: %s\n"),
+                                               buf);
+               } else {
+                       fsrprintf(_("could not create tmpdir: %s: %s\n"),
+                                       buf, strerror(errno));
+                       exit(-1);
+               }
+       }
+       for (i=0; i < fsgeom.agcount; i++) {
+               sprintf(buf, "%s/.fsr/ag%d", mnt, i);
+               if (mkdir(buf, 0777) < 0) {
+                       if (errno == EEXIST) {
+                               if (dflag)
+                                       fsrprintf(
+                                       _("tmpdir already exists: %s\n"), buf);
+                       } else {
+                               fsrprintf(_("cannot create tmpdir: %s: %s\n"),
+                                      buf, strerror(errno));
+                               exit(-1);
+                       }
+               }
+       }
+       (void)umask(mask);
+       return;
+}
+
+static char *
+tmp_next(char *mnt)
+{
+       static char     buf[SMBUFSZ];
+
+       sprintf(buf, "%s/.fsr/ag%d/tmp%d",
+               ( (strcmp(mnt, "/") == 0) ? "" : mnt),
+               tmp_agi,
+               getpid());
+
+       if (++tmp_agi == fsgeom.agcount)
+               tmp_agi = 0;
+
+       return(buf);
+}
+
+static void
+tmp_close(char *mnt)
+{
+       static char     buf[SMBUFSZ];
+       int i;
+
+       /* No data is ever actually written so we can just do rmdir's */
+       for (i=0; i < fsgeom.agcount; i++) {
+               sprintf(buf, "%s/.fsr/ag%d", mnt, i);
+               if (rmdir(buf) < 0) {
+                       if (errno != ENOENT) {
+                               fsrprintf(
+                                       _("could not remove tmpdir: %s: %s\n"),
+                                       buf, strerror(errno));
+                       }
+               }
+       }
+       sprintf(buf, "%s/.fsr", mnt);
+       if (rmdir(buf) < 0) {
+               if (errno != ENOENT) {
+                       fsrprintf(_("could not remove tmpdir: %s: %s\n"),
+                                 buf, strerror(errno));
+               }
+       }
+}
diff --git a/man/man8/xfs_fsr.8 b/man/man8/xfs_fsr.8
new file mode 100644 (file)
index 0000000..bf5e942
--- /dev/null
@@ -0,0 +1,171 @@
+.TH xfs_fsr 8
+.SH NAME
+xfs_fsr \- filesystem reorganizer for XFS
+.SH SYNOPSIS
+.nf
+\f3xfs_fsr\f1 [\f3\-v\f1] \c
+[\f3\-t\f1 seconds] [\f3\-f\f1 leftoff] [\f3\-m\f1 mtab]
+\f3xfs_fsr\f1 [\f3\-v\f1] \c
+[xfsdev | file] ...
+.fi
+.SH DESCRIPTION
+.I xfs_fsr
+is applicable only to XFS filesystems.
+.PP
+.I xfs_fsr
+improves the organization of mounted filesystems.
+The reorganization algorithm operates on one file at a time,
+compacting or otherwise improving the layout of
+the file extents (contiguous blocks of file data).
+.PP
+The following options are accepted by
+.IR xfs_fsr .
+The
+.BR \-m ,
+.BR \-t ,
+and
+.B \-f
+options have no meaning if any filesystems
+or files are specified on the command line.
+.TP 13 
+.BI \-m " mtab"
+Use this file for the list of filesystems to reorganize.
+The default is to use
+.IR /etc/mtab .
+.TP
+.BI \-t " seconds"
+How long to reorganize.
+The default is 7200 (2 hours).
+.TP
+.BI \-f " leftoff"
+Use this file instead of
+.I /var/tmp/.fsrlast
+to read the state of where to start and as the file
+to store the state of where reorganization left off.
+.TP
+.B \-v
+Verbose.
+Print cryptic information about
+each file being reorganized.
+.PP
+When invoked with no arguments
+.I xfs_fsr
+reorganizes all regular files in all mounted filesystems.
+.I xfs_fsr
+makes many cycles over
+.I /etc/mtab
+each time making a single pass over each XFS filesystem.
+Each pass goes through and selects files
+that have the largest number of extents.  It attempts
+to defragment the top 10% of these files on each pass.
+.PP
+It runs for up to two hours after which it records the filesystem
+where it left off, so it can start there the next time.
+This information is stored in the file
+.I /var/tmp/.fsrlast_xfs.
+If the information found here
+is somehow inconsistent or out of date
+it is ignored
+and reorganization starts at the beginning of the first
+filesystem found in
+.IR /etc/mtab .
+.PP
+.I xfs_fsr
+can be called with one or more arguments
+naming filesystems (block device name),
+and files to reorganize.
+In this mode
+.I xfs_fsr
+does not read or write
+.I /var/tmp/.fsrlast_xfs
+nor does it run for a fixed time interval.
+It makes one pass through each specified regular file and
+all regular files in each specified filesystem.
+A command line name referring to a symbolic link
+(except to a file system device),
+FIFO, or UNIX domain socket
+generates a warning message, but is otherwise ignored.
+While traversing the filesystem these types
+of files are silently skipped.
+.SH FILES
+.PD 0
+.TP 21
+/etc/mtab
+contains default list of filesystems to reorganize.
+.TP 21
+/var/tmp/.fsrlast_xfs
+records the state where reorganization left off.
+.PD
+.SH "SEE ALSO"
+xfs_fsr(8),
+mkfs.xfs(8),
+xfs_ncheck(8),
+xfs(5).
+.SH "NOTES"
+.I xfs_fsr
+improves the layout of extents for each file by copying the entire
+file to a temporary location and then interchanging the data extents 
+of the target and temporary files in an atomic manner.  
+This method requires that enough free disk space be available to copy 
+any given file and that the space be less fragmented than the original
+file.
+It also requires the owner of the file to have enough remaining
+filespace quota to do the copy on systems running quotas.
+.I xfs_fsr
+generates a warning message if space is not sufficient to improve
+the target file.
+.PP
+A temporary file used in improving a file given on the command line
+is created in the same parent directory of the target file and
+is prefixed by the string '\f3.fsr\f1'.   
+The temporary files used in improving an entire XFS device are stored
+in a directory at the root of the target device and use the same
+naming scheme.
+The temporary files are unlinked upon creation so data will not be
+readable by any other process.
+.PP
+.I xfs_fsr
+does not operate on files that are currently mapped in memory.
+A 'file busy' error can be seen for these files if the verbose
+flag (\f3-v\f1) is set.
+.PP
+Files marked as no\-defrag will be skipped. The
+.IR xfs_io (8)
+chattr command with the f attribute can be used to set or clear 
+this flag. Files and directories created in a directory with the 
+no\-defrag flag will inherit the attribute.
+.PP
+An entry in
+.I /etc/mtab
+or the file specified using the
+.B \-m
+option must have the
+.B rw
+option specified for read and write access.
+If this option is not present, then
+.I xfs_fsr
+skips the
+filesystem described by that line.
+See the
+.IR fstab (5)
+reference page for
+more details.
+.PP
+In general we do not foresee the need to run
+.I xfs_fsr
+on system partitions such as
+.IR / ,
+.I /boot
+and
+.I /usr
+as in general these will not suffer from fragmentation.
+There are also issues with defragmenting files
+.IR lilo (8)
+uses to boot your system. It is recommended that these files
+should be flagged as no\-defrag with the
+.IR xfs_io (8)
+chattr command. Should these files be moved by
+.I xfs_fsr
+then you must rerun
+.I lilo
+before you reboot or you may have an unbootable system.