From: Christoph Hellwig <hch@lst.de>
Date: Sat, 24 Jan 2009 13:59:31 +0000 (+0100)
Subject: move fsr over from xfsdump
X-Git-Tag: v3.0.0~21
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c988ea914a658e68cf7cdd7d4a5ffc24d1541f9b;p=thirdparty%2Fxfsprogs-dev.git

move fsr over from xfsdump

There's really no reason to hide fsr in xfsdump, so move it over
to xfsprogs now that we have a 3.0 release with some major shaking
pending.  The only code change is to replace the single attr_setf call
with a fsetxattr to avoid a dependency on libattr.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nathan Scott <nscott@aconex.com>
---

diff --git a/Makefile b/Makefile
index b1a59d604..b45015096 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ LDIRT = config.log .dep config.status config.cache confdefs.h conftest* \
 	Logs/* built .census install.* install-dev.* *.gz
 
 LIB_SUBDIRS = include libxfs libxlog libxcmd libhandle libdisk
-TOOL_SUBDIRS = copy db estimate fsck growfs io logprint mkfs quota \
+TOOL_SUBDIRS = copy db estimate fsck fsr growfs io logprint mkfs quota \
 		mdrestore repair rtcp m4 man doc po debian build
 
 SUBDIRS = $(LIB_SUBDIRS) $(TOOL_SUBDIRS)
diff --git a/fsr/Makefile b/fsr/Makefile
new file mode 100644
index 000000000..b62447ac7
--- /dev/null
+++ b/fsr/Makefile
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+LTCOMMAND = xfs_fsr
+CFILES = xfs_fsr.c
+LLDLIBS = $(LIBHANDLE)
+
+default: $(LTCOMMAND)
+
+include $(BUILDRULES)
+
+install: default
+	$(INSTALL) -m 755 -d $(PKG_BIN_DIR)
+	$(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_BIN_DIR)
+install-dev:
diff --git a/fsr/xfs_fsr.c b/fsr/xfs_fsr.c
new file mode 100644
index 000000000..9f2bf24c7
--- /dev/null
+++ b/fsr/xfs_fsr.c
@@ -0,0 +1,1600 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <xfs/xfs.h>
+#include <xfs/jdm.h>
+#include <xfs/xfs_dfrag.h>
+
+#include <fcntl.h>
+#include <errno.h>
+#include <malloc.h>
+#include <mntent.h>
+#include <syslog.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/xattr.h>
+
+
+#ifndef XFS_XFLAG_NODEFRAG
+#define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */
+#endif
+
+#define _PATH_FSRLAST	"/var/tmp/.fsrlast_xfs"
+
+char *progname;
+
+int vflag;
+int gflag;
+static int Mflag;
+/* static int nflag; */
+int dflag = 0;
+/* static int sflag; */
+int argv_blksz_dio;
+extern int max_ext_size;
+static int npasses = 10;
+static int startpass = 0;
+
+struct getbmap  *outmap = NULL;
+int             outmap_size = 0;
+int		RealUid;
+int		tmp_agi;
+static __int64_t	minimumfree = 2048;
+
+#define MNTTYPE_XFS             "xfs"
+
+#define SMBUFSZ		1024
+#define ROOT		0
+#define NULLFD		-1
+#define GRABSZ		64
+#define TARGETRANGE	10
+#define	V_NONE		0
+#define	V_OVERVIEW	1
+#define	V_ALL		2
+#define BUFFER_SIZE	(1<<16)
+#define BUFFER_MAX	(1<<24)
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+static time_t howlong = 7200;		/* default seconds of reorganizing */
+static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
+static char *mtab = MOUNTED;
+static time_t endtime;
+static time_t starttime;
+static xfs_ino_t	leftoffino = 0;
+static int	pagesize;
+
+void usage(int ret);
+static int  fsrfile(char *fname, xfs_ino_t ino);
+static int  fsrfile_common( char *fname, char *tname, char *mnt,
+                            int fd, xfs_bstat_t *statp);
+static int  packfile(char *fname, char *tname, int fd,
+                     xfs_bstat_t *statp, struct fsxattr *fsxp);
+static void fsrdir(char *dirname);
+static int  fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
+static void initallfs(char *mtab);
+static void fsrallfs(int howlong, char *leftofffile);
+static void fsrall_cleanup(int timeout);
+static int  getnextents(int);
+int xfsrtextsize(int fd);
+int xfs_getrt(int fd, struct statvfs64 *sfbp);
+char * gettmpname(char *fname);
+char * getparent(char *fname);
+int fsrprintf(const char *fmt, ...);
+int read_fd_bmap(int, xfs_bstat_t *, int *);
+int cmp(const void *, const void *);
+static void tmp_init(char *mnt);
+static char * tmp_next(char *mnt);
+static void tmp_close(char *mnt);
+int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
+static int getmntany(FILE *, struct mntent *, struct mntent *, struct stat64 *);
+
+xfs_fsop_geom_v1_t fsgeom;	/* geometry of active mounted system */
+
+#define NMOUNT 64
+static int numfs;
+
+typedef struct fsdesc {
+	char *dev;
+	char *mnt;
+	int  npass;
+} fsdesc_t;
+
+fsdesc_t	*fs, *fsbase, *fsend;
+int		fsbufsize = 10;	/* A starting value */
+int		nfrags = 0;	/* Debug option: Coerse into specific number
+				 * of extents */
+int		openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
+
+int
+xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
+{
+    return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
+}
+
+int
+xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
+{
+    xfs_fsop_bulkreq_t  bulkreq;
+
+    bulkreq.lastip = lastip;
+    bulkreq.icount = 1;
+    bulkreq.ubuffer = ubuffer;
+    bulkreq.ocount = NULL;
+    return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
+}
+
+int
+xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
+                    xfs_bstat_t *ubuffer, __s32 *ocount)
+{
+    xfs_fsop_bulkreq_t  bulkreq;
+
+    bulkreq.lastip = lastip;
+    bulkreq.icount = icount;
+    bulkreq.ubuffer = ubuffer;
+    bulkreq.ocount = ocount;
+    return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
+}
+
+int
+xfs_swapext(int fd, xfs_swapext_t *sx)
+{
+    return ioctl(fd, XFS_IOC_SWAPEXT, sx);
+}
+
+int
+xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
+{
+    return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
+}
+
+void
+aborter(int unused)
+{
+	fsrall_cleanup(1);
+	exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+	struct stat64 sb, sb2;
+	char *argname;
+	char *cp;
+	int c;
+	struct mntent mntpref;
+	register struct mntent *mntp;
+	struct mntent ment;
+	register FILE *mtabp;
+
+	setlinebuf(stdout);
+	progname = basename(argv[0]);
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+
+	gflag = ! isatty(0);
+
+	while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1 )
+		switch (c) {
+		case 'M':
+			Mflag = 1;
+			break;
+		case 'g':
+			gflag = 1;
+			break;
+		case 'n':
+			/* nflag = 1; */
+			break;
+		case 'v':
+			++vflag;
+			break;
+		case 'd':
+			dflag = 1;
+			break;
+		case 's':		/* frag stats only */
+			/* sflag = 1; */
+			fprintf(stderr,
+				_("%s: Stats not yet supported for XFS\n"),
+				progname);
+			usage(1);
+			break;
+		case 't':
+			howlong = atoi(optarg);
+			break;
+		case 'f':
+			leftofffile = optarg;
+			break;
+		case 'm':
+			mtab = optarg;
+			break;
+		case 'b':
+			argv_blksz_dio = atoi(optarg);
+			break;
+		case 'p':
+			npasses = atoi(optarg);
+			break;
+		case 'C':
+			/* Testing opt: coerses frag count in result */
+			if (getenv("FSRXFSTEST") != NULL) {
+				nfrags = atoi(optarg);
+				openopts |= O_SYNC;
+			}
+			break;
+		case 'V':
+			printf(_("%s version %s\n"), progname, VERSION);
+			exit(0);
+		default:
+			usage(1);
+		}
+	if (vflag)
+		setbuf(stdout, NULL);
+
+	starttime = time(0);
+
+	/* Save the caller's real uid */
+	RealUid = getuid();
+
+	pagesize = getpagesize();
+
+	if (optind < argc) {
+		for (; optind < argc; optind++) {
+			argname = argv[optind];
+			mntp = NULL;
+			if (lstat64(argname, &sb) < 0) {
+				fprintf(stderr,
+					_("%s: could not stat: %s: %s\n"),
+					progname, argname, strerror(errno));
+				continue;
+			}
+			if (S_ISLNK(sb.st_mode) && stat64(argname, &sb2) == 0 &&
+			    (S_ISBLK(sb2.st_mode) || S_ISCHR(sb2.st_mode)))
+				sb = sb2;
+			if (S_ISBLK(sb.st_mode) || (S_ISDIR(sb.st_mode))) {
+				if ((mtabp = setmntent(mtab, "r")) == NULL) {
+					fprintf(stderr,
+						_("%s: cannot read %s\n"),
+						progname, mtab);
+					exit(1);
+				}
+				bzero(&mntpref, sizeof(mntpref));
+				if (S_ISDIR(sb.st_mode))
+					mntpref.mnt_dir = argname;
+				else
+					mntpref.mnt_fsname = argname;
+
+				if (getmntany(mtabp, &ment, &mntpref, &sb) &&
+				    strcmp(ment.mnt_type, MNTTYPE_XFS) == 0) {
+					mntp = &ment;
+					if (S_ISBLK(sb.st_mode)) {
+						cp = mntp->mnt_dir;
+						if (cp == NULL ||
+						    stat64(cp, &sb2) < 0) {
+							fprintf(stderr, _(
+						"%s: could not stat: %s: %s\n"),
+							progname, argname,
+							strerror(errno));
+							continue;
+						}
+						sb = sb2;
+						argname = cp;
+					}
+				}
+			}
+			if (mntp != NULL) {
+				fsrfs(mntp->mnt_dir, 0, 100);
+			} else if (S_ISCHR(sb.st_mode)) {
+				fprintf(stderr, _(
+					"%s: char special not supported: %s\n"),
+				        progname, argname);
+				exit(1);
+			} else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
+				if (!platform_test_xfs_path(argname)) {
+					fprintf(stderr, _(
+				        "%s: cannot defragment: %s: Not XFS\n"),
+				        progname, argname);
+					continue;
+				}
+				if (S_ISDIR(sb.st_mode))
+					fsrdir(argname);
+				else
+					fsrfile(argname, sb.st_ino);
+			} else {
+				printf(
+			_("%s: not fsys dev, dir, or reg file, ignoring\n"),
+					argname);
+			}
+		}
+	} else {
+		initallfs(mtab);
+		fsrallfs(howlong, leftofffile);
+	}
+	return 0;
+}
+
+void
+usage(int ret)
+{
+	fprintf(stderr, _(
+"Usage: %s [-d] [-v] [-n] [-s] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
+"       %s [-d] [-v] [-n] [-s] [-g] xfsdev | dir | file ...\n\n"
+"Options:\n"
+"       -n              Do nothing, only interesting with -v. Not\n"
+"                       effective with in mtab mode.\n"
+"       -s		Print statistics only.\n"
+"       -g              Print to syslog (default if stdout not a tty).\n"
+"       -t time         How long to run in seconds.\n"
+"       -p passes	Number of passes before terminating global re-org.\n"
+"       -f leftoff      Use this instead of %s.\n"
+"       -m mtab         Use something other than /etc/mtab.\n"
+"       -d              Debug, print even more.\n"
+"       -v		Verbose, more -v's more verbose.\n"
+		), progname, progname, _PATH_FSRLAST);
+	exit(ret);
+}
+
+/*
+ * initallfs -- read the mount table and set up an internal form
+ */
+static void
+initallfs(char *mtab)
+{
+	FILE *fp;
+	struct mntent *mp;
+	int mi;
+	char *cp;
+	struct stat64 sb;
+
+	fp = setmntent(mtab, "r");
+	if (fp == NULL) {
+		fsrprintf(_("could not open mtab file: %s\n"), mtab);
+		exit(1);
+	}
+
+	/* malloc a number of descriptors, increased later if needed */
+	if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
+		fsrprintf(_("out of memory: %s\n"), strerror(errno));
+		exit(1);
+	}
+	fsend = (fsbase + fsbufsize - 1);
+
+	/* find all rw xfs file systems */
+	mi = 0;
+	fs = fsbase;
+	while ((mp = getmntent(fp))) {
+		int rw = 0;
+
+		if (strcmp(mp->mnt_type, MNTTYPE_XFS ) != 0 ||
+		    stat64(mp->mnt_fsname, &sb) == -1 ||
+		    !S_ISBLK(sb.st_mode))
+			continue;
+
+		cp = strtok(mp->mnt_opts,",");
+		do {
+			if (strcmp("rw", cp) == 0)
+				rw++;
+		} while ((cp = strtok(NULL, ",")) != NULL);
+		if (rw == 0) {
+			if (dflag)
+				fsrprintf(_("Skipping %s: not mounted rw\n"),
+					mp->mnt_fsname);
+			continue;
+		}
+
+		if (mi == fsbufsize) {
+			fsbufsize += NMOUNT;
+			if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
+			              fsbufsize * sizeof(fsdesc_t))) == NULL) {
+				fsrprintf(_("out of memory: %s\n"),
+					strerror(errno));
+				exit(1);
+			}
+			if (!fsbase) {
+				fsrprintf(_("out of memory on realloc: %s\n"),
+				          strerror(errno));
+				exit(1);
+			}
+			fs = (fsbase + mi);  /* Needed ? */
+		}
+
+		fs->dev = strdup(mp->mnt_fsname);
+		fs->mnt = strdup(mp->mnt_dir);
+
+		if (fs->mnt == NULL || fs->mnt == NULL) {
+			fsrprintf(_("strdup(%s) failed\n"), mp->mnt_fsname);
+			exit(1);
+		}
+		mi++;
+		fs++;
+	}
+	numfs = mi;
+	fsend = (fsbase + numfs);
+	endmntent(fp);
+	if (numfs == 0) {
+		fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
+		exit(0);
+	}
+	if (vflag || dflag) {
+		fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
+		           numfs);
+		if (dflag)
+			for (fs = fsbase; fs < fsend; fs++)
+			    fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
+	}
+}
+
+static void
+fsrallfs(int howlong, char *leftofffile)
+{
+	int fd;
+	int error;
+	int found = 0;
+	char *fsname;
+	char buf[SMBUFSZ];
+	int mdonly = Mflag;
+	char *ptr;
+	xfs_ino_t startino = 0;
+	fsdesc_t *fsp;
+	struct stat64 sb, sb2;
+
+	fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
+
+	endtime = starttime + howlong;
+	fs = fsbase;
+
+	/* where'd we leave off last time? */
+	if (lstat64(leftofffile, &sb) == 0) {
+		if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
+			fsrprintf(_("%s: open failed\n"), leftofffile);
+		}
+		else if ( fstat64(fd, &sb2) == 0) {
+			/*
+			 * Verify that lstat & fstat point to the
+			 * same regular file (no links/no quick spoofs)
+			 */
+			if ( (sb.st_dev  != sb2.st_dev) ||
+			     (sb.st_ino  != sb2.st_ino) ||
+			     ((sb.st_mode & S_IFMT) != S_IFREG) ||
+			     ((sb2.st_mode & S_IFMT) != S_IFREG) ||
+			     (sb2.st_uid  != ROOT) ||
+			     (sb2.st_nlink != 1)
+			   )
+			{
+				fsrprintf(_("Can't use %s: mode=0%o own=%d"
+					" nlink=%d\n"),
+					leftofffile, sb.st_mode,
+					sb.st_uid, sb.st_nlink);
+				close(fd);
+				fd = NULLFD;
+			}
+		}
+		else {
+			close(fd);
+			fd = NULLFD;
+		}
+	}
+	else {
+		fd = NULLFD;
+	}
+
+	if (fd != NULLFD) {
+		if (read(fd, buf, SMBUFSZ) == -1) {
+			fs = fsbase;
+			fsrprintf(_("could not read %s, starting with %s\n"),
+				leftofffile, *fs->dev);
+		} else {
+			for (fs = fsbase; fs < fsend; fs++) {
+				fsname = fs->dev;
+				if ((strncmp(buf,fsname,strlen(fsname)) == 0)
+				    && buf[strlen(fsname)] == ' ') {
+					found = 1;
+					break;
+				}
+			}
+			if (! found)
+				fs = fsbase;
+
+			ptr = strchr(buf, ' ');
+			if (ptr) {
+				startpass = atoi(++ptr);
+				ptr = strchr(ptr, ' ');
+				if (ptr) {
+					startino = strtoull(++ptr, NULL, 10);
+				}
+			}
+			if (startpass < 0)
+				startpass = 0;
+
+			/* Init pass counts */
+			for (fsp = fsbase; fsp < fs; fsp++) {
+				fsp->npass = startpass + 1;
+			}
+			for (fsp = fs; fsp <= fsend; fsp++) {
+				fsp->npass = startpass;
+			}
+		}
+		close(fd);
+	}
+
+	if (vflag) {
+		fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
+			  fs->npass, (unsigned long long)startino,
+			  fs->dev, fs->mnt);
+	}
+
+	signal(SIGABRT, aborter);
+	signal(SIGHUP, aborter);
+	signal(SIGINT, aborter);
+	signal(SIGQUIT, aborter);
+	signal(SIGTERM, aborter);
+
+	/* reorg for 'howlong' -- checked in 'fsrfs' */
+	while (endtime > time(0)) {
+		pid_t pid;
+		if (fs == fsend)
+			fs = fsbase;
+		if (fs->npass == npasses) {
+			fsrprintf(_("Completed all %d passes\n"), npasses);
+			break;
+		}
+		if (npasses > 1 && !fs->npass)
+			Mflag = 1;
+		else
+			Mflag = mdonly;
+		pid = fork();
+		switch(pid) {
+		case -1:
+			fsrprintf(_("couldn't fork sub process:"));
+			exit(1);
+			break;
+		case 0:
+			error = fsrfs(fs->mnt, startino, TARGETRANGE);
+			exit (error);
+			break;
+		default:
+			wait(&error);
+			close(fd);
+			if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
+				/* child timed out & did fsrall_cleanup */
+				exit(0);
+			}
+			break;
+		}
+		startino = 0;  /* reset after the first time through */
+		fs->npass++;
+		fs++;
+	}
+	fsrall_cleanup(endtime <= time(0));
+}
+
+/*
+ * fsrall_cleanup -- close files, print next starting location, etc.
+ */
+static void
+fsrall_cleanup(int timeout)
+{
+	int fd;
+	int ret;
+	char buf[SMBUFSZ];
+
+	/* record where we left off */
+	unlink(leftofffile);
+	fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
+	if (fd == -1)
+		fsrprintf(_("open(%s) failed: %s\n"),
+		          leftofffile, strerror(errno));
+	else {
+		if (timeout) {
+			ret = sprintf(buf, "%s %d %llu\n", fs->dev,
+			        fs->npass, (unsigned long long)leftoffino);
+			if (write(fd, buf, ret) < strlen(buf))
+				fsrprintf(_("write(%s) failed: %s\n"),
+					leftofffile, strerror(errno));
+			close(fd);
+		}
+	}
+
+	if (timeout)
+		fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
+			progname, startpass, fs->npass,
+			time(0) - endtime + howlong);
+}
+
+/*
+ * fsrfs -- reorganize a file system
+ */
+static int
+fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
+{
+
+	int	fsfd, fd;
+	int	count = 0;
+	int	ret;
+	__s32	buflenout;
+	xfs_bstat_t buf[GRABSZ];
+	char	fname[64];
+	char	*tname;
+	jdm_fshandle_t	*fshandlep;
+	xfs_ino_t	lastino = startino;
+
+	fsrprintf(_("%s start inode=%llu\n"), mntdir,
+		(unsigned long long)startino);
+
+	fshandlep = jdm_getfshandle( mntdir );
+	if ( ! fshandlep ) {
+		fsrprintf(_("unable to get handle: %s: %s\n"),
+		          mntdir, strerror( errno ));
+		return -1;
+	}
+
+	if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
+		fsrprintf(_("unable to open: %s: %s\n"),
+		          mntdir, strerror( errno ));
+		return -1;
+	}
+
+	if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
+		fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
+			  mntdir);
+		return -1;
+	}
+
+	tmp_init(mntdir);
+
+	while ((ret = xfs_bulkstat(fsfd,
+				&lastino, GRABSZ, &buf[0], &buflenout) == 0)) {
+		xfs_bstat_t *p;
+		xfs_bstat_t *endp;
+
+		if (buflenout == 0)
+			goto out0;
+
+		/* Each loop through, defrag targetrange percent of the files */
+		count = (buflenout * targetrange) / 100;
+
+		qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
+
+		for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
+			/* Do some obvious checks now */
+			if (((p->bs_mode & S_IFMT) != S_IFREG) ||
+			     (p->bs_extents < 2))
+				continue;
+
+			if ((fd = jdm_open(fshandlep, p, O_RDWR)) < 0) {
+				/* This probably means the file was
+				 * removed while in progress of handling
+				 * it.  Just quietly ignore this file.
+				 */
+				if (dflag)
+					fsrprintf(_("could not open: "
+						"inode %llu\n"), p->bs_ino);
+				continue;
+			}
+
+			/* Don't know the pathname, so make up something */
+			sprintf(fname, "ino=%lld", (long long)p->bs_ino);
+
+			/* Get a tmp file name */
+			tname = tmp_next(mntdir);
+
+			ret = fsrfile_common(fname, tname, mntdir, fd, p);
+
+			leftoffino = p->bs_ino;
+
+			close(fd);
+
+			if (ret == 0) {
+				if (--count <= 0)
+					break;
+			}
+		}
+		if (endtime && endtime < time(0)) {
+			tmp_close(mntdir);
+			close(fsfd);
+			fsrall_cleanup(1);
+			exit(1);
+		}
+	}
+	if (ret < 0)
+		fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
+out0:
+	tmp_close(mntdir);
+	close(fsfd);
+	return 0;
+}
+
+/*
+ * To compare bstat structs for qsort.
+ */
+int
+cmp(const void *s1, const void *s2)
+{
+	return( ((xfs_bstat_t *)s2)->bs_extents -
+	        ((xfs_bstat_t *)s1)->bs_extents);
+
+}
+
+/*
+ * reorganize by directory hierarchy.
+ * Stay in dev (a restriction based on structure of this program -- either
+ * call efs_{n,u}mount() around each file, something smarter or this)
+ */
+static void
+fsrdir(char *dirname)
+{
+	fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
+}
+
+/*
+ * Sets up the defragmentation of a file based on the
+ * filepath.  It collects the bstat information, does
+ * an open on the file and passes this all to fsrfile_common.
+ */
+static int
+fsrfile(char *fname, xfs_ino_t ino)
+{
+	xfs_bstat_t	statbuf;
+	jdm_fshandle_t	*fshandlep;
+	int	fd, fsfd;
+	int	error = 0;
+	char	*tname;
+
+	fshandlep = jdm_getfshandle(getparent (fname) );
+	if (! fshandlep) {
+		fsrprintf(_("unable to construct sys handle for %s: %s\n"),
+			fname, strerror(errno));
+		return -1;
+	}
+
+	/*
+	 * Need to open something on the same filesystem as the
+	 * file.  Open the parent.
+	 */
+	fsfd = open(getparent(fname), O_RDONLY);
+	if (fsfd < 0) {
+		fsrprintf(_("unable to open sys handle for %s: %s\n"),
+			fname, strerror(errno));
+		return -1;
+	}
+
+	if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
+		fsrprintf(_("unable to get bstat on %s: %s\n"),
+			fname, strerror(errno));
+		close(fsfd);
+		return -1;
+	}
+
+	fd = jdm_open( fshandlep, &statbuf, O_RDWR);
+	if (fd < 0) {
+		fsrprintf(_("unable to open handle %s: %s\n"),
+			fname, strerror(errno));
+		close(fsfd);
+		return -1;
+	}
+
+	/* Get the fs geometry */
+	if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
+		fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
+		close(fsfd);
+		return -1;
+	}
+
+	close(fsfd);
+
+	tname = gettmpname(fname);
+
+	if (tname)
+		error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
+
+	close(fd);
+
+	return error;
+}
+
+
+/*
+ * This is the common defrag code for either a full fs
+ * defragmentation or a single file.  Check as much as
+ * possible with the file, fork a process to setuid to the
+ * target file owner's uid and defragment the file.
+ * This is done so the new extents created in a tmp file are
+ * reflected in the owners' quota without having to do any
+ * special code in the kernel.  When the existing extents
+ * are removed, the quotas will be correct.  It's ugly but
+ * it saves us from doing some quota  re-construction in
+ * the extent swap.  The price is that the defragmentation
+ * will fail if the owner of the target file is already at
+ * their quota limit.
+ */
+static int
+fsrfile_common(
+	char		*fname,
+	char		*tname,
+	char		*fsname,
+	int		fd,
+	xfs_bstat_t	*statp)
+{
+	int		error;
+	struct statvfs64 vfss;
+	struct fsxattr	fsx;
+	unsigned long	bsize;
+
+	if (vflag)
+		fsrprintf("%s\n", fname);
+
+	if (fsync(fd) < 0) {
+		fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
+		return -1;
+	}
+
+	if (statp->bs_size == 0) {
+		if (vflag)
+			fsrprintf(_("%s: zero size, ignoring\n"), fname);
+		return(0);
+	}
+
+	/* Check if a mandatory lock is set on the file to try and
+	 * avoid blocking indefinitely on the reads later. Note that
+	 * someone could still set a mandatory lock after this check
+	 * but before all reads have completed to block fsr reads.
+	 * This change just closes the window a bit.
+	 */
+	if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
+		struct flock fl;
+
+		fl.l_type = F_RDLCK;
+		fl.l_whence = SEEK_SET;
+		fl.l_start = (off_t)0;
+		fl.l_len = 0;
+		if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
+			if (vflag)
+				fsrprintf(_("locking check failed: %s\n"),
+					fname);
+			return(-1);
+		}
+		if (fl.l_type != F_UNLCK) {
+			/* Mandatory lock is set */
+			if (vflag)
+				fsrprintf(_("mandatory lock: %s: ignoring\n"),
+					fname);
+			return(-1);
+		}
+	}
+
+	/*
+	 * Check if there is room to copy the file.
+	 *
+	 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
+	 * not the optimal I/O size as struct stat.
+	 */
+	if (statvfs64(fsname ? fsname : fname, &vfss) < 0) {
+		fsrprintf(_("unable to get fs stat on %s: %s\n"),
+			fname, strerror(errno));
+		return -1;
+	}
+	bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
+	if (statp->bs_blksize * statp->bs_blocks >
+	    vfss.f_bfree * bsize - minimumfree) {
+		fsrprintf(_("insufficient freespace for: %s: "
+			    "size=%lld: ignoring\n"), fname,
+			    statp->bs_blksize * statp->bs_blocks);
+		return 1;
+	}
+
+	if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
+		fsrprintf(_("failed to get inode attrs: %s\n"), fname);
+		return(-1);
+	}
+	if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE|XFS_XFLAG_APPEND)) {
+		if (vflag)
+			fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
+		return(0);
+	}
+	if (fsx.fsx_xflags & XFS_XFLAG_NODEFRAG) {
+		if (vflag)
+			fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
+			    fname);
+		return(0);
+	}
+	if (fsx.fsx_xflags & XFS_XFLAG_REALTIME) {
+		if (xfs_getrt(fd, &vfss) < 0) {
+			fsrprintf(_("cannot get realtime geometry for: %s\n"),
+				fname);
+			return(-1);
+		}
+		if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
+			fsrprintf(_("low on realtime free space: %s: "
+				"ignoring file\n"), fname);
+			return(-1);
+		}
+	}
+
+	if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
+		fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
+		return -1;
+	}
+
+	/*
+	 * Previously the code forked here, & the child changed it's uid to
+	 * that of the file's owner and then called packfile(), to keep
+	 * quota counts correct.  (defragged files could use fewer blocks).
+	 *
+	 * Instead, just fchown() the temp file to the uid,gid of the
+	 * file we're defragging, in packfile().
+	 */
+
+	if ((error = packfile(fname, tname, fd, statp, &fsx)))
+		return error;
+	return -1; /* no error */
+}
+
+
+/*
+ * Do the defragmentation of a single file.
+ * We already are pretty sure we can and want to
+ * defragment the file.  Create the tmp file, copy
+ * the data (maintaining holes) and call the kernel
+ * extent swap routinte.
+ */
+static int
+packfile(char *fname, char *tname, int fd,
+	 xfs_bstat_t *statp, struct fsxattr *fsxp)
+{
+	int 		tfd;
+	int		srval;
+	int		nextents, extent, cur_nextents, new_nextents;
+	unsigned	blksz_dio;
+	unsigned	dio_min;
+	struct dioattr	dio;
+	static xfs_swapext_t   sx;
+	struct xfs_flock64  space;
+	off64_t 	cnt, pos;
+	void 		*fbuf;
+	int 		ct, wc, wc_b4;
+	char		ffname[SMBUFSZ];
+	int		ffd = -1;
+
+	/*
+	 * Work out the extent map - nextents will be set to the
+	 * minimum number of extents needed for the file (taking
+	 * into account holes), cur_nextents is the current number
+	 * of extents.
+	 */
+	nextents = read_fd_bmap(fd, statp, &cur_nextents);
+
+	if (cur_nextents == 1 || cur_nextents <= nextents) {
+		if (vflag)
+			fsrprintf(_("%s already fully defragmented.\n"), fname);
+		return 1; /* indicates no change/no error */
+	}
+
+	if (dflag)
+		fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
+		          fname, cur_nextents, (cur_nextents - nextents),
+		          tname);
+
+	if ((tfd = open(tname, openopts, 0666)) < 0) {
+		if (vflag)
+			fsrprintf(_("could not open tmp file: %s: %s\n"),
+				   tname, strerror(errno));
+		return -1;
+	}
+	unlink(tname);
+
+	/* Setup extended attributes */
+	if (statp->bs_xflags & XFS_XFLAG_HASATTR) {
+		if (fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE) != 0) {
+			fsrprintf(_("could not set ATTR on tmp: %s:\n"), tname);
+			close(tfd);
+			return -1;
+		}
+		if (dflag)
+			fsrprintf(_("%s set temp attr\n"), tname);
+	}
+
+	/* Setup extended inode flags, project identifier, etc */
+	if (fsxp->fsx_xflags || fsxp->fsx_projid) {
+		if (ioctl(tfd, XFS_IOC_FSSETXATTR, fsxp) < 0) {
+			fsrprintf(_("could not set inode attrs on tmp: %s\n"),
+				tname);
+			close(tfd);
+			return -1;
+		}
+	}
+
+	if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
+		fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
+		close(tfd);
+		return -1;
+	}
+
+	dio_min = dio.d_miniosz;
+	if (statp->bs_size <= dio_min) {
+		blksz_dio = dio_min;
+	} else {
+		blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
+		if (argv_blksz_dio != 0)
+			blksz_dio = min(argv_blksz_dio, blksz_dio);
+		blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
+	}
+
+	if (dflag) {
+		fsrprintf(_("DEBUG: "
+			"fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
+			statp->bs_size, blksz_dio, dio.d_miniosz,
+			dio.d_maxiosz, pagesize);
+	}
+
+	if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
+		fsrprintf(_("could not allocate buf: %s\n"), tname);
+		close(tfd);
+		return -1;
+	}
+
+	if (nfrags) {
+		/* Create new tmp file in same AG as first */
+		sprintf(ffname, "%s.frag", tname);
+
+		/* Open the new file for sync writes */
+		if ((ffd = open(ffname, openopts, 0666)) < 0) {
+			fsrprintf(_("could not open fragfile: %s : %s\n"),
+				   ffname, strerror(errno));
+			close(tfd);
+			free(fbuf);
+			return -1;
+		}
+		unlink(ffname);
+	}
+
+	/* Loop through block map allocating new extents */
+	for (extent = 0; extent < nextents; extent++) {
+		pos = outmap[extent].bmv_offset;
+		if (outmap[extent].bmv_block == -1) {
+			space.l_whence = SEEK_SET;
+			space.l_start = pos;
+			space.l_len = outmap[extent].bmv_length;
+			if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
+				fsrprintf(_("could not trunc tmp %s\n"),
+					   tname);
+			}
+			lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR);
+			continue;
+		} else if (outmap[extent].bmv_length == 0) {
+			/* to catch holes at the beginning of the file */
+			continue;
+		}
+		if (! nfrags) {
+			space.l_whence = SEEK_CUR;
+			space.l_start = 0;
+			space.l_len = outmap[extent].bmv_length;
+
+			if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
+				fsrprintf(_("could not pre-allocate tmp space:"
+					" %s\n"), tname);
+				close(tfd);
+				free(fbuf);
+				return -1;
+			}
+			lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR);
+		}
+	} /* end of space allocation loop */
+
+	if (lseek64(tfd, 0, SEEK_SET)) {
+		fsrprintf(_("Couldn't rewind on temporary file\n"));
+		close(tfd);
+		free(fbuf);
+		return -1;
+	}
+
+	/* Check if the temporary file has fewer extents */
+	new_nextents = getnextents(tfd);
+	if (dflag)
+		fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
+	if (cur_nextents <= new_nextents) {
+		if (vflag)
+			fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
+		free(fbuf);
+		close(tfd);
+		return 1; /* no change/no error */
+	}
+
+	/* Loop through block map copying the file. */
+	for (extent = 0; extent < nextents; extent++) {
+		pos = outmap[extent].bmv_offset;
+		if (outmap[extent].bmv_block == -1) {
+			lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR);
+			lseek64(fd, outmap[extent].bmv_length, SEEK_CUR);
+			continue;
+		} else if (outmap[extent].bmv_length == 0) {
+			/* to catch holes at the beginning of the file */
+			continue;
+		}
+		for (cnt = outmap[extent].bmv_length; cnt > 0;
+		     cnt -= ct, pos += ct) {
+			if (nfrags && --nfrags) {
+				ct = min(cnt, dio_min);
+			} else if (cnt % dio_min == 0) {
+				ct = min(cnt, blksz_dio);
+			} else {
+				ct = min(cnt + dio_min - (cnt % dio_min),
+					blksz_dio);
+			}
+			ct = read(fd, fbuf, ct);
+			if (ct == 0) {
+				/* EOF, stop trying to read */
+				extent = nextents;
+				break;
+			}
+			/* Ensure we do direct I/O to correct block
+			 * boundaries.
+			 */
+			if (ct % dio_min != 0) {
+				wc = ct + dio_min - (ct % dio_min);
+			} else {
+				wc = ct;
+			}
+			wc_b4 = wc;
+			if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
+				if (ct < 0)
+					fsrprintf(_("bad read of %d bytes "
+						"from %s: %s\n"), wc_b4,
+						fname, strerror(errno));
+				else if (wc < 0)
+					fsrprintf(_("bad write of %d bytes "
+						"to %s: %s\n"), wc_b4,
+						tname, strerror(errno));
+				else {
+					/*
+					 * Might be out of space
+					 *
+					 * Try to finish write
+					 */
+					int resid = ct-wc;
+
+					if ((wc = write(tfd, ((char *)fbuf)+wc,
+							resid)) == resid) {
+						/* worked on second attempt? */
+						continue;
+					}
+					else if (wc < 0) {
+						fsrprintf(_("bad write2 of %d "
+							"bytes to %s: %s\n"),
+							resid, tname,
+							strerror(errno));
+					} else {
+						fsrprintf(_("bad copy to %s\n"),
+							tname);
+					}
+				}
+				free(fbuf);
+				close(tfd);
+				return -1;
+			}
+			if (nfrags) {
+				/* Do a matching write to the tmp file */
+				wc = wc_b4;
+				if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
+					fsrprintf(_("bad write of %d bytes "
+						"to %s: %s\n"),
+						wc_b4, ffname, strerror(errno));
+				}
+			}
+		}
+	}
+	ftruncate64(tfd, statp->bs_size);
+	if (ffd > 0) close(ffd);
+	fsync(tfd);
+
+	free(fbuf);
+
+	sx.sx_stat     = *statp; /* struct copy */
+	sx.sx_version  = XFS_SX_VERSION;
+	sx.sx_fdtarget = fd;
+	sx.sx_fdtmp    = tfd;
+	sx.sx_offset   = 0;
+	sx.sx_length   = statp->bs_size;
+
+	/* switch to the owner's id, to keep quota in line */
+        if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
+                if (vflag)
+                        fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
+                                   tname, strerror(errno));
+		close(tfd);
+                return -1;
+        }
+
+	/* Swap the extents */
+	srval = xfs_swapext(fd, &sx);
+	if (srval < 0) {
+		if (errno == ENOTSUP) {
+			if (vflag || dflag)
+			   fsrprintf(_("%s: file type not supported\n"), fname);
+		} else if (errno == EFAULT) {
+			/* The file has changed since we started the copy */
+			if (vflag || dflag)
+			   fsrprintf(_("%s: file modified defrag aborted\n"),
+				     fname);
+		} else if (errno == EBUSY) {
+			/* Timestamp has changed or mmap'ed file */
+			if (vflag || dflag)
+			   fsrprintf(_("%s: file busy\n"), fname);
+		} else {
+			fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
+				  fname, strerror(errno));
+		}
+		close(tfd);
+		return -1;
+	}
+
+	/* Report progress */
+	if (vflag)
+		fsrprintf(_("extents before:%d after:%d %s %s\n"),
+			  cur_nextents, new_nextents,
+			  (new_nextents <= nextents ? "DONE" : "    " ),
+		          fname);
+	close(tfd);
+	return 0;
+}
+
+char *
+gettmpname(char *fname)
+{
+	static char	buf[PATH_MAX+1];
+	char		sbuf[SMBUFSZ];
+	char		*ptr;
+
+	sprintf(sbuf, "/.fsr%d", getpid());
+
+	strcpy(buf, fname);
+	ptr = strrchr(buf, '/');
+	if (ptr) {
+		*ptr = '\0';
+	} else {
+		strcpy(buf, ".");
+	}
+
+	if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
+		fsrprintf(_("tmp file name too long: %s\n"), fname);
+		return(NULL);
+	}
+
+	strcat(buf, sbuf);
+
+	return(buf);
+}
+
+char *
+getparent(char *fname)
+{
+	static char	buf[PATH_MAX+1];
+	char		*ptr;
+
+	strcpy(buf, fname);
+	ptr = strrchr(buf, '/');
+	if (ptr) {
+		if (ptr == &buf[0])
+			++ptr;
+		*ptr = '\0';
+	} else {
+		strcpy(buf, ".");
+	}
+
+	return(buf);
+}
+
+/*
+ * Read in block map of the input file, coalesce contiguous
+ * extents into a single range, keep all holes. Convert from 512 byte
+ * blocks to bytes.
+ *
+ * This code was borrowed from mv.c with some minor mods.
+ */
+#define MAPSIZE	128
+#define	OUTMAP_SIZE_INCREMENT	MAPSIZE
+
+int	read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
+{
+	int		i, cnt;
+	struct getbmap	map[MAPSIZE];
+
+#define	BUMP_CNT	\
+	if (++cnt >= outmap_size) { \
+		outmap_size += OUTMAP_SIZE_INCREMENT; \
+		outmap = (struct getbmap *)realloc(outmap, \
+		                           outmap_size*sizeof(*outmap)); \
+		if (outmap == NULL) { \
+			fsrprintf(_("realloc failed: %s\n"), \
+				strerror(errno)); \
+			exit(1); \
+		} \
+	}
+
+	/*	Initialize the outmap array.  It always grows - never shrinks.
+	 *	Left-over memory allocation is saved for the next files.
+	 */
+	if (outmap_size == 0) {
+		outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
+		outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
+		if (!outmap) {
+			fsrprintf(_("malloc failed: %s\n"),
+				strerror(errno));
+			exit(1);
+		}
+	}
+
+	outmap[0].bmv_block = 0;
+	outmap[0].bmv_offset = 0;
+	outmap[0].bmv_length = sin->bs_size;
+
+	/*
+	 * If a non regular file is involved then forget holes
+	 */
+
+	if (!S_ISREG(sin->bs_mode))
+		return(1);
+
+	outmap[0].bmv_length = 0;
+
+	map[0].bmv_offset = 0;
+	map[0].bmv_block = 0;
+	map[0].bmv_entries = 0;
+	map[0].bmv_count = MAPSIZE;
+	map[0].bmv_length = -1;
+
+	cnt = 0;
+	*cur_nextents = 0;
+
+	do {
+		if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
+			fsrprintf(_("failed reading extents: inode %llu"),
+			         (unsigned long long)sin->bs_ino);
+			exit(1);
+		}
+
+		/* Concatenate extents together and replicate holes into
+		 * the output map.
+		 */
+		*cur_nextents += map[0].bmv_entries;
+		for (i = 0; i < map[0].bmv_entries; i++) {
+			if (map[i + 1].bmv_block == -1) {
+				BUMP_CNT;
+				outmap[cnt] = map[i+1];
+			} else if (outmap[cnt].bmv_block == -1) {
+				BUMP_CNT;
+				outmap[cnt] = map[i+1];
+			} else {
+				outmap[cnt].bmv_length += map[i + 1].bmv_length;
+			}
+		}
+	} while (map[0].bmv_entries == (MAPSIZE-1));
+	for (i = 0; i <= cnt; i++) {
+		outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
+		outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
+	}
+
+	outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
+
+	return(cnt+1);
+}
+
+/*
+ * Read the block map and return the number of extents.
+ */
+int
+getnextents(int fd)
+{
+	int		nextents;
+	struct getbmap	map[MAPSIZE];
+
+	map[0].bmv_offset = 0;
+	map[0].bmv_block = 0;
+	map[0].bmv_entries = 0;
+	map[0].bmv_count = MAPSIZE;
+	map[0].bmv_length = -1;
+
+	nextents = 0;
+
+	do {
+		if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
+			fsrprintf(_("failed reading extents"));
+			exit(1);
+		}
+
+		nextents += map[0].bmv_entries;
+	} while (map[0].bmv_entries == (MAPSIZE-1));
+
+	return(nextents);
+}
+
+/*
+ * Get the fs geometry
+ */
+int
+xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
+{
+	if (xfs_fsgeometry(fd, fsgeom) < 0) {
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Get xfs realtime space information
+ */
+int
+xfs_getrt(int fd, struct statvfs64 *sfbp)
+{
+	unsigned long	bsize;
+	unsigned long	factor;
+	xfs_fsop_counts_t cnt;
+
+	if (!fsgeom.rtblocks)
+		return -1;
+
+	if (xfs_fscounts(fd, &cnt) < 0) {
+		close(fd);
+		return -1;
+	}
+	bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
+	factor = fsgeom.blocksize / bsize;         /* currently this is == 1 */
+	sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
+	return 0;
+}
+
+int
+fsrprintf(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	if (gflag) {
+		static int didopenlog;
+		if (!didopenlog) {
+			openlog("fsr", LOG_PID, LOG_USER);
+			didopenlog = 1;
+		}
+		vsyslog(LOG_INFO, fmt, ap);
+	} else
+		vprintf(fmt, ap);
+	va_end(ap);
+	return 0;
+}
+
+/*
+ * emulate getmntany
+ */
+static int
+getmntany(FILE *fp, struct mntent *mp, struct mntent *mpref, struct stat64 *s)
+{
+	struct mntent *t;
+	struct stat64 ms;
+
+	while ((t = getmntent(fp))) {
+		if (mpref->mnt_fsname) {	/* device */
+			if (stat64(t->mnt_fsname, &ms) < 0)
+				continue;
+			if (s->st_rdev != ms.st_rdev)
+				continue;
+		}
+		if (mpref->mnt_dir) {		/* mount point */
+			if (stat64(t->mnt_dir, &ms) < 0)
+				continue;
+			if (s->st_ino != ms.st_ino || s->st_dev != ms.st_dev)
+				continue;
+		}
+		*mp = *t;
+		break;
+	}
+	return (t != NULL);
+}
+
+
+/*
+ * Initialize a directory for tmp file use.  This is used
+ * by the full filesystem defragmentation when we're walking
+ * the inodes and do not know the path for the individual
+ * files.  Multiple directories are used to spread out the
+ * tmp data around to different ag's (since file data is
+ * usually allocated to the same ag as the directory and
+ * directories allocated round robin from the same
+ * parent directory).
+ */
+static void
+tmp_init(char *mnt)
+{
+	int 	i;
+	static char	buf[SMBUFSZ];
+	mode_t	mask;
+
+	tmp_agi = 0;
+	sprintf(buf, "%s/.fsr", mnt);
+
+	mask = umask(0);
+	if (mkdir(buf, 0700) < 0) {
+		if (errno == EEXIST) {
+			if (dflag)
+				fsrprintf(_("tmpdir already exists: %s\n"),
+						buf);
+		} else {
+			fsrprintf(_("could not create tmpdir: %s: %s\n"),
+					buf, strerror(errno));
+			exit(-1);
+		}
+	}
+	for (i=0; i < fsgeom.agcount; i++) {
+		sprintf(buf, "%s/.fsr/ag%d", mnt, i);
+		if (mkdir(buf, 0777) < 0) {
+			if (errno == EEXIST) {
+				if (dflag)
+					fsrprintf(
+					_("tmpdir already exists: %s\n"), buf);
+			} else {
+				fsrprintf(_("cannot create tmpdir: %s: %s\n"),
+				       buf, strerror(errno));
+				exit(-1);
+			}
+		}
+	}
+	(void)umask(mask);
+	return;
+}
+
+static char *
+tmp_next(char *mnt)
+{
+	static char	buf[SMBUFSZ];
+
+	sprintf(buf, "%s/.fsr/ag%d/tmp%d",
+	        ( (strcmp(mnt, "/") == 0) ? "" : mnt),
+	        tmp_agi,
+	        getpid());
+
+	if (++tmp_agi == fsgeom.agcount)
+		tmp_agi = 0;
+
+	return(buf);
+}
+
+static void
+tmp_close(char *mnt)
+{
+	static char	buf[SMBUFSZ];
+	int i;
+
+	/* No data is ever actually written so we can just do rmdir's */
+	for (i=0; i < fsgeom.agcount; i++) {
+		sprintf(buf, "%s/.fsr/ag%d", mnt, i);
+		if (rmdir(buf) < 0) {
+			if (errno != ENOENT) {
+				fsrprintf(
+					_("could not remove tmpdir: %s: %s\n"),
+			 		buf, strerror(errno));
+			}
+		}
+	}
+	sprintf(buf, "%s/.fsr", mnt);
+	if (rmdir(buf) < 0) {
+		if (errno != ENOENT) {
+			fsrprintf(_("could not remove tmpdir: %s: %s\n"),
+			          buf, strerror(errno));
+		}
+	}
+}
diff --git a/man/man8/xfs_fsr.8 b/man/man8/xfs_fsr.8
new file mode 100644
index 000000000..bf5e94251
--- /dev/null
+++ b/man/man8/xfs_fsr.8
@@ -0,0 +1,171 @@
+.TH xfs_fsr 8
+.SH NAME
+xfs_fsr \- filesystem reorganizer for XFS
+.SH SYNOPSIS
+.nf
+\f3xfs_fsr\f1 [\f3\-v\f1] \c
+[\f3\-t\f1 seconds] [\f3\-f\f1 leftoff] [\f3\-m\f1 mtab]
+\f3xfs_fsr\f1 [\f3\-v\f1] \c
+[xfsdev | file] ...
+.fi
+.SH DESCRIPTION
+.I xfs_fsr
+is applicable only to XFS filesystems.
+.PP
+.I xfs_fsr
+improves the organization of mounted filesystems.
+The reorganization algorithm operates on one file at a time,
+compacting or otherwise improving the layout of
+the file extents (contiguous blocks of file data).
+.PP
+The following options are accepted by
+.IR xfs_fsr .
+The
+.BR \-m ,
+.BR \-t ,
+and
+.B \-f
+options have no meaning if any filesystems
+or files are specified on the command line.
+.TP 13 
+.BI \-m " mtab"
+Use this file for the list of filesystems to reorganize.
+The default is to use
+.IR /etc/mtab .
+.TP
+.BI \-t " seconds"
+How long to reorganize.
+The default is 7200 (2 hours).
+.TP
+.BI \-f " leftoff"
+Use this file instead of
+.I /var/tmp/.fsrlast
+to read the state of where to start and as the file
+to store the state of where reorganization left off.
+.TP
+.B \-v
+Verbose.
+Print cryptic information about
+each file being reorganized.
+.PP
+When invoked with no arguments
+.I xfs_fsr
+reorganizes all regular files in all mounted filesystems.
+.I xfs_fsr
+makes many cycles over
+.I /etc/mtab
+each time making a single pass over each XFS filesystem.
+Each pass goes through and selects files
+that have the largest number of extents.  It attempts
+to defragment the top 10% of these files on each pass.
+.PP
+It runs for up to two hours after which it records the filesystem
+where it left off, so it can start there the next time.
+This information is stored in the file
+.I /var/tmp/.fsrlast_xfs.
+If the information found here
+is somehow inconsistent or out of date
+it is ignored
+and reorganization starts at the beginning of the first
+filesystem found in
+.IR /etc/mtab .
+.PP
+.I xfs_fsr
+can be called with one or more arguments
+naming filesystems (block device name),
+and files to reorganize.
+In this mode
+.I xfs_fsr
+does not read or write
+.I /var/tmp/.fsrlast_xfs
+nor does it run for a fixed time interval.
+It makes one pass through each specified regular file and
+all regular files in each specified filesystem.
+A command line name referring to a symbolic link
+(except to a file system device),
+FIFO, or UNIX domain socket
+generates a warning message, but is otherwise ignored.
+While traversing the filesystem these types
+of files are silently skipped.
+.SH FILES
+.PD 0
+.TP 21
+/etc/mtab
+contains default list of filesystems to reorganize.
+.TP 21
+/var/tmp/.fsrlast_xfs
+records the state where reorganization left off.
+.PD
+.SH "SEE ALSO"
+xfs_fsr(8),
+mkfs.xfs(8),
+xfs_ncheck(8),
+xfs(5).
+.SH "NOTES"
+.I xfs_fsr
+improves the layout of extents for each file by copying the entire
+file to a temporary location and then interchanging the data extents 
+of the target and temporary files in an atomic manner.  
+This method requires that enough free disk space be available to copy 
+any given file and that the space be less fragmented than the original
+file.
+It also requires the owner of the file to have enough remaining
+filespace quota to do the copy on systems running quotas.
+.I xfs_fsr
+generates a warning message if space is not sufficient to improve
+the target file.
+.PP
+A temporary file used in improving a file given on the command line
+is created in the same parent directory of the target file and
+is prefixed by the string '\f3.fsr\f1'.   
+The temporary files used in improving an entire XFS device are stored
+in a directory at the root of the target device and use the same
+naming scheme.
+The temporary files are unlinked upon creation so data will not be
+readable by any other process.
+.PP
+.I xfs_fsr
+does not operate on files that are currently mapped in memory.
+A 'file busy' error can be seen for these files if the verbose
+flag (\f3-v\f1) is set.
+.PP
+Files marked as no\-defrag will be skipped. The
+.IR xfs_io (8)
+chattr command with the f attribute can be used to set or clear 
+this flag. Files and directories created in a directory with the 
+no\-defrag flag will inherit the attribute.
+.PP
+An entry in
+.I /etc/mtab
+or the file specified using the
+.B \-m
+option must have the
+.B rw
+option specified for read and write access.
+If this option is not present, then
+.I xfs_fsr
+skips the
+filesystem described by that line.
+See the
+.IR fstab (5)
+reference page for
+more details.
+.PP
+In general we do not foresee the need to run
+.I xfs_fsr
+on system partitions such as
+.IR / ,
+.I /boot
+and
+.I /usr
+as in general these will not suffer from fragmentation.
+There are also issues with defragmenting files
+.IR lilo (8)
+uses to boot your system. It is recommended that these files
+should be flagged as no\-defrag with the
+.IR xfs_io (8)
+chattr command. Should these files be moved by
+.I xfs_fsr
+then you must rerun
+.I lilo
+before you reboot or you may have an unbootable system.