From: Christoph Hellwig Date: Sat, 24 Jan 2009 13:59:31 +0000 (+0100) Subject: move fsr over from xfsdump X-Git-Tag: v3.0.0~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c988ea914a658e68cf7cdd7d4a5ffc24d1541f9b;p=thirdparty%2Fxfsprogs-dev.git move fsr over from xfsdump There's really no reason to hide fsr in xfsdump, so move it over to xfsprogs now that we have a 3.0 release with some major shaking pending. The only code change is to replace the single attr_setf call with a fsetxattr to avoid a dependency on libattr. Signed-off-by: Christoph Hellwig Reviewed-by: Nathan Scott --- diff --git a/Makefile b/Makefile index b1a59d604..b45015096 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ LDIRT = config.log .dep config.status config.cache confdefs.h conftest* \ Logs/* built .census install.* install-dev.* *.gz LIB_SUBDIRS = include libxfs libxlog libxcmd libhandle libdisk -TOOL_SUBDIRS = copy db estimate fsck growfs io logprint mkfs quota \ +TOOL_SUBDIRS = copy db estimate fsck fsr growfs io logprint mkfs quota \ mdrestore repair rtcp m4 man doc po debian build SUBDIRS = $(LIB_SUBDIRS) $(TOOL_SUBDIRS) diff --git a/fsr/Makefile b/fsr/Makefile new file mode 100644 index 000000000..b62447ac7 --- /dev/null +++ b/fsr/Makefile @@ -0,0 +1,19 @@ +# +# Copyright (c) 2000-2001 Silicon Graphics, Inc. All Rights Reserved. +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +LTCOMMAND = xfs_fsr +CFILES = xfs_fsr.c +LLDLIBS = $(LIBHANDLE) + +default: $(LTCOMMAND) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(PKG_BIN_DIR) + $(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_BIN_DIR) +install-dev: diff --git a/fsr/xfs_fsr.c b/fsr/xfs_fsr.c new file mode 100644 index 000000000..9f2bf24c7 --- /dev/null +++ b/fsr/xfs_fsr.c @@ -0,0 +1,1600 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef XFS_XFLAG_NODEFRAG +#define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */ +#endif + +#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs" + +char *progname; + +int vflag; +int gflag; +static int Mflag; +/* static int nflag; */ +int dflag = 0; +/* static int sflag; */ +int argv_blksz_dio; +extern int max_ext_size; +static int npasses = 10; +static int startpass = 0; + +struct getbmap *outmap = NULL; +int outmap_size = 0; +int RealUid; +int tmp_agi; +static __int64_t minimumfree = 2048; + +#define MNTTYPE_XFS "xfs" + +#define SMBUFSZ 1024 +#define ROOT 0 +#define NULLFD -1 +#define GRABSZ 64 +#define TARGETRANGE 10 +#define V_NONE 0 +#define V_OVERVIEW 1 +#define V_ALL 2 +#define BUFFER_SIZE (1<<16) +#define BUFFER_MAX (1<<24) +#define min(x, y) ((x) < (y) ? (x) : (y)) + +static time_t howlong = 7200; /* default seconds of reorganizing */ +static char *leftofffile = _PATH_FSRLAST; /* where we left off last */ +static char *mtab = MOUNTED; +static time_t endtime; +static time_t starttime; +static xfs_ino_t leftoffino = 0; +static int pagesize; + +void usage(int ret); +static int fsrfile(char *fname, xfs_ino_t ino); +static int fsrfile_common( char *fname, char *tname, char *mnt, + int fd, xfs_bstat_t *statp); +static int packfile(char *fname, char *tname, int fd, + xfs_bstat_t *statp, struct fsxattr *fsxp); +static void fsrdir(char *dirname); +static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange); +static void initallfs(char *mtab); +static void fsrallfs(int howlong, char *leftofffile); +static void fsrall_cleanup(int timeout); +static int getnextents(int); +int xfsrtextsize(int fd); +int xfs_getrt(int fd, struct statvfs64 *sfbp); +char * gettmpname(char *fname); +char * getparent(char *fname); +int fsrprintf(const char *fmt, ...); +int read_fd_bmap(int, xfs_bstat_t *, int *); +int cmp(const void *, const void *); +static void tmp_init(char *mnt); +static char * tmp_next(char *mnt); +static void tmp_close(char *mnt); +int xfs_getgeom(int , xfs_fsop_geom_v1_t * ); +static int getmntany(FILE *, struct mntent *, struct mntent *, struct stat64 *); + +xfs_fsop_geom_v1_t fsgeom; /* geometry of active mounted system */ + +#define NMOUNT 64 +static int numfs; + +typedef struct fsdesc { + char *dev; + char *mnt; + int npass; +} fsdesc_t; + +fsdesc_t *fs, *fsbase, *fsend; +int fsbufsize = 10; /* A starting value */ +int nfrags = 0; /* Debug option: Coerse into specific number + * of extents */ +int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT; + +int +xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom) +{ + return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom); +} + +int +xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer) +{ + xfs_fsop_bulkreq_t bulkreq; + + bulkreq.lastip = lastip; + bulkreq.icount = 1; + bulkreq.ubuffer = ubuffer; + bulkreq.ocount = NULL; + return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq); +} + +int +xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount, + xfs_bstat_t *ubuffer, __s32 *ocount) +{ + xfs_fsop_bulkreq_t bulkreq; + + bulkreq.lastip = lastip; + bulkreq.icount = icount; + bulkreq.ubuffer = ubuffer; + bulkreq.ocount = ocount; + return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq); +} + +int +xfs_swapext(int fd, xfs_swapext_t *sx) +{ + return ioctl(fd, XFS_IOC_SWAPEXT, sx); +} + +int +xfs_fscounts(int fd, xfs_fsop_counts_t *counts) +{ + return ioctl(fd, XFS_IOC_FSCOUNTS, counts); +} + +void +aborter(int unused) +{ + fsrall_cleanup(1); + exit(1); +} + +int +main(int argc, char **argv) +{ + struct stat64 sb, sb2; + char *argname; + char *cp; + int c; + struct mntent mntpref; + register struct mntent *mntp; + struct mntent ment; + register FILE *mtabp; + + setlinebuf(stdout); + progname = basename(argv[0]); + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + + gflag = ! isatty(0); + + while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1 ) + switch (c) { + case 'M': + Mflag = 1; + break; + case 'g': + gflag = 1; + break; + case 'n': + /* nflag = 1; */ + break; + case 'v': + ++vflag; + break; + case 'd': + dflag = 1; + break; + case 's': /* frag stats only */ + /* sflag = 1; */ + fprintf(stderr, + _("%s: Stats not yet supported for XFS\n"), + progname); + usage(1); + break; + case 't': + howlong = atoi(optarg); + break; + case 'f': + leftofffile = optarg; + break; + case 'm': + mtab = optarg; + break; + case 'b': + argv_blksz_dio = atoi(optarg); + break; + case 'p': + npasses = atoi(optarg); + break; + case 'C': + /* Testing opt: coerses frag count in result */ + if (getenv("FSRXFSTEST") != NULL) { + nfrags = atoi(optarg); + openopts |= O_SYNC; + } + break; + case 'V': + printf(_("%s version %s\n"), progname, VERSION); + exit(0); + default: + usage(1); + } + if (vflag) + setbuf(stdout, NULL); + + starttime = time(0); + + /* Save the caller's real uid */ + RealUid = getuid(); + + pagesize = getpagesize(); + + if (optind < argc) { + for (; optind < argc; optind++) { + argname = argv[optind]; + mntp = NULL; + if (lstat64(argname, &sb) < 0) { + fprintf(stderr, + _("%s: could not stat: %s: %s\n"), + progname, argname, strerror(errno)); + continue; + } + if (S_ISLNK(sb.st_mode) && stat64(argname, &sb2) == 0 && + (S_ISBLK(sb2.st_mode) || S_ISCHR(sb2.st_mode))) + sb = sb2; + if (S_ISBLK(sb.st_mode) || (S_ISDIR(sb.st_mode))) { + if ((mtabp = setmntent(mtab, "r")) == NULL) { + fprintf(stderr, + _("%s: cannot read %s\n"), + progname, mtab); + exit(1); + } + bzero(&mntpref, sizeof(mntpref)); + if (S_ISDIR(sb.st_mode)) + mntpref.mnt_dir = argname; + else + mntpref.mnt_fsname = argname; + + if (getmntany(mtabp, &ment, &mntpref, &sb) && + strcmp(ment.mnt_type, MNTTYPE_XFS) == 0) { + mntp = &ment; + if (S_ISBLK(sb.st_mode)) { + cp = mntp->mnt_dir; + if (cp == NULL || + stat64(cp, &sb2) < 0) { + fprintf(stderr, _( + "%s: could not stat: %s: %s\n"), + progname, argname, + strerror(errno)); + continue; + } + sb = sb2; + argname = cp; + } + } + } + if (mntp != NULL) { + fsrfs(mntp->mnt_dir, 0, 100); + } else if (S_ISCHR(sb.st_mode)) { + fprintf(stderr, _( + "%s: char special not supported: %s\n"), + progname, argname); + exit(1); + } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) { + if (!platform_test_xfs_path(argname)) { + fprintf(stderr, _( + "%s: cannot defragment: %s: Not XFS\n"), + progname, argname); + continue; + } + if (S_ISDIR(sb.st_mode)) + fsrdir(argname); + else + fsrfile(argname, sb.st_ino); + } else { + printf( + _("%s: not fsys dev, dir, or reg file, ignoring\n"), + argname); + } + } + } else { + initallfs(mtab); + fsrallfs(howlong, leftofffile); + } + return 0; +} + +void +usage(int ret) +{ + fprintf(stderr, _( +"Usage: %s [-d] [-v] [-n] [-s] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n" +" %s [-d] [-v] [-n] [-s] [-g] xfsdev | dir | file ...\n\n" +"Options:\n" +" -n Do nothing, only interesting with -v. Not\n" +" effective with in mtab mode.\n" +" -s Print statistics only.\n" +" -g Print to syslog (default if stdout not a tty).\n" +" -t time How long to run in seconds.\n" +" -p passes Number of passes before terminating global re-org.\n" +" -f leftoff Use this instead of %s.\n" +" -m mtab Use something other than /etc/mtab.\n" +" -d Debug, print even more.\n" +" -v Verbose, more -v's more verbose.\n" + ), progname, progname, _PATH_FSRLAST); + exit(ret); +} + +/* + * initallfs -- read the mount table and set up an internal form + */ +static void +initallfs(char *mtab) +{ + FILE *fp; + struct mntent *mp; + int mi; + char *cp; + struct stat64 sb; + + fp = setmntent(mtab, "r"); + if (fp == NULL) { + fsrprintf(_("could not open mtab file: %s\n"), mtab); + exit(1); + } + + /* malloc a number of descriptors, increased later if needed */ + if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) { + fsrprintf(_("out of memory: %s\n"), strerror(errno)); + exit(1); + } + fsend = (fsbase + fsbufsize - 1); + + /* find all rw xfs file systems */ + mi = 0; + fs = fsbase; + while ((mp = getmntent(fp))) { + int rw = 0; + + if (strcmp(mp->mnt_type, MNTTYPE_XFS ) != 0 || + stat64(mp->mnt_fsname, &sb) == -1 || + !S_ISBLK(sb.st_mode)) + continue; + + cp = strtok(mp->mnt_opts,","); + do { + if (strcmp("rw", cp) == 0) + rw++; + } while ((cp = strtok(NULL, ",")) != NULL); + if (rw == 0) { + if (dflag) + fsrprintf(_("Skipping %s: not mounted rw\n"), + mp->mnt_fsname); + continue; + } + + if (mi == fsbufsize) { + fsbufsize += NMOUNT; + if ((fsbase = (fsdesc_t *)realloc((char *)fsbase, + fsbufsize * sizeof(fsdesc_t))) == NULL) { + fsrprintf(_("out of memory: %s\n"), + strerror(errno)); + exit(1); + } + if (!fsbase) { + fsrprintf(_("out of memory on realloc: %s\n"), + strerror(errno)); + exit(1); + } + fs = (fsbase + mi); /* Needed ? */ + } + + fs->dev = strdup(mp->mnt_fsname); + fs->mnt = strdup(mp->mnt_dir); + + if (fs->mnt == NULL || fs->mnt == NULL) { + fsrprintf(_("strdup(%s) failed\n"), mp->mnt_fsname); + exit(1); + } + mi++; + fs++; + } + numfs = mi; + fsend = (fsbase + numfs); + endmntent(fp); + if (numfs == 0) { + fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab); + exit(0); + } + if (vflag || dflag) { + fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"), + numfs); + if (dflag) + for (fs = fsbase; fs < fsend; fs++) + fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt); + } +} + +static void +fsrallfs(int howlong, char *leftofffile) +{ + int fd; + int error; + int found = 0; + char *fsname; + char buf[SMBUFSZ]; + int mdonly = Mflag; + char *ptr; + xfs_ino_t startino = 0; + fsdesc_t *fsp; + struct stat64 sb, sb2; + + fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile); + + endtime = starttime + howlong; + fs = fsbase; + + /* where'd we leave off last time? */ + if (lstat64(leftofffile, &sb) == 0) { + if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) { + fsrprintf(_("%s: open failed\n"), leftofffile); + } + else if ( fstat64(fd, &sb2) == 0) { + /* + * Verify that lstat & fstat point to the + * same regular file (no links/no quick spoofs) + */ + if ( (sb.st_dev != sb2.st_dev) || + (sb.st_ino != sb2.st_ino) || + ((sb.st_mode & S_IFMT) != S_IFREG) || + ((sb2.st_mode & S_IFMT) != S_IFREG) || + (sb2.st_uid != ROOT) || + (sb2.st_nlink != 1) + ) + { + fsrprintf(_("Can't use %s: mode=0%o own=%d" + " nlink=%d\n"), + leftofffile, sb.st_mode, + sb.st_uid, sb.st_nlink); + close(fd); + fd = NULLFD; + } + } + else { + close(fd); + fd = NULLFD; + } + } + else { + fd = NULLFD; + } + + if (fd != NULLFD) { + if (read(fd, buf, SMBUFSZ) == -1) { + fs = fsbase; + fsrprintf(_("could not read %s, starting with %s\n"), + leftofffile, *fs->dev); + } else { + for (fs = fsbase; fs < fsend; fs++) { + fsname = fs->dev; + if ((strncmp(buf,fsname,strlen(fsname)) == 0) + && buf[strlen(fsname)] == ' ') { + found = 1; + break; + } + } + if (! found) + fs = fsbase; + + ptr = strchr(buf, ' '); + if (ptr) { + startpass = atoi(++ptr); + ptr = strchr(ptr, ' '); + if (ptr) { + startino = strtoull(++ptr, NULL, 10); + } + } + if (startpass < 0) + startpass = 0; + + /* Init pass counts */ + for (fsp = fsbase; fsp < fs; fsp++) { + fsp->npass = startpass + 1; + } + for (fsp = fs; fsp <= fsend; fsp++) { + fsp->npass = startpass; + } + } + close(fd); + } + + if (vflag) { + fsrprintf(_("START: pass=%d ino=%llu %s %s\n"), + fs->npass, (unsigned long long)startino, + fs->dev, fs->mnt); + } + + signal(SIGABRT, aborter); + signal(SIGHUP, aborter); + signal(SIGINT, aborter); + signal(SIGQUIT, aborter); + signal(SIGTERM, aborter); + + /* reorg for 'howlong' -- checked in 'fsrfs' */ + while (endtime > time(0)) { + pid_t pid; + if (fs == fsend) + fs = fsbase; + if (fs->npass == npasses) { + fsrprintf(_("Completed all %d passes\n"), npasses); + break; + } + if (npasses > 1 && !fs->npass) + Mflag = 1; + else + Mflag = mdonly; + pid = fork(); + switch(pid) { + case -1: + fsrprintf(_("couldn't fork sub process:")); + exit(1); + break; + case 0: + error = fsrfs(fs->mnt, startino, TARGETRANGE); + exit (error); + break; + default: + wait(&error); + close(fd); + if (WIFEXITED(error) && WEXITSTATUS(error) == 1) { + /* child timed out & did fsrall_cleanup */ + exit(0); + } + break; + } + startino = 0; /* reset after the first time through */ + fs->npass++; + fs++; + } + fsrall_cleanup(endtime <= time(0)); +} + +/* + * fsrall_cleanup -- close files, print next starting location, etc. + */ +static void +fsrall_cleanup(int timeout) +{ + int fd; + int ret; + char buf[SMBUFSZ]; + + /* record where we left off */ + unlink(leftofffile); + fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644); + if (fd == -1) + fsrprintf(_("open(%s) failed: %s\n"), + leftofffile, strerror(errno)); + else { + if (timeout) { + ret = sprintf(buf, "%s %d %llu\n", fs->dev, + fs->npass, (unsigned long long)leftoffino); + if (write(fd, buf, ret) < strlen(buf)) + fsrprintf(_("write(%s) failed: %s\n"), + leftofffile, strerror(errno)); + close(fd); + } + } + + if (timeout) + fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"), + progname, startpass, fs->npass, + time(0) - endtime + howlong); +} + +/* + * fsrfs -- reorganize a file system + */ +static int +fsrfs(char *mntdir, xfs_ino_t startino, int targetrange) +{ + + int fsfd, fd; + int count = 0; + int ret; + __s32 buflenout; + xfs_bstat_t buf[GRABSZ]; + char fname[64]; + char *tname; + jdm_fshandle_t *fshandlep; + xfs_ino_t lastino = startino; + + fsrprintf(_("%s start inode=%llu\n"), mntdir, + (unsigned long long)startino); + + fshandlep = jdm_getfshandle( mntdir ); + if ( ! fshandlep ) { + fsrprintf(_("unable to get handle: %s: %s\n"), + mntdir, strerror( errno )); + return -1; + } + + if ((fsfd = open(mntdir, O_RDONLY)) < 0) { + fsrprintf(_("unable to open: %s: %s\n"), + mntdir, strerror( errno )); + return -1; + } + + if (xfs_getgeom(fsfd, &fsgeom) < 0 ) { + fsrprintf(_("Skipping %s: could not get XFS geometry\n"), + mntdir); + return -1; + } + + tmp_init(mntdir); + + while ((ret = xfs_bulkstat(fsfd, + &lastino, GRABSZ, &buf[0], &buflenout) == 0)) { + xfs_bstat_t *p; + xfs_bstat_t *endp; + + if (buflenout == 0) + goto out0; + + /* Each loop through, defrag targetrange percent of the files */ + count = (buflenout * targetrange) / 100; + + qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp); + + for (p = buf, endp = (buf + buflenout); p < endp ; p++) { + /* Do some obvious checks now */ + if (((p->bs_mode & S_IFMT) != S_IFREG) || + (p->bs_extents < 2)) + continue; + + if ((fd = jdm_open(fshandlep, p, O_RDWR)) < 0) { + /* This probably means the file was + * removed while in progress of handling + * it. Just quietly ignore this file. + */ + if (dflag) + fsrprintf(_("could not open: " + "inode %llu\n"), p->bs_ino); + continue; + } + + /* Don't know the pathname, so make up something */ + sprintf(fname, "ino=%lld", (long long)p->bs_ino); + + /* Get a tmp file name */ + tname = tmp_next(mntdir); + + ret = fsrfile_common(fname, tname, mntdir, fd, p); + + leftoffino = p->bs_ino; + + close(fd); + + if (ret == 0) { + if (--count <= 0) + break; + } + } + if (endtime && endtime < time(0)) { + tmp_close(mntdir); + close(fsfd); + fsrall_cleanup(1); + exit(1); + } + } + if (ret < 0) + fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno)); +out0: + tmp_close(mntdir); + close(fsfd); + return 0; +} + +/* + * To compare bstat structs for qsort. + */ +int +cmp(const void *s1, const void *s2) +{ + return( ((xfs_bstat_t *)s2)->bs_extents - + ((xfs_bstat_t *)s1)->bs_extents); + +} + +/* + * reorganize by directory hierarchy. + * Stay in dev (a restriction based on structure of this program -- either + * call efs_{n,u}mount() around each file, something smarter or this) + */ +static void +fsrdir(char *dirname) +{ + fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname); +} + +/* + * Sets up the defragmentation of a file based on the + * filepath. It collects the bstat information, does + * an open on the file and passes this all to fsrfile_common. + */ +static int +fsrfile(char *fname, xfs_ino_t ino) +{ + xfs_bstat_t statbuf; + jdm_fshandle_t *fshandlep; + int fd, fsfd; + int error = 0; + char *tname; + + fshandlep = jdm_getfshandle(getparent (fname) ); + if (! fshandlep) { + fsrprintf(_("unable to construct sys handle for %s: %s\n"), + fname, strerror(errno)); + return -1; + } + + /* + * Need to open something on the same filesystem as the + * file. Open the parent. + */ + fsfd = open(getparent(fname), O_RDONLY); + if (fsfd < 0) { + fsrprintf(_("unable to open sys handle for %s: %s\n"), + fname, strerror(errno)); + return -1; + } + + if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) { + fsrprintf(_("unable to get bstat on %s: %s\n"), + fname, strerror(errno)); + close(fsfd); + return -1; + } + + fd = jdm_open( fshandlep, &statbuf, O_RDWR); + if (fd < 0) { + fsrprintf(_("unable to open handle %s: %s\n"), + fname, strerror(errno)); + close(fsfd); + return -1; + } + + /* Get the fs geometry */ + if (xfs_getgeom(fsfd, &fsgeom) < 0 ) { + fsrprintf(_("Unable to get geom on fs for: %s\n"), fname); + close(fsfd); + return -1; + } + + close(fsfd); + + tname = gettmpname(fname); + + if (tname) + error = fsrfile_common(fname, tname, NULL, fd, &statbuf); + + close(fd); + + return error; +} + + +/* + * This is the common defrag code for either a full fs + * defragmentation or a single file. Check as much as + * possible with the file, fork a process to setuid to the + * target file owner's uid and defragment the file. + * This is done so the new extents created in a tmp file are + * reflected in the owners' quota without having to do any + * special code in the kernel. When the existing extents + * are removed, the quotas will be correct. It's ugly but + * it saves us from doing some quota re-construction in + * the extent swap. The price is that the defragmentation + * will fail if the owner of the target file is already at + * their quota limit. + */ +static int +fsrfile_common( + char *fname, + char *tname, + char *fsname, + int fd, + xfs_bstat_t *statp) +{ + int error; + struct statvfs64 vfss; + struct fsxattr fsx; + unsigned long bsize; + + if (vflag) + fsrprintf("%s\n", fname); + + if (fsync(fd) < 0) { + fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno)); + return -1; + } + + if (statp->bs_size == 0) { + if (vflag) + fsrprintf(_("%s: zero size, ignoring\n"), fname); + return(0); + } + + /* Check if a mandatory lock is set on the file to try and + * avoid blocking indefinitely on the reads later. Note that + * someone could still set a mandatory lock after this check + * but before all reads have completed to block fsr reads. + * This change just closes the window a bit. + */ + if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) { + struct flock fl; + + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = (off_t)0; + fl.l_len = 0; + if ((fcntl(fd, F_GETLK, &fl)) < 0 ) { + if (vflag) + fsrprintf(_("locking check failed: %s\n"), + fname); + return(-1); + } + if (fl.l_type != F_UNLCK) { + /* Mandatory lock is set */ + if (vflag) + fsrprintf(_("mandatory lock: %s: ignoring\n"), + fname); + return(-1); + } + } + + /* + * Check if there is room to copy the file. + * + * Note that xfs_bstat.bs_blksize returns the filesystem blocksize, + * not the optimal I/O size as struct stat. + */ + if (statvfs64(fsname ? fsname : fname, &vfss) < 0) { + fsrprintf(_("unable to get fs stat on %s: %s\n"), + fname, strerror(errno)); + return -1; + } + bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize; + if (statp->bs_blksize * statp->bs_blocks > + vfss.f_bfree * bsize - minimumfree) { + fsrprintf(_("insufficient freespace for: %s: " + "size=%lld: ignoring\n"), fname, + statp->bs_blksize * statp->bs_blocks); + return 1; + } + + if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) { + fsrprintf(_("failed to get inode attrs: %s\n"), fname); + return(-1); + } + if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE|XFS_XFLAG_APPEND)) { + if (vflag) + fsrprintf(_("%s: immutable/append, ignoring\n"), fname); + return(0); + } + if (fsx.fsx_xflags & XFS_XFLAG_NODEFRAG) { + if (vflag) + fsrprintf(_("%s: marked as don't defrag, ignoring\n"), + fname); + return(0); + } + if (fsx.fsx_xflags & XFS_XFLAG_REALTIME) { + if (xfs_getrt(fd, &vfss) < 0) { + fsrprintf(_("cannot get realtime geometry for: %s\n"), + fname); + return(-1); + } + if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) { + fsrprintf(_("low on realtime free space: %s: " + "ignoring file\n"), fname); + return(-1); + } + } + + if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) { + fsrprintf(_("cannot open: %s: Permission denied\n"), fname); + return -1; + } + + /* + * Previously the code forked here, & the child changed it's uid to + * that of the file's owner and then called packfile(), to keep + * quota counts correct. (defragged files could use fewer blocks). + * + * Instead, just fchown() the temp file to the uid,gid of the + * file we're defragging, in packfile(). + */ + + if ((error = packfile(fname, tname, fd, statp, &fsx))) + return error; + return -1; /* no error */ +} + + +/* + * Do the defragmentation of a single file. + * We already are pretty sure we can and want to + * defragment the file. Create the tmp file, copy + * the data (maintaining holes) and call the kernel + * extent swap routinte. + */ +static int +packfile(char *fname, char *tname, int fd, + xfs_bstat_t *statp, struct fsxattr *fsxp) +{ + int tfd; + int srval; + int nextents, extent, cur_nextents, new_nextents; + unsigned blksz_dio; + unsigned dio_min; + struct dioattr dio; + static xfs_swapext_t sx; + struct xfs_flock64 space; + off64_t cnt, pos; + void *fbuf; + int ct, wc, wc_b4; + char ffname[SMBUFSZ]; + int ffd = -1; + + /* + * Work out the extent map - nextents will be set to the + * minimum number of extents needed for the file (taking + * into account holes), cur_nextents is the current number + * of extents. + */ + nextents = read_fd_bmap(fd, statp, &cur_nextents); + + if (cur_nextents == 1 || cur_nextents <= nextents) { + if (vflag) + fsrprintf(_("%s already fully defragmented.\n"), fname); + return 1; /* indicates no change/no error */ + } + + if (dflag) + fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"), + fname, cur_nextents, (cur_nextents - nextents), + tname); + + if ((tfd = open(tname, openopts, 0666)) < 0) { + if (vflag) + fsrprintf(_("could not open tmp file: %s: %s\n"), + tname, strerror(errno)); + return -1; + } + unlink(tname); + + /* Setup extended attributes */ + if (statp->bs_xflags & XFS_XFLAG_HASATTR) { + if (fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE) != 0) { + fsrprintf(_("could not set ATTR on tmp: %s:\n"), tname); + close(tfd); + return -1; + } + if (dflag) + fsrprintf(_("%s set temp attr\n"), tname); + } + + /* Setup extended inode flags, project identifier, etc */ + if (fsxp->fsx_xflags || fsxp->fsx_projid) { + if (ioctl(tfd, XFS_IOC_FSSETXATTR, fsxp) < 0) { + fsrprintf(_("could not set inode attrs on tmp: %s\n"), + tname); + close(tfd); + return -1; + } + } + + if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) { + fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname); + close(tfd); + return -1; + } + + dio_min = dio.d_miniosz; + if (statp->bs_size <= dio_min) { + blksz_dio = dio_min; + } else { + blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize); + if (argv_blksz_dio != 0) + blksz_dio = min(argv_blksz_dio, blksz_dio); + blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min; + } + + if (dflag) { + fsrprintf(_("DEBUG: " + "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"), + statp->bs_size, blksz_dio, dio.d_miniosz, + dio.d_maxiosz, pagesize); + } + + if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) { + fsrprintf(_("could not allocate buf: %s\n"), tname); + close(tfd); + return -1; + } + + if (nfrags) { + /* Create new tmp file in same AG as first */ + sprintf(ffname, "%s.frag", tname); + + /* Open the new file for sync writes */ + if ((ffd = open(ffname, openopts, 0666)) < 0) { + fsrprintf(_("could not open fragfile: %s : %s\n"), + ffname, strerror(errno)); + close(tfd); + free(fbuf); + return -1; + } + unlink(ffname); + } + + /* Loop through block map allocating new extents */ + for (extent = 0; extent < nextents; extent++) { + pos = outmap[extent].bmv_offset; + if (outmap[extent].bmv_block == -1) { + space.l_whence = SEEK_SET; + space.l_start = pos; + space.l_len = outmap[extent].bmv_length; + if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) { + fsrprintf(_("could not trunc tmp %s\n"), + tname); + } + lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR); + continue; + } else if (outmap[extent].bmv_length == 0) { + /* to catch holes at the beginning of the file */ + continue; + } + if (! nfrags) { + space.l_whence = SEEK_CUR; + space.l_start = 0; + space.l_len = outmap[extent].bmv_length; + + if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) { + fsrprintf(_("could not pre-allocate tmp space:" + " %s\n"), tname); + close(tfd); + free(fbuf); + return -1; + } + lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR); + } + } /* end of space allocation loop */ + + if (lseek64(tfd, 0, SEEK_SET)) { + fsrprintf(_("Couldn't rewind on temporary file\n")); + close(tfd); + free(fbuf); + return -1; + } + + /* Check if the temporary file has fewer extents */ + new_nextents = getnextents(tfd); + if (dflag) + fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents); + if (cur_nextents <= new_nextents) { + if (vflag) + fsrprintf(_("No improvement will be made (skipping): %s\n"), fname); + free(fbuf); + close(tfd); + return 1; /* no change/no error */ + } + + /* Loop through block map copying the file. */ + for (extent = 0; extent < nextents; extent++) { + pos = outmap[extent].bmv_offset; + if (outmap[extent].bmv_block == -1) { + lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR); + lseek64(fd, outmap[extent].bmv_length, SEEK_CUR); + continue; + } else if (outmap[extent].bmv_length == 0) { + /* to catch holes at the beginning of the file */ + continue; + } + for (cnt = outmap[extent].bmv_length; cnt > 0; + cnt -= ct, pos += ct) { + if (nfrags && --nfrags) { + ct = min(cnt, dio_min); + } else if (cnt % dio_min == 0) { + ct = min(cnt, blksz_dio); + } else { + ct = min(cnt + dio_min - (cnt % dio_min), + blksz_dio); + } + ct = read(fd, fbuf, ct); + if (ct == 0) { + /* EOF, stop trying to read */ + extent = nextents; + break; + } + /* Ensure we do direct I/O to correct block + * boundaries. + */ + if (ct % dio_min != 0) { + wc = ct + dio_min - (ct % dio_min); + } else { + wc = ct; + } + wc_b4 = wc; + if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) { + if (ct < 0) + fsrprintf(_("bad read of %d bytes " + "from %s: %s\n"), wc_b4, + fname, strerror(errno)); + else if (wc < 0) + fsrprintf(_("bad write of %d bytes " + "to %s: %s\n"), wc_b4, + tname, strerror(errno)); + else { + /* + * Might be out of space + * + * Try to finish write + */ + int resid = ct-wc; + + if ((wc = write(tfd, ((char *)fbuf)+wc, + resid)) == resid) { + /* worked on second attempt? */ + continue; + } + else if (wc < 0) { + fsrprintf(_("bad write2 of %d " + "bytes to %s: %s\n"), + resid, tname, + strerror(errno)); + } else { + fsrprintf(_("bad copy to %s\n"), + tname); + } + } + free(fbuf); + close(tfd); + return -1; + } + if (nfrags) { + /* Do a matching write to the tmp file */ + wc = wc_b4; + if (((wc = write(ffd, fbuf, wc)) != wc_b4)) { + fsrprintf(_("bad write of %d bytes " + "to %s: %s\n"), + wc_b4, ffname, strerror(errno)); + } + } + } + } + ftruncate64(tfd, statp->bs_size); + if (ffd > 0) close(ffd); + fsync(tfd); + + free(fbuf); + + sx.sx_stat = *statp; /* struct copy */ + sx.sx_version = XFS_SX_VERSION; + sx.sx_fdtarget = fd; + sx.sx_fdtmp = tfd; + sx.sx_offset = 0; + sx.sx_length = statp->bs_size; + + /* switch to the owner's id, to keep quota in line */ + if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) { + if (vflag) + fsrprintf(_("failed to fchown tmpfile %s: %s\n"), + tname, strerror(errno)); + close(tfd); + return -1; + } + + /* Swap the extents */ + srval = xfs_swapext(fd, &sx); + if (srval < 0) { + if (errno == ENOTSUP) { + if (vflag || dflag) + fsrprintf(_("%s: file type not supported\n"), fname); + } else if (errno == EFAULT) { + /* The file has changed since we started the copy */ + if (vflag || dflag) + fsrprintf(_("%s: file modified defrag aborted\n"), + fname); + } else if (errno == EBUSY) { + /* Timestamp has changed or mmap'ed file */ + if (vflag || dflag) + fsrprintf(_("%s: file busy\n"), fname); + } else { + fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"), + fname, strerror(errno)); + } + close(tfd); + return -1; + } + + /* Report progress */ + if (vflag) + fsrprintf(_("extents before:%d after:%d %s %s\n"), + cur_nextents, new_nextents, + (new_nextents <= nextents ? "DONE" : " " ), + fname); + close(tfd); + return 0; +} + +char * +gettmpname(char *fname) +{ + static char buf[PATH_MAX+1]; + char sbuf[SMBUFSZ]; + char *ptr; + + sprintf(sbuf, "/.fsr%d", getpid()); + + strcpy(buf, fname); + ptr = strrchr(buf, '/'); + if (ptr) { + *ptr = '\0'; + } else { + strcpy(buf, "."); + } + + if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) { + fsrprintf(_("tmp file name too long: %s\n"), fname); + return(NULL); + } + + strcat(buf, sbuf); + + return(buf); +} + +char * +getparent(char *fname) +{ + static char buf[PATH_MAX+1]; + char *ptr; + + strcpy(buf, fname); + ptr = strrchr(buf, '/'); + if (ptr) { + if (ptr == &buf[0]) + ++ptr; + *ptr = '\0'; + } else { + strcpy(buf, "."); + } + + return(buf); +} + +/* + * Read in block map of the input file, coalesce contiguous + * extents into a single range, keep all holes. Convert from 512 byte + * blocks to bytes. + * + * This code was borrowed from mv.c with some minor mods. + */ +#define MAPSIZE 128 +#define OUTMAP_SIZE_INCREMENT MAPSIZE + +int read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents) +{ + int i, cnt; + struct getbmap map[MAPSIZE]; + +#define BUMP_CNT \ + if (++cnt >= outmap_size) { \ + outmap_size += OUTMAP_SIZE_INCREMENT; \ + outmap = (struct getbmap *)realloc(outmap, \ + outmap_size*sizeof(*outmap)); \ + if (outmap == NULL) { \ + fsrprintf(_("realloc failed: %s\n"), \ + strerror(errno)); \ + exit(1); \ + } \ + } + + /* Initialize the outmap array. It always grows - never shrinks. + * Left-over memory allocation is saved for the next files. + */ + if (outmap_size == 0) { + outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */ + outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap)); + if (!outmap) { + fsrprintf(_("malloc failed: %s\n"), + strerror(errno)); + exit(1); + } + } + + outmap[0].bmv_block = 0; + outmap[0].bmv_offset = 0; + outmap[0].bmv_length = sin->bs_size; + + /* + * If a non regular file is involved then forget holes + */ + + if (!S_ISREG(sin->bs_mode)) + return(1); + + outmap[0].bmv_length = 0; + + map[0].bmv_offset = 0; + map[0].bmv_block = 0; + map[0].bmv_entries = 0; + map[0].bmv_count = MAPSIZE; + map[0].bmv_length = -1; + + cnt = 0; + *cur_nextents = 0; + + do { + if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) { + fsrprintf(_("failed reading extents: inode %llu"), + (unsigned long long)sin->bs_ino); + exit(1); + } + + /* Concatenate extents together and replicate holes into + * the output map. + */ + *cur_nextents += map[0].bmv_entries; + for (i = 0; i < map[0].bmv_entries; i++) { + if (map[i + 1].bmv_block == -1) { + BUMP_CNT; + outmap[cnt] = map[i+1]; + } else if (outmap[cnt].bmv_block == -1) { + BUMP_CNT; + outmap[cnt] = map[i+1]; + } else { + outmap[cnt].bmv_length += map[i + 1].bmv_length; + } + } + } while (map[0].bmv_entries == (MAPSIZE-1)); + for (i = 0; i <= cnt; i++) { + outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset); + outmap[i].bmv_length = BBTOB(outmap[i].bmv_length); + } + + outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset; + + return(cnt+1); +} + +/* + * Read the block map and return the number of extents. + */ +int +getnextents(int fd) +{ + int nextents; + struct getbmap map[MAPSIZE]; + + map[0].bmv_offset = 0; + map[0].bmv_block = 0; + map[0].bmv_entries = 0; + map[0].bmv_count = MAPSIZE; + map[0].bmv_length = -1; + + nextents = 0; + + do { + if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) { + fsrprintf(_("failed reading extents")); + exit(1); + } + + nextents += map[0].bmv_entries; + } while (map[0].bmv_entries == (MAPSIZE-1)); + + return(nextents); +} + +/* + * Get the fs geometry + */ +int +xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom) +{ + if (xfs_fsgeometry(fd, fsgeom) < 0) { + return -1; + } + return 0; +} + +/* + * Get xfs realtime space information + */ +int +xfs_getrt(int fd, struct statvfs64 *sfbp) +{ + unsigned long bsize; + unsigned long factor; + xfs_fsop_counts_t cnt; + + if (!fsgeom.rtblocks) + return -1; + + if (xfs_fscounts(fd, &cnt) < 0) { + close(fd); + return -1; + } + bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize); + factor = fsgeom.blocksize / bsize; /* currently this is == 1 */ + sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor; + return 0; +} + +int +fsrprintf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (gflag) { + static int didopenlog; + if (!didopenlog) { + openlog("fsr", LOG_PID, LOG_USER); + didopenlog = 1; + } + vsyslog(LOG_INFO, fmt, ap); + } else + vprintf(fmt, ap); + va_end(ap); + return 0; +} + +/* + * emulate getmntany + */ +static int +getmntany(FILE *fp, struct mntent *mp, struct mntent *mpref, struct stat64 *s) +{ + struct mntent *t; + struct stat64 ms; + + while ((t = getmntent(fp))) { + if (mpref->mnt_fsname) { /* device */ + if (stat64(t->mnt_fsname, &ms) < 0) + continue; + if (s->st_rdev != ms.st_rdev) + continue; + } + if (mpref->mnt_dir) { /* mount point */ + if (stat64(t->mnt_dir, &ms) < 0) + continue; + if (s->st_ino != ms.st_ino || s->st_dev != ms.st_dev) + continue; + } + *mp = *t; + break; + } + return (t != NULL); +} + + +/* + * Initialize a directory for tmp file use. This is used + * by the full filesystem defragmentation when we're walking + * the inodes and do not know the path for the individual + * files. Multiple directories are used to spread out the + * tmp data around to different ag's (since file data is + * usually allocated to the same ag as the directory and + * directories allocated round robin from the same + * parent directory). + */ +static void +tmp_init(char *mnt) +{ + int i; + static char buf[SMBUFSZ]; + mode_t mask; + + tmp_agi = 0; + sprintf(buf, "%s/.fsr", mnt); + + mask = umask(0); + if (mkdir(buf, 0700) < 0) { + if (errno == EEXIST) { + if (dflag) + fsrprintf(_("tmpdir already exists: %s\n"), + buf); + } else { + fsrprintf(_("could not create tmpdir: %s: %s\n"), + buf, strerror(errno)); + exit(-1); + } + } + for (i=0; i < fsgeom.agcount; i++) { + sprintf(buf, "%s/.fsr/ag%d", mnt, i); + if (mkdir(buf, 0777) < 0) { + if (errno == EEXIST) { + if (dflag) + fsrprintf( + _("tmpdir already exists: %s\n"), buf); + } else { + fsrprintf(_("cannot create tmpdir: %s: %s\n"), + buf, strerror(errno)); + exit(-1); + } + } + } + (void)umask(mask); + return; +} + +static char * +tmp_next(char *mnt) +{ + static char buf[SMBUFSZ]; + + sprintf(buf, "%s/.fsr/ag%d/tmp%d", + ( (strcmp(mnt, "/") == 0) ? "" : mnt), + tmp_agi, + getpid()); + + if (++tmp_agi == fsgeom.agcount) + tmp_agi = 0; + + return(buf); +} + +static void +tmp_close(char *mnt) +{ + static char buf[SMBUFSZ]; + int i; + + /* No data is ever actually written so we can just do rmdir's */ + for (i=0; i < fsgeom.agcount; i++) { + sprintf(buf, "%s/.fsr/ag%d", mnt, i); + if (rmdir(buf) < 0) { + if (errno != ENOENT) { + fsrprintf( + _("could not remove tmpdir: %s: %s\n"), + buf, strerror(errno)); + } + } + } + sprintf(buf, "%s/.fsr", mnt); + if (rmdir(buf) < 0) { + if (errno != ENOENT) { + fsrprintf(_("could not remove tmpdir: %s: %s\n"), + buf, strerror(errno)); + } + } +} diff --git a/man/man8/xfs_fsr.8 b/man/man8/xfs_fsr.8 new file mode 100644 index 000000000..bf5e94251 --- /dev/null +++ b/man/man8/xfs_fsr.8 @@ -0,0 +1,171 @@ +.TH xfs_fsr 8 +.SH NAME +xfs_fsr \- filesystem reorganizer for XFS +.SH SYNOPSIS +.nf +\f3xfs_fsr\f1 [\f3\-v\f1] \c +[\f3\-t\f1 seconds] [\f3\-f\f1 leftoff] [\f3\-m\f1 mtab] +\f3xfs_fsr\f1 [\f3\-v\f1] \c +[xfsdev | file] ... +.fi +.SH DESCRIPTION +.I xfs_fsr +is applicable only to XFS filesystems. +.PP +.I xfs_fsr +improves the organization of mounted filesystems. +The reorganization algorithm operates on one file at a time, +compacting or otherwise improving the layout of +the file extents (contiguous blocks of file data). +.PP +The following options are accepted by +.IR xfs_fsr . +The +.BR \-m , +.BR \-t , +and +.B \-f +options have no meaning if any filesystems +or files are specified on the command line. +.TP 13 +.BI \-m " mtab" +Use this file for the list of filesystems to reorganize. +The default is to use +.IR /etc/mtab . +.TP +.BI \-t " seconds" +How long to reorganize. +The default is 7200 (2 hours). +.TP +.BI \-f " leftoff" +Use this file instead of +.I /var/tmp/.fsrlast +to read the state of where to start and as the file +to store the state of where reorganization left off. +.TP +.B \-v +Verbose. +Print cryptic information about +each file being reorganized. +.PP +When invoked with no arguments +.I xfs_fsr +reorganizes all regular files in all mounted filesystems. +.I xfs_fsr +makes many cycles over +.I /etc/mtab +each time making a single pass over each XFS filesystem. +Each pass goes through and selects files +that have the largest number of extents. It attempts +to defragment the top 10% of these files on each pass. +.PP +It runs for up to two hours after which it records the filesystem +where it left off, so it can start there the next time. +This information is stored in the file +.I /var/tmp/.fsrlast_xfs. +If the information found here +is somehow inconsistent or out of date +it is ignored +and reorganization starts at the beginning of the first +filesystem found in +.IR /etc/mtab . +.PP +.I xfs_fsr +can be called with one or more arguments +naming filesystems (block device name), +and files to reorganize. +In this mode +.I xfs_fsr +does not read or write +.I /var/tmp/.fsrlast_xfs +nor does it run for a fixed time interval. +It makes one pass through each specified regular file and +all regular files in each specified filesystem. +A command line name referring to a symbolic link +(except to a file system device), +FIFO, or UNIX domain socket +generates a warning message, but is otherwise ignored. +While traversing the filesystem these types +of files are silently skipped. +.SH FILES +.PD 0 +.TP 21 +/etc/mtab +contains default list of filesystems to reorganize. +.TP 21 +/var/tmp/.fsrlast_xfs +records the state where reorganization left off. +.PD +.SH "SEE ALSO" +xfs_fsr(8), +mkfs.xfs(8), +xfs_ncheck(8), +xfs(5). +.SH "NOTES" +.I xfs_fsr +improves the layout of extents for each file by copying the entire +file to a temporary location and then interchanging the data extents +of the target and temporary files in an atomic manner. +This method requires that enough free disk space be available to copy +any given file and that the space be less fragmented than the original +file. +It also requires the owner of the file to have enough remaining +filespace quota to do the copy on systems running quotas. +.I xfs_fsr +generates a warning message if space is not sufficient to improve +the target file. +.PP +A temporary file used in improving a file given on the command line +is created in the same parent directory of the target file and +is prefixed by the string '\f3.fsr\f1'. +The temporary files used in improving an entire XFS device are stored +in a directory at the root of the target device and use the same +naming scheme. +The temporary files are unlinked upon creation so data will not be +readable by any other process. +.PP +.I xfs_fsr +does not operate on files that are currently mapped in memory. +A 'file busy' error can be seen for these files if the verbose +flag (\f3-v\f1) is set. +.PP +Files marked as no\-defrag will be skipped. The +.IR xfs_io (8) +chattr command with the f attribute can be used to set or clear +this flag. Files and directories created in a directory with the +no\-defrag flag will inherit the attribute. +.PP +An entry in +.I /etc/mtab +or the file specified using the +.B \-m +option must have the +.B rw +option specified for read and write access. +If this option is not present, then +.I xfs_fsr +skips the +filesystem described by that line. +See the +.IR fstab (5) +reference page for +more details. +.PP +In general we do not foresee the need to run +.I xfs_fsr +on system partitions such as +.IR / , +.I /boot +and +.I /usr +as in general these will not suffer from fragmentation. +There are also issues with defragmenting files +.IR lilo (8) +uses to boot your system. It is recommended that these files +should be flagged as no\-defrag with the +.IR xfs_io (8) +chattr command. Should these files be moved by +.I xfs_fsr +then you must rerun +.I lilo +before you reboot or you may have an unbootable system.