2 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <xfs/xfs_types.h>
23 #include <xfs/xfs_dfrag.h>
24 #include <xfs/xfs_bmap_btree.h>
25 #include <xfs/xfs_dinode.h>
26 #include <xfs/xfs_attr_sf.h>
34 #include <sys/ioctl.h>
37 #include <sys/statvfs.h>
38 #include <sys/xattr.h>
41 #ifndef XFS_XFLAG_NODEFRAG
42 #define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */
45 #define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
46 #define _PATH_PROC_MOUNTS "/proc/mounts"
54 /* static int nflag; */
56 /* static int sflag; */
58 extern int max_ext_size
;
59 static int npasses
= 10;
60 static int startpass
= 0;
62 struct getbmap
*outmap
= NULL
;
66 static __int64_t minimumfree
= 2048;
68 #define MNTTYPE_XFS "xfs"
74 #define TARGETRANGE 10
78 #define BUFFER_SIZE (1<<16)
79 #define BUFFER_MAX (1<<24)
80 #define min(x, y) ((x) < (y) ? (x) : (y))
82 static time_t howlong
= 7200; /* default seconds of reorganizing */
83 static char *leftofffile
= _PATH_FSRLAST
; /* where we left off last */
84 static time_t endtime
;
85 static time_t starttime
;
86 static xfs_ino_t leftoffino
= 0;
90 static int fsrfile(char *fname
, xfs_ino_t ino
);
91 static int fsrfile_common( char *fname
, char *tname
, char *mnt
,
92 int fd
, xfs_bstat_t
*statp
);
93 static int packfile(char *fname
, char *tname
, int fd
,
94 xfs_bstat_t
*statp
, struct fsxattr
*fsxp
);
95 static void fsrdir(char *dirname
);
96 static int fsrfs(char *mntdir
, xfs_ino_t ino
, int targetrange
);
97 static void initallfs(char *mtab
);
98 static void fsrallfs(char *mtab
, int howlong
, char *leftofffile
);
99 static void fsrall_cleanup(int timeout
);
100 static int getnextents(int);
101 int xfsrtextsize(int fd
);
102 int xfs_getrt(int fd
, struct statvfs64
*sfbp
);
103 char * gettmpname(char *fname
);
104 char * getparent(char *fname
);
105 int fsrprintf(const char *fmt
, ...);
106 int read_fd_bmap(int, xfs_bstat_t
*, int *);
107 int cmp(const void *, const void *);
108 static void tmp_init(char *mnt
);
109 static char * tmp_next(char *mnt
);
110 static void tmp_close(char *mnt
);
111 int xfs_getgeom(int , xfs_fsop_geom_v1_t
* );
113 xfs_fsop_geom_v1_t fsgeom
; /* geometry of active mounted system */
118 typedef struct fsdesc
{
124 fsdesc_t
*fs
, *fsbase
, *fsend
;
125 int fsbufsize
= 10; /* A starting value */
126 int nfrags
= 0; /* Debug option: Coerse into specific number
128 int openopts
= O_CREAT
|O_EXCL
|O_RDWR
|O_DIRECT
;
131 xfs_fsgeometry(int fd
, xfs_fsop_geom_v1_t
*geom
)
133 return ioctl(fd
, XFS_IOC_FSGEOMETRY_V1
, geom
);
137 xfs_bulkstat_single(int fd
, xfs_ino_t
*lastip
, xfs_bstat_t
*ubuffer
)
139 xfs_fsop_bulkreq_t bulkreq
;
141 bulkreq
.lastip
= (__u64
*)lastip
;
143 bulkreq
.ubuffer
= ubuffer
;
144 bulkreq
.ocount
= NULL
;
145 return ioctl(fd
, XFS_IOC_FSBULKSTAT_SINGLE
, &bulkreq
);
149 xfs_bulkstat(int fd
, xfs_ino_t
*lastip
, int icount
,
150 xfs_bstat_t
*ubuffer
, __s32
*ocount
)
152 xfs_fsop_bulkreq_t bulkreq
;
154 bulkreq
.lastip
= (__u64
*)lastip
;
155 bulkreq
.icount
= icount
;
156 bulkreq
.ubuffer
= ubuffer
;
157 bulkreq
.ocount
= ocount
;
158 return ioctl(fd
, XFS_IOC_FSBULKSTAT
, &bulkreq
);
162 xfs_swapext(int fd
, xfs_swapext_t
*sx
)
164 return ioctl(fd
, XFS_IOC_SWAPEXT
, sx
);
168 xfs_fscounts(int fd
, xfs_fsop_counts_t
*counts
)
170 return ioctl(fd
, XFS_IOC_FSCOUNTS
, counts
);
181 * Check if the argument is either the device name or mountpoint of an XFS
182 * filesystem. Note that we do not care about bind mounted regular files
183 * here - the code that handles defragmentation of invidual files takes care
187 find_mountpoint(char *mtab
, char *argname
, struct stat64
*sb
)
194 mtabp
= setmntent(mtab
, "r");
196 fprintf(stderr
, _("%s: cannot read %s\n"),
201 while ((t
= getmntent(mtabp
))) {
202 if (S_ISDIR(sb
->st_mode
)) { /* mount point */
203 if (stat64(t
->mnt_dir
, &ms
) < 0)
205 if (sb
->st_ino
!= ms
.st_ino
)
207 if (sb
->st_dev
!= ms
.st_dev
)
209 if (strcmp(t
->mnt_type
, MNTTYPE_XFS
) != 0)
211 } else { /* device */
214 if (stat64(t
->mnt_fsname
, &ms
) < 0)
216 if (sb
->st_rdev
!= ms
.st_rdev
)
218 if (strcmp(t
->mnt_type
, MNTTYPE_XFS
) != 0)
222 * Make sure the mountpoint given by mtab is accessible
225 if (stat64(t
->mnt_dir
, &sb2
) < 0)
238 main(int argc
, char **argv
)
247 progname
= basename(argv
[0]);
249 setlocale(LC_ALL
, "");
250 bindtextdomain(PACKAGE
, LOCALEDIR
);
255 while ((c
= getopt(argc
, argv
, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
272 case 's': /* frag stats only */
275 _("%s: Stats not yet supported for XFS\n"),
280 howlong
= atoi(optarg
);
283 leftofffile
= optarg
;
289 argv_blksz_dio
= atoi(optarg
);
292 npasses
= atoi(optarg
);
295 /* Testing opt: coerses frag count in result */
296 if (getenv("FSRXFSTEST") != NULL
) {
297 nfrags
= atoi(optarg
);
302 printf(_("%s version %s\n"), progname
, VERSION
);
310 * If the user did not specify an explicit mount table, try to use
311 * /proc/mounts if it is available, else /etc/mtab. We prefer
312 * /proc/mounts because it is kernel controlled, while /etc/mtab
313 * may contain garbage that userspace tools like pam_mounts wrote
317 if (access(_PATH_PROC_MOUNTS
, R_OK
) == 0)
318 mtab
= _PATH_PROC_MOUNTS
;
320 mtab
= _PATH_MOUNTED
;
324 setbuf(stdout
, NULL
);
328 /* Save the caller's real uid */
331 pagesize
= getpagesize();
334 for (; optind
< argc
; optind
++) {
335 argname
= argv
[optind
];
337 if (lstat64(argname
, &sb
) < 0) {
339 _("%s: could not stat: %s: %s\n"),
340 progname
, argname
, strerror(errno
));
344 if (S_ISLNK(sb
.st_mode
)) {
347 if (stat64(argname
, &sb2
) == 0 &&
348 (S_ISBLK(sb2
.st_mode
) ||
349 S_ISCHR(sb2
.st_mode
)))
353 mntp
= find_mountpoint(mtab
, argname
, &sb
);
356 } else if (S_ISCHR(sb
.st_mode
)) {
358 "%s: char special not supported: %s\n"),
361 } else if (S_ISDIR(sb
.st_mode
) || S_ISREG(sb
.st_mode
)) {
362 if (!platform_test_xfs_path(argname
)) {
364 "%s: cannot defragment: %s: Not XFS\n"),
368 if (S_ISDIR(sb
.st_mode
))
371 fsrfile(argname
, sb
.st_ino
);
374 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
380 fsrallfs(mtab
, howlong
, leftofffile
);
389 "Usage: %s [-d] [-v] [-n] [-s] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
390 " %s [-d] [-v] [-n] [-s] [-g] xfsdev | dir | file ...\n\n"
392 " -n Do nothing, only interesting with -v. Not\n"
393 " effective with in mtab mode.\n"
394 " -s Print statistics only.\n"
395 " -g Print to syslog (default if stdout not a tty).\n"
396 " -t time How long to run in seconds.\n"
397 " -p passes Number of passes before terminating global re-org.\n"
398 " -f leftoff Use this instead of %s.\n"
399 " -m mtab Use something other than /etc/mtab.\n"
400 " -d Debug, print even more.\n"
401 " -v Verbose, more -v's more verbose.\n"
402 ), progname
, progname
, _PATH_FSRLAST
);
407 * initallfs -- read the mount table and set up an internal form
410 initallfs(char *mtab
)
418 fp
= setmntent(mtab
, "r");
420 fsrprintf(_("could not open mtab file: %s\n"), mtab
);
424 /* malloc a number of descriptors, increased later if needed */
425 if (!(fsbase
= (fsdesc_t
*)malloc(fsbufsize
* sizeof(fsdesc_t
)))) {
426 fsrprintf(_("out of memory: %s\n"), strerror(errno
));
429 fsend
= (fsbase
+ fsbufsize
- 1);
431 /* find all rw xfs file systems */
434 while ((mp
= getmntent(fp
))) {
437 if (strcmp(mp
->mnt_type
, MNTTYPE_XFS
) != 0 ||
438 stat64(mp
->mnt_fsname
, &sb
) == -1 ||
439 !S_ISBLK(sb
.st_mode
))
442 cp
= strtok(mp
->mnt_opts
,",");
444 if (strcmp("rw", cp
) == 0)
446 } while ((cp
= strtok(NULL
, ",")) != NULL
);
449 fsrprintf(_("Skipping %s: not mounted rw\n"),
454 if (mi
== fsbufsize
) {
456 if ((fsbase
= (fsdesc_t
*)realloc((char *)fsbase
,
457 fsbufsize
* sizeof(fsdesc_t
))) == NULL
) {
458 fsrprintf(_("out of memory: %s\n"),
463 fsrprintf(_("out of memory on realloc: %s\n"),
467 fs
= (fsbase
+ mi
); /* Needed ? */
470 fs
->dev
= strdup(mp
->mnt_fsname
);
471 fs
->mnt
= strdup(mp
->mnt_dir
);
473 if (fs
->dev
== NULL
) {
474 fsrprintf(_("strdup(%s) failed\n"), mp
->mnt_fsname
);
477 if (fs
->mnt
== NULL
) {
478 fsrprintf(_("strdup(%s) failed\n"), mp
->mnt_dir
);
485 fsend
= (fsbase
+ numfs
);
488 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab
);
491 if (vflag
|| dflag
) {
492 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
495 for (fs
= fsbase
; fs
< fsend
; fs
++)
496 fsrprintf("\t%-30.30s%-30.30s\n", fs
->dev
, fs
->mnt
);
501 fsrallfs(char *mtab
, int howlong
, char *leftofffile
)
510 xfs_ino_t startino
= 0;
512 struct stat64 sb
, sb2
;
514 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab
, howlong
, leftofffile
);
516 endtime
= starttime
+ howlong
;
519 /* where'd we leave off last time? */
520 if (lstat64(leftofffile
, &sb
) == 0) {
521 if ( (fd
= open(leftofffile
, O_RDONLY
)) == -1 ) {
522 fsrprintf(_("%s: open failed\n"), leftofffile
);
524 else if ( fstat64(fd
, &sb2
) == 0) {
526 * Verify that lstat & fstat point to the
527 * same regular file (no links/no quick spoofs)
529 if ( (sb
.st_dev
!= sb2
.st_dev
) ||
530 (sb
.st_ino
!= sb2
.st_ino
) ||
531 ((sb
.st_mode
& S_IFMT
) != S_IFREG
) ||
532 ((sb2
.st_mode
& S_IFMT
) != S_IFREG
) ||
533 (sb2
.st_uid
!= ROOT
) ||
537 fsrprintf(_("Can't use %s: mode=0%o own=%d"
539 leftofffile
, sb
.st_mode
,
540 sb
.st_uid
, sb
.st_nlink
);
555 if (read(fd
, buf
, SMBUFSZ
) == -1) {
557 fsrprintf(_("could not read %s, starting with %s\n"),
558 leftofffile
, *fs
->dev
);
560 for (fs
= fsbase
; fs
< fsend
; fs
++) {
562 if ((strncmp(buf
,fsname
,strlen(fsname
)) == 0)
563 && buf
[strlen(fsname
)] == ' ') {
571 ptr
= strchr(buf
, ' ');
573 startpass
= atoi(++ptr
);
574 ptr
= strchr(ptr
, ' ');
576 startino
= strtoull(++ptr
, NULL
, 10);
582 /* Init pass counts */
583 for (fsp
= fsbase
; fsp
< fs
; fsp
++) {
584 fsp
->npass
= startpass
+ 1;
586 for (fsp
= fs
; fsp
<= fsend
; fsp
++) {
587 fsp
->npass
= startpass
;
594 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
595 fs
->npass
, (unsigned long long)startino
,
599 signal(SIGABRT
, aborter
);
600 signal(SIGHUP
, aborter
);
601 signal(SIGINT
, aborter
);
602 signal(SIGQUIT
, aborter
);
603 signal(SIGTERM
, aborter
);
605 /* reorg for 'howlong' -- checked in 'fsrfs' */
606 while (endtime
> time(0)) {
610 if (fs
->npass
== npasses
) {
611 fsrprintf(_("Completed all %d passes\n"), npasses
);
614 if (npasses
> 1 && !fs
->npass
)
621 fsrprintf(_("couldn't fork sub process:"));
625 error
= fsrfs(fs
->mnt
, startino
, TARGETRANGE
);
631 if (WIFEXITED(error
) && WEXITSTATUS(error
) == 1) {
632 /* child timed out & did fsrall_cleanup */
637 startino
= 0; /* reset after the first time through */
641 fsrall_cleanup(endtime
<= time(0));
645 * fsrall_cleanup -- close files, print next starting location, etc.
648 fsrall_cleanup(int timeout
)
654 /* record where we left off */
656 fd
= open(leftofffile
, O_WRONLY
|O_CREAT
|O_EXCL
, 0644);
658 fsrprintf(_("open(%s) failed: %s\n"),
659 leftofffile
, strerror(errno
));
662 ret
= sprintf(buf
, "%s %d %llu\n", fs
->dev
,
663 fs
->npass
, (unsigned long long)leftoffino
);
664 if (write(fd
, buf
, ret
) < strlen(buf
))
665 fsrprintf(_("write(%s) failed: %s\n"),
666 leftofffile
, strerror(errno
));
672 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
673 progname
, startpass
, fs
->npass
,
674 time(0) - endtime
+ howlong
);
678 * fsrfs -- reorganize a file system
681 fsrfs(char *mntdir
, xfs_ino_t startino
, int targetrange
)
688 xfs_bstat_t buf
[GRABSZ
];
691 jdm_fshandle_t
*fshandlep
;
692 xfs_ino_t lastino
= startino
;
694 fsrprintf(_("%s start inode=%llu\n"), mntdir
,
695 (unsigned long long)startino
);
697 fshandlep
= jdm_getfshandle( mntdir
);
699 fsrprintf(_("unable to get handle: %s: %s\n"),
700 mntdir
, strerror( errno
));
704 if ((fsfd
= open(mntdir
, O_RDONLY
)) < 0) {
705 fsrprintf(_("unable to open: %s: %s\n"),
706 mntdir
, strerror( errno
));
710 if (xfs_getgeom(fsfd
, &fsgeom
) < 0 ) {
711 fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
718 while ((ret
= xfs_bulkstat(fsfd
,
719 &lastino
, GRABSZ
, &buf
[0], &buflenout
) == 0)) {
726 /* Each loop through, defrag targetrange percent of the files */
727 count
= (buflenout
* targetrange
) / 100;
729 qsort((char *)buf
, buflenout
, sizeof(struct xfs_bstat
), cmp
);
731 for (p
= buf
, endp
= (buf
+ buflenout
); p
< endp
; p
++) {
732 /* Do some obvious checks now */
733 if (((p
->bs_mode
& S_IFMT
) != S_IFREG
) ||
737 if ((fd
= jdm_open(fshandlep
, p
, O_RDWR
)) < 0) {
738 /* This probably means the file was
739 * removed while in progress of handling
740 * it. Just quietly ignore this file.
743 fsrprintf(_("could not open: "
744 "inode %llu\n"), p
->bs_ino
);
748 /* Don't know the pathname, so make up something */
749 sprintf(fname
, "ino=%lld", (long long)p
->bs_ino
);
751 /* Get a tmp file name */
752 tname
= tmp_next(mntdir
);
754 ret
= fsrfile_common(fname
, tname
, mntdir
, fd
, p
);
756 leftoffino
= p
->bs_ino
;
765 if (endtime
&& endtime
< time(0)) {
773 fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname
, strerror(errno
));
781 * To compare bstat structs for qsort.
784 cmp(const void *s1
, const void *s2
)
786 return( ((xfs_bstat_t
*)s2
)->bs_extents
-
787 ((xfs_bstat_t
*)s1
)->bs_extents
);
792 * reorganize by directory hierarchy.
793 * Stay in dev (a restriction based on structure of this program -- either
794 * call efs_{n,u}mount() around each file, something smarter or this)
797 fsrdir(char *dirname
)
799 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname
);
803 * Sets up the defragmentation of a file based on the
804 * filepath. It collects the bstat information, does
805 * an open on the file and passes this all to fsrfile_common.
808 fsrfile(char *fname
, xfs_ino_t ino
)
811 jdm_fshandle_t
*fshandlep
;
816 fshandlep
= jdm_getfshandle(getparent (fname
) );
818 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
819 fname
, strerror(errno
));
824 * Need to open something on the same filesystem as the
825 * file. Open the parent.
827 fsfd
= open(getparent(fname
), O_RDONLY
);
829 fsrprintf(_("unable to open sys handle for %s: %s\n"),
830 fname
, strerror(errno
));
834 if ((xfs_bulkstat_single(fsfd
, &ino
, &statbuf
)) < 0) {
835 fsrprintf(_("unable to get bstat on %s: %s\n"),
836 fname
, strerror(errno
));
841 fd
= jdm_open( fshandlep
, &statbuf
, O_RDWR
);
843 fsrprintf(_("unable to open handle %s: %s\n"),
844 fname
, strerror(errno
));
849 /* Get the fs geometry */
850 if (xfs_getgeom(fsfd
, &fsgeom
) < 0 ) {
851 fsrprintf(_("Unable to get geom on fs for: %s\n"), fname
);
858 tname
= gettmpname(fname
);
861 error
= fsrfile_common(fname
, tname
, NULL
, fd
, &statbuf
);
870 * This is the common defrag code for either a full fs
871 * defragmentation or a single file. Check as much as
872 * possible with the file, fork a process to setuid to the
873 * target file owner's uid and defragment the file.
874 * This is done so the new extents created in a tmp file are
875 * reflected in the owners' quota without having to do any
876 * special code in the kernel. When the existing extents
877 * are removed, the quotas will be correct. It's ugly but
878 * it saves us from doing some quota re-construction in
879 * the extent swap. The price is that the defragmentation
880 * will fail if the owner of the target file is already at
892 struct statvfs64 vfss
;
897 fsrprintf("%s\n", fname
);
900 fsrprintf(_("sync failed: %s: %s\n"), fname
, strerror(errno
));
904 if (statp
->bs_size
== 0) {
906 fsrprintf(_("%s: zero size, ignoring\n"), fname
);
910 /* Check if a mandatory lock is set on the file to try and
911 * avoid blocking indefinitely on the reads later. Note that
912 * someone could still set a mandatory lock after this check
913 * but before all reads have completed to block fsr reads.
914 * This change just closes the window a bit.
916 if ( (statp
->bs_mode
& S_ISGID
) && ( ! (statp
->bs_mode
&S_IXGRP
) ) ) {
920 fl
.l_whence
= SEEK_SET
;
921 fl
.l_start
= (off_t
)0;
923 if ((fcntl(fd
, F_GETLK
, &fl
)) < 0 ) {
925 fsrprintf(_("locking check failed: %s\n"),
929 if (fl
.l_type
!= F_UNLCK
) {
930 /* Mandatory lock is set */
932 fsrprintf(_("mandatory lock: %s: ignoring\n"),
939 * Check if there is room to copy the file.
941 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
942 * not the optimal I/O size as struct stat.
944 if (statvfs64(fsname
? fsname
: fname
, &vfss
) < 0) {
945 fsrprintf(_("unable to get fs stat on %s: %s\n"),
946 fname
, strerror(errno
));
949 bsize
= vfss
.f_frsize
? vfss
.f_frsize
: vfss
.f_bsize
;
950 if (statp
->bs_blksize
* statp
->bs_blocks
>
951 vfss
.f_bfree
* bsize
- minimumfree
) {
952 fsrprintf(_("insufficient freespace for: %s: "
953 "size=%lld: ignoring\n"), fname
,
954 statp
->bs_blksize
* statp
->bs_blocks
);
958 if ((ioctl(fd
, XFS_IOC_FSGETXATTR
, &fsx
)) < 0) {
959 fsrprintf(_("failed to get inode attrs: %s\n"), fname
);
962 if (fsx
.fsx_xflags
& (XFS_XFLAG_IMMUTABLE
|XFS_XFLAG_APPEND
)) {
964 fsrprintf(_("%s: immutable/append, ignoring\n"), fname
);
967 if (fsx
.fsx_xflags
& XFS_XFLAG_NODEFRAG
) {
969 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
973 if (fsx
.fsx_xflags
& XFS_XFLAG_REALTIME
) {
974 if (xfs_getrt(fd
, &vfss
) < 0) {
975 fsrprintf(_("cannot get realtime geometry for: %s\n"),
979 if (statp
->bs_size
> ((vfss
.f_bfree
* bsize
) - minimumfree
)) {
980 fsrprintf(_("low on realtime free space: %s: "
981 "ignoring file\n"), fname
);
986 if ((RealUid
!= ROOT
) && (RealUid
!= statp
->bs_uid
)) {
987 fsrprintf(_("cannot open: %s: Permission denied\n"), fname
);
992 * Previously the code forked here, & the child changed it's uid to
993 * that of the file's owner and then called packfile(), to keep
994 * quota counts correct. (defragged files could use fewer blocks).
996 * Instead, just fchown() the temp file to the uid,gid of the
997 * file we're defragging, in packfile().
1000 if ((error
= packfile(fname
, tname
, fd
, statp
, &fsx
)))
1002 return -1; /* no error */
1006 * Attempt to set the attr fork up correctly. This is simple for attr1
1007 * filesystems as they have a fixed inode fork offset. In that case
1008 * just create an attribute and that's all we need to do.
1010 * For attr2 filesystems, see if we have the actual fork offset in
1011 * the bstat structure. If so, just create additional attributes on
1012 * the temporary inode until the offset matches.
1014 * If it doesn't exist, we can only do best effort. Add an attribute at a time
1015 * to move the inode fork around, but take into account that the attribute
1016 * might be too small to move the fork every time we add one. This should
1017 * hopefully put the fork offset in the right place. It's not a big deal if we
1018 * don't get it right - the kernel will reject it when we try to swap extents.
1021 fsr_setup_attr_fork(
1024 xfs_bstat_t
*bstatp
)
1026 struct stat64 tstatbuf
;
1028 int last_forkoff
= 0;
1029 int no_change_cnt
= 0;
1032 if (!(bstatp
->bs_xflags
& XFS_XFLAG_HASATTR
))
1036 * use the old method if we have attr1 or the kernel does not yet
1037 * support passing the fork offset in the bulkstat data.
1039 if (!(fsgeom
.flags
& XFS_FSOP_GEOM_FLAGS_ATTR2
) ||
1040 bstatp
->bs_forkoff
== 0) {
1042 ret
= fsetxattr(tfd
, "user.X", "X", 1, XATTR_CREATE
);
1044 fsrprintf(_("could not set ATTR\n"));
1050 /* attr2 w/ fork offsets */
1052 if (fstat64(tfd
, &tstatbuf
) < 0) {
1053 fsrprintf(_("unable to stat temp file: %s\n"),
1066 * bulkstat the temp inode to see what the forkoff is. Use
1067 * this to compare against the target and determine what we
1070 ino
= tstatbuf
.st_ino
;
1071 if ((xfs_bulkstat_single(tfd
, &ino
, &tbstat
)) < 0) {
1072 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1077 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1078 bstatp
->bs_forkoff
, tbstat
.bs_forkoff
);
1080 snprintf(name
, sizeof(name
), "user.%d", i
);
1083 * If there is no attribute, then we need to create one to get
1084 * an attribute fork at the default location.
1086 if (!tbstat
.bs_forkoff
) {
1087 ret
= fsetxattr(tfd
, name
, "XX", 2, XATTR_CREATE
);
1089 fsrprintf(_("could not set ATTR\n"));
1096 * make a progress check so we don't get stuck trying to extend
1097 * a large btree form attribute fork.
1099 if (last_forkoff
== tbstat
.bs_forkoff
) {
1100 if (no_change_cnt
++ > 10)
1102 } else /* progress! */
1104 last_forkoff
= tbstat
.bs_forkoff
;
1106 /* work out which way to grow the fork */
1107 diff
= tbstat
.bs_forkoff
- bstatp
->bs_forkoff
;
1108 if (abs(diff
) > fsgeom
.inodesize
- sizeof(struct xfs_dinode
)) {
1109 fsrprintf(_("forkoff diff %d too large!\n"), diff
);
1113 /* if they are equal, we are done */
1118 * if the temp inode fork offset is smaller then we have to
1119 * grow the data fork
1123 * create some temporary extents in the inode to move
1124 * the fork in the direction we need. This can be done
1125 * by preallocating some single block extents at
1126 * non-contiguous offsets.
1128 /* XXX: unimplemented! */
1132 /* we need to grow the attr fork, so create another attr */
1133 ret
= fsetxattr(tfd
, name
, "XX", 2, XATTR_CREATE
);
1135 fsrprintf(_("could not set ATTR\n"));
1139 } while (++i
< 100); /* don't go forever */
1143 fsrprintf(_("set temp attr\n"));
1148 * Do the defragmentation of a single file.
1149 * We already are pretty sure we can and want to
1150 * defragment the file. Create the tmp file, copy
1151 * the data (maintaining holes) and call the kernel
1152 * extent swap routinte.
1155 packfile(char *fname
, char *tname
, int fd
,
1156 xfs_bstat_t
*statp
, struct fsxattr
*fsxp
)
1160 int nextents
, extent
, cur_nextents
, new_nextents
;
1164 static xfs_swapext_t sx
;
1165 struct xfs_flock64 space
;
1169 char ffname
[SMBUFSZ
];
1173 * Work out the extent map - nextents will be set to the
1174 * minimum number of extents needed for the file (taking
1175 * into account holes), cur_nextents is the current number
1178 nextents
= read_fd_bmap(fd
, statp
, &cur_nextents
);
1180 if (cur_nextents
== 1 || cur_nextents
<= nextents
) {
1182 fsrprintf(_("%s already fully defragmented.\n"), fname
);
1183 return 1; /* indicates no change/no error */
1187 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1188 fname
, cur_nextents
, (cur_nextents
- nextents
),
1191 if ((tfd
= open(tname
, openopts
, 0666)) < 0) {
1193 fsrprintf(_("could not open tmp file: %s: %s\n"),
1194 tname
, strerror(errno
));
1199 /* Setup extended attributes */
1200 if (fsr_setup_attr_fork(fd
, tfd
, statp
) != 0) {
1201 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname
);
1206 /* Setup extended inode flags, project identifier, etc */
1207 if (fsxp
->fsx_xflags
|| fsxp
->fsx_projid
) {
1208 if (ioctl(tfd
, XFS_IOC_FSSETXATTR
, fsxp
) < 0) {
1209 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1216 if ((ioctl(tfd
, XFS_IOC_DIOINFO
, &dio
)) < 0 ) {
1217 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname
);
1222 dio_min
= dio
.d_miniosz
;
1223 if (statp
->bs_size
<= dio_min
) {
1224 blksz_dio
= dio_min
;
1226 blksz_dio
= min(dio
.d_maxiosz
, BUFFER_MAX
- pagesize
);
1227 if (argv_blksz_dio
!= 0)
1228 blksz_dio
= min(argv_blksz_dio
, blksz_dio
);
1229 blksz_dio
= (min(statp
->bs_size
, blksz_dio
) / dio_min
) * dio_min
;
1233 fsrprintf(_("DEBUG: "
1234 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1235 statp
->bs_size
, blksz_dio
, dio
.d_miniosz
,
1236 dio
.d_maxiosz
, pagesize
);
1239 if (!(fbuf
= (char *)memalign(dio
.d_mem
, blksz_dio
))) {
1240 fsrprintf(_("could not allocate buf: %s\n"), tname
);
1246 /* Create new tmp file in same AG as first */
1247 sprintf(ffname
, "%s.frag", tname
);
1249 /* Open the new file for sync writes */
1250 if ((ffd
= open(ffname
, openopts
, 0666)) < 0) {
1251 fsrprintf(_("could not open fragfile: %s : %s\n"),
1252 ffname
, strerror(errno
));
1260 /* Loop through block map allocating new extents */
1261 for (extent
= 0; extent
< nextents
; extent
++) {
1262 pos
= outmap
[extent
].bmv_offset
;
1263 if (outmap
[extent
].bmv_block
== -1) {
1264 space
.l_whence
= SEEK_SET
;
1265 space
.l_start
= pos
;
1266 space
.l_len
= outmap
[extent
].bmv_length
;
1267 if (ioctl(tfd
, XFS_IOC_UNRESVSP64
, &space
) < 0) {
1268 fsrprintf(_("could not trunc tmp %s\n"),
1271 lseek64(tfd
, outmap
[extent
].bmv_length
, SEEK_CUR
);
1273 } else if (outmap
[extent
].bmv_length
== 0) {
1274 /* to catch holes at the beginning of the file */
1278 space
.l_whence
= SEEK_CUR
;
1280 space
.l_len
= outmap
[extent
].bmv_length
;
1282 if (ioctl(tfd
, XFS_IOC_RESVSP64
, &space
) < 0) {
1283 fsrprintf(_("could not pre-allocate tmp space:"
1289 lseek64(tfd
, outmap
[extent
].bmv_length
, SEEK_CUR
);
1291 } /* end of space allocation loop */
1293 if (lseek64(tfd
, 0, SEEK_SET
)) {
1294 fsrprintf(_("Couldn't rewind on temporary file\n"));
1300 /* Check if the temporary file has fewer extents */
1301 new_nextents
= getnextents(tfd
);
1303 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents
, cur_nextents
);
1304 if (cur_nextents
<= new_nextents
) {
1306 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname
);
1309 return 1; /* no change/no error */
1312 /* Loop through block map copying the file. */
1313 for (extent
= 0; extent
< nextents
; extent
++) {
1314 pos
= outmap
[extent
].bmv_offset
;
1315 if (outmap
[extent
].bmv_block
== -1) {
1316 lseek64(tfd
, outmap
[extent
].bmv_length
, SEEK_CUR
);
1317 lseek64(fd
, outmap
[extent
].bmv_length
, SEEK_CUR
);
1319 } else if (outmap
[extent
].bmv_length
== 0) {
1320 /* to catch holes at the beginning of the file */
1323 for (cnt
= outmap
[extent
].bmv_length
; cnt
> 0;
1324 cnt
-= ct
, pos
+= ct
) {
1325 if (nfrags
&& --nfrags
) {
1326 ct
= min(cnt
, dio_min
);
1327 } else if (cnt
% dio_min
== 0) {
1328 ct
= min(cnt
, blksz_dio
);
1330 ct
= min(cnt
+ dio_min
- (cnt
% dio_min
),
1333 ct
= read(fd
, fbuf
, ct
);
1335 /* EOF, stop trying to read */
1339 /* Ensure we do direct I/O to correct block
1342 if (ct
% dio_min
!= 0) {
1343 wc
= ct
+ dio_min
- (ct
% dio_min
);
1348 if (ct
< 0 || ((wc
= write(tfd
, fbuf
, wc
)) != wc_b4
)) {
1350 fsrprintf(_("bad read of %d bytes "
1351 "from %s: %s\n"), wc_b4
,
1352 fname
, strerror(errno
));
1354 fsrprintf(_("bad write of %d bytes "
1355 "to %s: %s\n"), wc_b4
,
1356 tname
, strerror(errno
));
1359 * Might be out of space
1361 * Try to finish write
1365 if ((wc
= write(tfd
, ((char *)fbuf
)+wc
,
1367 /* worked on second attempt? */
1371 fsrprintf(_("bad write2 of %d "
1372 "bytes to %s: %s\n"),
1376 fsrprintf(_("bad copy to %s\n"),
1385 /* Do a matching write to the tmp file */
1387 if (((wc
= write(ffd
, fbuf
, wc
)) != wc_b4
)) {
1388 fsrprintf(_("bad write of %d bytes "
1390 wc_b4
, ffname
, strerror(errno
));
1395 ftruncate64(tfd
, statp
->bs_size
);
1396 if (ffd
> 0) close(ffd
);
1401 sx
.sx_stat
= *statp
; /* struct copy */
1402 sx
.sx_version
= XFS_SX_VERSION
;
1403 sx
.sx_fdtarget
= fd
;
1406 sx
.sx_length
= statp
->bs_size
;
1408 /* switch to the owner's id, to keep quota in line */
1409 if (fchown(tfd
, statp
->bs_uid
, statp
->bs_gid
) < 0) {
1411 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1412 tname
, strerror(errno
));
1417 /* Swap the extents */
1418 srval
= xfs_swapext(fd
, &sx
);
1420 if (errno
== ENOTSUP
) {
1422 fsrprintf(_("%s: file type not supported\n"), fname
);
1423 } else if (errno
== EFAULT
) {
1424 /* The file has changed since we started the copy */
1426 fsrprintf(_("%s: file modified defrag aborted\n"),
1428 } else if (errno
== EBUSY
) {
1429 /* Timestamp has changed or mmap'ed file */
1431 fsrprintf(_("%s: file busy\n"), fname
);
1433 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1434 fname
, strerror(errno
));
1440 /* Report progress */
1442 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1443 cur_nextents
, new_nextents
,
1444 (new_nextents
<= nextents
? "DONE" : " " ),
1451 gettmpname(char *fname
)
1453 static char buf
[PATH_MAX
+1];
1457 sprintf(sbuf
, "/.fsr%d", getpid());
1459 strncpy(buf
, fname
, PATH_MAX
);
1460 buf
[PATH_MAX
] = '\0';
1461 ptr
= strrchr(buf
, '/');
1468 if ((strlen(buf
) + strlen (sbuf
)) > PATH_MAX
) {
1469 fsrprintf(_("tmp file name too long: %s\n"), fname
);
1479 getparent(char *fname
)
1481 static char buf
[PATH_MAX
+1];
1484 strncpy(buf
, fname
, PATH_MAX
);
1485 buf
[PATH_MAX
] = '\0';
1486 ptr
= strrchr(buf
, '/');
1499 * Read in block map of the input file, coalesce contiguous
1500 * extents into a single range, keep all holes. Convert from 512 byte
1503 * This code was borrowed from mv.c with some minor mods.
1506 #define OUTMAP_SIZE_INCREMENT MAPSIZE
1508 int read_fd_bmap(int fd
, xfs_bstat_t
*sin
, int *cur_nextents
)
1511 struct getbmap map
[MAPSIZE
];
1514 if (++cnt >= outmap_size) { \
1515 outmap_size += OUTMAP_SIZE_INCREMENT; \
1516 outmap = (struct getbmap *)realloc(outmap, \
1517 outmap_size*sizeof(*outmap)); \
1518 if (outmap == NULL) { \
1519 fsrprintf(_("realloc failed: %s\n"), \
1525 /* Initialize the outmap array. It always grows - never shrinks.
1526 * Left-over memory allocation is saved for the next files.
1528 if (outmap_size
== 0) {
1529 outmap_size
= OUTMAP_SIZE_INCREMENT
; /* Initial size */
1530 outmap
= (struct getbmap
*)malloc(outmap_size
*sizeof(*outmap
));
1532 fsrprintf(_("malloc failed: %s\n"),
1538 outmap
[0].bmv_block
= 0;
1539 outmap
[0].bmv_offset
= 0;
1540 outmap
[0].bmv_length
= sin
->bs_size
;
1543 * If a non regular file is involved then forget holes
1546 if (!S_ISREG(sin
->bs_mode
))
1549 outmap
[0].bmv_length
= 0;
1551 map
[0].bmv_offset
= 0;
1552 map
[0].bmv_block
= 0;
1553 map
[0].bmv_entries
= 0;
1554 map
[0].bmv_count
= MAPSIZE
;
1555 map
[0].bmv_length
= -1;
1561 if (ioctl(fd
, XFS_IOC_GETBMAP
, map
) < 0) {
1562 fsrprintf(_("failed reading extents: inode %llu"),
1563 (unsigned long long)sin
->bs_ino
);
1567 /* Concatenate extents together and replicate holes into
1570 *cur_nextents
+= map
[0].bmv_entries
;
1571 for (i
= 0; i
< map
[0].bmv_entries
; i
++) {
1572 if (map
[i
+ 1].bmv_block
== -1) {
1574 outmap
[cnt
] = map
[i
+1];
1575 } else if (outmap
[cnt
].bmv_block
== -1) {
1577 outmap
[cnt
] = map
[i
+1];
1579 outmap
[cnt
].bmv_length
+= map
[i
+ 1].bmv_length
;
1582 } while (map
[0].bmv_entries
== (MAPSIZE
-1));
1583 for (i
= 0; i
<= cnt
; i
++) {
1584 outmap
[i
].bmv_offset
= BBTOB(outmap
[i
].bmv_offset
);
1585 outmap
[i
].bmv_length
= BBTOB(outmap
[i
].bmv_length
);
1588 outmap
[cnt
].bmv_length
= sin
->bs_size
- outmap
[cnt
].bmv_offset
;
1594 * Read the block map and return the number of extents.
1600 struct getbmap map
[MAPSIZE
];
1602 map
[0].bmv_offset
= 0;
1603 map
[0].bmv_block
= 0;
1604 map
[0].bmv_entries
= 0;
1605 map
[0].bmv_count
= MAPSIZE
;
1606 map
[0].bmv_length
= -1;
1611 if (ioctl(fd
,XFS_IOC_GETBMAP
, map
) < 0) {
1612 fsrprintf(_("failed reading extents"));
1616 nextents
+= map
[0].bmv_entries
;
1617 } while (map
[0].bmv_entries
== (MAPSIZE
-1));
1623 * Get the fs geometry
1626 xfs_getgeom(int fd
, xfs_fsop_geom_v1_t
* fsgeom
)
1628 if (xfs_fsgeometry(fd
, fsgeom
) < 0) {
1635 * Get xfs realtime space information
1638 xfs_getrt(int fd
, struct statvfs64
*sfbp
)
1640 unsigned long bsize
;
1641 unsigned long factor
;
1642 xfs_fsop_counts_t cnt
;
1644 if (!fsgeom
.rtblocks
)
1647 if (xfs_fscounts(fd
, &cnt
) < 0) {
1651 bsize
= (sfbp
->f_frsize
? sfbp
->f_frsize
: sfbp
->f_bsize
);
1652 factor
= fsgeom
.blocksize
/ bsize
; /* currently this is == 1 */
1653 sfbp
->f_bfree
= (cnt
.freertx
* fsgeom
.rtextsize
) * factor
;
1658 fsrprintf(const char *fmt
, ...)
1664 static int didopenlog
;
1666 openlog("fsr", LOG_PID
, LOG_USER
);
1669 vsyslog(LOG_INFO
, fmt
, ap
);
1677 * Initialize a directory for tmp file use. This is used
1678 * by the full filesystem defragmentation when we're walking
1679 * the inodes and do not know the path for the individual
1680 * files. Multiple directories are used to spread out the
1681 * tmp data around to different ag's (since file data is
1682 * usually allocated to the same ag as the directory and
1683 * directories allocated round robin from the same
1684 * parent directory).
1690 static char buf
[SMBUFSZ
];
1694 sprintf(buf
, "%s/.fsr", mnt
);
1697 if (mkdir(buf
, 0700) < 0) {
1698 if (errno
== EEXIST
) {
1700 fsrprintf(_("tmpdir already exists: %s\n"),
1703 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1704 buf
, strerror(errno
));
1708 for (i
=0; i
< fsgeom
.agcount
; i
++) {
1709 sprintf(buf
, "%s/.fsr/ag%d", mnt
, i
);
1710 if (mkdir(buf
, 0700) < 0) {
1711 if (errno
== EEXIST
) {
1714 _("tmpdir already exists: %s\n"), buf
);
1716 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1717 buf
, strerror(errno
));
1729 static char buf
[SMBUFSZ
];
1731 sprintf(buf
, "%s/.fsr/ag%d/tmp%d",
1732 ( (strcmp(mnt
, "/") == 0) ? "" : mnt
),
1736 if (++tmp_agi
== fsgeom
.agcount
)
1743 tmp_close(char *mnt
)
1745 static char buf
[SMBUFSZ
];
1748 /* No data is ever actually written so we can just do rmdir's */
1749 for (i
=0; i
< fsgeom
.agcount
; i
++) {
1750 sprintf(buf
, "%s/.fsr/ag%d", mnt
, i
);
1751 if (rmdir(buf
) < 0) {
1752 if (errno
!= ENOENT
) {
1754 _("could not remove tmpdir: %s: %s\n"),
1755 buf
, strerror(errno
));
1759 sprintf(buf
, "%s/.fsr", mnt
);
1760 if (rmdir(buf
) < 0) {
1761 if (errno
!= ENOENT
) {
1762 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1763 buf
, strerror(errno
));