1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
11 #include "xfs_bmap_btree.h"
12 #include "xfs_attr_sf.h"
13 #include "libfrog/paths.h"
14 #include "libfrog/fsgeom.h"
15 #include "libfrog/bulkstat.h"
21 #include <sys/ioctl.h>
23 #include <sys/statvfs.h>
24 #include <sys/xattr.h>
27 #define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
28 #define _PATH_PROC_MOUNTS "/proc/mounts"
36 /* static int nflag; */
38 /* static int sflag; */
39 static int argv_blksz_dio
;
40 extern int max_ext_size
;
41 static int npasses
= 10;
42 static int startpass
= 0;
44 static struct getbmap
*outmap
= NULL
;
45 static int outmap_size
= 0;
48 static int64_t minimumfree
= 2048;
50 #define MNTTYPE_XFS "xfs"
56 #define TARGETRANGE 10
57 #define BUFFER_MAX (1<<24)
59 static time_t howlong
= 7200; /* default seconds of reorganizing */
60 static char *leftofffile
= _PATH_FSRLAST
; /* where we left off last */
61 static time_t endtime
;
62 static time_t starttime
;
63 static xfs_ino_t leftoffino
= 0;
67 static int fsrfile(char *fname
, xfs_ino_t ino
);
68 static int fsrfile_common( char *fname
, char *tname
, char *mnt
,
69 int fd
, struct xfs_bstat
*statp
);
70 static int packfile(char *fname
, char *tname
, int fd
,
71 struct xfs_bstat
*statp
, struct fsxattr
*fsxp
);
72 static void fsrdir(char *dirname
);
73 static int fsrfs(char *mntdir
, xfs_ino_t ino
, int targetrange
);
74 static void initallfs(char *mtab
);
75 static void fsrallfs(char *mtab
, int howlong
, char *leftofffile
);
76 static void fsrall_cleanup(int timeout
);
77 static int getnextents(int);
78 int xfsrtextsize(int fd
);
79 int xfs_getrt(int fd
, struct statvfs
*sfbp
);
80 char * gettmpname(char *fname
);
81 char * getparent(char *fname
);
82 int fsrprintf(const char *fmt
, ...);
83 int read_fd_bmap(int, struct xfs_bstat
*, int *);
84 int cmp(const void *, const void *);
85 static void tmp_init(char *mnt
);
86 static char * tmp_next(char *mnt
);
87 static void tmp_close(char *mnt
);
89 static struct xfs_fsop_geom fsgeom
; /* geometry of active mounted system */
94 typedef struct fsdesc
{
100 static fsdesc_t
*fs
, *fsbase
, *fsend
;
101 static int fsbufsize
= 10; /* A starting value */
102 static int nfrags
= 0; /* Debug option: Coerse into specific number
104 static int openopts
= O_CREAT
|O_EXCL
|O_RDWR
|O_DIRECT
;
107 xfs_swapext(int fd
, xfs_swapext_t
*sx
)
109 return ioctl(fd
, XFS_IOC_SWAPEXT
, sx
);
113 xfs_fscounts(int fd
, xfs_fsop_counts_t
*counts
)
115 return ioctl(fd
, XFS_IOC_FSCOUNTS
, counts
);
126 main(int argc
, char **argv
)
135 progname
= basename(argv
[0]);
137 setlocale(LC_ALL
, "");
138 bindtextdomain(PACKAGE
, LOCALEDIR
);
143 while ((c
= getopt(argc
, argv
, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
160 case 's': /* frag stats only */
163 _("%s: Stats not yet supported for XFS\n"),
168 howlong
= atoi(optarg
);
171 leftofffile
= optarg
;
177 argv_blksz_dio
= atoi(optarg
);
180 npasses
= atoi(optarg
);
183 /* Testing opt: coerses frag count in result */
184 if (getenv("FSRXFSTEST") != NULL
) {
185 nfrags
= atoi(optarg
);
190 printf(_("%s version %s\n"), progname
, VERSION
);
198 * If the user did not specify an explicit mount table, try to use
199 * /proc/mounts if it is available, else /etc/mtab. We prefer
200 * /proc/mounts because it is kernel controlled, while /etc/mtab
201 * may contain garbage that userspace tools like pam_mounts wrote
205 if (access(_PATH_PROC_MOUNTS
, R_OK
) == 0)
206 mtab
= _PATH_PROC_MOUNTS
;
208 mtab
= _PATH_MOUNTED
;
212 setbuf(stdout
, NULL
);
214 starttime
= time(NULL
);
216 /* Save the caller's real uid */
219 pagesize
= getpagesize();
220 fs_table_initialise(0, NULL
, 0, NULL
);
222 for (; optind
< argc
; optind
++) {
223 argname
= argv
[optind
];
225 if (lstat(argname
, &sb
) < 0) {
227 _("%s: could not stat: %s: %s\n"),
228 progname
, argname
, strerror(errno
));
232 if (S_ISLNK(sb
.st_mode
)) {
235 if (stat(argname
, &sb2
) == 0 &&
236 (S_ISBLK(sb2
.st_mode
) ||
237 S_ISCHR(sb2
.st_mode
)))
241 fsp
= fs_table_lookup_mount(argname
);
243 fsp
= fs_table_lookup_blkdev(argname
);
245 fsrfs(fsp
->fs_dir
, 0, 100);
246 } else if (S_ISCHR(sb
.st_mode
)) {
248 "%s: char special not supported: %s\n"),
251 } else if (S_ISDIR(sb
.st_mode
) || S_ISREG(sb
.st_mode
)) {
252 if (!platform_test_xfs_path(argname
)) {
254 "%s: cannot defragment: %s: Not XFS\n"),
258 if (S_ISDIR(sb
.st_mode
))
261 fsrfile(argname
, sb
.st_ino
);
264 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
270 fsrallfs(mtab
, howlong
, leftofffile
);
279 "Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
280 " %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
283 " -g Print to syslog (default if stdout not a tty).\n"
284 " -t time How long to run in seconds.\n"
285 " -p passes Number of passes before terminating global re-org.\n"
286 " -f leftoff Use this instead of %s.\n"
287 " -m mtab Use something other than /etc/mtab.\n"
288 " -d Debug, print even more.\n"
289 " -v Verbose, more -v's more verbose.\n"
290 " -V Print version number and exit.\n"
291 ), progname
, progname
, progname
, _PATH_FSRLAST
);
296 * initallfs -- read the mount table and set up an internal form
299 initallfs(char *mtab
)
301 struct mntent_cursor cursor
;
302 struct mntent
*mnt
= NULL
;
307 /* malloc a number of descriptors, increased later if needed */
308 if (!(fsbase
= (fsdesc_t
*)malloc(fsbufsize
* sizeof(fsdesc_t
)))) {
309 fsrprintf(_("out of memory: %s\n"), strerror(errno
));
312 fsend
= (fsbase
+ fsbufsize
- 1);
314 /* find all rw xfs file systems */
318 if (platform_mntent_open(&cursor
, mtab
) != 0){
319 fprintf(stderr
, "Error: can't get mntent entries.\n");
323 while ((mnt
= platform_mntent_next(&cursor
)) != NULL
) {
326 if (strcmp(mnt
->mnt_type
, MNTTYPE_XFS
) != 0 ||
327 stat(mnt
->mnt_fsname
, &sb
) == -1 ||
328 !S_ISBLK(sb
.st_mode
))
331 cp
= strtok(mnt
->mnt_opts
,",");
333 if (strcmp("rw", cp
) == 0)
335 } while ((cp
= strtok(NULL
, ",")) != NULL
);
338 fsrprintf(_("Skipping %s: not mounted rw\n"),
343 if (mi
== fsbufsize
) {
345 if ((fsbase
= (fsdesc_t
*)realloc((char *)fsbase
,
346 fsbufsize
* sizeof(fsdesc_t
))) == NULL
) {
347 fsrprintf(_("out of memory: %s\n"),
352 fsrprintf(_("out of memory on realloc: %s\n"),
356 fs
= (fsbase
+ mi
); /* Needed ? */
359 fs
->dev
= strdup(mnt
->mnt_fsname
);
360 fs
->mnt
= strdup(mnt
->mnt_dir
);
362 if (fs
->dev
== NULL
) {
363 fsrprintf(_("strdup(%s) failed\n"), mnt
->mnt_fsname
);
366 if (fs
->mnt
== NULL
) {
367 fsrprintf(_("strdup(%s) failed\n"), mnt
->mnt_dir
);
373 platform_mntent_close(&cursor
);
376 fsend
= (fsbase
+ numfs
);
378 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab
);
381 if (vflag
|| dflag
) {
382 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
385 for (fs
= fsbase
; fs
< fsend
; fs
++)
386 fsrprintf("\t%-30.30s%-30.30s\n", fs
->dev
, fs
->mnt
);
391 fsrallfs(char *mtab
, int howlong
, char *leftofffile
)
400 xfs_ino_t startino
= 0;
404 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab
, howlong
, leftofffile
);
406 endtime
= starttime
+ howlong
;
409 /* where'd we leave off last time? */
410 if (lstat(leftofffile
, &sb
) == 0) {
411 if ( (fd
= open(leftofffile
, O_RDONLY
)) == -1 ) {
412 fsrprintf(_("%s: open failed\n"), leftofffile
);
414 else if ( fstat(fd
, &sb2
) == 0) {
416 * Verify that lstat & fstat point to the
417 * same regular file (no links/no quick spoofs)
419 if ( (sb
.st_dev
!= sb2
.st_dev
) ||
420 (sb
.st_ino
!= sb2
.st_ino
) ||
421 ((sb
.st_mode
& S_IFMT
) != S_IFREG
) ||
422 ((sb2
.st_mode
& S_IFMT
) != S_IFREG
) ||
423 (sb2
.st_uid
!= ROOT
) ||
427 fsrprintf(_("Can't use %s: mode=0%o own=%d"
429 leftofffile
, sb
.st_mode
,
430 sb
.st_uid
, sb
.st_nlink
);
445 if (read(fd
, buf
, SMBUFSZ
) == -1) {
447 fsrprintf(_("could not read %s, starting with %s\n"),
448 leftofffile
, *fs
->dev
);
450 /* Ensure the buffer we read is null terminated */
451 buf
[SMBUFSZ
-1] = '\0';
452 for (fs
= fsbase
; fs
< fsend
; fs
++) {
454 if ((strncmp(buf
,fsname
,strlen(fsname
)) == 0)
455 && buf
[strlen(fsname
)] == ' ') {
463 ptr
= strchr(buf
, ' ');
465 startpass
= atoi(++ptr
);
466 ptr
= strchr(ptr
, ' ');
468 startino
= strtoull(++ptr
, NULL
, 10);
470 * NOTE: The inode number read in from
471 * the leftoff file is the last inode
472 * to have been fsr'd. Since the v5
473 * xfrog_bulkstat function wants to be
474 * passed the first inode that we want
475 * to examine, increment the value that
476 * we read in. The debug message below
477 * prints the lastoff value.
485 /* Init pass counts */
486 for (fsp
= fsbase
; fsp
< fs
; fsp
++) {
487 fsp
->npass
= startpass
+ 1;
489 for (fsp
= fs
; fsp
<= fsend
; fsp
++) {
490 fsp
->npass
= startpass
;
497 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
498 fs
->npass
, (unsigned long long)startino
- 1,
502 signal(SIGABRT
, aborter
);
503 signal(SIGHUP
, aborter
);
504 signal(SIGINT
, aborter
);
505 signal(SIGQUIT
, aborter
);
506 signal(SIGTERM
, aborter
);
508 /* reorg for 'howlong' -- checked in 'fsrfs' */
509 while (endtime
> time(NULL
)) {
512 if (npasses
> 1 && !fs
->npass
)
519 fsrprintf(_("couldn't fork sub process:"));
523 error
= fsrfs(fs
->mnt
, startino
, TARGETRANGE
);
528 if (WIFEXITED(error
) && WEXITSTATUS(error
) == 1) {
529 /* child timed out & did fsrall_cleanup */
534 startino
= 0; /* reset after the first time through */
539 if (fs
->npass
== npasses
) {
540 fsrprintf(_("Completed all %d passes\n"), npasses
);
544 fsrall_cleanup(endtime
<= time(NULL
));
548 * fsrall_cleanup -- close files, print next starting location, etc.
551 fsrall_cleanup(int timeout
)
560 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
561 progname
, startpass
, fs
->npass
,
562 time(NULL
) - endtime
+ howlong
);
564 /* record where we left off */
565 fd
= open(leftofffile
, O_WRONLY
|O_CREAT
|O_EXCL
, 0644);
567 fsrprintf(_("open(%s) failed: %s\n"),
568 leftofffile
, strerror(errno
));
570 ret
= sprintf(buf
, "%s %d %llu\n", fs
->dev
,
571 fs
->npass
, (unsigned long long)leftoffino
);
572 if (write(fd
, buf
, ret
) < strlen(buf
))
573 fsrprintf(_("write(%s) failed: %s\n"),
574 leftofffile
, strerror(errno
));
581 * fsrfs -- reorganize a file system
584 fsrfs(char *mntdir
, xfs_ino_t startino
, int targetrange
)
586 struct xfs_fd fsxfd
= XFS_FD_INIT_EMPTY
;
592 jdm_fshandle_t
*fshandlep
;
593 struct xfs_bulkstat_req
*breq
;
595 fsrprintf(_("%s start inode=%llu\n"), mntdir
,
596 (unsigned long long)startino
);
598 fshandlep
= jdm_getfshandle( mntdir
);
600 fsrprintf(_("unable to get handle: %s: %s\n"),
601 mntdir
, strerror( errno
));
605 ret
= -xfd_open(&fsxfd
, mntdir
, O_RDONLY
);
607 fsrprintf(_("unable to open XFS file: %s: %s\n"),
608 mntdir
, strerror(ret
));
612 memcpy(&fsgeom
, &fsxfd
.fsgeom
, sizeof(fsgeom
));
616 breq
= xfrog_bulkstat_alloc_req(GRABSZ
, startino
);
618 fsrprintf(_("Skipping %s: not enough memory\n"),
625 while ((ret
= xfrog_bulkstat(&fsxfd
, breq
) == 0)) {
626 struct xfs_bstat bs1
;
627 struct xfs_bulkstat
*buf
= breq
->bulkstat
;
628 struct xfs_bulkstat
*p
;
629 struct xfs_bulkstat
*endp
;
630 uint32_t buflenout
= breq
->hdr
.ocount
;
635 /* Each loop through, defrag targetrange percent of the files */
636 count
= (buflenout
* targetrange
) / 100;
638 qsort((char *)buf
, buflenout
, sizeof(struct xfs_bulkstat
), cmp
);
640 for (p
= buf
, endp
= (buf
+ buflenout
); p
< endp
; p
++) {
641 /* Do some obvious checks now */
642 if (((p
->bs_mode
& S_IFMT
) != S_IFREG
) ||
646 ret
= xfrog_bulkstat_v5_to_v1(&fsxfd
, &bs1
, p
);
648 fsrprintf(_("bstat conversion error: %s\n"),
653 fd
= jdm_open(fshandlep
, &bs1
, O_RDWR
| O_DIRECT
);
655 /* This probably means the file was
656 * removed while in progress of handling
657 * it. Just quietly ignore this file.
660 fsrprintf(_("could not open: "
661 "inode %llu\n"), p
->bs_ino
);
665 /* Don't know the pathname, so make up something */
666 sprintf(fname
, "ino=%lld", (long long)p
->bs_ino
);
668 /* Get a tmp file name */
669 tname
= tmp_next(mntdir
);
671 ret
= fsrfile_common(fname
, tname
, mntdir
, fd
, &bs1
);
673 leftoffino
= p
->bs_ino
;
682 if (endtime
&& endtime
< time(NULL
)) {
691 fsrprintf(_("%s: bulkstat: %s\n"), progname
, strerror(ret
));
701 * To compare bstat structs for qsort.
704 cmp(const void *s1
, const void *s2
)
706 return( ((struct xfs_bstat
*)s2
)->bs_extents
-
707 ((struct xfs_bstat
*)s1
)->bs_extents
);
712 * reorganize by directory hierarchy.
713 * Stay in dev (a restriction based on structure of this program -- either
714 * call efs_{n,u}mount() around each file, something smarter or this)
717 fsrdir(char *dirname
)
719 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname
);
723 * Sets up the defragmentation of a file based on the
724 * filepath. It collects the bstat information, does
725 * an open on the file and passes this all to fsrfile_common.
732 struct xfs_fd fsxfd
= XFS_FD_INIT_EMPTY
;
733 struct xfs_bulkstat bulkstat
;
734 struct xfs_bstat statbuf
;
735 jdm_fshandle_t
*fshandlep
;
740 fshandlep
= jdm_getfshandle(getparent (fname
) );
742 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
743 fname
, strerror(errno
));
748 * Need to open something on the same filesystem as the
749 * file. Open the parent.
751 error
= -xfd_open(&fsxfd
, getparent(fname
), O_RDONLY
);
753 fsrprintf(_("unable to open sys handle for XFS file %s: %s\n"),
754 fname
, strerror(error
));
758 error
= xfrog_bulkstat_single(&fsxfd
, ino
, 0, &bulkstat
);
760 fsrprintf(_("unable to get bstat on %s: %s\n"),
761 fname
, strerror(error
));
764 error
= xfrog_bulkstat_v5_to_v1(&fsxfd
, &statbuf
, &bulkstat
);
766 fsrprintf(_("bstat conversion error on %s: %s\n"),
767 fname
, strerror(error
));
771 fd
= jdm_open(fshandlep
, &statbuf
, O_RDWR
|O_DIRECT
);
773 fsrprintf(_("unable to open handle %s: %s\n"),
774 fname
, strerror(errno
));
778 /* Stash the fs geometry for general use. */
779 memcpy(&fsgeom
, &fsxfd
.fsgeom
, sizeof(fsgeom
));
781 tname
= gettmpname(fname
);
784 error
= fsrfile_common(fname
, tname
, NULL
, fd
, &statbuf
);
797 * This is the common defrag code for either a full fs
798 * defragmentation or a single file. Check as much as
799 * possible with the file, fork a process to setuid to the
800 * target file owner's uid and defragment the file.
801 * This is done so the new extents created in a tmp file are
802 * reflected in the owners' quota without having to do any
803 * special code in the kernel. When the existing extents
804 * are removed, the quotas will be correct. It's ugly but
805 * it saves us from doing some quota re-construction in
806 * the extent swap. The price is that the defragmentation
807 * will fail if the owner of the target file is already at
816 struct xfs_bstat
*statp
)
824 fsrprintf("%s\n", fname
);
827 fsrprintf(_("sync failed: %s: %s\n"), fname
, strerror(errno
));
831 if (statp
->bs_size
== 0) {
833 fsrprintf(_("%s: zero size, ignoring\n"), fname
);
837 /* Check if a mandatory lock is set on the file to try and
838 * avoid blocking indefinitely on the reads later. Note that
839 * someone could still set a mandatory lock after this check
840 * but before all reads have completed to block fsr reads.
841 * This change just closes the window a bit.
843 if ( (statp
->bs_mode
& S_ISGID
) && ( ! (statp
->bs_mode
&S_IXGRP
) ) ) {
847 fl
.l_whence
= SEEK_SET
;
848 fl
.l_start
= (off_t
)0;
850 if ((fcntl(fd
, F_GETLK
, &fl
)) < 0 ) {
852 fsrprintf(_("locking check failed: %s\n"),
856 if (fl
.l_type
!= F_UNLCK
) {
857 /* Mandatory lock is set */
859 fsrprintf(_("mandatory lock: %s: ignoring\n"),
866 * Check if there is room to copy the file.
868 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
869 * not the optimal I/O size as struct stat.
871 if (statvfs(fsname
? fsname
: fname
, &vfss
) < 0) {
872 fsrprintf(_("unable to get fs stat on %s: %s\n"),
873 fname
, strerror(errno
));
876 bsize
= vfss
.f_frsize
? vfss
.f_frsize
: vfss
.f_bsize
;
877 if (statp
->bs_blksize
* statp
->bs_blocks
>
878 vfss
.f_bfree
* bsize
- minimumfree
) {
879 fsrprintf(_("insufficient freespace for: %s: "
880 "size=%lld: ignoring\n"), fname
,
881 statp
->bs_blksize
* statp
->bs_blocks
);
885 if ((ioctl(fd
, FS_IOC_FSGETXATTR
, &fsx
)) < 0) {
886 fsrprintf(_("failed to get inode attrs: %s\n"), fname
);
889 if (fsx
.fsx_xflags
& (FS_XFLAG_IMMUTABLE
|FS_XFLAG_APPEND
)) {
891 fsrprintf(_("%s: immutable/append, ignoring\n"), fname
);
894 if (fsx
.fsx_xflags
& FS_XFLAG_NODEFRAG
) {
896 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
900 if (fsx
.fsx_xflags
& FS_XFLAG_REALTIME
) {
901 if (xfs_getrt(fd
, &vfss
) < 0) {
902 fsrprintf(_("cannot get realtime geometry for: %s\n"),
906 if (statp
->bs_size
> ((vfss
.f_bfree
* bsize
) - minimumfree
)) {
907 fsrprintf(_("low on realtime free space: %s: "
908 "ignoring file\n"), fname
);
913 if ((RealUid
!= ROOT
) && (RealUid
!= statp
->bs_uid
)) {
914 fsrprintf(_("cannot open: %s: Permission denied\n"), fname
);
919 * Previously the code forked here, & the child changed it's uid to
920 * that of the file's owner and then called packfile(), to keep
921 * quota counts correct. (defragged files could use fewer blocks).
923 * Instead, just fchown() the temp file to the uid,gid of the
924 * file we're defragging, in packfile().
927 if ((error
= packfile(fname
, tname
, fd
, statp
, &fsx
)))
929 return -1; /* no error */
933 * Attempt to set the attr fork up correctly. This is simple for attr1
934 * filesystems as they have a fixed inode fork offset. In that case
935 * just create an attribute and that's all we need to do.
937 * For attr2 filesystems, see if we have the actual fork offset in
938 * the bstat structure. If so, just create additional attributes on
939 * the temporary inode until the offset matches.
941 * If it doesn't exist, we can only do best effort. Add an attribute at a time
942 * to move the inode fork around, but take into account that the attribute
943 * might be too small to move the fork every time we add one. This should
944 * hopefully put the fork offset in the right place. It's not a big deal if we
945 * don't get it right - the kernel will reject it when we try to swap extents.
951 struct xfs_bstat
*bstatp
)
953 #ifdef HAVE_FSETXATTR
954 struct xfs_fd txfd
= XFS_FD_INIT(tfd
);
955 struct stat tstatbuf
;
958 int last_forkoff
= 0;
959 int no_change_cnt
= 0;
962 if (!(bstatp
->bs_xflags
& FS_XFLAG_HASATTR
))
966 * use the old method if we have attr1 or the kernel does not yet
967 * support passing the fork offset in the bulkstat data.
969 if (!(fsgeom
.flags
& XFS_FSOP_GEOM_FLAGS_ATTR2
) ||
970 bstatp
->bs_forkoff
== 0) {
972 ret
= fsetxattr(txfd
.fd
, "user.X", "X", 1, XATTR_CREATE
);
974 fsrprintf(_("could not set ATTR\n"));
980 /* attr2 w/ fork offsets */
982 if (fstat(txfd
.fd
, &tstatbuf
) < 0) {
983 fsrprintf(_("unable to stat temp file: %s\n"),
990 struct xfs_bulkstat tbstat
;
995 * bulkstat the temp inode to see what the forkoff is. Use
996 * this to compare against the target and determine what we
999 ret
= xfrog_bulkstat_single(&txfd
, tstatbuf
.st_ino
, 0, &tbstat
);
1001 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1006 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1007 bstatp
->bs_forkoff
, tbstat
.bs_forkoff
);
1008 diff
= tbstat
.bs_forkoff
- bstatp
->bs_forkoff
;
1010 /* if they are equal, we are done */
1014 snprintf(name
, sizeof(name
), "user.%d", i
);
1017 * If there is no attribute, then we need to create one to get
1018 * an attribute fork at the default location.
1020 if (!tbstat
.bs_forkoff
) {
1022 ret
= fsetxattr(txfd
.fd
, name
, "XX", 2, XATTR_CREATE
);
1024 fsrprintf(_("could not set ATTR\n"));
1028 } else if (i
== 0) {
1030 * First pass, and temp file already has an inline
1031 * xattr, probably due to selinux.
1033 * It's *possible* that the temp file attr area
1034 * is larger than the target file's:
1037 * +-------+ 0 +-------+ 0
1041 * | | v-------v forkoff
1043 * v-------v forkoff | Attr | local
1045 * +-------+ +-------+
1049 * If target attr area is less than the temp's
1050 * (diff < 0) write a big attr to the temp file to knock
1051 * the attr out of local format.
1052 * (This should actually *increase* the temp file's
1053 * forkoffset when the attr moves out of the inode)
1057 memset(val
, 'X', 2048);
1058 if (fsetxattr(txfd
.fd
, name
, val
, 2048, 0)) {
1059 fsrprintf(_("big ATTR set failed\n"));
1062 /* Go back & see where we're at now */
1068 * make a progress check so we don't get stuck trying to extend
1069 * a large btree form attribute fork.
1071 if (last_forkoff
== tbstat
.bs_forkoff
) {
1072 if (no_change_cnt
++ > 10)
1074 } else /* progress! */
1076 last_forkoff
= tbstat
.bs_forkoff
;
1078 /* work out which way to grow the fork */
1079 if (abs(diff
) > fsgeom
.inodesize
- sizeof(struct xfs_dinode
)) {
1080 fsrprintf(_("forkoff diff %d too large!\n"), diff
);
1085 * if the temp inode fork offset is still smaller then we have
1086 * to grow the data fork
1090 * create some temporary extents in the inode to move
1091 * the fork in the direction we need. This can be done
1092 * by preallocating some single block extents at
1093 * non-contiguous offsets.
1095 /* XXX: unimplemented! */
1097 printf(_("data fork growth unimplemented\n"));
1101 /* we need to grow the attr fork, so create another attr */
1102 ret
= fsetxattr(txfd
.fd
, name
, "XX", 2, XATTR_CREATE
);
1104 fsrprintf(_("could not set ATTR\n"));
1108 } while (++i
< 100); /* don't go forever */
1112 fsrprintf(_("set temp attr\n"));
1113 /* We failed to resolve the fork difference */
1115 fsrprintf(_("failed to match fork offset\n"));;
1117 #endif /* HAVE_FSETXATTR */
1122 * Do the defragmentation of a single file.
1123 * We already are pretty sure we can and want to
1124 * defragment the file. Create the tmp file, copy
1125 * the data (maintaining holes) and call the kernel
1126 * extent swap routine.
1129 * -1: Some error was encountered
1130 * 0: Successfully defragmented the file
1131 * 1: No change / No Error
1134 packfile(char *fname
, char *tname
, int fd
,
1135 struct xfs_bstat
*statp
, struct fsxattr
*fsxp
)
1139 int retval
= -1; /* Failure is the default */
1140 int nextents
, extent
, cur_nextents
, new_nextents
;
1144 static xfs_swapext_t sx
;
1145 struct xfs_flock64 space
;
1149 char ffname
[SMBUFSZ
];
1153 * Work out the extent map - nextents will be set to the
1154 * minimum number of extents needed for the file (taking
1155 * into account holes), cur_nextents is the current number
1158 nextents
= read_fd_bmap(fd
, statp
, &cur_nextents
);
1160 if (cur_nextents
== 1 || cur_nextents
<= nextents
) {
1162 fsrprintf(_("%s already fully defragmented.\n"), fname
);
1163 retval
= 1; /* indicates no change/no error */
1168 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1169 fname
, cur_nextents
, (cur_nextents
- nextents
),
1172 if ((tfd
= open(tname
, openopts
, 0666)) < 0) {
1174 fsrprintf(_("could not open tmp file: %s: %s\n"),
1175 tname
, strerror(errno
));
1180 /* Setup extended attributes */
1181 if (fsr_setup_attr_fork(fd
, tfd
, statp
) != 0) {
1182 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname
);
1186 /* Setup extended inode flags, project identifier, etc */
1187 if (fsxp
->fsx_xflags
|| fsxp
->fsx_projid
) {
1188 if (ioctl(tfd
, FS_IOC_FSSETXATTR
, fsxp
) < 0) {
1189 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1195 if ((ioctl(tfd
, XFS_IOC_DIOINFO
, &dio
)) < 0 ) {
1196 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname
);
1200 dio_min
= dio
.d_miniosz
;
1201 if (statp
->bs_size
<= dio_min
) {
1202 blksz_dio
= dio_min
;
1204 blksz_dio
= min(dio
.d_maxiosz
, BUFFER_MAX
- pagesize
);
1205 if (argv_blksz_dio
!= 0)
1206 blksz_dio
= min(argv_blksz_dio
, blksz_dio
);
1207 blksz_dio
= (min(statp
->bs_size
, blksz_dio
) / dio_min
) * dio_min
;
1211 fsrprintf(_("DEBUG: "
1212 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1213 statp
->bs_size
, blksz_dio
, dio
.d_miniosz
,
1214 dio
.d_maxiosz
, pagesize
);
1217 if (!(fbuf
= (char *)memalign(dio
.d_mem
, blksz_dio
))) {
1218 fsrprintf(_("could not allocate buf: %s\n"), tname
);
1223 /* Create new tmp file in same AG as first */
1224 sprintf(ffname
, "%s.frag", tname
);
1226 /* Open the new file for sync writes */
1227 if ((ffd
= open(ffname
, openopts
, 0666)) < 0) {
1228 fsrprintf(_("could not open fragfile: %s : %s\n"),
1229 ffname
, strerror(errno
));
1235 /* Loop through block map allocating new extents */
1236 for (extent
= 0; extent
< nextents
; extent
++) {
1237 pos
= outmap
[extent
].bmv_offset
;
1238 if (outmap
[extent
].bmv_block
== -1) {
1239 space
.l_whence
= SEEK_SET
;
1240 space
.l_start
= pos
;
1241 space
.l_len
= outmap
[extent
].bmv_length
;
1242 if (ioctl(tfd
, XFS_IOC_UNRESVSP64
, &space
) < 0) {
1243 fsrprintf(_("could not trunc tmp %s\n"),
1246 if (lseek(tfd
, outmap
[extent
].bmv_length
, SEEK_CUR
) < 0) {
1247 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1248 tname
, strerror(errno
));
1252 } else if (outmap
[extent
].bmv_length
== 0) {
1253 /* to catch holes at the beginning of the file */
1257 space
.l_whence
= SEEK_CUR
;
1259 space
.l_len
= outmap
[extent
].bmv_length
;
1261 if (ioctl(tfd
, XFS_IOC_RESVSP64
, &space
) < 0) {
1262 fsrprintf(_("could not pre-allocate tmp space:"
1266 if (lseek(tfd
, outmap
[extent
].bmv_length
, SEEK_CUR
) < 0) {
1267 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1268 tname
, strerror(errno
));
1272 } /* end of space allocation loop */
1274 if (lseek(tfd
, 0, SEEK_SET
)) {
1275 fsrprintf(_("Couldn't rewind on temporary file\n"));
1279 /* Check if the temporary file has fewer extents */
1280 new_nextents
= getnextents(tfd
);
1282 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents
, cur_nextents
);
1283 if (cur_nextents
<= new_nextents
) {
1285 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname
);
1286 retval
= 1; /* no change/no error */
1290 /* Loop through block map copying the file. */
1291 for (extent
= 0; extent
< nextents
; extent
++) {
1292 pos
= outmap
[extent
].bmv_offset
;
1293 if (outmap
[extent
].bmv_block
== -1) {
1294 if (lseek(tfd
, outmap
[extent
].bmv_length
, SEEK_CUR
) < 0) {
1295 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1296 tname
, strerror(errno
));
1299 if (lseek(fd
, outmap
[extent
].bmv_length
, SEEK_CUR
) < 0) {
1300 fsrprintf(_("could not lseek in file: %s : %s\n"),
1301 fname
, strerror(errno
));
1305 } else if (outmap
[extent
].bmv_length
== 0) {
1306 /* to catch holes at the beginning of the file */
1309 for (cnt
= outmap
[extent
].bmv_length
; cnt
> 0;
1310 cnt
-= ct
, pos
+= ct
) {
1311 if (nfrags
&& --nfrags
) {
1312 ct
= min(cnt
, dio_min
);
1313 } else if (cnt
% dio_min
== 0) {
1314 ct
= min(cnt
, blksz_dio
);
1316 ct
= min(cnt
+ dio_min
- (cnt
% dio_min
),
1319 ct
= read(fd
, fbuf
, ct
);
1321 /* EOF, stop trying to read */
1325 /* Ensure we do direct I/O to correct block
1328 if (ct
% dio_min
!= 0) {
1329 wc
= ct
+ dio_min
- (ct
% dio_min
);
1334 if (ct
< 0 || ((wc
= write(tfd
, fbuf
, wc
)) != wc_b4
)) {
1336 fsrprintf(_("bad read of %d bytes "
1337 "from %s: %s\n"), wc_b4
,
1338 fname
, strerror(errno
));
1340 fsrprintf(_("bad write of %d bytes "
1341 "to %s: %s\n"), wc_b4
,
1342 tname
, strerror(errno
));
1345 * Might be out of space
1347 * Try to finish write
1351 if ((wc
= write(tfd
, ((char *)fbuf
)+wc
,
1353 /* worked on second attempt? */
1357 fsrprintf(_("bad write2 of %d "
1358 "bytes to %s: %s\n"),
1362 fsrprintf(_("bad copy to %s\n"),
1369 /* Do a matching write to the tmp file */
1371 if (((wc
= write(ffd
, fbuf
, wc
)) != wc_b4
)) {
1372 fsrprintf(_("bad write of %d bytes "
1374 wc_b4
, ffname
, strerror(errno
));
1379 if (ftruncate(tfd
, statp
->bs_size
) < 0) {
1380 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1381 fname
, strerror(errno
));
1384 if (fsync(tfd
) < 0) {
1385 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1386 fname
, strerror(errno
));
1390 sx
.sx_stat
= *statp
; /* struct copy */
1391 sx
.sx_version
= XFS_SX_VERSION
;
1392 sx
.sx_fdtarget
= fd
;
1395 sx
.sx_length
= statp
->bs_size
;
1397 /* switch to the owner's id, to keep quota in line */
1398 if (fchown(tfd
, statp
->bs_uid
, statp
->bs_gid
) < 0) {
1400 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1401 tname
, strerror(errno
));
1405 /* Swap the extents */
1406 srval
= xfs_swapext(fd
, &sx
);
1408 if (errno
== ENOTSUP
) {
1410 fsrprintf(_("%s: file type not supported\n"), fname
);
1411 } else if (errno
== EFAULT
) {
1412 /* The file has changed since we started the copy */
1414 fsrprintf(_("%s: file modified defrag aborted\n"),
1416 } else if (errno
== EBUSY
) {
1417 /* Timestamp has changed or mmap'ed file */
1419 fsrprintf(_("%s: file busy\n"), fname
);
1421 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1422 fname
, strerror(errno
));
1427 /* Report progress */
1429 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1430 cur_nextents
, new_nextents
,
1431 (new_nextents
<= nextents
? "DONE" : " " ),
1445 gettmpname(char *fname
)
1447 static char buf
[PATH_MAX
+1];
1451 sprintf(sbuf
, "/.fsr%d", getpid());
1453 strncpy(buf
, fname
, PATH_MAX
);
1454 buf
[PATH_MAX
] = '\0';
1455 ptr
= strrchr(buf
, '/');
1462 if ((strlen(buf
) + strlen (sbuf
)) > PATH_MAX
) {
1463 fsrprintf(_("tmp file name too long: %s\n"), fname
);
1473 getparent(char *fname
)
1475 static char buf
[PATH_MAX
+1];
1478 strncpy(buf
, fname
, PATH_MAX
);
1479 buf
[PATH_MAX
] = '\0';
1480 ptr
= strrchr(buf
, '/');
1493 * Read in block map of the input file, coalesce contiguous
1494 * extents into a single range, keep all holes. Convert from 512 byte
1497 * This code was borrowed from mv.c with some minor mods.
1500 #define OUTMAP_SIZE_INCREMENT MAPSIZE
1502 int read_fd_bmap(int fd
, struct xfs_bstat
*sin
, int *cur_nextents
)
1505 struct getbmap map
[MAPSIZE
];
1508 if (++cnt >= outmap_size) { \
1509 outmap_size += OUTMAP_SIZE_INCREMENT; \
1510 outmap = (struct getbmap *)realloc(outmap, \
1511 outmap_size*sizeof(*outmap)); \
1512 if (outmap == NULL) { \
1513 fsrprintf(_("realloc failed: %s\n"), \
1519 /* Initialize the outmap array. It always grows - never shrinks.
1520 * Left-over memory allocation is saved for the next files.
1522 if (outmap_size
== 0) {
1523 outmap_size
= OUTMAP_SIZE_INCREMENT
; /* Initial size */
1524 outmap
= (struct getbmap
*)malloc(outmap_size
*sizeof(*outmap
));
1526 fsrprintf(_("malloc failed: %s\n"),
1532 outmap
[0].bmv_block
= 0;
1533 outmap
[0].bmv_offset
= 0;
1534 outmap
[0].bmv_length
= sin
->bs_size
;
1537 * If a non regular file is involved then forget holes
1540 if (!S_ISREG(sin
->bs_mode
))
1543 outmap
[0].bmv_length
= 0;
1545 map
[0].bmv_offset
= 0;
1546 map
[0].bmv_block
= 0;
1547 map
[0].bmv_entries
= 0;
1548 map
[0].bmv_count
= MAPSIZE
;
1549 map
[0].bmv_length
= -1;
1555 if (ioctl(fd
, XFS_IOC_GETBMAP
, map
) < 0) {
1556 fsrprintf(_("failed reading extents: inode %llu"),
1557 (unsigned long long)sin
->bs_ino
);
1561 /* Concatenate extents together and replicate holes into
1564 *cur_nextents
+= map
[0].bmv_entries
;
1565 for (i
= 0; i
< map
[0].bmv_entries
; i
++) {
1566 if (map
[i
+ 1].bmv_block
== -1) {
1568 outmap
[cnt
] = map
[i
+1];
1569 } else if (outmap
[cnt
].bmv_block
== -1) {
1571 outmap
[cnt
] = map
[i
+1];
1573 outmap
[cnt
].bmv_length
+= map
[i
+ 1].bmv_length
;
1576 } while (map
[0].bmv_entries
== (MAPSIZE
-1));
1577 for (i
= 0; i
<= cnt
; i
++) {
1578 outmap
[i
].bmv_offset
= BBTOB(outmap
[i
].bmv_offset
);
1579 outmap
[i
].bmv_length
= BBTOB(outmap
[i
].bmv_length
);
1582 outmap
[cnt
].bmv_length
= sin
->bs_size
- outmap
[cnt
].bmv_offset
;
1588 * Read the block map and return the number of extents.
1594 struct getbmap map
[MAPSIZE
];
1596 map
[0].bmv_offset
= 0;
1597 map
[0].bmv_block
= 0;
1598 map
[0].bmv_entries
= 0;
1599 map
[0].bmv_count
= MAPSIZE
;
1600 map
[0].bmv_length
= -1;
1605 if (ioctl(fd
,XFS_IOC_GETBMAP
, map
) < 0) {
1606 fsrprintf(_("failed reading extents"));
1610 nextents
+= map
[0].bmv_entries
;
1611 } while (map
[0].bmv_entries
== (MAPSIZE
-1));
1617 * Get xfs realtime space information
1620 xfs_getrt(int fd
, struct statvfs
*sfbp
)
1622 unsigned long bsize
;
1623 unsigned long factor
;
1624 xfs_fsop_counts_t cnt
;
1626 if (!fsgeom
.rtblocks
)
1629 if (xfs_fscounts(fd
, &cnt
) < 0) {
1633 bsize
= (sfbp
->f_frsize
? sfbp
->f_frsize
: sfbp
->f_bsize
);
1634 factor
= fsgeom
.blocksize
/ bsize
; /* currently this is == 1 */
1635 sfbp
->f_bfree
= (cnt
.freertx
* fsgeom
.rtextsize
) * factor
;
1640 fsrprintf(const char *fmt
, ...)
1646 static int didopenlog
;
1648 openlog("fsr", LOG_PID
, LOG_USER
);
1651 vsyslog(LOG_INFO
, fmt
, ap
);
1659 * Initialize a directory for tmp file use. This is used
1660 * by the full filesystem defragmentation when we're walking
1661 * the inodes and do not know the path for the individual
1662 * files. Multiple directories are used to spread out the
1663 * tmp data around to different ag's (since file data is
1664 * usually allocated to the same ag as the directory and
1665 * directories allocated round robin from the same
1666 * parent directory).
1672 static char buf
[SMBUFSZ
];
1676 sprintf(buf
, "%s/.fsr", mnt
);
1679 if (mkdir(buf
, 0700) < 0) {
1680 if (errno
== EEXIST
) {
1682 fsrprintf(_("tmpdir already exists: %s\n"),
1685 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1686 buf
, strerror(errno
));
1690 for (i
=0; i
< fsgeom
.agcount
; i
++) {
1691 sprintf(buf
, "%s/.fsr/ag%d", mnt
, i
);
1692 if (mkdir(buf
, 0700) < 0) {
1693 if (errno
== EEXIST
) {
1696 _("tmpdir already exists: %s\n"), buf
);
1698 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1699 buf
, strerror(errno
));
1711 static char buf
[SMBUFSZ
];
1713 sprintf(buf
, "%s/.fsr/ag%d/tmp%d",
1714 ( (strcmp(mnt
, "/") == 0) ? "" : mnt
),
1718 if (++tmp_agi
== fsgeom
.agcount
)
1725 tmp_close(char *mnt
)
1727 static char buf
[SMBUFSZ
];
1730 /* No data is ever actually written so we can just do rmdir's */
1731 for (i
=0; i
< fsgeom
.agcount
; i
++) {
1732 sprintf(buf
, "%s/.fsr/ag%d", mnt
, i
);
1733 if (rmdir(buf
) < 0) {
1734 if (errno
!= ENOENT
) {
1736 _("could not remove tmpdir: %s: %s\n"),
1737 buf
, strerror(errno
));
1741 sprintf(buf
, "%s/.fsr", mnt
);
1742 if (rmdir(buf
) < 0) {
1743 if (errno
!= ENOENT
) {
1744 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1745 buf
, strerror(errno
));