]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - fsr/xfs_fsr.c
build: Add includes required for OS X
[thirdparty/xfsprogs-dev.git] / fsr / xfs_fsr.c
CommitLineData
c988ea91
CH
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "xfs.h"
21#include "xfs_types.h"
22#include "jdm.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_attr_sf.h"
c988ea91
CH
25
26#include <fcntl.h>
27#include <errno.h>
c988ea91
CH
28#include <syslog.h>
29#include <signal.h>
30#include <sys/ioctl.h>
31#include <sys/wait.h>
c988ea91
CH
32#include <sys/statvfs.h>
33#include <sys/xattr.h>
34
5ae294ea
JT
35#ifdef HAVE_MNTENT
36# include <mntent.h>
37#endif
c988ea91
CH
38
39#ifndef XFS_XFLAG_NODEFRAG
40#define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */
41#endif
42
89e4b5bd
CH
43#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
44#define _PATH_PROC_MOUNTS "/proc/mounts"
45
c988ea91
CH
46
47char *progname;
48
49int vflag;
50int gflag;
51static int Mflag;
52/* static int nflag; */
53int dflag = 0;
54/* static int sflag; */
55int argv_blksz_dio;
56extern int max_ext_size;
57static int npasses = 10;
58static int startpass = 0;
59
60struct getbmap *outmap = NULL;
61int outmap_size = 0;
62int RealUid;
63int tmp_agi;
64static __int64_t minimumfree = 2048;
65
66#define MNTTYPE_XFS "xfs"
67
68#define SMBUFSZ 1024
69#define ROOT 0
70#define NULLFD -1
71#define GRABSZ 64
72#define TARGETRANGE 10
73#define V_NONE 0
74#define V_OVERVIEW 1
75#define V_ALL 2
76#define BUFFER_SIZE (1<<16)
77#define BUFFER_MAX (1<<24)
c988ea91
CH
78
79static time_t howlong = 7200; /* default seconds of reorganizing */
80static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
c988ea91
CH
81static time_t endtime;
82static time_t starttime;
83static xfs_ino_t leftoffino = 0;
84static int pagesize;
85
86void usage(int ret);
87static int fsrfile(char *fname, xfs_ino_t ino);
88static int fsrfile_common( char *fname, char *tname, char *mnt,
89 int fd, xfs_bstat_t *statp);
90static int packfile(char *fname, char *tname, int fd,
91 xfs_bstat_t *statp, struct fsxattr *fsxp);
92static void fsrdir(char *dirname);
93static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
94static void initallfs(char *mtab);
89e4b5bd 95static void fsrallfs(char *mtab, int howlong, char *leftofffile);
c988ea91
CH
96static void fsrall_cleanup(int timeout);
97static int getnextents(int);
98int xfsrtextsize(int fd);
99int xfs_getrt(int fd, struct statvfs64 *sfbp);
100char * gettmpname(char *fname);
101char * getparent(char *fname);
102int fsrprintf(const char *fmt, ...);
103int read_fd_bmap(int, xfs_bstat_t *, int *);
104int cmp(const void *, const void *);
105static void tmp_init(char *mnt);
106static char * tmp_next(char *mnt);
107static void tmp_close(char *mnt);
108int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
c988ea91
CH
109
110xfs_fsop_geom_v1_t fsgeom; /* geometry of active mounted system */
111
112#define NMOUNT 64
113static int numfs;
114
115typedef struct fsdesc {
116 char *dev;
117 char *mnt;
118 int npass;
119} fsdesc_t;
120
121fsdesc_t *fs, *fsbase, *fsend;
122int fsbufsize = 10; /* A starting value */
123int nfrags = 0; /* Debug option: Coerse into specific number
124 * of extents */
125int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
126
127int
128xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
129{
130 return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
131}
132
133int
134xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
135{
136 xfs_fsop_bulkreq_t bulkreq;
137
cad114df 138 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
139 bulkreq.icount = 1;
140 bulkreq.ubuffer = ubuffer;
141 bulkreq.ocount = NULL;
142 return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
143}
144
145int
146xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
147 xfs_bstat_t *ubuffer, __s32 *ocount)
148{
149 xfs_fsop_bulkreq_t bulkreq;
150
cad114df 151 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
152 bulkreq.icount = icount;
153 bulkreq.ubuffer = ubuffer;
154 bulkreq.ocount = ocount;
155 return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
156}
157
158int
159xfs_swapext(int fd, xfs_swapext_t *sx)
160{
161 return ioctl(fd, XFS_IOC_SWAPEXT, sx);
162}
163
164int
165xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
166{
167 return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
168}
169
170void
171aborter(int unused)
172{
173 fsrall_cleanup(1);
174 exit(1);
175}
176
3e50d888
CH
177/*
178 * Check if the argument is either the device name or mountpoint of an XFS
179 * filesystem. Note that we do not care about bind mounted regular files
180 * here - the code that handles defragmentation of invidual files takes care
181 * of that.
182 */
183static char *
184find_mountpoint(char *mtab, char *argname, struct stat64 *sb)
185{
186 struct mntent *t;
187 struct stat64 ms;
188 FILE *mtabp;
189 char *mntp = NULL;
190
191 mtabp = setmntent(mtab, "r");
192 if (!mtabp) {
193 fprintf(stderr, _("%s: cannot read %s\n"),
194 progname, mtab);
195 exit(1);
196 }
197
198 while ((t = getmntent(mtabp))) {
199 if (S_ISDIR(sb->st_mode)) { /* mount point */
200 if (stat64(t->mnt_dir, &ms) < 0)
201 continue;
202 if (sb->st_ino != ms.st_ino)
203 continue;
204 if (sb->st_dev != ms.st_dev)
205 continue;
206 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
207 continue;
208 } else { /* device */
209 struct stat64 sb2;
210
211 if (stat64(t->mnt_fsname, &ms) < 0)
212 continue;
213 if (sb->st_rdev != ms.st_rdev)
214 continue;
215 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
216 continue;
217
218 /*
219 * Make sure the mountpoint given by mtab is accessible
220 * before using it.
221 */
222 if (stat64(t->mnt_dir, &sb2) < 0)
223 continue;
224 }
225
226 mntp = t->mnt_dir;
227 break;
228 }
229
230 endmntent(mtabp);
231 return mntp;
232}
233
c988ea91
CH
234int
235main(int argc, char **argv)
236{
3e50d888 237 struct stat64 sb;
c988ea91 238 char *argname;
c988ea91 239 int c;
3e50d888 240 char *mntp;
89e4b5bd 241 char *mtab = NULL;
c988ea91
CH
242
243 setlinebuf(stdout);
244 progname = basename(argv[0]);
245
246 setlocale(LC_ALL, "");
247 bindtextdomain(PACKAGE, LOCALEDIR);
248 textdomain(PACKAGE);
249
250 gflag = ! isatty(0);
251
89e4b5bd 252 while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
c988ea91
CH
253 switch (c) {
254 case 'M':
255 Mflag = 1;
256 break;
257 case 'g':
258 gflag = 1;
259 break;
260 case 'n':
261 /* nflag = 1; */
262 break;
263 case 'v':
264 ++vflag;
265 break;
266 case 'd':
267 dflag = 1;
268 break;
269 case 's': /* frag stats only */
270 /* sflag = 1; */
271 fprintf(stderr,
272 _("%s: Stats not yet supported for XFS\n"),
273 progname);
274 usage(1);
275 break;
276 case 't':
277 howlong = atoi(optarg);
278 break;
279 case 'f':
280 leftofffile = optarg;
281 break;
282 case 'm':
283 mtab = optarg;
284 break;
285 case 'b':
286 argv_blksz_dio = atoi(optarg);
287 break;
288 case 'p':
289 npasses = atoi(optarg);
290 break;
291 case 'C':
292 /* Testing opt: coerses frag count in result */
293 if (getenv("FSRXFSTEST") != NULL) {
294 nfrags = atoi(optarg);
295 openopts |= O_SYNC;
296 }
297 break;
298 case 'V':
299 printf(_("%s version %s\n"), progname, VERSION);
300 exit(0);
301 default:
302 usage(1);
303 }
89e4b5bd
CH
304 }
305
306 /*
307 * If the user did not specify an explicit mount table, try to use
308 * /proc/mounts if it is available, else /etc/mtab. We prefer
309 * /proc/mounts because it is kernel controlled, while /etc/mtab
310 * may contain garbage that userspace tools like pam_mounts wrote
311 * into it.
312 */
313 if (!mtab) {
314 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
315 mtab = _PATH_PROC_MOUNTS;
316 else
317 mtab = _PATH_MOUNTED;
318 }
319
c988ea91
CH
320 if (vflag)
321 setbuf(stdout, NULL);
322
323 starttime = time(0);
324
325 /* Save the caller's real uid */
326 RealUid = getuid();
327
328 pagesize = getpagesize();
329
330 if (optind < argc) {
331 for (; optind < argc; optind++) {
332 argname = argv[optind];
3e50d888 333
c988ea91
CH
334 if (lstat64(argname, &sb) < 0) {
335 fprintf(stderr,
336 _("%s: could not stat: %s: %s\n"),
337 progname, argname, strerror(errno));
338 continue;
339 }
3e50d888
CH
340
341 if (S_ISLNK(sb.st_mode)) {
342 struct stat64 sb2;
343
344 if (stat64(argname, &sb2) == 0 &&
345 (S_ISBLK(sb2.st_mode) ||
346 S_ISCHR(sb2.st_mode)))
c988ea91 347 sb = sb2;
c988ea91 348 }
3e50d888
CH
349
350 mntp = find_mountpoint(mtab, argname, &sb);
c988ea91 351 if (mntp != NULL) {
3e50d888 352 fsrfs(mntp, 0, 100);
c988ea91
CH
353 } else if (S_ISCHR(sb.st_mode)) {
354 fprintf(stderr, _(
355 "%s: char special not supported: %s\n"),
356 progname, argname);
357 exit(1);
358 } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
359 if (!platform_test_xfs_path(argname)) {
360 fprintf(stderr, _(
361 "%s: cannot defragment: %s: Not XFS\n"),
362 progname, argname);
363 continue;
364 }
365 if (S_ISDIR(sb.st_mode))
366 fsrdir(argname);
367 else
368 fsrfile(argname, sb.st_ino);
369 } else {
370 printf(
371 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
372 argname);
373 }
374 }
375 } else {
376 initallfs(mtab);
89e4b5bd 377 fsrallfs(mtab, howlong, leftofffile);
c988ea91
CH
378 }
379 return 0;
380}
381
382void
383usage(int ret)
384{
385 fprintf(stderr, _(
30626ef6
ES
386"Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
387" %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
388" %s -V\n\n"
c988ea91 389"Options:\n"
c988ea91
CH
390" -g Print to syslog (default if stdout not a tty).\n"
391" -t time How long to run in seconds.\n"
30626ef6 392" -p passes Number of passes before terminating global re-org.\n"
c988ea91
CH
393" -f leftoff Use this instead of %s.\n"
394" -m mtab Use something other than /etc/mtab.\n"
395" -d Debug, print even more.\n"
30626ef6
ES
396" -v Verbose, more -v's more verbose.\n"
397" -V Print version number and exit.\n"
398 ), progname, progname, progname, _PATH_FSRLAST);
c988ea91
CH
399 exit(ret);
400}
401
402/*
403 * initallfs -- read the mount table and set up an internal form
404 */
405static void
406initallfs(char *mtab)
407{
408 FILE *fp;
409 struct mntent *mp;
410 int mi;
411 char *cp;
412 struct stat64 sb;
413
414 fp = setmntent(mtab, "r");
415 if (fp == NULL) {
416 fsrprintf(_("could not open mtab file: %s\n"), mtab);
417 exit(1);
418 }
419
420 /* malloc a number of descriptors, increased later if needed */
421 if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
422 fsrprintf(_("out of memory: %s\n"), strerror(errno));
423 exit(1);
424 }
425 fsend = (fsbase + fsbufsize - 1);
426
427 /* find all rw xfs file systems */
428 mi = 0;
429 fs = fsbase;
430 while ((mp = getmntent(fp))) {
431 int rw = 0;
432
433 if (strcmp(mp->mnt_type, MNTTYPE_XFS ) != 0 ||
434 stat64(mp->mnt_fsname, &sb) == -1 ||
435 !S_ISBLK(sb.st_mode))
436 continue;
437
438 cp = strtok(mp->mnt_opts,",");
439 do {
440 if (strcmp("rw", cp) == 0)
441 rw++;
442 } while ((cp = strtok(NULL, ",")) != NULL);
443 if (rw == 0) {
444 if (dflag)
445 fsrprintf(_("Skipping %s: not mounted rw\n"),
446 mp->mnt_fsname);
447 continue;
448 }
449
450 if (mi == fsbufsize) {
451 fsbufsize += NMOUNT;
452 if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
453 fsbufsize * sizeof(fsdesc_t))) == NULL) {
454 fsrprintf(_("out of memory: %s\n"),
455 strerror(errno));
456 exit(1);
457 }
458 if (!fsbase) {
459 fsrprintf(_("out of memory on realloc: %s\n"),
460 strerror(errno));
461 exit(1);
462 }
463 fs = (fsbase + mi); /* Needed ? */
464 }
465
466 fs->dev = strdup(mp->mnt_fsname);
467 fs->mnt = strdup(mp->mnt_dir);
468
758bcc92 469 if (fs->dev == NULL) {
c988ea91
CH
470 fsrprintf(_("strdup(%s) failed\n"), mp->mnt_fsname);
471 exit(1);
472 }
758bcc92
ES
473 if (fs->mnt == NULL) {
474 fsrprintf(_("strdup(%s) failed\n"), mp->mnt_dir);
475 exit(1);
476 }
c988ea91
CH
477 mi++;
478 fs++;
479 }
480 numfs = mi;
481 fsend = (fsbase + numfs);
482 endmntent(fp);
483 if (numfs == 0) {
484 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
485 exit(0);
486 }
487 if (vflag || dflag) {
488 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
489 numfs);
490 if (dflag)
491 for (fs = fsbase; fs < fsend; fs++)
492 fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
493 }
494}
495
496static void
89e4b5bd 497fsrallfs(char *mtab, int howlong, char *leftofffile)
c988ea91
CH
498{
499 int fd;
500 int error;
501 int found = 0;
502 char *fsname;
503 char buf[SMBUFSZ];
504 int mdonly = Mflag;
505 char *ptr;
506 xfs_ino_t startino = 0;
507 fsdesc_t *fsp;
508 struct stat64 sb, sb2;
509
510 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
511
512 endtime = starttime + howlong;
513 fs = fsbase;
514
515 /* where'd we leave off last time? */
516 if (lstat64(leftofffile, &sb) == 0) {
517 if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
518 fsrprintf(_("%s: open failed\n"), leftofffile);
519 }
520 else if ( fstat64(fd, &sb2) == 0) {
521 /*
522 * Verify that lstat & fstat point to the
523 * same regular file (no links/no quick spoofs)
524 */
525 if ( (sb.st_dev != sb2.st_dev) ||
526 (sb.st_ino != sb2.st_ino) ||
527 ((sb.st_mode & S_IFMT) != S_IFREG) ||
528 ((sb2.st_mode & S_IFMT) != S_IFREG) ||
529 (sb2.st_uid != ROOT) ||
530 (sb2.st_nlink != 1)
531 )
532 {
533 fsrprintf(_("Can't use %s: mode=0%o own=%d"
534 " nlink=%d\n"),
535 leftofffile, sb.st_mode,
536 sb.st_uid, sb.st_nlink);
537 close(fd);
538 fd = NULLFD;
539 }
540 }
541 else {
542 close(fd);
543 fd = NULLFD;
544 }
545 }
546 else {
547 fd = NULLFD;
548 }
549
550 if (fd != NULLFD) {
551 if (read(fd, buf, SMBUFSZ) == -1) {
552 fs = fsbase;
553 fsrprintf(_("could not read %s, starting with %s\n"),
554 leftofffile, *fs->dev);
555 } else {
eef20df0
ES
556 /* Ensure the buffer we read is null terminated */
557 buf[SMBUFSZ-1] = '\0';
c988ea91
CH
558 for (fs = fsbase; fs < fsend; fs++) {
559 fsname = fs->dev;
560 if ((strncmp(buf,fsname,strlen(fsname)) == 0)
561 && buf[strlen(fsname)] == ' ') {
562 found = 1;
563 break;
564 }
565 }
566 if (! found)
567 fs = fsbase;
568
569 ptr = strchr(buf, ' ');
570 if (ptr) {
571 startpass = atoi(++ptr);
572 ptr = strchr(ptr, ' ');
573 if (ptr) {
574 startino = strtoull(++ptr, NULL, 10);
575 }
576 }
577 if (startpass < 0)
578 startpass = 0;
579
580 /* Init pass counts */
581 for (fsp = fsbase; fsp < fs; fsp++) {
582 fsp->npass = startpass + 1;
583 }
584 for (fsp = fs; fsp <= fsend; fsp++) {
585 fsp->npass = startpass;
586 }
587 }
588 close(fd);
589 }
590
591 if (vflag) {
592 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
593 fs->npass, (unsigned long long)startino,
594 fs->dev, fs->mnt);
595 }
596
597 signal(SIGABRT, aborter);
598 signal(SIGHUP, aborter);
599 signal(SIGINT, aborter);
600 signal(SIGQUIT, aborter);
601 signal(SIGTERM, aborter);
602
603 /* reorg for 'howlong' -- checked in 'fsrfs' */
604 while (endtime > time(0)) {
605 pid_t pid;
606 if (fs == fsend)
607 fs = fsbase;
608 if (fs->npass == npasses) {
609 fsrprintf(_("Completed all %d passes\n"), npasses);
610 break;
611 }
612 if (npasses > 1 && !fs->npass)
613 Mflag = 1;
614 else
615 Mflag = mdonly;
616 pid = fork();
617 switch(pid) {
618 case -1:
619 fsrprintf(_("couldn't fork sub process:"));
620 exit(1);
621 break;
622 case 0:
623 error = fsrfs(fs->mnt, startino, TARGETRANGE);
624 exit (error);
625 break;
626 default:
627 wait(&error);
c988ea91
CH
628 if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
629 /* child timed out & did fsrall_cleanup */
630 exit(0);
631 }
632 break;
633 }
634 startino = 0; /* reset after the first time through */
635 fs->npass++;
636 fs++;
637 }
638 fsrall_cleanup(endtime <= time(0));
639}
640
641/*
642 * fsrall_cleanup -- close files, print next starting location, etc.
643 */
644static void
645fsrall_cleanup(int timeout)
646{
647 int fd;
648 int ret;
649 char buf[SMBUFSZ];
650
c988ea91 651 unlink(leftofffile);
d0e82db1
ES
652
653 if (timeout) {
654 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
655 progname, startpass, fs->npass,
656 time(0) - endtime + howlong);
657
658 /* record where we left off */
659 fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
660 if (fd == -1) {
661 fsrprintf(_("open(%s) failed: %s\n"),
662 leftofffile, strerror(errno));
663 } else {
c988ea91
CH
664 ret = sprintf(buf, "%s %d %llu\n", fs->dev,
665 fs->npass, (unsigned long long)leftoffino);
666 if (write(fd, buf, ret) < strlen(buf))
667 fsrprintf(_("write(%s) failed: %s\n"),
668 leftofffile, strerror(errno));
669 close(fd);
670 }
671 }
c988ea91
CH
672}
673
674/*
675 * fsrfs -- reorganize a file system
676 */
677static int
678fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
679{
680
681 int fsfd, fd;
682 int count = 0;
683 int ret;
684 __s32 buflenout;
685 xfs_bstat_t buf[GRABSZ];
686 char fname[64];
687 char *tname;
688 jdm_fshandle_t *fshandlep;
689 xfs_ino_t lastino = startino;
690
691 fsrprintf(_("%s start inode=%llu\n"), mntdir,
692 (unsigned long long)startino);
693
694 fshandlep = jdm_getfshandle( mntdir );
695 if ( ! fshandlep ) {
696 fsrprintf(_("unable to get handle: %s: %s\n"),
697 mntdir, strerror( errno ));
698 return -1;
699 }
700
701 if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
702 fsrprintf(_("unable to open: %s: %s\n"),
703 mntdir, strerror( errno ));
e3e2793d 704 free(fshandlep);
c988ea91
CH
705 return -1;
706 }
707
708 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
709 fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
710 mntdir);
11e06961 711 close(fsfd);
e3e2793d 712 free(fshandlep);
c988ea91
CH
713 return -1;
714 }
715
716 tmp_init(mntdir);
717
718 while ((ret = xfs_bulkstat(fsfd,
98166c91 719 &lastino, GRABSZ, &buf[0], &buflenout)) == 0) {
c988ea91
CH
720 xfs_bstat_t *p;
721 xfs_bstat_t *endp;
722
723 if (buflenout == 0)
724 goto out0;
725
726 /* Each loop through, defrag targetrange percent of the files */
727 count = (buflenout * targetrange) / 100;
728
729 qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
730
731 for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
732 /* Do some obvious checks now */
733 if (((p->bs_mode & S_IFMT) != S_IFREG) ||
734 (p->bs_extents < 2))
735 continue;
736
108e985b
DC
737 fd = jdm_open(fshandlep, p, O_RDWR|O_DIRECT);
738 if (fd < 0) {
c988ea91
CH
739 /* This probably means the file was
740 * removed while in progress of handling
741 * it. Just quietly ignore this file.
742 */
743 if (dflag)
744 fsrprintf(_("could not open: "
745 "inode %llu\n"), p->bs_ino);
746 continue;
747 }
748
749 /* Don't know the pathname, so make up something */
750 sprintf(fname, "ino=%lld", (long long)p->bs_ino);
751
752 /* Get a tmp file name */
753 tname = tmp_next(mntdir);
754
755 ret = fsrfile_common(fname, tname, mntdir, fd, p);
756
757 leftoffino = p->bs_ino;
758
759 close(fd);
760
761 if (ret == 0) {
762 if (--count <= 0)
763 break;
764 }
765 }
766 if (endtime && endtime < time(0)) {
767 tmp_close(mntdir);
768 close(fsfd);
769 fsrall_cleanup(1);
770 exit(1);
771 }
772 }
773 if (ret < 0)
774 fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
775out0:
776 tmp_close(mntdir);
777 close(fsfd);
e3e2793d 778 free(fshandlep);
c988ea91
CH
779 return 0;
780}
781
782/*
783 * To compare bstat structs for qsort.
784 */
785int
786cmp(const void *s1, const void *s2)
787{
788 return( ((xfs_bstat_t *)s2)->bs_extents -
789 ((xfs_bstat_t *)s1)->bs_extents);
790
791}
792
793/*
794 * reorganize by directory hierarchy.
795 * Stay in dev (a restriction based on structure of this program -- either
796 * call efs_{n,u}mount() around each file, something smarter or this)
797 */
798static void
799fsrdir(char *dirname)
800{
801 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
802}
803
804/*
805 * Sets up the defragmentation of a file based on the
806 * filepath. It collects the bstat information, does
807 * an open on the file and passes this all to fsrfile_common.
808 */
809static int
810fsrfile(char *fname, xfs_ino_t ino)
811{
812 xfs_bstat_t statbuf;
813 jdm_fshandle_t *fshandlep;
4f10a2fb
ES
814 int fd = -1, fsfd = -1;
815 int error = -1;
c988ea91
CH
816 char *tname;
817
818 fshandlep = jdm_getfshandle(getparent (fname) );
4f10a2fb 819 if (!fshandlep) {
c988ea91
CH
820 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
821 fname, strerror(errno));
4f10a2fb 822 goto out;
c988ea91
CH
823 }
824
825 /*
826 * Need to open something on the same filesystem as the
827 * file. Open the parent.
828 */
829 fsfd = open(getparent(fname), O_RDONLY);
830 if (fsfd < 0) {
831 fsrprintf(_("unable to open sys handle for %s: %s\n"),
832 fname, strerror(errno));
4f10a2fb 833 goto out;
c988ea91
CH
834 }
835
836 if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
837 fsrprintf(_("unable to get bstat on %s: %s\n"),
838 fname, strerror(errno));
4f10a2fb 839 goto out;
c988ea91
CH
840 }
841
108e985b 842 fd = jdm_open(fshandlep, &statbuf, O_RDWR|O_DIRECT);
c988ea91
CH
843 if (fd < 0) {
844 fsrprintf(_("unable to open handle %s: %s\n"),
845 fname, strerror(errno));
4f10a2fb 846 goto out;
c988ea91
CH
847 }
848
849 /* Get the fs geometry */
850 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
851 fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
4f10a2fb 852 goto out;
c988ea91
CH
853 }
854
c988ea91
CH
855 tname = gettmpname(fname);
856
857 if (tname)
858 error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
859
4f10a2fb
ES
860out:
861 if (fsfd >= 0)
862 close(fsfd);
863 if (fd >= 0)
864 close(fd);
865 free(fshandlep);
c988ea91
CH
866
867 return error;
868}
869
870
871/*
872 * This is the common defrag code for either a full fs
873 * defragmentation or a single file. Check as much as
874 * possible with the file, fork a process to setuid to the
875 * target file owner's uid and defragment the file.
876 * This is done so the new extents created in a tmp file are
877 * reflected in the owners' quota without having to do any
878 * special code in the kernel. When the existing extents
879 * are removed, the quotas will be correct. It's ugly but
880 * it saves us from doing some quota re-construction in
881 * the extent swap. The price is that the defragmentation
882 * will fail if the owner of the target file is already at
883 * their quota limit.
884 */
885static int
886fsrfile_common(
887 char *fname,
888 char *tname,
889 char *fsname,
890 int fd,
891 xfs_bstat_t *statp)
892{
893 int error;
894 struct statvfs64 vfss;
895 struct fsxattr fsx;
896 unsigned long bsize;
897
898 if (vflag)
899 fsrprintf("%s\n", fname);
900
901 if (fsync(fd) < 0) {
902 fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
903 return -1;
904 }
905
906 if (statp->bs_size == 0) {
907 if (vflag)
908 fsrprintf(_("%s: zero size, ignoring\n"), fname);
909 return(0);
910 }
911
912 /* Check if a mandatory lock is set on the file to try and
913 * avoid blocking indefinitely on the reads later. Note that
914 * someone could still set a mandatory lock after this check
915 * but before all reads have completed to block fsr reads.
916 * This change just closes the window a bit.
917 */
918 if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
919 struct flock fl;
920
921 fl.l_type = F_RDLCK;
922 fl.l_whence = SEEK_SET;
923 fl.l_start = (off_t)0;
924 fl.l_len = 0;
925 if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
926 if (vflag)
927 fsrprintf(_("locking check failed: %s\n"),
928 fname);
929 return(-1);
930 }
931 if (fl.l_type != F_UNLCK) {
932 /* Mandatory lock is set */
933 if (vflag)
934 fsrprintf(_("mandatory lock: %s: ignoring\n"),
935 fname);
936 return(-1);
937 }
938 }
939
940 /*
941 * Check if there is room to copy the file.
942 *
943 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
944 * not the optimal I/O size as struct stat.
945 */
946 if (statvfs64(fsname ? fsname : fname, &vfss) < 0) {
947 fsrprintf(_("unable to get fs stat on %s: %s\n"),
948 fname, strerror(errno));
949 return -1;
950 }
951 bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
952 if (statp->bs_blksize * statp->bs_blocks >
953 vfss.f_bfree * bsize - minimumfree) {
954 fsrprintf(_("insufficient freespace for: %s: "
955 "size=%lld: ignoring\n"), fname,
956 statp->bs_blksize * statp->bs_blocks);
957 return 1;
958 }
959
960 if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
961 fsrprintf(_("failed to get inode attrs: %s\n"), fname);
962 return(-1);
963 }
964 if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE|XFS_XFLAG_APPEND)) {
965 if (vflag)
966 fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
967 return(0);
968 }
969 if (fsx.fsx_xflags & XFS_XFLAG_NODEFRAG) {
970 if (vflag)
971 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
972 fname);
973 return(0);
974 }
975 if (fsx.fsx_xflags & XFS_XFLAG_REALTIME) {
976 if (xfs_getrt(fd, &vfss) < 0) {
977 fsrprintf(_("cannot get realtime geometry for: %s\n"),
978 fname);
979 return(-1);
980 }
981 if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
982 fsrprintf(_("low on realtime free space: %s: "
983 "ignoring file\n"), fname);
984 return(-1);
985 }
986 }
987
988 if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
989 fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
990 return -1;
991 }
992
993 /*
994 * Previously the code forked here, & the child changed it's uid to
995 * that of the file's owner and then called packfile(), to keep
996 * quota counts correct. (defragged files could use fewer blocks).
997 *
998 * Instead, just fchown() the temp file to the uid,gid of the
999 * file we're defragging, in packfile().
1000 */
1001
1002 if ((error = packfile(fname, tname, fd, statp, &fsx)))
1003 return error;
1004 return -1; /* no error */
1005}
1006
bdb041f5
DC
1007/*
1008 * Attempt to set the attr fork up correctly. This is simple for attr1
1009 * filesystems as they have a fixed inode fork offset. In that case
1010 * just create an attribute and that's all we need to do.
1011 *
1012 * For attr2 filesystems, see if we have the actual fork offset in
1013 * the bstat structure. If so, just create additional attributes on
1014 * the temporary inode until the offset matches.
1015 *
1016 * If it doesn't exist, we can only do best effort. Add an attribute at a time
1017 * to move the inode fork around, but take into account that the attribute
1018 * might be too small to move the fork every time we add one. This should
1019 * hopefully put the fork offset in the right place. It's not a big deal if we
1020 * don't get it right - the kernel will reject it when we try to swap extents.
1021 */
1022static int
1023fsr_setup_attr_fork(
1024 int fd,
1025 int tfd,
1026 xfs_bstat_t *bstatp)
1027{
1028 struct stat64 tstatbuf;
1029 int i;
27507775 1030 int diff = 0;
bdb041f5
DC
1031 int last_forkoff = 0;
1032 int no_change_cnt = 0;
1033 int ret;
1034
1035 if (!(bstatp->bs_xflags & XFS_XFLAG_HASATTR))
1036 return 0;
1037
1038 /*
1039 * use the old method if we have attr1 or the kernel does not yet
1040 * support passing the fork offset in the bulkstat data.
1041 */
1042 if (!(fsgeom.flags & XFS_FSOP_GEOM_FLAGS_ATTR2) ||
1043 bstatp->bs_forkoff == 0) {
1044 /* attr1 */
1045 ret = fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE);
1046 if (ret) {
1047 fsrprintf(_("could not set ATTR\n"));
1048 return -1;
1049 }
1050 goto out;
1051 }
1052
1053 /* attr2 w/ fork offsets */
1054
1055 if (fstat64(tfd, &tstatbuf) < 0) {
1056 fsrprintf(_("unable to stat temp file: %s\n"),
1057 strerror(errno));
1058 return -1;
1059 }
1060
1061 i = 0;
1062 do {
1063 xfs_bstat_t tbstat;
1064 xfs_ino_t ino;
1065 char name[64];
bdb041f5
DC
1066
1067 /*
1adfe5c6 1068 * bulkstat the temp inode to see what the forkoff is. Use
bdb041f5
DC
1069 * this to compare against the target and determine what we
1070 * need to do.
1071 */
1072 ino = tstatbuf.st_ino;
1073 if ((xfs_bulkstat_single(tfd, &ino, &tbstat)) < 0) {
1074 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1075 strerror(errno));
1076 return -1;
1077 }
1078 if (dflag)
1079 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1080 bstatp->bs_forkoff, tbstat.bs_forkoff);
1adfe5c6
ES
1081 diff = tbstat.bs_forkoff - bstatp->bs_forkoff;
1082
1083 /* if they are equal, we are done */
1084 if (!diff)
1085 goto out;
bdb041f5
DC
1086
1087 snprintf(name, sizeof(name), "user.%d", i);
1088
1089 /*
1090 * If there is no attribute, then we need to create one to get
1091 * an attribute fork at the default location.
1092 */
1093 if (!tbstat.bs_forkoff) {
1adfe5c6 1094 ASSERT(i == 0);
bdb041f5
DC
1095 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1096 if (ret) {
1097 fsrprintf(_("could not set ATTR\n"));
1098 return -1;
1099 }
1100 continue;
1adfe5c6
ES
1101 } else if (i == 0) {
1102 struct fsxattr fsx;
1103 /*
1104 * First pass, and temp file already has an inline
1105 * xattr, probably due to selinux.
1106 *
1107 * It's *possible* that the temp file attr area
1108 * is larger than the target file's, if the
1109 * target file's attrs are not inline:
1110 *
1111 * Target Temp
1112 * +-------+ 0 +-------+ 0
1113 * | | | |
1114 * | | | Data |
1115 * | Data | | |
1116 * | | v-------v forkoff
1117 * | | | |
1118 * v-------v forkoff | Attr | local
1119 * | Attr | ext/btree | |
1120 * +-------+ +-------+
1121 *
1122 * FSGETXATTRA will tell us nr of attr extents in
1123 * target, if any. If none, it's local:
1124 */
1125
1126 memset(&fsx, 0, sizeof(fsx));
1127 if (ioctl(fd, XFS_IOC_FSGETXATTRA, &fsx)) {
1128 fsrprintf(_("FSGETXATTRA failed on target\n"));
1129 return -1;
1130 }
1131
1132 /*
1133 * If target attr area is less than the temp's (diff < 0)
1134 * and the target is not local, write a big attr to
1135 * the temp file to knock the attr out of local format,
1136 * to match the target. (This should actually *increase*
1137 * the temp file's forkoffset when the attr moves out
1138 * of the inode)
1139 */
1140 if (diff < 0 && fsx.fsx_nextents > 0) {
1141 char val[2048];
1142 memset(val, 'X', 2048);
1143 if (fsetxattr(tfd, name, val, 2048, 0)) {
1144 fsrprintf(_("big ATTR set failed\n"));
1145 return -1;
1146 }
1147 /* Go back & see where we're at now */
1148 continue;
1149 }
bdb041f5
DC
1150 }
1151
1152 /*
1153 * make a progress check so we don't get stuck trying to extend
1154 * a large btree form attribute fork.
1155 */
1156 if (last_forkoff == tbstat.bs_forkoff) {
1157 if (no_change_cnt++ > 10)
1158 break;
ff85ea3f
ES
1159 } else /* progress! */
1160 no_change_cnt = 0;
bdb041f5
DC
1161 last_forkoff = tbstat.bs_forkoff;
1162
1163 /* work out which way to grow the fork */
bdb041f5
DC
1164 if (abs(diff) > fsgeom.inodesize - sizeof(struct xfs_dinode)) {
1165 fsrprintf(_("forkoff diff %d too large!\n"), diff);
1166 return -1;
1167 }
1168
bdb041f5 1169 /*
1adfe5c6
ES
1170 * if the temp inode fork offset is still smaller then we have
1171 * to grow the data fork
bdb041f5
DC
1172 */
1173 if (diff < 0) {
1174 /*
1175 * create some temporary extents in the inode to move
1176 * the fork in the direction we need. This can be done
1177 * by preallocating some single block extents at
1178 * non-contiguous offsets.
1179 */
1180 /* XXX: unimplemented! */
27507775
ES
1181 if (dflag)
1182 printf(_("data fork growth unimplemented\n"));
bdb041f5
DC
1183 goto out;
1184 }
1185
1186 /* we need to grow the attr fork, so create another attr */
1187 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1188 if (ret) {
1189 fsrprintf(_("could not set ATTR\n"));
1190 return -1;
1191 }
1192
1193 } while (++i < 100); /* don't go forever */
1194
1195out:
1196 if (dflag)
1197 fsrprintf(_("set temp attr\n"));
27507775
ES
1198 /* We failed to resolve the fork difference */
1199 if (dflag && diff)
1200 fsrprintf(_("failed to match fork offset\n"));;
1201
bdb041f5
DC
1202 return 0;
1203}
c988ea91
CH
1204
1205/*
1206 * Do the defragmentation of a single file.
1207 * We already are pretty sure we can and want to
1208 * defragment the file. Create the tmp file, copy
1209 * the data (maintaining holes) and call the kernel
671632c6
ES
1210 * extent swap routine.
1211 *
1212 * Return values:
1213 * -1: Some error was encountered
1214 * 0: Successfully defragmented the file
1215 * 1: No change / No Error
c988ea91
CH
1216 */
1217static int
1218packfile(char *fname, char *tname, int fd,
1219 xfs_bstat_t *statp, struct fsxattr *fsxp)
1220{
671632c6 1221 int tfd = -1;
c988ea91 1222 int srval;
671632c6 1223 int retval = -1; /* Failure is the default */
c988ea91
CH
1224 int nextents, extent, cur_nextents, new_nextents;
1225 unsigned blksz_dio;
1226 unsigned dio_min;
1227 struct dioattr dio;
1228 static xfs_swapext_t sx;
1229 struct xfs_flock64 space;
1230 off64_t cnt, pos;
671632c6 1231 void *fbuf = NULL;
c988ea91
CH
1232 int ct, wc, wc_b4;
1233 char ffname[SMBUFSZ];
1234 int ffd = -1;
1235
1236 /*
1237 * Work out the extent map - nextents will be set to the
1238 * minimum number of extents needed for the file (taking
1239 * into account holes), cur_nextents is the current number
1240 * of extents.
1241 */
1242 nextents = read_fd_bmap(fd, statp, &cur_nextents);
1243
1244 if (cur_nextents == 1 || cur_nextents <= nextents) {
1245 if (vflag)
1246 fsrprintf(_("%s already fully defragmented.\n"), fname);
671632c6
ES
1247 retval = 1; /* indicates no change/no error */
1248 goto out;
c988ea91
CH
1249 }
1250
1251 if (dflag)
1252 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1253 fname, cur_nextents, (cur_nextents - nextents),
1254 tname);
1255
1256 if ((tfd = open(tname, openopts, 0666)) < 0) {
1257 if (vflag)
1258 fsrprintf(_("could not open tmp file: %s: %s\n"),
1259 tname, strerror(errno));
671632c6 1260 goto out;
c988ea91
CH
1261 }
1262 unlink(tname);
1263
1264 /* Setup extended attributes */
bdb041f5
DC
1265 if (fsr_setup_attr_fork(fd, tfd, statp) != 0) {
1266 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname);
671632c6 1267 goto out;
c988ea91
CH
1268 }
1269
1270 /* Setup extended inode flags, project identifier, etc */
1271 if (fsxp->fsx_xflags || fsxp->fsx_projid) {
1272 if (ioctl(tfd, XFS_IOC_FSSETXATTR, fsxp) < 0) {
1273 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1274 tname);
671632c6 1275 goto out;
c988ea91
CH
1276 }
1277 }
1278
1279 if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
1280 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
671632c6 1281 goto out;
c988ea91
CH
1282 }
1283
1284 dio_min = dio.d_miniosz;
1285 if (statp->bs_size <= dio_min) {
1286 blksz_dio = dio_min;
1287 } else {
1288 blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
1289 if (argv_blksz_dio != 0)
1290 blksz_dio = min(argv_blksz_dio, blksz_dio);
1291 blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
1292 }
1293
1294 if (dflag) {
1295 fsrprintf(_("DEBUG: "
1296 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1297 statp->bs_size, blksz_dio, dio.d_miniosz,
1298 dio.d_maxiosz, pagesize);
1299 }
1300
1301 if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
1302 fsrprintf(_("could not allocate buf: %s\n"), tname);
671632c6 1303 goto out;
c988ea91
CH
1304 }
1305
1306 if (nfrags) {
1307 /* Create new tmp file in same AG as first */
1308 sprintf(ffname, "%s.frag", tname);
1309
1310 /* Open the new file for sync writes */
1311 if ((ffd = open(ffname, openopts, 0666)) < 0) {
1312 fsrprintf(_("could not open fragfile: %s : %s\n"),
1313 ffname, strerror(errno));
671632c6 1314 goto out;
c988ea91
CH
1315 }
1316 unlink(ffname);
1317 }
1318
1319 /* Loop through block map allocating new extents */
1320 for (extent = 0; extent < nextents; extent++) {
1321 pos = outmap[extent].bmv_offset;
1322 if (outmap[extent].bmv_block == -1) {
1323 space.l_whence = SEEK_SET;
1324 space.l_start = pos;
1325 space.l_len = outmap[extent].bmv_length;
1326 if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
1327 fsrprintf(_("could not trunc tmp %s\n"),
1328 tname);
1329 }
3d303baa
ES
1330 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1331 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1332 tname, strerror(errno));
1333 goto out;
1334 }
c988ea91
CH
1335 continue;
1336 } else if (outmap[extent].bmv_length == 0) {
1337 /* to catch holes at the beginning of the file */
1338 continue;
1339 }
1340 if (! nfrags) {
1341 space.l_whence = SEEK_CUR;
1342 space.l_start = 0;
1343 space.l_len = outmap[extent].bmv_length;
1344
1345 if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
1346 fsrprintf(_("could not pre-allocate tmp space:"
1347 " %s\n"), tname);
671632c6 1348 goto out;
c988ea91 1349 }
3d303baa
ES
1350 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1351 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1352 tname, strerror(errno));
1353 goto out;
1354 }
c988ea91
CH
1355 }
1356 } /* end of space allocation loop */
1357
1358 if (lseek64(tfd, 0, SEEK_SET)) {
1359 fsrprintf(_("Couldn't rewind on temporary file\n"));
671632c6 1360 goto out;
c988ea91
CH
1361 }
1362
1363 /* Check if the temporary file has fewer extents */
1364 new_nextents = getnextents(tfd);
1365 if (dflag)
1366 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
1367 if (cur_nextents <= new_nextents) {
1368 if (vflag)
1369 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
671632c6
ES
1370 retval = 1; /* no change/no error */
1371 goto out;
c988ea91
CH
1372 }
1373
1374 /* Loop through block map copying the file. */
1375 for (extent = 0; extent < nextents; extent++) {
1376 pos = outmap[extent].bmv_offset;
1377 if (outmap[extent].bmv_block == -1) {
3d303baa
ES
1378 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1379 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1380 tname, strerror(errno));
1381 goto out;
1382 }
1383 if (lseek64(fd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1384 fsrprintf(_("could not lseek in file: %s : %s\n"),
1385 fname, strerror(errno));
1386 goto out;
1387 }
c988ea91
CH
1388 continue;
1389 } else if (outmap[extent].bmv_length == 0) {
1390 /* to catch holes at the beginning of the file */
1391 continue;
1392 }
1393 for (cnt = outmap[extent].bmv_length; cnt > 0;
1394 cnt -= ct, pos += ct) {
1395 if (nfrags && --nfrags) {
1396 ct = min(cnt, dio_min);
1397 } else if (cnt % dio_min == 0) {
1398 ct = min(cnt, blksz_dio);
1399 } else {
1400 ct = min(cnt + dio_min - (cnt % dio_min),
1401 blksz_dio);
1402 }
1403 ct = read(fd, fbuf, ct);
1404 if (ct == 0) {
1405 /* EOF, stop trying to read */
1406 extent = nextents;
1407 break;
1408 }
1409 /* Ensure we do direct I/O to correct block
1410 * boundaries.
1411 */
1412 if (ct % dio_min != 0) {
1413 wc = ct + dio_min - (ct % dio_min);
1414 } else {
1415 wc = ct;
1416 }
1417 wc_b4 = wc;
1418 if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
1419 if (ct < 0)
1420 fsrprintf(_("bad read of %d bytes "
1421 "from %s: %s\n"), wc_b4,
1422 fname, strerror(errno));
1423 else if (wc < 0)
1424 fsrprintf(_("bad write of %d bytes "
1425 "to %s: %s\n"), wc_b4,
1426 tname, strerror(errno));
1427 else {
1428 /*
1429 * Might be out of space
1430 *
1431 * Try to finish write
1432 */
1433 int resid = ct-wc;
1434
1435 if ((wc = write(tfd, ((char *)fbuf)+wc,
1436 resid)) == resid) {
1437 /* worked on second attempt? */
1438 continue;
1439 }
1440 else if (wc < 0) {
1441 fsrprintf(_("bad write2 of %d "
1442 "bytes to %s: %s\n"),
1443 resid, tname,
1444 strerror(errno));
1445 } else {
1446 fsrprintf(_("bad copy to %s\n"),
1447 tname);
1448 }
1449 }
671632c6 1450 goto out;
c988ea91
CH
1451 }
1452 if (nfrags) {
1453 /* Do a matching write to the tmp file */
431ec4e6 1454 wc_b4 = wc;
c988ea91
CH
1455 if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
1456 fsrprintf(_("bad write of %d bytes "
1457 "to %s: %s\n"),
1458 wc_b4, ffname, strerror(errno));
1459 }
1460 }
1461 }
1462 }
3d303baa
ES
1463 if (ftruncate64(tfd, statp->bs_size) < 0) {
1464 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1465 fname, strerror(errno));
1466 goto out;
1467 }
1468 if (fsync(tfd) < 0) {
1469 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1470 fname, strerror(errno));
1471 goto out;
1472 }
c988ea91 1473
c988ea91
CH
1474 sx.sx_stat = *statp; /* struct copy */
1475 sx.sx_version = XFS_SX_VERSION;
1476 sx.sx_fdtarget = fd;
1477 sx.sx_fdtmp = tfd;
1478 sx.sx_offset = 0;
1479 sx.sx_length = statp->bs_size;
1480
1481 /* switch to the owner's id, to keep quota in line */
1482 if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
1483 if (vflag)
1484 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1485 tname, strerror(errno));
671632c6 1486 goto out;
c988ea91
CH
1487 }
1488
1489 /* Swap the extents */
1490 srval = xfs_swapext(fd, &sx);
1491 if (srval < 0) {
1492 if (errno == ENOTSUP) {
1493 if (vflag || dflag)
1494 fsrprintf(_("%s: file type not supported\n"), fname);
1495 } else if (errno == EFAULT) {
1496 /* The file has changed since we started the copy */
1497 if (vflag || dflag)
1498 fsrprintf(_("%s: file modified defrag aborted\n"),
1499 fname);
1500 } else if (errno == EBUSY) {
1501 /* Timestamp has changed or mmap'ed file */
1502 if (vflag || dflag)
1503 fsrprintf(_("%s: file busy\n"), fname);
1504 } else {
1505 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1506 fname, strerror(errno));
1507 }
671632c6 1508 goto out;
c988ea91
CH
1509 }
1510
1511 /* Report progress */
1512 if (vflag)
1513 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1514 cur_nextents, new_nextents,
1515 (new_nextents <= nextents ? "DONE" : " " ),
1516 fname);
671632c6
ES
1517 retval = 0;
1518
1519out:
1520 free(fbuf);
1521 if (tfd != -1)
1522 close(tfd);
1523 if (ffd != -1)
1524 close(ffd);
1525 return retval;
c988ea91
CH
1526}
1527
1528char *
1529gettmpname(char *fname)
1530{
1531 static char buf[PATH_MAX+1];
1532 char sbuf[SMBUFSZ];
1533 char *ptr;
1534
1535 sprintf(sbuf, "/.fsr%d", getpid());
1536
6063feca
ES
1537 strncpy(buf, fname, PATH_MAX);
1538 buf[PATH_MAX] = '\0';
c988ea91
CH
1539 ptr = strrchr(buf, '/');
1540 if (ptr) {
1541 *ptr = '\0';
1542 } else {
1543 strcpy(buf, ".");
1544 }
1545
1546 if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
1547 fsrprintf(_("tmp file name too long: %s\n"), fname);
1548 return(NULL);
1549 }
1550
1551 strcat(buf, sbuf);
1552
1553 return(buf);
1554}
1555
1556char *
1557getparent(char *fname)
1558{
1559 static char buf[PATH_MAX+1];
1560 char *ptr;
1561
6063feca
ES
1562 strncpy(buf, fname, PATH_MAX);
1563 buf[PATH_MAX] = '\0';
c988ea91
CH
1564 ptr = strrchr(buf, '/');
1565 if (ptr) {
1566 if (ptr == &buf[0])
1567 ++ptr;
1568 *ptr = '\0';
1569 } else {
1570 strcpy(buf, ".");
1571 }
1572
1573 return(buf);
1574}
1575
1576/*
1577 * Read in block map of the input file, coalesce contiguous
1578 * extents into a single range, keep all holes. Convert from 512 byte
1579 * blocks to bytes.
1580 *
1581 * This code was borrowed from mv.c with some minor mods.
1582 */
1583#define MAPSIZE 128
1584#define OUTMAP_SIZE_INCREMENT MAPSIZE
1585
1586int read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
1587{
1588 int i, cnt;
1589 struct getbmap map[MAPSIZE];
1590
1591#define BUMP_CNT \
1592 if (++cnt >= outmap_size) { \
1593 outmap_size += OUTMAP_SIZE_INCREMENT; \
1594 outmap = (struct getbmap *)realloc(outmap, \
1595 outmap_size*sizeof(*outmap)); \
1596 if (outmap == NULL) { \
1597 fsrprintf(_("realloc failed: %s\n"), \
1598 strerror(errno)); \
1599 exit(1); \
1600 } \
1601 }
1602
1603 /* Initialize the outmap array. It always grows - never shrinks.
1604 * Left-over memory allocation is saved for the next files.
1605 */
1606 if (outmap_size == 0) {
1607 outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
1608 outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
1609 if (!outmap) {
1610 fsrprintf(_("malloc failed: %s\n"),
1611 strerror(errno));
1612 exit(1);
1613 }
1614 }
1615
1616 outmap[0].bmv_block = 0;
1617 outmap[0].bmv_offset = 0;
1618 outmap[0].bmv_length = sin->bs_size;
1619
1620 /*
1621 * If a non regular file is involved then forget holes
1622 */
1623
1624 if (!S_ISREG(sin->bs_mode))
1625 return(1);
1626
1627 outmap[0].bmv_length = 0;
1628
1629 map[0].bmv_offset = 0;
1630 map[0].bmv_block = 0;
1631 map[0].bmv_entries = 0;
1632 map[0].bmv_count = MAPSIZE;
1633 map[0].bmv_length = -1;
1634
1635 cnt = 0;
1636 *cur_nextents = 0;
1637
1638 do {
1639 if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
1640 fsrprintf(_("failed reading extents: inode %llu"),
1641 (unsigned long long)sin->bs_ino);
1642 exit(1);
1643 }
1644
1645 /* Concatenate extents together and replicate holes into
1646 * the output map.
1647 */
1648 *cur_nextents += map[0].bmv_entries;
1649 for (i = 0; i < map[0].bmv_entries; i++) {
1650 if (map[i + 1].bmv_block == -1) {
1651 BUMP_CNT;
1652 outmap[cnt] = map[i+1];
1653 } else if (outmap[cnt].bmv_block == -1) {
1654 BUMP_CNT;
1655 outmap[cnt] = map[i+1];
1656 } else {
1657 outmap[cnt].bmv_length += map[i + 1].bmv_length;
1658 }
1659 }
1660 } while (map[0].bmv_entries == (MAPSIZE-1));
1661 for (i = 0; i <= cnt; i++) {
1662 outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
1663 outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
1664 }
1665
1666 outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
1667
1668 return(cnt+1);
1669}
1670
1671/*
1672 * Read the block map and return the number of extents.
1673 */
1674int
1675getnextents(int fd)
1676{
1677 int nextents;
1678 struct getbmap map[MAPSIZE];
1679
1680 map[0].bmv_offset = 0;
1681 map[0].bmv_block = 0;
1682 map[0].bmv_entries = 0;
1683 map[0].bmv_count = MAPSIZE;
1684 map[0].bmv_length = -1;
1685
1686 nextents = 0;
1687
1688 do {
1689 if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
1690 fsrprintf(_("failed reading extents"));
1691 exit(1);
1692 }
1693
1694 nextents += map[0].bmv_entries;
1695 } while (map[0].bmv_entries == (MAPSIZE-1));
1696
1697 return(nextents);
1698}
1699
1700/*
1701 * Get the fs geometry
1702 */
1703int
1704xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
1705{
1706 if (xfs_fsgeometry(fd, fsgeom) < 0) {
1707 return -1;
1708 }
1709 return 0;
1710}
1711
1712/*
1713 * Get xfs realtime space information
1714 */
1715int
1716xfs_getrt(int fd, struct statvfs64 *sfbp)
1717{
1718 unsigned long bsize;
1719 unsigned long factor;
1720 xfs_fsop_counts_t cnt;
1721
1722 if (!fsgeom.rtblocks)
1723 return -1;
1724
1725 if (xfs_fscounts(fd, &cnt) < 0) {
1726 close(fd);
1727 return -1;
1728 }
1729 bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
1730 factor = fsgeom.blocksize / bsize; /* currently this is == 1 */
1731 sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
1732 return 0;
1733}
1734
1735int
1736fsrprintf(const char *fmt, ...)
1737{
1738 va_list ap;
1739
1740 va_start(ap, fmt);
1741 if (gflag) {
1742 static int didopenlog;
1743 if (!didopenlog) {
1744 openlog("fsr", LOG_PID, LOG_USER);
1745 didopenlog = 1;
1746 }
1747 vsyslog(LOG_INFO, fmt, ap);
1748 } else
1749 vprintf(fmt, ap);
1750 va_end(ap);
1751 return 0;
1752}
1753
c988ea91
CH
1754/*
1755 * Initialize a directory for tmp file use. This is used
1756 * by the full filesystem defragmentation when we're walking
1757 * the inodes and do not know the path for the individual
1758 * files. Multiple directories are used to spread out the
1759 * tmp data around to different ag's (since file data is
1760 * usually allocated to the same ag as the directory and
1761 * directories allocated round robin from the same
1762 * parent directory).
1763 */
1764static void
1765tmp_init(char *mnt)
1766{
1767 int i;
1768 static char buf[SMBUFSZ];
1769 mode_t mask;
1770
1771 tmp_agi = 0;
1772 sprintf(buf, "%s/.fsr", mnt);
1773
1774 mask = umask(0);
1775 if (mkdir(buf, 0700) < 0) {
1776 if (errno == EEXIST) {
1777 if (dflag)
1778 fsrprintf(_("tmpdir already exists: %s\n"),
1779 buf);
1780 } else {
1781 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1782 buf, strerror(errno));
1783 exit(-1);
1784 }
1785 }
1786 for (i=0; i < fsgeom.agcount; i++) {
1787 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
7d59f3fd 1788 if (mkdir(buf, 0700) < 0) {
c988ea91
CH
1789 if (errno == EEXIST) {
1790 if (dflag)
1791 fsrprintf(
1792 _("tmpdir already exists: %s\n"), buf);
1793 } else {
1794 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1795 buf, strerror(errno));
1796 exit(-1);
1797 }
1798 }
1799 }
1800 (void)umask(mask);
1801 return;
1802}
1803
1804static char *
1805tmp_next(char *mnt)
1806{
1807 static char buf[SMBUFSZ];
1808
1809 sprintf(buf, "%s/.fsr/ag%d/tmp%d",
1810 ( (strcmp(mnt, "/") == 0) ? "" : mnt),
1811 tmp_agi,
1812 getpid());
1813
1814 if (++tmp_agi == fsgeom.agcount)
1815 tmp_agi = 0;
1816
1817 return(buf);
1818}
1819
1820static void
1821tmp_close(char *mnt)
1822{
1823 static char buf[SMBUFSZ];
1824 int i;
1825
1826 /* No data is ever actually written so we can just do rmdir's */
1827 for (i=0; i < fsgeom.agcount; i++) {
1828 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
1829 if (rmdir(buf) < 0) {
1830 if (errno != ENOENT) {
1831 fsrprintf(
1832 _("could not remove tmpdir: %s: %s\n"),
1833 buf, strerror(errno));
1834 }
1835 }
1836 }
1837 sprintf(buf, "%s/.fsr", mnt);
1838 if (rmdir(buf) < 0) {
1839 if (errno != ENOENT) {
1840 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1841 buf, strerror(errno));
1842 }
1843 }
1844}