]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - fsr/xfs_fsr.c
libxfs: refactor short btree block verification
[thirdparty/xfsprogs-dev.git] / fsr / xfs_fsr.c
CommitLineData
c988ea91
CH
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "xfs.h"
21#include "xfs_types.h"
22#include "jdm.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_attr_sf.h"
c988ea91
CH
25
26#include <fcntl.h>
27#include <errno.h>
c988ea91
CH
28#include <syslog.h>
29#include <signal.h>
30#include <sys/ioctl.h>
31#include <sys/wait.h>
c988ea91
CH
32#include <sys/statvfs.h>
33#include <sys/xattr.h>
34
c988ea91
CH
35#ifndef XFS_XFLAG_NODEFRAG
36#define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */
37#endif
38
89e4b5bd
CH
39#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
40#define _PATH_PROC_MOUNTS "/proc/mounts"
41
c988ea91
CH
42
43char *progname;
44
45int vflag;
46int gflag;
47static int Mflag;
48/* static int nflag; */
49int dflag = 0;
50/* static int sflag; */
51int argv_blksz_dio;
52extern int max_ext_size;
53static int npasses = 10;
54static int startpass = 0;
55
56struct getbmap *outmap = NULL;
57int outmap_size = 0;
58int RealUid;
59int tmp_agi;
60static __int64_t minimumfree = 2048;
61
62#define MNTTYPE_XFS "xfs"
63
64#define SMBUFSZ 1024
65#define ROOT 0
66#define NULLFD -1
67#define GRABSZ 64
68#define TARGETRANGE 10
69#define V_NONE 0
70#define V_OVERVIEW 1
71#define V_ALL 2
72#define BUFFER_SIZE (1<<16)
73#define BUFFER_MAX (1<<24)
c988ea91
CH
74
75static time_t howlong = 7200; /* default seconds of reorganizing */
76static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
c988ea91
CH
77static time_t endtime;
78static time_t starttime;
79static xfs_ino_t leftoffino = 0;
80static int pagesize;
81
82void usage(int ret);
83static int fsrfile(char *fname, xfs_ino_t ino);
84static int fsrfile_common( char *fname, char *tname, char *mnt,
85 int fd, xfs_bstat_t *statp);
86static int packfile(char *fname, char *tname, int fd,
87 xfs_bstat_t *statp, struct fsxattr *fsxp);
88static void fsrdir(char *dirname);
89static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
90static void initallfs(char *mtab);
89e4b5bd 91static void fsrallfs(char *mtab, int howlong, char *leftofffile);
c988ea91
CH
92static void fsrall_cleanup(int timeout);
93static int getnextents(int);
94int xfsrtextsize(int fd);
95int xfs_getrt(int fd, struct statvfs64 *sfbp);
96char * gettmpname(char *fname);
97char * getparent(char *fname);
98int fsrprintf(const char *fmt, ...);
99int read_fd_bmap(int, xfs_bstat_t *, int *);
100int cmp(const void *, const void *);
101static void tmp_init(char *mnt);
102static char * tmp_next(char *mnt);
103static void tmp_close(char *mnt);
104int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
c988ea91
CH
105
106xfs_fsop_geom_v1_t fsgeom; /* geometry of active mounted system */
107
108#define NMOUNT 64
109static int numfs;
110
111typedef struct fsdesc {
112 char *dev;
113 char *mnt;
114 int npass;
115} fsdesc_t;
116
117fsdesc_t *fs, *fsbase, *fsend;
118int fsbufsize = 10; /* A starting value */
119int nfrags = 0; /* Debug option: Coerse into specific number
120 * of extents */
121int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
122
123int
124xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
125{
126 return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
127}
128
129int
130xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
131{
132 xfs_fsop_bulkreq_t bulkreq;
133
cad114df 134 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
135 bulkreq.icount = 1;
136 bulkreq.ubuffer = ubuffer;
137 bulkreq.ocount = NULL;
138 return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
139}
140
141int
142xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
143 xfs_bstat_t *ubuffer, __s32 *ocount)
144{
145 xfs_fsop_bulkreq_t bulkreq;
146
cad114df 147 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
148 bulkreq.icount = icount;
149 bulkreq.ubuffer = ubuffer;
150 bulkreq.ocount = ocount;
151 return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
152}
153
154int
155xfs_swapext(int fd, xfs_swapext_t *sx)
156{
157 return ioctl(fd, XFS_IOC_SWAPEXT, sx);
158}
159
160int
161xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
162{
163 return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
164}
165
166void
167aborter(int unused)
168{
169 fsrall_cleanup(1);
170 exit(1);
171}
172
3e50d888
CH
173/*
174 * Check if the argument is either the device name or mountpoint of an XFS
175 * filesystem. Note that we do not care about bind mounted regular files
176 * here - the code that handles defragmentation of invidual files takes care
177 * of that.
178 */
7141fc5b 179static char *
7849d55d 180find_mountpoint_check(struct stat64 *sb, struct mntent *t)
7141fc5b 181{
7849d55d
ES
182 struct stat64 ms;
183
7141fc5b 184 if (S_ISDIR(sb->st_mode)) { /* mount point */
7849d55d 185 if (stat64(t->mnt_dir, &ms) < 0)
7141fc5b 186 return NULL;
7849d55d 187 if (sb->st_ino != ms.st_ino)
7141fc5b 188 return NULL;
7849d55d 189 if (sb->st_dev != ms.st_dev)
7141fc5b
JT
190 return NULL;
191 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
192 return NULL;
193 } else { /* device */
7849d55d 194 if (stat64(t->mnt_fsname, &ms) < 0)
7141fc5b 195 return NULL;
7849d55d 196 if (sb->st_rdev != ms.st_rdev)
7141fc5b
JT
197 return NULL;
198 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
199 return NULL;
7141fc5b 200 /*
a32546b0
DC
201 * Make sure the mountpoint given by mtab is accessible
202 * before using it.
203 */
7849d55d 204 if (stat64(t->mnt_dir, &ms) < 0)
7141fc5b
JT
205 return NULL;
206 }
207
208 return t->mnt_dir;
7141fc5b
JT
209}
210
3e50d888
CH
211static char *
212find_mountpoint(char *mtab, char *argname, struct stat64 *sb)
213{
7141fc5b 214 struct mntent_cursor cursor;
7141fc5b 215 struct mntent *t = NULL;
3e50d888
CH
216 char *mntp = NULL;
217
7141fc5b
JT
218 if (platform_mntent_open(&cursor, mtab) != 0){
219 fprintf(stderr, "Error: can't get mntent entries.\n");
3e50d888
CH
220 exit(1);
221 }
222
a32546b0 223 while ((t = platform_mntent_next(&cursor)) != NULL) {
7849d55d 224 mntp = find_mountpoint_check(sb, t);
7141fc5b
JT
225 if (mntp == NULL)
226 continue;
3e50d888
CH
227 break;
228 }
7141fc5b 229 platform_mntent_close(&cursor);
3e50d888
CH
230 return mntp;
231}
232
c988ea91
CH
233int
234main(int argc, char **argv)
235{
3e50d888 236 struct stat64 sb;
c988ea91 237 char *argname;
c988ea91 238 int c;
3e50d888 239 char *mntp;
89e4b5bd 240 char *mtab = NULL;
c988ea91
CH
241
242 setlinebuf(stdout);
243 progname = basename(argv[0]);
244
245 setlocale(LC_ALL, "");
246 bindtextdomain(PACKAGE, LOCALEDIR);
247 textdomain(PACKAGE);
248
249 gflag = ! isatty(0);
250
89e4b5bd 251 while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
c988ea91
CH
252 switch (c) {
253 case 'M':
254 Mflag = 1;
255 break;
256 case 'g':
257 gflag = 1;
258 break;
259 case 'n':
260 /* nflag = 1; */
261 break;
262 case 'v':
263 ++vflag;
264 break;
265 case 'd':
266 dflag = 1;
267 break;
268 case 's': /* frag stats only */
269 /* sflag = 1; */
270 fprintf(stderr,
271 _("%s: Stats not yet supported for XFS\n"),
272 progname);
273 usage(1);
274 break;
275 case 't':
276 howlong = atoi(optarg);
277 break;
278 case 'f':
279 leftofffile = optarg;
280 break;
281 case 'm':
282 mtab = optarg;
283 break;
284 case 'b':
285 argv_blksz_dio = atoi(optarg);
286 break;
287 case 'p':
288 npasses = atoi(optarg);
289 break;
290 case 'C':
291 /* Testing opt: coerses frag count in result */
292 if (getenv("FSRXFSTEST") != NULL) {
293 nfrags = atoi(optarg);
294 openopts |= O_SYNC;
295 }
296 break;
297 case 'V':
298 printf(_("%s version %s\n"), progname, VERSION);
299 exit(0);
300 default:
301 usage(1);
302 }
89e4b5bd
CH
303 }
304
305 /*
306 * If the user did not specify an explicit mount table, try to use
307 * /proc/mounts if it is available, else /etc/mtab. We prefer
308 * /proc/mounts because it is kernel controlled, while /etc/mtab
309 * may contain garbage that userspace tools like pam_mounts wrote
310 * into it.
311 */
312 if (!mtab) {
313 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
314 mtab = _PATH_PROC_MOUNTS;
315 else
316 mtab = _PATH_MOUNTED;
317 }
318
c988ea91
CH
319 if (vflag)
320 setbuf(stdout, NULL);
321
322 starttime = time(0);
323
324 /* Save the caller's real uid */
325 RealUid = getuid();
326
327 pagesize = getpagesize();
328
329 if (optind < argc) {
330 for (; optind < argc; optind++) {
331 argname = argv[optind];
3e50d888 332
c988ea91
CH
333 if (lstat64(argname, &sb) < 0) {
334 fprintf(stderr,
335 _("%s: could not stat: %s: %s\n"),
336 progname, argname, strerror(errno));
337 continue;
338 }
3e50d888
CH
339
340 if (S_ISLNK(sb.st_mode)) {
341 struct stat64 sb2;
342
343 if (stat64(argname, &sb2) == 0 &&
344 (S_ISBLK(sb2.st_mode) ||
345 S_ISCHR(sb2.st_mode)))
c988ea91 346 sb = sb2;
c988ea91 347 }
3e50d888
CH
348
349 mntp = find_mountpoint(mtab, argname, &sb);
c988ea91 350 if (mntp != NULL) {
3e50d888 351 fsrfs(mntp, 0, 100);
c988ea91
CH
352 } else if (S_ISCHR(sb.st_mode)) {
353 fprintf(stderr, _(
354 "%s: char special not supported: %s\n"),
355 progname, argname);
356 exit(1);
357 } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
358 if (!platform_test_xfs_path(argname)) {
359 fprintf(stderr, _(
360 "%s: cannot defragment: %s: Not XFS\n"),
361 progname, argname);
362 continue;
363 }
364 if (S_ISDIR(sb.st_mode))
365 fsrdir(argname);
366 else
367 fsrfile(argname, sb.st_ino);
368 } else {
369 printf(
370 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
371 argname);
372 }
373 }
374 } else {
375 initallfs(mtab);
89e4b5bd 376 fsrallfs(mtab, howlong, leftofffile);
c988ea91
CH
377 }
378 return 0;
379}
380
381void
382usage(int ret)
383{
384 fprintf(stderr, _(
30626ef6
ES
385"Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
386" %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
387" %s -V\n\n"
c988ea91 388"Options:\n"
c988ea91
CH
389" -g Print to syslog (default if stdout not a tty).\n"
390" -t time How long to run in seconds.\n"
30626ef6 391" -p passes Number of passes before terminating global re-org.\n"
c988ea91
CH
392" -f leftoff Use this instead of %s.\n"
393" -m mtab Use something other than /etc/mtab.\n"
394" -d Debug, print even more.\n"
30626ef6
ES
395" -v Verbose, more -v's more verbose.\n"
396" -V Print version number and exit.\n"
397 ), progname, progname, progname, _PATH_FSRLAST);
c988ea91
CH
398 exit(ret);
399}
400
401/*
402 * initallfs -- read the mount table and set up an internal form
403 */
404static void
405initallfs(char *mtab)
406{
7141fc5b 407 struct mntent_cursor cursor;
7849d55d 408 struct mntent *mnt= NULL;
c988ea91
CH
409 int mi;
410 char *cp;
411 struct stat64 sb;
c988ea91
CH
412
413 /* malloc a number of descriptors, increased later if needed */
414 if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
415 fsrprintf(_("out of memory: %s\n"), strerror(errno));
416 exit(1);
417 }
418 fsend = (fsbase + fsbufsize - 1);
419
420 /* find all rw xfs file systems */
421 mi = 0;
422 fs = fsbase;
7141fc5b
JT
423
424 if (platform_mntent_open(&cursor, mtab) != 0){
425 fprintf(stderr, "Error: can't get mntent entries.\n");
426 exit(1);
427 }
428
7849d55d 429 while ((mnt = platform_mntent_next(&cursor)) != NULL) {
c988ea91
CH
430 int rw = 0;
431
7849d55d
ES
432 if (strcmp(mnt->mnt_type, MNTTYPE_XFS ) != 0 ||
433 stat64(mnt->mnt_fsname, &sb) == -1 ||
c988ea91
CH
434 !S_ISBLK(sb.st_mode))
435 continue;
436
7849d55d 437 cp = strtok(mnt->mnt_opts,",");
c988ea91
CH
438 do {
439 if (strcmp("rw", cp) == 0)
440 rw++;
441 } while ((cp = strtok(NULL, ",")) != NULL);
442 if (rw == 0) {
443 if (dflag)
444 fsrprintf(_("Skipping %s: not mounted rw\n"),
7849d55d 445 mnt->mnt_fsname);
c988ea91
CH
446 continue;
447 }
448
449 if (mi == fsbufsize) {
450 fsbufsize += NMOUNT;
451 if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
452 fsbufsize * sizeof(fsdesc_t))) == NULL) {
453 fsrprintf(_("out of memory: %s\n"),
454 strerror(errno));
455 exit(1);
456 }
457 if (!fsbase) {
458 fsrprintf(_("out of memory on realloc: %s\n"),
459 strerror(errno));
460 exit(1);
461 }
462 fs = (fsbase + mi); /* Needed ? */
463 }
464
7849d55d
ES
465 fs->dev = strdup(mnt->mnt_fsname);
466 fs->mnt = strdup(mnt->mnt_dir);
c988ea91 467
758bcc92 468 if (fs->dev == NULL) {
7849d55d 469 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_fsname);
c988ea91
CH
470 exit(1);
471 }
758bcc92 472 if (fs->mnt == NULL) {
7849d55d 473 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_dir);
758bcc92
ES
474 exit(1);
475 }
c988ea91
CH
476 mi++;
477 fs++;
478 }
7141fc5b
JT
479 platform_mntent_close(&cursor);
480
c988ea91
CH
481 numfs = mi;
482 fsend = (fsbase + numfs);
c988ea91
CH
483 if (numfs == 0) {
484 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
485 exit(0);
486 }
487 if (vflag || dflag) {
488 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
489 numfs);
490 if (dflag)
491 for (fs = fsbase; fs < fsend; fs++)
492 fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
493 }
494}
495
496static void
89e4b5bd 497fsrallfs(char *mtab, int howlong, char *leftofffile)
c988ea91
CH
498{
499 int fd;
500 int error;
501 int found = 0;
502 char *fsname;
503 char buf[SMBUFSZ];
504 int mdonly = Mflag;
505 char *ptr;
506 xfs_ino_t startino = 0;
507 fsdesc_t *fsp;
508 struct stat64 sb, sb2;
509
510 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
511
512 endtime = starttime + howlong;
513 fs = fsbase;
514
515 /* where'd we leave off last time? */
516 if (lstat64(leftofffile, &sb) == 0) {
517 if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
518 fsrprintf(_("%s: open failed\n"), leftofffile);
519 }
520 else if ( fstat64(fd, &sb2) == 0) {
521 /*
522 * Verify that lstat & fstat point to the
523 * same regular file (no links/no quick spoofs)
524 */
525 if ( (sb.st_dev != sb2.st_dev) ||
526 (sb.st_ino != sb2.st_ino) ||
527 ((sb.st_mode & S_IFMT) != S_IFREG) ||
528 ((sb2.st_mode & S_IFMT) != S_IFREG) ||
529 (sb2.st_uid != ROOT) ||
530 (sb2.st_nlink != 1)
531 )
532 {
533 fsrprintf(_("Can't use %s: mode=0%o own=%d"
534 " nlink=%d\n"),
535 leftofffile, sb.st_mode,
536 sb.st_uid, sb.st_nlink);
537 close(fd);
538 fd = NULLFD;
539 }
540 }
541 else {
542 close(fd);
543 fd = NULLFD;
544 }
545 }
546 else {
547 fd = NULLFD;
548 }
549
550 if (fd != NULLFD) {
551 if (read(fd, buf, SMBUFSZ) == -1) {
552 fs = fsbase;
553 fsrprintf(_("could not read %s, starting with %s\n"),
554 leftofffile, *fs->dev);
555 } else {
eef20df0
ES
556 /* Ensure the buffer we read is null terminated */
557 buf[SMBUFSZ-1] = '\0';
c988ea91
CH
558 for (fs = fsbase; fs < fsend; fs++) {
559 fsname = fs->dev;
560 if ((strncmp(buf,fsname,strlen(fsname)) == 0)
561 && buf[strlen(fsname)] == ' ') {
562 found = 1;
563 break;
564 }
565 }
566 if (! found)
567 fs = fsbase;
568
569 ptr = strchr(buf, ' ');
570 if (ptr) {
571 startpass = atoi(++ptr);
572 ptr = strchr(ptr, ' ');
573 if (ptr) {
574 startino = strtoull(++ptr, NULL, 10);
575 }
576 }
577 if (startpass < 0)
578 startpass = 0;
579
580 /* Init pass counts */
581 for (fsp = fsbase; fsp < fs; fsp++) {
582 fsp->npass = startpass + 1;
583 }
584 for (fsp = fs; fsp <= fsend; fsp++) {
585 fsp->npass = startpass;
586 }
587 }
588 close(fd);
589 }
590
591 if (vflag) {
592 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
593 fs->npass, (unsigned long long)startino,
594 fs->dev, fs->mnt);
595 }
596
597 signal(SIGABRT, aborter);
598 signal(SIGHUP, aborter);
599 signal(SIGINT, aborter);
600 signal(SIGQUIT, aborter);
601 signal(SIGTERM, aborter);
602
603 /* reorg for 'howlong' -- checked in 'fsrfs' */
604 while (endtime > time(0)) {
605 pid_t pid;
606 if (fs == fsend)
607 fs = fsbase;
608 if (fs->npass == npasses) {
609 fsrprintf(_("Completed all %d passes\n"), npasses);
610 break;
611 }
612 if (npasses > 1 && !fs->npass)
613 Mflag = 1;
614 else
615 Mflag = mdonly;
616 pid = fork();
617 switch(pid) {
618 case -1:
619 fsrprintf(_("couldn't fork sub process:"));
620 exit(1);
621 break;
622 case 0:
623 error = fsrfs(fs->mnt, startino, TARGETRANGE);
624 exit (error);
625 break;
626 default:
627 wait(&error);
c988ea91
CH
628 if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
629 /* child timed out & did fsrall_cleanup */
630 exit(0);
631 }
632 break;
633 }
634 startino = 0; /* reset after the first time through */
635 fs->npass++;
636 fs++;
637 }
638 fsrall_cleanup(endtime <= time(0));
639}
640
641/*
642 * fsrall_cleanup -- close files, print next starting location, etc.
643 */
644static void
645fsrall_cleanup(int timeout)
646{
647 int fd;
648 int ret;
649 char buf[SMBUFSZ];
650
c988ea91 651 unlink(leftofffile);
d0e82db1
ES
652
653 if (timeout) {
654 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
655 progname, startpass, fs->npass,
656 time(0) - endtime + howlong);
657
658 /* record where we left off */
659 fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
660 if (fd == -1) {
661 fsrprintf(_("open(%s) failed: %s\n"),
662 leftofffile, strerror(errno));
663 } else {
c988ea91
CH
664 ret = sprintf(buf, "%s %d %llu\n", fs->dev,
665 fs->npass, (unsigned long long)leftoffino);
666 if (write(fd, buf, ret) < strlen(buf))
667 fsrprintf(_("write(%s) failed: %s\n"),
668 leftofffile, strerror(errno));
669 close(fd);
670 }
671 }
c988ea91
CH
672}
673
674/*
675 * fsrfs -- reorganize a file system
676 */
677static int
678fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
679{
680
681 int fsfd, fd;
682 int count = 0;
683 int ret;
684 __s32 buflenout;
685 xfs_bstat_t buf[GRABSZ];
686 char fname[64];
687 char *tname;
688 jdm_fshandle_t *fshandlep;
689 xfs_ino_t lastino = startino;
690
691 fsrprintf(_("%s start inode=%llu\n"), mntdir,
692 (unsigned long long)startino);
693
694 fshandlep = jdm_getfshandle( mntdir );
695 if ( ! fshandlep ) {
696 fsrprintf(_("unable to get handle: %s: %s\n"),
697 mntdir, strerror( errno ));
698 return -1;
699 }
700
701 if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
702 fsrprintf(_("unable to open: %s: %s\n"),
703 mntdir, strerror( errno ));
e3e2793d 704 free(fshandlep);
c988ea91
CH
705 return -1;
706 }
707
708 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
709 fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
710 mntdir);
11e06961 711 close(fsfd);
e3e2793d 712 free(fshandlep);
c988ea91
CH
713 return -1;
714 }
715
716 tmp_init(mntdir);
717
718 while ((ret = xfs_bulkstat(fsfd,
98166c91 719 &lastino, GRABSZ, &buf[0], &buflenout)) == 0) {
c988ea91
CH
720 xfs_bstat_t *p;
721 xfs_bstat_t *endp;
722
723 if (buflenout == 0)
724 goto out0;
725
726 /* Each loop through, defrag targetrange percent of the files */
727 count = (buflenout * targetrange) / 100;
728
729 qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
730
731 for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
732 /* Do some obvious checks now */
733 if (((p->bs_mode & S_IFMT) != S_IFREG) ||
734 (p->bs_extents < 2))
735 continue;
736
108e985b
DC
737 fd = jdm_open(fshandlep, p, O_RDWR|O_DIRECT);
738 if (fd < 0) {
c988ea91
CH
739 /* This probably means the file was
740 * removed while in progress of handling
741 * it. Just quietly ignore this file.
742 */
743 if (dflag)
744 fsrprintf(_("could not open: "
745 "inode %llu\n"), p->bs_ino);
746 continue;
747 }
748
749 /* Don't know the pathname, so make up something */
750 sprintf(fname, "ino=%lld", (long long)p->bs_ino);
751
752 /* Get a tmp file name */
753 tname = tmp_next(mntdir);
754
755 ret = fsrfile_common(fname, tname, mntdir, fd, p);
756
757 leftoffino = p->bs_ino;
758
759 close(fd);
760
761 if (ret == 0) {
762 if (--count <= 0)
763 break;
764 }
765 }
766 if (endtime && endtime < time(0)) {
767 tmp_close(mntdir);
768 close(fsfd);
769 fsrall_cleanup(1);
770 exit(1);
771 }
772 }
773 if (ret < 0)
774 fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
775out0:
776 tmp_close(mntdir);
777 close(fsfd);
e3e2793d 778 free(fshandlep);
c988ea91
CH
779 return 0;
780}
781
782/*
783 * To compare bstat structs for qsort.
784 */
785int
786cmp(const void *s1, const void *s2)
787{
788 return( ((xfs_bstat_t *)s2)->bs_extents -
789 ((xfs_bstat_t *)s1)->bs_extents);
790
791}
792
793/*
794 * reorganize by directory hierarchy.
795 * Stay in dev (a restriction based on structure of this program -- either
796 * call efs_{n,u}mount() around each file, something smarter or this)
797 */
798static void
799fsrdir(char *dirname)
800{
801 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
802}
803
804/*
805 * Sets up the defragmentation of a file based on the
806 * filepath. It collects the bstat information, does
807 * an open on the file and passes this all to fsrfile_common.
808 */
809static int
810fsrfile(char *fname, xfs_ino_t ino)
811{
812 xfs_bstat_t statbuf;
813 jdm_fshandle_t *fshandlep;
4f10a2fb
ES
814 int fd = -1, fsfd = -1;
815 int error = -1;
c988ea91
CH
816 char *tname;
817
818 fshandlep = jdm_getfshandle(getparent (fname) );
4f10a2fb 819 if (!fshandlep) {
c988ea91
CH
820 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
821 fname, strerror(errno));
4f10a2fb 822 goto out;
c988ea91
CH
823 }
824
825 /*
826 * Need to open something on the same filesystem as the
827 * file. Open the parent.
828 */
829 fsfd = open(getparent(fname), O_RDONLY);
830 if (fsfd < 0) {
831 fsrprintf(_("unable to open sys handle for %s: %s\n"),
832 fname, strerror(errno));
4f10a2fb 833 goto out;
c988ea91
CH
834 }
835
836 if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
837 fsrprintf(_("unable to get bstat on %s: %s\n"),
838 fname, strerror(errno));
4f10a2fb 839 goto out;
c988ea91
CH
840 }
841
108e985b 842 fd = jdm_open(fshandlep, &statbuf, O_RDWR|O_DIRECT);
c988ea91
CH
843 if (fd < 0) {
844 fsrprintf(_("unable to open handle %s: %s\n"),
845 fname, strerror(errno));
4f10a2fb 846 goto out;
c988ea91
CH
847 }
848
849 /* Get the fs geometry */
850 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
851 fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
4f10a2fb 852 goto out;
c988ea91
CH
853 }
854
c988ea91
CH
855 tname = gettmpname(fname);
856
857 if (tname)
858 error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
859
4f10a2fb
ES
860out:
861 if (fsfd >= 0)
862 close(fsfd);
863 if (fd >= 0)
864 close(fd);
865 free(fshandlep);
c988ea91
CH
866
867 return error;
868}
869
870
871/*
872 * This is the common defrag code for either a full fs
873 * defragmentation or a single file. Check as much as
874 * possible with the file, fork a process to setuid to the
875 * target file owner's uid and defragment the file.
876 * This is done so the new extents created in a tmp file are
877 * reflected in the owners' quota without having to do any
878 * special code in the kernel. When the existing extents
879 * are removed, the quotas will be correct. It's ugly but
880 * it saves us from doing some quota re-construction in
881 * the extent swap. The price is that the defragmentation
882 * will fail if the owner of the target file is already at
883 * their quota limit.
884 */
885static int
886fsrfile_common(
887 char *fname,
888 char *tname,
889 char *fsname,
890 int fd,
891 xfs_bstat_t *statp)
892{
893 int error;
894 struct statvfs64 vfss;
895 struct fsxattr fsx;
896 unsigned long bsize;
897
898 if (vflag)
899 fsrprintf("%s\n", fname);
900
901 if (fsync(fd) < 0) {
902 fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
903 return -1;
904 }
905
906 if (statp->bs_size == 0) {
907 if (vflag)
908 fsrprintf(_("%s: zero size, ignoring\n"), fname);
909 return(0);
910 }
911
912 /* Check if a mandatory lock is set on the file to try and
913 * avoid blocking indefinitely on the reads later. Note that
914 * someone could still set a mandatory lock after this check
915 * but before all reads have completed to block fsr reads.
916 * This change just closes the window a bit.
917 */
918 if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
919 struct flock fl;
920
921 fl.l_type = F_RDLCK;
922 fl.l_whence = SEEK_SET;
923 fl.l_start = (off_t)0;
924 fl.l_len = 0;
925 if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
926 if (vflag)
927 fsrprintf(_("locking check failed: %s\n"),
928 fname);
929 return(-1);
930 }
931 if (fl.l_type != F_UNLCK) {
932 /* Mandatory lock is set */
933 if (vflag)
934 fsrprintf(_("mandatory lock: %s: ignoring\n"),
935 fname);
936 return(-1);
937 }
938 }
939
940 /*
941 * Check if there is room to copy the file.
942 *
943 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
944 * not the optimal I/O size as struct stat.
945 */
946 if (statvfs64(fsname ? fsname : fname, &vfss) < 0) {
947 fsrprintf(_("unable to get fs stat on %s: %s\n"),
948 fname, strerror(errno));
949 return -1;
950 }
b35b4eb8 951#ifndef statvfs64
c988ea91 952 bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
b35b4eb8
JT
953#else
954 bsize = vfss.f_bsize;
955#endif
c988ea91
CH
956 if (statp->bs_blksize * statp->bs_blocks >
957 vfss.f_bfree * bsize - minimumfree) {
958 fsrprintf(_("insufficient freespace for: %s: "
959 "size=%lld: ignoring\n"), fname,
960 statp->bs_blksize * statp->bs_blocks);
961 return 1;
962 }
963
964 if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
965 fsrprintf(_("failed to get inode attrs: %s\n"), fname);
966 return(-1);
967 }
968 if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE|XFS_XFLAG_APPEND)) {
969 if (vflag)
970 fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
971 return(0);
972 }
973 if (fsx.fsx_xflags & XFS_XFLAG_NODEFRAG) {
974 if (vflag)
975 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
976 fname);
977 return(0);
978 }
979 if (fsx.fsx_xflags & XFS_XFLAG_REALTIME) {
980 if (xfs_getrt(fd, &vfss) < 0) {
981 fsrprintf(_("cannot get realtime geometry for: %s\n"),
982 fname);
983 return(-1);
984 }
985 if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
986 fsrprintf(_("low on realtime free space: %s: "
987 "ignoring file\n"), fname);
988 return(-1);
989 }
990 }
991
992 if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
993 fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
994 return -1;
995 }
996
997 /*
998 * Previously the code forked here, & the child changed it's uid to
999 * that of the file's owner and then called packfile(), to keep
1000 * quota counts correct. (defragged files could use fewer blocks).
1001 *
1002 * Instead, just fchown() the temp file to the uid,gid of the
1003 * file we're defragging, in packfile().
1004 */
1005
1006 if ((error = packfile(fname, tname, fd, statp, &fsx)))
1007 return error;
1008 return -1; /* no error */
1009}
1010
bdb041f5
DC
1011/*
1012 * Attempt to set the attr fork up correctly. This is simple for attr1
1013 * filesystems as they have a fixed inode fork offset. In that case
1014 * just create an attribute and that's all we need to do.
1015 *
1016 * For attr2 filesystems, see if we have the actual fork offset in
1017 * the bstat structure. If so, just create additional attributes on
1018 * the temporary inode until the offset matches.
1019 *
1020 * If it doesn't exist, we can only do best effort. Add an attribute at a time
1021 * to move the inode fork around, but take into account that the attribute
1022 * might be too small to move the fork every time we add one. This should
1023 * hopefully put the fork offset in the right place. It's not a big deal if we
1024 * don't get it right - the kernel will reject it when we try to swap extents.
1025 */
1026static int
1027fsr_setup_attr_fork(
1028 int fd,
1029 int tfd,
1030 xfs_bstat_t *bstatp)
1031{
c14c7b79 1032#ifdef HAVE_FSETXATTR
bdb041f5
DC
1033 struct stat64 tstatbuf;
1034 int i;
27507775 1035 int diff = 0;
bdb041f5
DC
1036 int last_forkoff = 0;
1037 int no_change_cnt = 0;
1038 int ret;
1039
1040 if (!(bstatp->bs_xflags & XFS_XFLAG_HASATTR))
1041 return 0;
1042
1043 /*
1044 * use the old method if we have attr1 or the kernel does not yet
1045 * support passing the fork offset in the bulkstat data.
1046 */
1047 if (!(fsgeom.flags & XFS_FSOP_GEOM_FLAGS_ATTR2) ||
1048 bstatp->bs_forkoff == 0) {
1049 /* attr1 */
1050 ret = fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE);
1051 if (ret) {
1052 fsrprintf(_("could not set ATTR\n"));
1053 return -1;
1054 }
1055 goto out;
1056 }
1057
1058 /* attr2 w/ fork offsets */
1059
1060 if (fstat64(tfd, &tstatbuf) < 0) {
1061 fsrprintf(_("unable to stat temp file: %s\n"),
1062 strerror(errno));
1063 return -1;
1064 }
1065
1066 i = 0;
1067 do {
1068 xfs_bstat_t tbstat;
1069 xfs_ino_t ino;
1070 char name[64];
bdb041f5
DC
1071
1072 /*
1adfe5c6 1073 * bulkstat the temp inode to see what the forkoff is. Use
bdb041f5
DC
1074 * this to compare against the target and determine what we
1075 * need to do.
1076 */
1077 ino = tstatbuf.st_ino;
1078 if ((xfs_bulkstat_single(tfd, &ino, &tbstat)) < 0) {
1079 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1080 strerror(errno));
1081 return -1;
1082 }
1083 if (dflag)
1084 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1085 bstatp->bs_forkoff, tbstat.bs_forkoff);
1adfe5c6
ES
1086 diff = tbstat.bs_forkoff - bstatp->bs_forkoff;
1087
1088 /* if they are equal, we are done */
1089 if (!diff)
1090 goto out;
bdb041f5
DC
1091
1092 snprintf(name, sizeof(name), "user.%d", i);
1093
1094 /*
1095 * If there is no attribute, then we need to create one to get
1096 * an attribute fork at the default location.
1097 */
1098 if (!tbstat.bs_forkoff) {
1adfe5c6 1099 ASSERT(i == 0);
bdb041f5
DC
1100 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1101 if (ret) {
1102 fsrprintf(_("could not set ATTR\n"));
1103 return -1;
1104 }
1105 continue;
1adfe5c6 1106 } else if (i == 0) {
1adfe5c6
ES
1107 /*
1108 * First pass, and temp file already has an inline
1109 * xattr, probably due to selinux.
1110 *
1111 * It's *possible* that the temp file attr area
e7e3152c 1112 * is larger than the target file's:
1adfe5c6
ES
1113 *
1114 * Target Temp
1115 * +-------+ 0 +-------+ 0
1116 * | | | |
1117 * | | | Data |
1118 * | Data | | |
1119 * | | v-------v forkoff
1120 * | | | |
1121 * v-------v forkoff | Attr | local
e7e3152c 1122 * | Attr | | |
1adfe5c6 1123 * +-------+ +-------+
1adfe5c6
ES
1124 */
1125
1adfe5c6 1126 /*
e7e3152c
ES
1127 * If target attr area is less than the temp's
1128 * (diff < 0) write a big attr to the temp file to knock
1129 * the attr out of local format.
1130 * (This should actually *increase* the temp file's
1131 * forkoffset when the attr moves out of the inode)
1adfe5c6 1132 */
e7e3152c 1133 if (diff < 0) {
1adfe5c6
ES
1134 char val[2048];
1135 memset(val, 'X', 2048);
1136 if (fsetxattr(tfd, name, val, 2048, 0)) {
1137 fsrprintf(_("big ATTR set failed\n"));
1138 return -1;
1139 }
1140 /* Go back & see where we're at now */
1141 continue;
1142 }
bdb041f5
DC
1143 }
1144
1145 /*
1146 * make a progress check so we don't get stuck trying to extend
1147 * a large btree form attribute fork.
1148 */
1149 if (last_forkoff == tbstat.bs_forkoff) {
1150 if (no_change_cnt++ > 10)
1151 break;
ff85ea3f
ES
1152 } else /* progress! */
1153 no_change_cnt = 0;
bdb041f5
DC
1154 last_forkoff = tbstat.bs_forkoff;
1155
1156 /* work out which way to grow the fork */
bdb041f5
DC
1157 if (abs(diff) > fsgeom.inodesize - sizeof(struct xfs_dinode)) {
1158 fsrprintf(_("forkoff diff %d too large!\n"), diff);
1159 return -1;
1160 }
1161
bdb041f5 1162 /*
1adfe5c6
ES
1163 * if the temp inode fork offset is still smaller then we have
1164 * to grow the data fork
bdb041f5
DC
1165 */
1166 if (diff < 0) {
1167 /*
1168 * create some temporary extents in the inode to move
1169 * the fork in the direction we need. This can be done
1170 * by preallocating some single block extents at
1171 * non-contiguous offsets.
1172 */
1173 /* XXX: unimplemented! */
27507775
ES
1174 if (dflag)
1175 printf(_("data fork growth unimplemented\n"));
bdb041f5
DC
1176 goto out;
1177 }
1178
1179 /* we need to grow the attr fork, so create another attr */
1180 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1181 if (ret) {
1182 fsrprintf(_("could not set ATTR\n"));
1183 return -1;
1184 }
1185
1186 } while (++i < 100); /* don't go forever */
1187
1188out:
1189 if (dflag)
1190 fsrprintf(_("set temp attr\n"));
27507775
ES
1191 /* We failed to resolve the fork difference */
1192 if (dflag && diff)
1193 fsrprintf(_("failed to match fork offset\n"));;
1194
c14c7b79 1195#endif /* HAVE_FSETXATTR */
bdb041f5
DC
1196 return 0;
1197}
c988ea91
CH
1198
1199/*
1200 * Do the defragmentation of a single file.
1201 * We already are pretty sure we can and want to
1202 * defragment the file. Create the tmp file, copy
1203 * the data (maintaining holes) and call the kernel
671632c6
ES
1204 * extent swap routine.
1205 *
1206 * Return values:
1207 * -1: Some error was encountered
1208 * 0: Successfully defragmented the file
1209 * 1: No change / No Error
c988ea91
CH
1210 */
1211static int
1212packfile(char *fname, char *tname, int fd,
1213 xfs_bstat_t *statp, struct fsxattr *fsxp)
1214{
671632c6 1215 int tfd = -1;
c988ea91 1216 int srval;
671632c6 1217 int retval = -1; /* Failure is the default */
c988ea91
CH
1218 int nextents, extent, cur_nextents, new_nextents;
1219 unsigned blksz_dio;
1220 unsigned dio_min;
1221 struct dioattr dio;
1222 static xfs_swapext_t sx;
1223 struct xfs_flock64 space;
1224 off64_t cnt, pos;
671632c6 1225 void *fbuf = NULL;
c988ea91
CH
1226 int ct, wc, wc_b4;
1227 char ffname[SMBUFSZ];
1228 int ffd = -1;
1229
1230 /*
1231 * Work out the extent map - nextents will be set to the
1232 * minimum number of extents needed for the file (taking
1233 * into account holes), cur_nextents is the current number
1234 * of extents.
1235 */
1236 nextents = read_fd_bmap(fd, statp, &cur_nextents);
1237
1238 if (cur_nextents == 1 || cur_nextents <= nextents) {
1239 if (vflag)
1240 fsrprintf(_("%s already fully defragmented.\n"), fname);
671632c6
ES
1241 retval = 1; /* indicates no change/no error */
1242 goto out;
c988ea91
CH
1243 }
1244
1245 if (dflag)
1246 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1247 fname, cur_nextents, (cur_nextents - nextents),
1248 tname);
1249
1250 if ((tfd = open(tname, openopts, 0666)) < 0) {
1251 if (vflag)
1252 fsrprintf(_("could not open tmp file: %s: %s\n"),
1253 tname, strerror(errno));
671632c6 1254 goto out;
c988ea91
CH
1255 }
1256 unlink(tname);
1257
1258 /* Setup extended attributes */
bdb041f5
DC
1259 if (fsr_setup_attr_fork(fd, tfd, statp) != 0) {
1260 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname);
671632c6 1261 goto out;
c988ea91
CH
1262 }
1263
1264 /* Setup extended inode flags, project identifier, etc */
1265 if (fsxp->fsx_xflags || fsxp->fsx_projid) {
1266 if (ioctl(tfd, XFS_IOC_FSSETXATTR, fsxp) < 0) {
1267 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1268 tname);
671632c6 1269 goto out;
c988ea91
CH
1270 }
1271 }
1272
1273 if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
1274 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
671632c6 1275 goto out;
c988ea91
CH
1276 }
1277
1278 dio_min = dio.d_miniosz;
1279 if (statp->bs_size <= dio_min) {
1280 blksz_dio = dio_min;
1281 } else {
1282 blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
1283 if (argv_blksz_dio != 0)
1284 blksz_dio = min(argv_blksz_dio, blksz_dio);
1285 blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
1286 }
1287
1288 if (dflag) {
1289 fsrprintf(_("DEBUG: "
1290 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1291 statp->bs_size, blksz_dio, dio.d_miniosz,
1292 dio.d_maxiosz, pagesize);
1293 }
1294
1295 if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
1296 fsrprintf(_("could not allocate buf: %s\n"), tname);
671632c6 1297 goto out;
c988ea91
CH
1298 }
1299
1300 if (nfrags) {
1301 /* Create new tmp file in same AG as first */
1302 sprintf(ffname, "%s.frag", tname);
1303
1304 /* Open the new file for sync writes */
1305 if ((ffd = open(ffname, openopts, 0666)) < 0) {
1306 fsrprintf(_("could not open fragfile: %s : %s\n"),
1307 ffname, strerror(errno));
671632c6 1308 goto out;
c988ea91
CH
1309 }
1310 unlink(ffname);
1311 }
1312
1313 /* Loop through block map allocating new extents */
1314 for (extent = 0; extent < nextents; extent++) {
1315 pos = outmap[extent].bmv_offset;
1316 if (outmap[extent].bmv_block == -1) {
1317 space.l_whence = SEEK_SET;
1318 space.l_start = pos;
1319 space.l_len = outmap[extent].bmv_length;
1320 if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
1321 fsrprintf(_("could not trunc tmp %s\n"),
1322 tname);
1323 }
3d303baa
ES
1324 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1325 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1326 tname, strerror(errno));
1327 goto out;
1328 }
c988ea91
CH
1329 continue;
1330 } else if (outmap[extent].bmv_length == 0) {
1331 /* to catch holes at the beginning of the file */
1332 continue;
1333 }
1334 if (! nfrags) {
1335 space.l_whence = SEEK_CUR;
1336 space.l_start = 0;
1337 space.l_len = outmap[extent].bmv_length;
1338
1339 if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
1340 fsrprintf(_("could not pre-allocate tmp space:"
1341 " %s\n"), tname);
671632c6 1342 goto out;
c988ea91 1343 }
3d303baa
ES
1344 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1345 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1346 tname, strerror(errno));
1347 goto out;
1348 }
c988ea91
CH
1349 }
1350 } /* end of space allocation loop */
1351
1352 if (lseek64(tfd, 0, SEEK_SET)) {
1353 fsrprintf(_("Couldn't rewind on temporary file\n"));
671632c6 1354 goto out;
c988ea91
CH
1355 }
1356
1357 /* Check if the temporary file has fewer extents */
1358 new_nextents = getnextents(tfd);
1359 if (dflag)
1360 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
1361 if (cur_nextents <= new_nextents) {
1362 if (vflag)
1363 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
671632c6
ES
1364 retval = 1; /* no change/no error */
1365 goto out;
c988ea91
CH
1366 }
1367
1368 /* Loop through block map copying the file. */
1369 for (extent = 0; extent < nextents; extent++) {
1370 pos = outmap[extent].bmv_offset;
1371 if (outmap[extent].bmv_block == -1) {
3d303baa
ES
1372 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1373 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1374 tname, strerror(errno));
1375 goto out;
1376 }
1377 if (lseek64(fd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1378 fsrprintf(_("could not lseek in file: %s : %s\n"),
1379 fname, strerror(errno));
1380 goto out;
1381 }
c988ea91
CH
1382 continue;
1383 } else if (outmap[extent].bmv_length == 0) {
1384 /* to catch holes at the beginning of the file */
1385 continue;
1386 }
1387 for (cnt = outmap[extent].bmv_length; cnt > 0;
1388 cnt -= ct, pos += ct) {
1389 if (nfrags && --nfrags) {
1390 ct = min(cnt, dio_min);
1391 } else if (cnt % dio_min == 0) {
1392 ct = min(cnt, blksz_dio);
1393 } else {
1394 ct = min(cnt + dio_min - (cnt % dio_min),
1395 blksz_dio);
1396 }
1397 ct = read(fd, fbuf, ct);
1398 if (ct == 0) {
1399 /* EOF, stop trying to read */
1400 extent = nextents;
1401 break;
1402 }
1403 /* Ensure we do direct I/O to correct block
1404 * boundaries.
1405 */
1406 if (ct % dio_min != 0) {
1407 wc = ct + dio_min - (ct % dio_min);
1408 } else {
1409 wc = ct;
1410 }
1411 wc_b4 = wc;
1412 if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
1413 if (ct < 0)
1414 fsrprintf(_("bad read of %d bytes "
1415 "from %s: %s\n"), wc_b4,
1416 fname, strerror(errno));
1417 else if (wc < 0)
1418 fsrprintf(_("bad write of %d bytes "
1419 "to %s: %s\n"), wc_b4,
1420 tname, strerror(errno));
1421 else {
1422 /*
1423 * Might be out of space
1424 *
1425 * Try to finish write
1426 */
1427 int resid = ct-wc;
1428
1429 if ((wc = write(tfd, ((char *)fbuf)+wc,
1430 resid)) == resid) {
1431 /* worked on second attempt? */
1432 continue;
1433 }
1434 else if (wc < 0) {
1435 fsrprintf(_("bad write2 of %d "
1436 "bytes to %s: %s\n"),
1437 resid, tname,
1438 strerror(errno));
1439 } else {
1440 fsrprintf(_("bad copy to %s\n"),
1441 tname);
1442 }
1443 }
671632c6 1444 goto out;
c988ea91
CH
1445 }
1446 if (nfrags) {
1447 /* Do a matching write to the tmp file */
431ec4e6 1448 wc_b4 = wc;
c988ea91
CH
1449 if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
1450 fsrprintf(_("bad write of %d bytes "
1451 "to %s: %s\n"),
1452 wc_b4, ffname, strerror(errno));
1453 }
1454 }
1455 }
1456 }
3d303baa
ES
1457 if (ftruncate64(tfd, statp->bs_size) < 0) {
1458 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1459 fname, strerror(errno));
1460 goto out;
1461 }
1462 if (fsync(tfd) < 0) {
1463 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1464 fname, strerror(errno));
1465 goto out;
1466 }
c988ea91 1467
c988ea91
CH
1468 sx.sx_stat = *statp; /* struct copy */
1469 sx.sx_version = XFS_SX_VERSION;
1470 sx.sx_fdtarget = fd;
1471 sx.sx_fdtmp = tfd;
1472 sx.sx_offset = 0;
1473 sx.sx_length = statp->bs_size;
1474
1475 /* switch to the owner's id, to keep quota in line */
1476 if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
1477 if (vflag)
1478 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1479 tname, strerror(errno));
671632c6 1480 goto out;
c988ea91
CH
1481 }
1482
1483 /* Swap the extents */
1484 srval = xfs_swapext(fd, &sx);
1485 if (srval < 0) {
1486 if (errno == ENOTSUP) {
1487 if (vflag || dflag)
1488 fsrprintf(_("%s: file type not supported\n"), fname);
1489 } else if (errno == EFAULT) {
1490 /* The file has changed since we started the copy */
1491 if (vflag || dflag)
1492 fsrprintf(_("%s: file modified defrag aborted\n"),
1493 fname);
1494 } else if (errno == EBUSY) {
1495 /* Timestamp has changed or mmap'ed file */
1496 if (vflag || dflag)
1497 fsrprintf(_("%s: file busy\n"), fname);
1498 } else {
1499 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1500 fname, strerror(errno));
1501 }
671632c6 1502 goto out;
c988ea91
CH
1503 }
1504
1505 /* Report progress */
1506 if (vflag)
1507 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1508 cur_nextents, new_nextents,
1509 (new_nextents <= nextents ? "DONE" : " " ),
1510 fname);
671632c6
ES
1511 retval = 0;
1512
1513out:
1514 free(fbuf);
1515 if (tfd != -1)
1516 close(tfd);
1517 if (ffd != -1)
1518 close(ffd);
1519 return retval;
c988ea91
CH
1520}
1521
1522char *
1523gettmpname(char *fname)
1524{
1525 static char buf[PATH_MAX+1];
1526 char sbuf[SMBUFSZ];
1527 char *ptr;
1528
1529 sprintf(sbuf, "/.fsr%d", getpid());
1530
6063feca
ES
1531 strncpy(buf, fname, PATH_MAX);
1532 buf[PATH_MAX] = '\0';
c988ea91
CH
1533 ptr = strrchr(buf, '/');
1534 if (ptr) {
1535 *ptr = '\0';
1536 } else {
1537 strcpy(buf, ".");
1538 }
1539
1540 if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
1541 fsrprintf(_("tmp file name too long: %s\n"), fname);
1542 return(NULL);
1543 }
1544
1545 strcat(buf, sbuf);
1546
1547 return(buf);
1548}
1549
1550char *
1551getparent(char *fname)
1552{
1553 static char buf[PATH_MAX+1];
1554 char *ptr;
1555
6063feca
ES
1556 strncpy(buf, fname, PATH_MAX);
1557 buf[PATH_MAX] = '\0';
c988ea91
CH
1558 ptr = strrchr(buf, '/');
1559 if (ptr) {
1560 if (ptr == &buf[0])
1561 ++ptr;
1562 *ptr = '\0';
1563 } else {
1564 strcpy(buf, ".");
1565 }
1566
1567 return(buf);
1568}
1569
1570/*
1571 * Read in block map of the input file, coalesce contiguous
1572 * extents into a single range, keep all holes. Convert from 512 byte
1573 * blocks to bytes.
1574 *
1575 * This code was borrowed from mv.c with some minor mods.
1576 */
1577#define MAPSIZE 128
1578#define OUTMAP_SIZE_INCREMENT MAPSIZE
1579
1580int read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
1581{
1582 int i, cnt;
1583 struct getbmap map[MAPSIZE];
1584
1585#define BUMP_CNT \
1586 if (++cnt >= outmap_size) { \
1587 outmap_size += OUTMAP_SIZE_INCREMENT; \
1588 outmap = (struct getbmap *)realloc(outmap, \
1589 outmap_size*sizeof(*outmap)); \
1590 if (outmap == NULL) { \
1591 fsrprintf(_("realloc failed: %s\n"), \
1592 strerror(errno)); \
1593 exit(1); \
1594 } \
1595 }
1596
1597 /* Initialize the outmap array. It always grows - never shrinks.
1598 * Left-over memory allocation is saved for the next files.
1599 */
1600 if (outmap_size == 0) {
1601 outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
1602 outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
1603 if (!outmap) {
1604 fsrprintf(_("malloc failed: %s\n"),
1605 strerror(errno));
1606 exit(1);
1607 }
1608 }
1609
1610 outmap[0].bmv_block = 0;
1611 outmap[0].bmv_offset = 0;
1612 outmap[0].bmv_length = sin->bs_size;
1613
1614 /*
1615 * If a non regular file is involved then forget holes
1616 */
1617
1618 if (!S_ISREG(sin->bs_mode))
1619 return(1);
1620
1621 outmap[0].bmv_length = 0;
1622
1623 map[0].bmv_offset = 0;
1624 map[0].bmv_block = 0;
1625 map[0].bmv_entries = 0;
1626 map[0].bmv_count = MAPSIZE;
1627 map[0].bmv_length = -1;
1628
1629 cnt = 0;
1630 *cur_nextents = 0;
1631
1632 do {
1633 if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
1634 fsrprintf(_("failed reading extents: inode %llu"),
1635 (unsigned long long)sin->bs_ino);
1636 exit(1);
1637 }
1638
1639 /* Concatenate extents together and replicate holes into
1640 * the output map.
1641 */
1642 *cur_nextents += map[0].bmv_entries;
1643 for (i = 0; i < map[0].bmv_entries; i++) {
1644 if (map[i + 1].bmv_block == -1) {
1645 BUMP_CNT;
1646 outmap[cnt] = map[i+1];
1647 } else if (outmap[cnt].bmv_block == -1) {
1648 BUMP_CNT;
1649 outmap[cnt] = map[i+1];
1650 } else {
1651 outmap[cnt].bmv_length += map[i + 1].bmv_length;
1652 }
1653 }
1654 } while (map[0].bmv_entries == (MAPSIZE-1));
1655 for (i = 0; i <= cnt; i++) {
1656 outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
1657 outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
1658 }
1659
1660 outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
1661
1662 return(cnt+1);
1663}
1664
1665/*
1666 * Read the block map and return the number of extents.
1667 */
1668int
1669getnextents(int fd)
1670{
1671 int nextents;
1672 struct getbmap map[MAPSIZE];
1673
1674 map[0].bmv_offset = 0;
1675 map[0].bmv_block = 0;
1676 map[0].bmv_entries = 0;
1677 map[0].bmv_count = MAPSIZE;
1678 map[0].bmv_length = -1;
1679
1680 nextents = 0;
1681
1682 do {
1683 if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
1684 fsrprintf(_("failed reading extents"));
1685 exit(1);
1686 }
1687
1688 nextents += map[0].bmv_entries;
1689 } while (map[0].bmv_entries == (MAPSIZE-1));
1690
1691 return(nextents);
1692}
1693
1694/*
1695 * Get the fs geometry
1696 */
1697int
1698xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
1699{
1700 if (xfs_fsgeometry(fd, fsgeom) < 0) {
1701 return -1;
1702 }
1703 return 0;
1704}
1705
1706/*
1707 * Get xfs realtime space information
1708 */
1709int
1710xfs_getrt(int fd, struct statvfs64 *sfbp)
1711{
1712 unsigned long bsize;
1713 unsigned long factor;
1714 xfs_fsop_counts_t cnt;
1715
1716 if (!fsgeom.rtblocks)
1717 return -1;
1718
1719 if (xfs_fscounts(fd, &cnt) < 0) {
1720 close(fd);
1721 return -1;
1722 }
b35b4eb8 1723#ifndef statvfs64
c988ea91 1724 bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
b35b4eb8
JT
1725#else
1726 bsize = sfbp->f_bsize;
1727#endif
c988ea91
CH
1728 factor = fsgeom.blocksize / bsize; /* currently this is == 1 */
1729 sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
1730 return 0;
1731}
1732
1733int
1734fsrprintf(const char *fmt, ...)
1735{
1736 va_list ap;
1737
1738 va_start(ap, fmt);
1739 if (gflag) {
1740 static int didopenlog;
1741 if (!didopenlog) {
1742 openlog("fsr", LOG_PID, LOG_USER);
1743 didopenlog = 1;
1744 }
1745 vsyslog(LOG_INFO, fmt, ap);
1746 } else
1747 vprintf(fmt, ap);
1748 va_end(ap);
1749 return 0;
1750}
1751
c988ea91
CH
1752/*
1753 * Initialize a directory for tmp file use. This is used
1754 * by the full filesystem defragmentation when we're walking
1755 * the inodes and do not know the path for the individual
1756 * files. Multiple directories are used to spread out the
1757 * tmp data around to different ag's (since file data is
1758 * usually allocated to the same ag as the directory and
1759 * directories allocated round robin from the same
1760 * parent directory).
1761 */
1762static void
1763tmp_init(char *mnt)
1764{
1765 int i;
1766 static char buf[SMBUFSZ];
1767 mode_t mask;
1768
1769 tmp_agi = 0;
1770 sprintf(buf, "%s/.fsr", mnt);
1771
1772 mask = umask(0);
1773 if (mkdir(buf, 0700) < 0) {
1774 if (errno == EEXIST) {
1775 if (dflag)
1776 fsrprintf(_("tmpdir already exists: %s\n"),
1777 buf);
1778 } else {
1779 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1780 buf, strerror(errno));
1781 exit(-1);
1782 }
1783 }
1784 for (i=0; i < fsgeom.agcount; i++) {
1785 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
7d59f3fd 1786 if (mkdir(buf, 0700) < 0) {
c988ea91
CH
1787 if (errno == EEXIST) {
1788 if (dflag)
1789 fsrprintf(
1790 _("tmpdir already exists: %s\n"), buf);
1791 } else {
1792 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1793 buf, strerror(errno));
1794 exit(-1);
1795 }
1796 }
1797 }
1798 (void)umask(mask);
1799 return;
1800}
1801
1802static char *
1803tmp_next(char *mnt)
1804{
1805 static char buf[SMBUFSZ];
1806
1807 sprintf(buf, "%s/.fsr/ag%d/tmp%d",
1808 ( (strcmp(mnt, "/") == 0) ? "" : mnt),
1809 tmp_agi,
1810 getpid());
1811
1812 if (++tmp_agi == fsgeom.agcount)
1813 tmp_agi = 0;
1814
1815 return(buf);
1816}
1817
1818static void
1819tmp_close(char *mnt)
1820{
1821 static char buf[SMBUFSZ];
1822 int i;
1823
1824 /* No data is ever actually written so we can just do rmdir's */
1825 for (i=0; i < fsgeom.agcount; i++) {
1826 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
1827 if (rmdir(buf) < 0) {
1828 if (errno != ENOENT) {
1829 fsrprintf(
1830 _("could not remove tmpdir: %s: %s\n"),
1831 buf, strerror(errno));
1832 }
1833 }
1834 }
1835 sprintf(buf, "%s/.fsr", mnt);
1836 if (rmdir(buf) < 0) {
1837 if (errno != ENOENT) {
1838 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1839 buf, strerror(errno));
1840 }
1841 }
1842}