]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - fsr/xfs_fsr.c
fsr: fix uninitialized fs usage after timeout
[thirdparty/xfsprogs-dev.git] / fsr / xfs_fsr.c
CommitLineData
c988ea91
CH
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "xfs.h"
21#include "xfs_types.h"
22#include "jdm.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_attr_sf.h"
c988ea91
CH
25
26#include <fcntl.h>
27#include <errno.h>
c988ea91
CH
28#include <syslog.h>
29#include <signal.h>
30#include <sys/ioctl.h>
31#include <sys/wait.h>
c988ea91
CH
32#include <sys/statvfs.h>
33#include <sys/xattr.h>
0e2fb84a 34#include <paths.h>
c988ea91 35
89e4b5bd
CH
36#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
37#define _PATH_PROC_MOUNTS "/proc/mounts"
38
c988ea91
CH
39
40char *progname;
41
42int vflag;
43int gflag;
44static int Mflag;
45/* static int nflag; */
46int dflag = 0;
47/* static int sflag; */
48int argv_blksz_dio;
49extern int max_ext_size;
50static int npasses = 10;
51static int startpass = 0;
52
53struct getbmap *outmap = NULL;
54int outmap_size = 0;
55int RealUid;
56int tmp_agi;
14f8b681 57static int64_t minimumfree = 2048;
c988ea91
CH
58
59#define MNTTYPE_XFS "xfs"
60
61#define SMBUFSZ 1024
62#define ROOT 0
63#define NULLFD -1
64#define GRABSZ 64
65#define TARGETRANGE 10
66#define V_NONE 0
67#define V_OVERVIEW 1
68#define V_ALL 2
69#define BUFFER_SIZE (1<<16)
70#define BUFFER_MAX (1<<24)
c988ea91
CH
71
72static time_t howlong = 7200; /* default seconds of reorganizing */
73static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
c988ea91
CH
74static time_t endtime;
75static time_t starttime;
76static xfs_ino_t leftoffino = 0;
77static int pagesize;
78
79void usage(int ret);
80static int fsrfile(char *fname, xfs_ino_t ino);
81static int fsrfile_common( char *fname, char *tname, char *mnt,
82 int fd, xfs_bstat_t *statp);
83static int packfile(char *fname, char *tname, int fd,
84 xfs_bstat_t *statp, struct fsxattr *fsxp);
85static void fsrdir(char *dirname);
86static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
87static void initallfs(char *mtab);
89e4b5bd 88static void fsrallfs(char *mtab, int howlong, char *leftofffile);
c988ea91
CH
89static void fsrall_cleanup(int timeout);
90static int getnextents(int);
91int xfsrtextsize(int fd);
09d38d96 92int xfs_getrt(int fd, struct statvfs *sfbp);
c988ea91
CH
93char * gettmpname(char *fname);
94char * getparent(char *fname);
95int fsrprintf(const char *fmt, ...);
96int read_fd_bmap(int, xfs_bstat_t *, int *);
97int cmp(const void *, const void *);
98static void tmp_init(char *mnt);
99static char * tmp_next(char *mnt);
100static void tmp_close(char *mnt);
101int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
c988ea91
CH
102
103xfs_fsop_geom_v1_t fsgeom; /* geometry of active mounted system */
104
105#define NMOUNT 64
106static int numfs;
107
108typedef struct fsdesc {
109 char *dev;
110 char *mnt;
111 int npass;
112} fsdesc_t;
113
114fsdesc_t *fs, *fsbase, *fsend;
115int fsbufsize = 10; /* A starting value */
116int nfrags = 0; /* Debug option: Coerse into specific number
117 * of extents */
118int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
119
120int
121xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
122{
123 return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
124}
125
126int
127xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
128{
129 xfs_fsop_bulkreq_t bulkreq;
130
cad114df 131 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
132 bulkreq.icount = 1;
133 bulkreq.ubuffer = ubuffer;
134 bulkreq.ocount = NULL;
135 return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
136}
137
138int
139xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
140 xfs_bstat_t *ubuffer, __s32 *ocount)
141{
142 xfs_fsop_bulkreq_t bulkreq;
143
cad114df 144 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
145 bulkreq.icount = icount;
146 bulkreq.ubuffer = ubuffer;
147 bulkreq.ocount = ocount;
148 return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
149}
150
151int
152xfs_swapext(int fd, xfs_swapext_t *sx)
153{
154 return ioctl(fd, XFS_IOC_SWAPEXT, sx);
155}
156
157int
158xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
159{
160 return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
161}
162
163void
164aborter(int unused)
165{
166 fsrall_cleanup(1);
167 exit(1);
168}
169
3e50d888
CH
170/*
171 * Check if the argument is either the device name or mountpoint of an XFS
172 * filesystem. Note that we do not care about bind mounted regular files
173 * here - the code that handles defragmentation of invidual files takes care
174 * of that.
175 */
7141fc5b 176static char *
f594a0d1 177find_mountpoint_check(struct stat *sb, struct mntent *t)
7141fc5b 178{
f594a0d1 179 struct stat ms;
7849d55d 180
7141fc5b 181 if (S_ISDIR(sb->st_mode)) { /* mount point */
f594a0d1 182 if (stat(t->mnt_dir, &ms) < 0)
7141fc5b 183 return NULL;
7849d55d 184 if (sb->st_ino != ms.st_ino)
7141fc5b 185 return NULL;
7849d55d 186 if (sb->st_dev != ms.st_dev)
7141fc5b
JT
187 return NULL;
188 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
189 return NULL;
190 } else { /* device */
f594a0d1 191 if (stat(t->mnt_fsname, &ms) < 0)
7141fc5b 192 return NULL;
7849d55d 193 if (sb->st_rdev != ms.st_rdev)
7141fc5b
JT
194 return NULL;
195 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
196 return NULL;
7141fc5b 197 /*
a32546b0
DC
198 * Make sure the mountpoint given by mtab is accessible
199 * before using it.
200 */
f594a0d1 201 if (stat(t->mnt_dir, &ms) < 0)
7141fc5b
JT
202 return NULL;
203 }
204
205 return t->mnt_dir;
7141fc5b
JT
206}
207
3e50d888 208static char *
f594a0d1 209find_mountpoint(char *mtab, char *argname, struct stat *sb)
3e50d888 210{
7141fc5b 211 struct mntent_cursor cursor;
7141fc5b 212 struct mntent *t = NULL;
3e50d888
CH
213 char *mntp = NULL;
214
7141fc5b
JT
215 if (platform_mntent_open(&cursor, mtab) != 0){
216 fprintf(stderr, "Error: can't get mntent entries.\n");
3e50d888
CH
217 exit(1);
218 }
219
a32546b0 220 while ((t = platform_mntent_next(&cursor)) != NULL) {
7849d55d 221 mntp = find_mountpoint_check(sb, t);
7141fc5b
JT
222 if (mntp == NULL)
223 continue;
3e50d888
CH
224 break;
225 }
7141fc5b 226 platform_mntent_close(&cursor);
3e50d888
CH
227 return mntp;
228}
229
c988ea91
CH
230int
231main(int argc, char **argv)
232{
f594a0d1 233 struct stat sb;
c988ea91 234 char *argname;
c988ea91 235 int c;
3e50d888 236 char *mntp;
89e4b5bd 237 char *mtab = NULL;
c988ea91
CH
238
239 setlinebuf(stdout);
240 progname = basename(argv[0]);
241
242 setlocale(LC_ALL, "");
243 bindtextdomain(PACKAGE, LOCALEDIR);
244 textdomain(PACKAGE);
245
246 gflag = ! isatty(0);
247
89e4b5bd 248 while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
c988ea91
CH
249 switch (c) {
250 case 'M':
251 Mflag = 1;
252 break;
253 case 'g':
254 gflag = 1;
255 break;
256 case 'n':
257 /* nflag = 1; */
258 break;
259 case 'v':
260 ++vflag;
261 break;
262 case 'd':
263 dflag = 1;
264 break;
265 case 's': /* frag stats only */
266 /* sflag = 1; */
267 fprintf(stderr,
268 _("%s: Stats not yet supported for XFS\n"),
269 progname);
270 usage(1);
271 break;
272 case 't':
273 howlong = atoi(optarg);
274 break;
275 case 'f':
276 leftofffile = optarg;
277 break;
278 case 'm':
279 mtab = optarg;
280 break;
281 case 'b':
282 argv_blksz_dio = atoi(optarg);
283 break;
284 case 'p':
285 npasses = atoi(optarg);
286 break;
287 case 'C':
288 /* Testing opt: coerses frag count in result */
289 if (getenv("FSRXFSTEST") != NULL) {
290 nfrags = atoi(optarg);
291 openopts |= O_SYNC;
292 }
293 break;
294 case 'V':
295 printf(_("%s version %s\n"), progname, VERSION);
296 exit(0);
297 default:
298 usage(1);
299 }
89e4b5bd
CH
300 }
301
302 /*
303 * If the user did not specify an explicit mount table, try to use
304 * /proc/mounts if it is available, else /etc/mtab. We prefer
305 * /proc/mounts because it is kernel controlled, while /etc/mtab
306 * may contain garbage that userspace tools like pam_mounts wrote
307 * into it.
308 */
309 if (!mtab) {
310 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
311 mtab = _PATH_PROC_MOUNTS;
312 else
313 mtab = _PATH_MOUNTED;
314 }
315
c988ea91
CH
316 if (vflag)
317 setbuf(stdout, NULL);
318
319 starttime = time(0);
320
321 /* Save the caller's real uid */
322 RealUid = getuid();
323
324 pagesize = getpagesize();
325
326 if (optind < argc) {
327 for (; optind < argc; optind++) {
328 argname = argv[optind];
3e50d888 329
f594a0d1 330 if (lstat(argname, &sb) < 0) {
c988ea91
CH
331 fprintf(stderr,
332 _("%s: could not stat: %s: %s\n"),
333 progname, argname, strerror(errno));
334 continue;
335 }
3e50d888
CH
336
337 if (S_ISLNK(sb.st_mode)) {
f594a0d1 338 struct stat sb2;
3e50d888 339
f594a0d1 340 if (stat(argname, &sb2) == 0 &&
3e50d888
CH
341 (S_ISBLK(sb2.st_mode) ||
342 S_ISCHR(sb2.st_mode)))
c988ea91 343 sb = sb2;
c988ea91 344 }
3e50d888
CH
345
346 mntp = find_mountpoint(mtab, argname, &sb);
c988ea91 347 if (mntp != NULL) {
3e50d888 348 fsrfs(mntp, 0, 100);
c988ea91
CH
349 } else if (S_ISCHR(sb.st_mode)) {
350 fprintf(stderr, _(
351 "%s: char special not supported: %s\n"),
352 progname, argname);
353 exit(1);
354 } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
355 if (!platform_test_xfs_path(argname)) {
356 fprintf(stderr, _(
357 "%s: cannot defragment: %s: Not XFS\n"),
358 progname, argname);
359 continue;
360 }
361 if (S_ISDIR(sb.st_mode))
362 fsrdir(argname);
363 else
364 fsrfile(argname, sb.st_ino);
365 } else {
366 printf(
367 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
368 argname);
369 }
370 }
371 } else {
372 initallfs(mtab);
89e4b5bd 373 fsrallfs(mtab, howlong, leftofffile);
c988ea91
CH
374 }
375 return 0;
376}
377
378void
379usage(int ret)
380{
381 fprintf(stderr, _(
30626ef6
ES
382"Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
383" %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
384" %s -V\n\n"
c988ea91 385"Options:\n"
c988ea91
CH
386" -g Print to syslog (default if stdout not a tty).\n"
387" -t time How long to run in seconds.\n"
30626ef6 388" -p passes Number of passes before terminating global re-org.\n"
c988ea91
CH
389" -f leftoff Use this instead of %s.\n"
390" -m mtab Use something other than /etc/mtab.\n"
391" -d Debug, print even more.\n"
30626ef6
ES
392" -v Verbose, more -v's more verbose.\n"
393" -V Print version number and exit.\n"
394 ), progname, progname, progname, _PATH_FSRLAST);
c988ea91
CH
395 exit(ret);
396}
397
398/*
399 * initallfs -- read the mount table and set up an internal form
400 */
401static void
402initallfs(char *mtab)
403{
7141fc5b 404 struct mntent_cursor cursor;
7849d55d 405 struct mntent *mnt= NULL;
c988ea91
CH
406 int mi;
407 char *cp;
f594a0d1 408 struct stat sb;
c988ea91
CH
409
410 /* malloc a number of descriptors, increased later if needed */
411 if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
412 fsrprintf(_("out of memory: %s\n"), strerror(errno));
413 exit(1);
414 }
415 fsend = (fsbase + fsbufsize - 1);
416
417 /* find all rw xfs file systems */
418 mi = 0;
419 fs = fsbase;
7141fc5b
JT
420
421 if (platform_mntent_open(&cursor, mtab) != 0){
422 fprintf(stderr, "Error: can't get mntent entries.\n");
423 exit(1);
424 }
425
7849d55d 426 while ((mnt = platform_mntent_next(&cursor)) != NULL) {
c988ea91
CH
427 int rw = 0;
428
7849d55d 429 if (strcmp(mnt->mnt_type, MNTTYPE_XFS ) != 0 ||
f594a0d1 430 stat(mnt->mnt_fsname, &sb) == -1 ||
c988ea91
CH
431 !S_ISBLK(sb.st_mode))
432 continue;
433
7849d55d 434 cp = strtok(mnt->mnt_opts,",");
c988ea91
CH
435 do {
436 if (strcmp("rw", cp) == 0)
437 rw++;
438 } while ((cp = strtok(NULL, ",")) != NULL);
439 if (rw == 0) {
440 if (dflag)
441 fsrprintf(_("Skipping %s: not mounted rw\n"),
7849d55d 442 mnt->mnt_fsname);
c988ea91
CH
443 continue;
444 }
445
446 if (mi == fsbufsize) {
447 fsbufsize += NMOUNT;
448 if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
449 fsbufsize * sizeof(fsdesc_t))) == NULL) {
450 fsrprintf(_("out of memory: %s\n"),
451 strerror(errno));
452 exit(1);
453 }
454 if (!fsbase) {
455 fsrprintf(_("out of memory on realloc: %s\n"),
456 strerror(errno));
457 exit(1);
458 }
459 fs = (fsbase + mi); /* Needed ? */
460 }
461
7849d55d
ES
462 fs->dev = strdup(mnt->mnt_fsname);
463 fs->mnt = strdup(mnt->mnt_dir);
c988ea91 464
758bcc92 465 if (fs->dev == NULL) {
7849d55d 466 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_fsname);
c988ea91
CH
467 exit(1);
468 }
758bcc92 469 if (fs->mnt == NULL) {
7849d55d 470 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_dir);
758bcc92
ES
471 exit(1);
472 }
c988ea91
CH
473 mi++;
474 fs++;
475 }
7141fc5b
JT
476 platform_mntent_close(&cursor);
477
c988ea91
CH
478 numfs = mi;
479 fsend = (fsbase + numfs);
c988ea91
CH
480 if (numfs == 0) {
481 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
482 exit(0);
483 }
484 if (vflag || dflag) {
485 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
486 numfs);
487 if (dflag)
488 for (fs = fsbase; fs < fsend; fs++)
489 fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
490 }
491}
492
493static void
89e4b5bd 494fsrallfs(char *mtab, int howlong, char *leftofffile)
c988ea91
CH
495{
496 int fd;
497 int error;
498 int found = 0;
499 char *fsname;
500 char buf[SMBUFSZ];
501 int mdonly = Mflag;
502 char *ptr;
503 xfs_ino_t startino = 0;
504 fsdesc_t *fsp;
f594a0d1 505 struct stat sb, sb2;
c988ea91
CH
506
507 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
508
509 endtime = starttime + howlong;
510 fs = fsbase;
511
512 /* where'd we leave off last time? */
f594a0d1 513 if (lstat(leftofffile, &sb) == 0) {
c988ea91
CH
514 if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
515 fsrprintf(_("%s: open failed\n"), leftofffile);
516 }
f594a0d1 517 else if ( fstat(fd, &sb2) == 0) {
c988ea91
CH
518 /*
519 * Verify that lstat & fstat point to the
520 * same regular file (no links/no quick spoofs)
521 */
522 if ( (sb.st_dev != sb2.st_dev) ||
523 (sb.st_ino != sb2.st_ino) ||
524 ((sb.st_mode & S_IFMT) != S_IFREG) ||
525 ((sb2.st_mode & S_IFMT) != S_IFREG) ||
526 (sb2.st_uid != ROOT) ||
527 (sb2.st_nlink != 1)
528 )
529 {
530 fsrprintf(_("Can't use %s: mode=0%o own=%d"
531 " nlink=%d\n"),
532 leftofffile, sb.st_mode,
533 sb.st_uid, sb.st_nlink);
534 close(fd);
535 fd = NULLFD;
536 }
537 }
538 else {
539 close(fd);
540 fd = NULLFD;
541 }
542 }
543 else {
544 fd = NULLFD;
545 }
546
547 if (fd != NULLFD) {
548 if (read(fd, buf, SMBUFSZ) == -1) {
549 fs = fsbase;
550 fsrprintf(_("could not read %s, starting with %s\n"),
551 leftofffile, *fs->dev);
552 } else {
eef20df0
ES
553 /* Ensure the buffer we read is null terminated */
554 buf[SMBUFSZ-1] = '\0';
c988ea91
CH
555 for (fs = fsbase; fs < fsend; fs++) {
556 fsname = fs->dev;
557 if ((strncmp(buf,fsname,strlen(fsname)) == 0)
558 && buf[strlen(fsname)] == ' ') {
559 found = 1;
560 break;
561 }
562 }
563 if (! found)
564 fs = fsbase;
565
566 ptr = strchr(buf, ' ');
567 if (ptr) {
568 startpass = atoi(++ptr);
569 ptr = strchr(ptr, ' ');
570 if (ptr) {
571 startino = strtoull(++ptr, NULL, 10);
572 }
573 }
574 if (startpass < 0)
575 startpass = 0;
576
577 /* Init pass counts */
578 for (fsp = fsbase; fsp < fs; fsp++) {
579 fsp->npass = startpass + 1;
580 }
581 for (fsp = fs; fsp <= fsend; fsp++) {
582 fsp->npass = startpass;
583 }
584 }
585 close(fd);
586 }
587
588 if (vflag) {
589 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
590 fs->npass, (unsigned long long)startino,
591 fs->dev, fs->mnt);
592 }
593
594 signal(SIGABRT, aborter);
595 signal(SIGHUP, aborter);
596 signal(SIGINT, aborter);
597 signal(SIGQUIT, aborter);
598 signal(SIGTERM, aborter);
599
600 /* reorg for 'howlong' -- checked in 'fsrfs' */
601 while (endtime > time(0)) {
602 pid_t pid;
8d2666e3 603
c988ea91
CH
604 if (npasses > 1 && !fs->npass)
605 Mflag = 1;
606 else
607 Mflag = mdonly;
608 pid = fork();
609 switch(pid) {
610 case -1:
611 fsrprintf(_("couldn't fork sub process:"));
612 exit(1);
613 break;
614 case 0:
615 error = fsrfs(fs->mnt, startino, TARGETRANGE);
616 exit (error);
617 break;
618 default:
619 wait(&error);
c988ea91
CH
620 if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
621 /* child timed out & did fsrall_cleanup */
622 exit(0);
623 }
624 break;
625 }
626 startino = 0; /* reset after the first time through */
627 fs->npass++;
628 fs++;
8d2666e3
JM
629 if (fs == fsend)
630 fs = fsbase;
631 if (fs->npass == npasses) {
632 fsrprintf(_("Completed all %d passes\n"), npasses);
633 break;
634 }
c988ea91
CH
635 }
636 fsrall_cleanup(endtime <= time(0));
637}
638
639/*
640 * fsrall_cleanup -- close files, print next starting location, etc.
641 */
642static void
643fsrall_cleanup(int timeout)
644{
645 int fd;
646 int ret;
647 char buf[SMBUFSZ];
648
c988ea91 649 unlink(leftofffile);
d0e82db1
ES
650
651 if (timeout) {
652 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
653 progname, startpass, fs->npass,
654 time(0) - endtime + howlong);
655
656 /* record where we left off */
657 fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
658 if (fd == -1) {
659 fsrprintf(_("open(%s) failed: %s\n"),
660 leftofffile, strerror(errno));
661 } else {
c988ea91
CH
662 ret = sprintf(buf, "%s %d %llu\n", fs->dev,
663 fs->npass, (unsigned long long)leftoffino);
664 if (write(fd, buf, ret) < strlen(buf))
665 fsrprintf(_("write(%s) failed: %s\n"),
666 leftofffile, strerror(errno));
667 close(fd);
668 }
669 }
c988ea91
CH
670}
671
672/*
673 * fsrfs -- reorganize a file system
674 */
675static int
676fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
677{
678
679 int fsfd, fd;
680 int count = 0;
681 int ret;
682 __s32 buflenout;
683 xfs_bstat_t buf[GRABSZ];
684 char fname[64];
685 char *tname;
686 jdm_fshandle_t *fshandlep;
687 xfs_ino_t lastino = startino;
688
689 fsrprintf(_("%s start inode=%llu\n"), mntdir,
690 (unsigned long long)startino);
691
692 fshandlep = jdm_getfshandle( mntdir );
693 if ( ! fshandlep ) {
694 fsrprintf(_("unable to get handle: %s: %s\n"),
695 mntdir, strerror( errno ));
696 return -1;
697 }
698
699 if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
700 fsrprintf(_("unable to open: %s: %s\n"),
701 mntdir, strerror( errno ));
e3e2793d 702 free(fshandlep);
c988ea91
CH
703 return -1;
704 }
705
706 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
707 fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
708 mntdir);
11e06961 709 close(fsfd);
e3e2793d 710 free(fshandlep);
c988ea91
CH
711 return -1;
712 }
713
714 tmp_init(mntdir);
715
716 while ((ret = xfs_bulkstat(fsfd,
98166c91 717 &lastino, GRABSZ, &buf[0], &buflenout)) == 0) {
c988ea91
CH
718 xfs_bstat_t *p;
719 xfs_bstat_t *endp;
720
721 if (buflenout == 0)
722 goto out0;
723
724 /* Each loop through, defrag targetrange percent of the files */
725 count = (buflenout * targetrange) / 100;
726
727 qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
728
729 for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
730 /* Do some obvious checks now */
731 if (((p->bs_mode & S_IFMT) != S_IFREG) ||
732 (p->bs_extents < 2))
733 continue;
734
108e985b
DC
735 fd = jdm_open(fshandlep, p, O_RDWR|O_DIRECT);
736 if (fd < 0) {
c988ea91
CH
737 /* This probably means the file was
738 * removed while in progress of handling
739 * it. Just quietly ignore this file.
740 */
741 if (dflag)
742 fsrprintf(_("could not open: "
743 "inode %llu\n"), p->bs_ino);
744 continue;
745 }
746
747 /* Don't know the pathname, so make up something */
748 sprintf(fname, "ino=%lld", (long long)p->bs_ino);
749
750 /* Get a tmp file name */
751 tname = tmp_next(mntdir);
752
753 ret = fsrfile_common(fname, tname, mntdir, fd, p);
754
755 leftoffino = p->bs_ino;
756
757 close(fd);
758
759 if (ret == 0) {
760 if (--count <= 0)
761 break;
762 }
763 }
764 if (endtime && endtime < time(0)) {
765 tmp_close(mntdir);
766 close(fsfd);
767 fsrall_cleanup(1);
768 exit(1);
769 }
770 }
771 if (ret < 0)
772 fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
773out0:
774 tmp_close(mntdir);
775 close(fsfd);
e3e2793d 776 free(fshandlep);
c988ea91
CH
777 return 0;
778}
779
780/*
781 * To compare bstat structs for qsort.
782 */
783int
784cmp(const void *s1, const void *s2)
785{
786 return( ((xfs_bstat_t *)s2)->bs_extents -
787 ((xfs_bstat_t *)s1)->bs_extents);
788
789}
790
791/*
792 * reorganize by directory hierarchy.
793 * Stay in dev (a restriction based on structure of this program -- either
794 * call efs_{n,u}mount() around each file, something smarter or this)
795 */
796static void
797fsrdir(char *dirname)
798{
799 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
800}
801
802/*
803 * Sets up the defragmentation of a file based on the
804 * filepath. It collects the bstat information, does
805 * an open on the file and passes this all to fsrfile_common.
806 */
807static int
808fsrfile(char *fname, xfs_ino_t ino)
809{
810 xfs_bstat_t statbuf;
811 jdm_fshandle_t *fshandlep;
4f10a2fb
ES
812 int fd = -1, fsfd = -1;
813 int error = -1;
c988ea91
CH
814 char *tname;
815
816 fshandlep = jdm_getfshandle(getparent (fname) );
4f10a2fb 817 if (!fshandlep) {
c988ea91
CH
818 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
819 fname, strerror(errno));
4f10a2fb 820 goto out;
c988ea91
CH
821 }
822
823 /*
824 * Need to open something on the same filesystem as the
825 * file. Open the parent.
826 */
827 fsfd = open(getparent(fname), O_RDONLY);
828 if (fsfd < 0) {
829 fsrprintf(_("unable to open sys handle for %s: %s\n"),
830 fname, strerror(errno));
4f10a2fb 831 goto out;
c988ea91
CH
832 }
833
834 if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
835 fsrprintf(_("unable to get bstat on %s: %s\n"),
836 fname, strerror(errno));
4f10a2fb 837 goto out;
c988ea91
CH
838 }
839
108e985b 840 fd = jdm_open(fshandlep, &statbuf, O_RDWR|O_DIRECT);
c988ea91
CH
841 if (fd < 0) {
842 fsrprintf(_("unable to open handle %s: %s\n"),
843 fname, strerror(errno));
4f10a2fb 844 goto out;
c988ea91
CH
845 }
846
847 /* Get the fs geometry */
848 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
849 fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
4f10a2fb 850 goto out;
c988ea91
CH
851 }
852
c988ea91
CH
853 tname = gettmpname(fname);
854
855 if (tname)
856 error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
857
4f10a2fb
ES
858out:
859 if (fsfd >= 0)
860 close(fsfd);
861 if (fd >= 0)
862 close(fd);
863 free(fshandlep);
c988ea91
CH
864
865 return error;
866}
867
868
869/*
870 * This is the common defrag code for either a full fs
871 * defragmentation or a single file. Check as much as
872 * possible with the file, fork a process to setuid to the
873 * target file owner's uid and defragment the file.
874 * This is done so the new extents created in a tmp file are
875 * reflected in the owners' quota without having to do any
876 * special code in the kernel. When the existing extents
877 * are removed, the quotas will be correct. It's ugly but
878 * it saves us from doing some quota re-construction in
879 * the extent swap. The price is that the defragmentation
880 * will fail if the owner of the target file is already at
881 * their quota limit.
882 */
883static int
884fsrfile_common(
885 char *fname,
886 char *tname,
887 char *fsname,
888 int fd,
889 xfs_bstat_t *statp)
890{
891 int error;
09d38d96 892 struct statvfs vfss;
c988ea91
CH
893 struct fsxattr fsx;
894 unsigned long bsize;
895
896 if (vflag)
897 fsrprintf("%s\n", fname);
898
899 if (fsync(fd) < 0) {
900 fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
901 return -1;
902 }
903
904 if (statp->bs_size == 0) {
905 if (vflag)
906 fsrprintf(_("%s: zero size, ignoring\n"), fname);
907 return(0);
908 }
909
910 /* Check if a mandatory lock is set on the file to try and
911 * avoid blocking indefinitely on the reads later. Note that
912 * someone could still set a mandatory lock after this check
913 * but before all reads have completed to block fsr reads.
914 * This change just closes the window a bit.
915 */
916 if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
917 struct flock fl;
918
919 fl.l_type = F_RDLCK;
920 fl.l_whence = SEEK_SET;
921 fl.l_start = (off_t)0;
922 fl.l_len = 0;
923 if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
924 if (vflag)
925 fsrprintf(_("locking check failed: %s\n"),
926 fname);
927 return(-1);
928 }
929 if (fl.l_type != F_UNLCK) {
930 /* Mandatory lock is set */
931 if (vflag)
932 fsrprintf(_("mandatory lock: %s: ignoring\n"),
933 fname);
934 return(-1);
935 }
936 }
937
938 /*
939 * Check if there is room to copy the file.
940 *
941 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
942 * not the optimal I/O size as struct stat.
943 */
09d38d96 944 if (statvfs(fsname ? fsname : fname, &vfss) < 0) {
c988ea91
CH
945 fsrprintf(_("unable to get fs stat on %s: %s\n"),
946 fname, strerror(errno));
947 return -1;
948 }
949 bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
950 if (statp->bs_blksize * statp->bs_blocks >
951 vfss.f_bfree * bsize - minimumfree) {
952 fsrprintf(_("insufficient freespace for: %s: "
953 "size=%lld: ignoring\n"), fname,
954 statp->bs_blksize * statp->bs_blocks);
955 return 1;
956 }
957
83f4b5ac 958 if ((ioctl(fd, FS_IOC_FSGETXATTR, &fsx)) < 0) {
c988ea91
CH
959 fsrprintf(_("failed to get inode attrs: %s\n"), fname);
960 return(-1);
961 }
83f4b5ac 962 if (fsx.fsx_xflags & (FS_XFLAG_IMMUTABLE|FS_XFLAG_APPEND)) {
c988ea91
CH
963 if (vflag)
964 fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
965 return(0);
966 }
83f4b5ac 967 if (fsx.fsx_xflags & FS_XFLAG_NODEFRAG) {
c988ea91
CH
968 if (vflag)
969 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
970 fname);
971 return(0);
972 }
83f4b5ac 973 if (fsx.fsx_xflags & FS_XFLAG_REALTIME) {
c988ea91
CH
974 if (xfs_getrt(fd, &vfss) < 0) {
975 fsrprintf(_("cannot get realtime geometry for: %s\n"),
976 fname);
977 return(-1);
978 }
979 if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
980 fsrprintf(_("low on realtime free space: %s: "
981 "ignoring file\n"), fname);
982 return(-1);
983 }
984 }
985
986 if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
987 fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
988 return -1;
989 }
990
991 /*
992 * Previously the code forked here, & the child changed it's uid to
993 * that of the file's owner and then called packfile(), to keep
994 * quota counts correct. (defragged files could use fewer blocks).
995 *
996 * Instead, just fchown() the temp file to the uid,gid of the
997 * file we're defragging, in packfile().
998 */
999
1000 if ((error = packfile(fname, tname, fd, statp, &fsx)))
1001 return error;
1002 return -1; /* no error */
1003}
1004
bdb041f5
DC
1005/*
1006 * Attempt to set the attr fork up correctly. This is simple for attr1
1007 * filesystems as they have a fixed inode fork offset. In that case
1008 * just create an attribute and that's all we need to do.
1009 *
1010 * For attr2 filesystems, see if we have the actual fork offset in
1011 * the bstat structure. If so, just create additional attributes on
1012 * the temporary inode until the offset matches.
1013 *
1014 * If it doesn't exist, we can only do best effort. Add an attribute at a time
1015 * to move the inode fork around, but take into account that the attribute
1016 * might be too small to move the fork every time we add one. This should
1017 * hopefully put the fork offset in the right place. It's not a big deal if we
1018 * don't get it right - the kernel will reject it when we try to swap extents.
1019 */
1020static int
1021fsr_setup_attr_fork(
1022 int fd,
1023 int tfd,
1024 xfs_bstat_t *bstatp)
1025{
c14c7b79 1026#ifdef HAVE_FSETXATTR
f594a0d1 1027 struct stat tstatbuf;
bdb041f5 1028 int i;
27507775 1029 int diff = 0;
bdb041f5
DC
1030 int last_forkoff = 0;
1031 int no_change_cnt = 0;
1032 int ret;
1033
83f4b5ac 1034 if (!(bstatp->bs_xflags & FS_XFLAG_HASATTR))
bdb041f5
DC
1035 return 0;
1036
1037 /*
1038 * use the old method if we have attr1 or the kernel does not yet
1039 * support passing the fork offset in the bulkstat data.
1040 */
1041 if (!(fsgeom.flags & XFS_FSOP_GEOM_FLAGS_ATTR2) ||
1042 bstatp->bs_forkoff == 0) {
1043 /* attr1 */
1044 ret = fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE);
1045 if (ret) {
1046 fsrprintf(_("could not set ATTR\n"));
1047 return -1;
1048 }
1049 goto out;
1050 }
1051
1052 /* attr2 w/ fork offsets */
1053
f594a0d1 1054 if (fstat(tfd, &tstatbuf) < 0) {
bdb041f5
DC
1055 fsrprintf(_("unable to stat temp file: %s\n"),
1056 strerror(errno));
1057 return -1;
1058 }
1059
1060 i = 0;
1061 do {
1062 xfs_bstat_t tbstat;
1063 xfs_ino_t ino;
1064 char name[64];
bdb041f5
DC
1065
1066 /*
1adfe5c6 1067 * bulkstat the temp inode to see what the forkoff is. Use
bdb041f5
DC
1068 * this to compare against the target and determine what we
1069 * need to do.
1070 */
1071 ino = tstatbuf.st_ino;
1072 if ((xfs_bulkstat_single(tfd, &ino, &tbstat)) < 0) {
1073 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1074 strerror(errno));
1075 return -1;
1076 }
1077 if (dflag)
1078 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1079 bstatp->bs_forkoff, tbstat.bs_forkoff);
1adfe5c6
ES
1080 diff = tbstat.bs_forkoff - bstatp->bs_forkoff;
1081
1082 /* if they are equal, we are done */
1083 if (!diff)
1084 goto out;
bdb041f5
DC
1085
1086 snprintf(name, sizeof(name), "user.%d", i);
1087
1088 /*
1089 * If there is no attribute, then we need to create one to get
1090 * an attribute fork at the default location.
1091 */
1092 if (!tbstat.bs_forkoff) {
1adfe5c6 1093 ASSERT(i == 0);
bdb041f5
DC
1094 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1095 if (ret) {
1096 fsrprintf(_("could not set ATTR\n"));
1097 return -1;
1098 }
1099 continue;
1adfe5c6 1100 } else if (i == 0) {
1adfe5c6
ES
1101 /*
1102 * First pass, and temp file already has an inline
1103 * xattr, probably due to selinux.
1104 *
1105 * It's *possible* that the temp file attr area
e7e3152c 1106 * is larger than the target file's:
1adfe5c6
ES
1107 *
1108 * Target Temp
1109 * +-------+ 0 +-------+ 0
1110 * | | | |
1111 * | | | Data |
1112 * | Data | | |
1113 * | | v-------v forkoff
1114 * | | | |
1115 * v-------v forkoff | Attr | local
e7e3152c 1116 * | Attr | | |
1adfe5c6 1117 * +-------+ +-------+
1adfe5c6
ES
1118 */
1119
1adfe5c6 1120 /*
e7e3152c
ES
1121 * If target attr area is less than the temp's
1122 * (diff < 0) write a big attr to the temp file to knock
1123 * the attr out of local format.
1124 * (This should actually *increase* the temp file's
1125 * forkoffset when the attr moves out of the inode)
1adfe5c6 1126 */
e7e3152c 1127 if (diff < 0) {
1adfe5c6
ES
1128 char val[2048];
1129 memset(val, 'X', 2048);
1130 if (fsetxattr(tfd, name, val, 2048, 0)) {
1131 fsrprintf(_("big ATTR set failed\n"));
1132 return -1;
1133 }
1134 /* Go back & see where we're at now */
1135 continue;
1136 }
bdb041f5
DC
1137 }
1138
1139 /*
1140 * make a progress check so we don't get stuck trying to extend
1141 * a large btree form attribute fork.
1142 */
1143 if (last_forkoff == tbstat.bs_forkoff) {
1144 if (no_change_cnt++ > 10)
1145 break;
ff85ea3f
ES
1146 } else /* progress! */
1147 no_change_cnt = 0;
bdb041f5
DC
1148 last_forkoff = tbstat.bs_forkoff;
1149
1150 /* work out which way to grow the fork */
bdb041f5
DC
1151 if (abs(diff) > fsgeom.inodesize - sizeof(struct xfs_dinode)) {
1152 fsrprintf(_("forkoff diff %d too large!\n"), diff);
1153 return -1;
1154 }
1155
bdb041f5 1156 /*
1adfe5c6
ES
1157 * if the temp inode fork offset is still smaller then we have
1158 * to grow the data fork
bdb041f5
DC
1159 */
1160 if (diff < 0) {
1161 /*
1162 * create some temporary extents in the inode to move
1163 * the fork in the direction we need. This can be done
1164 * by preallocating some single block extents at
1165 * non-contiguous offsets.
1166 */
1167 /* XXX: unimplemented! */
27507775
ES
1168 if (dflag)
1169 printf(_("data fork growth unimplemented\n"));
bdb041f5
DC
1170 goto out;
1171 }
1172
1173 /* we need to grow the attr fork, so create another attr */
1174 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1175 if (ret) {
1176 fsrprintf(_("could not set ATTR\n"));
1177 return -1;
1178 }
1179
1180 } while (++i < 100); /* don't go forever */
1181
1182out:
1183 if (dflag)
1184 fsrprintf(_("set temp attr\n"));
27507775
ES
1185 /* We failed to resolve the fork difference */
1186 if (dflag && diff)
1187 fsrprintf(_("failed to match fork offset\n"));;
1188
c14c7b79 1189#endif /* HAVE_FSETXATTR */
bdb041f5
DC
1190 return 0;
1191}
c988ea91
CH
1192
1193/*
1194 * Do the defragmentation of a single file.
1195 * We already are pretty sure we can and want to
1196 * defragment the file. Create the tmp file, copy
1197 * the data (maintaining holes) and call the kernel
671632c6
ES
1198 * extent swap routine.
1199 *
1200 * Return values:
1201 * -1: Some error was encountered
1202 * 0: Successfully defragmented the file
1203 * 1: No change / No Error
c988ea91
CH
1204 */
1205static int
1206packfile(char *fname, char *tname, int fd,
1207 xfs_bstat_t *statp, struct fsxattr *fsxp)
1208{
671632c6 1209 int tfd = -1;
c988ea91 1210 int srval;
671632c6 1211 int retval = -1; /* Failure is the default */
c988ea91
CH
1212 int nextents, extent, cur_nextents, new_nextents;
1213 unsigned blksz_dio;
1214 unsigned dio_min;
1215 struct dioattr dio;
1216 static xfs_swapext_t sx;
1217 struct xfs_flock64 space;
1218 off64_t cnt, pos;
671632c6 1219 void *fbuf = NULL;
c988ea91
CH
1220 int ct, wc, wc_b4;
1221 char ffname[SMBUFSZ];
1222 int ffd = -1;
1223
1224 /*
1225 * Work out the extent map - nextents will be set to the
1226 * minimum number of extents needed for the file (taking
1227 * into account holes), cur_nextents is the current number
1228 * of extents.
1229 */
1230 nextents = read_fd_bmap(fd, statp, &cur_nextents);
1231
1232 if (cur_nextents == 1 || cur_nextents <= nextents) {
1233 if (vflag)
1234 fsrprintf(_("%s already fully defragmented.\n"), fname);
671632c6
ES
1235 retval = 1; /* indicates no change/no error */
1236 goto out;
c988ea91
CH
1237 }
1238
1239 if (dflag)
1240 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1241 fname, cur_nextents, (cur_nextents - nextents),
1242 tname);
1243
1244 if ((tfd = open(tname, openopts, 0666)) < 0) {
1245 if (vflag)
1246 fsrprintf(_("could not open tmp file: %s: %s\n"),
1247 tname, strerror(errno));
671632c6 1248 goto out;
c988ea91
CH
1249 }
1250 unlink(tname);
1251
1252 /* Setup extended attributes */
bdb041f5
DC
1253 if (fsr_setup_attr_fork(fd, tfd, statp) != 0) {
1254 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname);
671632c6 1255 goto out;
c988ea91
CH
1256 }
1257
1258 /* Setup extended inode flags, project identifier, etc */
1259 if (fsxp->fsx_xflags || fsxp->fsx_projid) {
83f4b5ac 1260 if (ioctl(tfd, FS_IOC_FSSETXATTR, fsxp) < 0) {
c988ea91
CH
1261 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1262 tname);
671632c6 1263 goto out;
c988ea91
CH
1264 }
1265 }
1266
1267 if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
1268 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
671632c6 1269 goto out;
c988ea91
CH
1270 }
1271
1272 dio_min = dio.d_miniosz;
1273 if (statp->bs_size <= dio_min) {
1274 blksz_dio = dio_min;
1275 } else {
1276 blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
1277 if (argv_blksz_dio != 0)
1278 blksz_dio = min(argv_blksz_dio, blksz_dio);
1279 blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
1280 }
1281
1282 if (dflag) {
1283 fsrprintf(_("DEBUG: "
1284 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1285 statp->bs_size, blksz_dio, dio.d_miniosz,
1286 dio.d_maxiosz, pagesize);
1287 }
1288
1289 if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
1290 fsrprintf(_("could not allocate buf: %s\n"), tname);
671632c6 1291 goto out;
c988ea91
CH
1292 }
1293
1294 if (nfrags) {
1295 /* Create new tmp file in same AG as first */
1296 sprintf(ffname, "%s.frag", tname);
1297
1298 /* Open the new file for sync writes */
1299 if ((ffd = open(ffname, openopts, 0666)) < 0) {
1300 fsrprintf(_("could not open fragfile: %s : %s\n"),
1301 ffname, strerror(errno));
671632c6 1302 goto out;
c988ea91
CH
1303 }
1304 unlink(ffname);
1305 }
1306
1307 /* Loop through block map allocating new extents */
1308 for (extent = 0; extent < nextents; extent++) {
1309 pos = outmap[extent].bmv_offset;
1310 if (outmap[extent].bmv_block == -1) {
1311 space.l_whence = SEEK_SET;
1312 space.l_start = pos;
1313 space.l_len = outmap[extent].bmv_length;
1314 if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
1315 fsrprintf(_("could not trunc tmp %s\n"),
1316 tname);
1317 }
dc8878f4 1318 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1319 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1320 tname, strerror(errno));
1321 goto out;
1322 }
c988ea91
CH
1323 continue;
1324 } else if (outmap[extent].bmv_length == 0) {
1325 /* to catch holes at the beginning of the file */
1326 continue;
1327 }
1328 if (! nfrags) {
1329 space.l_whence = SEEK_CUR;
1330 space.l_start = 0;
1331 space.l_len = outmap[extent].bmv_length;
1332
1333 if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
1334 fsrprintf(_("could not pre-allocate tmp space:"
1335 " %s\n"), tname);
671632c6 1336 goto out;
c988ea91 1337 }
dc8878f4 1338 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1339 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1340 tname, strerror(errno));
1341 goto out;
1342 }
c988ea91
CH
1343 }
1344 } /* end of space allocation loop */
1345
dc8878f4 1346 if (lseek(tfd, 0, SEEK_SET)) {
c988ea91 1347 fsrprintf(_("Couldn't rewind on temporary file\n"));
671632c6 1348 goto out;
c988ea91
CH
1349 }
1350
1351 /* Check if the temporary file has fewer extents */
1352 new_nextents = getnextents(tfd);
1353 if (dflag)
1354 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
1355 if (cur_nextents <= new_nextents) {
1356 if (vflag)
1357 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
671632c6
ES
1358 retval = 1; /* no change/no error */
1359 goto out;
c988ea91
CH
1360 }
1361
1362 /* Loop through block map copying the file. */
1363 for (extent = 0; extent < nextents; extent++) {
1364 pos = outmap[extent].bmv_offset;
1365 if (outmap[extent].bmv_block == -1) {
dc8878f4 1366 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1367 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1368 tname, strerror(errno));
1369 goto out;
1370 }
dc8878f4 1371 if (lseek(fd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1372 fsrprintf(_("could not lseek in file: %s : %s\n"),
1373 fname, strerror(errno));
1374 goto out;
1375 }
c988ea91
CH
1376 continue;
1377 } else if (outmap[extent].bmv_length == 0) {
1378 /* to catch holes at the beginning of the file */
1379 continue;
1380 }
1381 for (cnt = outmap[extent].bmv_length; cnt > 0;
1382 cnt -= ct, pos += ct) {
1383 if (nfrags && --nfrags) {
1384 ct = min(cnt, dio_min);
1385 } else if (cnt % dio_min == 0) {
1386 ct = min(cnt, blksz_dio);
1387 } else {
1388 ct = min(cnt + dio_min - (cnt % dio_min),
1389 blksz_dio);
1390 }
1391 ct = read(fd, fbuf, ct);
1392 if (ct == 0) {
1393 /* EOF, stop trying to read */
1394 extent = nextents;
1395 break;
1396 }
1397 /* Ensure we do direct I/O to correct block
1398 * boundaries.
1399 */
1400 if (ct % dio_min != 0) {
1401 wc = ct + dio_min - (ct % dio_min);
1402 } else {
1403 wc = ct;
1404 }
1405 wc_b4 = wc;
1406 if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
1407 if (ct < 0)
1408 fsrprintf(_("bad read of %d bytes "
1409 "from %s: %s\n"), wc_b4,
1410 fname, strerror(errno));
1411 else if (wc < 0)
1412 fsrprintf(_("bad write of %d bytes "
1413 "to %s: %s\n"), wc_b4,
1414 tname, strerror(errno));
1415 else {
1416 /*
1417 * Might be out of space
1418 *
1419 * Try to finish write
1420 */
1421 int resid = ct-wc;
1422
1423 if ((wc = write(tfd, ((char *)fbuf)+wc,
1424 resid)) == resid) {
1425 /* worked on second attempt? */
1426 continue;
1427 }
1428 else if (wc < 0) {
1429 fsrprintf(_("bad write2 of %d "
1430 "bytes to %s: %s\n"),
1431 resid, tname,
1432 strerror(errno));
1433 } else {
1434 fsrprintf(_("bad copy to %s\n"),
1435 tname);
1436 }
1437 }
671632c6 1438 goto out;
c988ea91
CH
1439 }
1440 if (nfrags) {
1441 /* Do a matching write to the tmp file */
431ec4e6 1442 wc_b4 = wc;
c988ea91
CH
1443 if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
1444 fsrprintf(_("bad write of %d bytes "
1445 "to %s: %s\n"),
1446 wc_b4, ffname, strerror(errno));
1447 }
1448 }
1449 }
1450 }
dde67673 1451 if (ftruncate(tfd, statp->bs_size) < 0) {
3d303baa
ES
1452 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1453 fname, strerror(errno));
1454 goto out;
1455 }
1456 if (fsync(tfd) < 0) {
1457 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1458 fname, strerror(errno));
1459 goto out;
1460 }
c988ea91 1461
c988ea91
CH
1462 sx.sx_stat = *statp; /* struct copy */
1463 sx.sx_version = XFS_SX_VERSION;
1464 sx.sx_fdtarget = fd;
1465 sx.sx_fdtmp = tfd;
1466 sx.sx_offset = 0;
1467 sx.sx_length = statp->bs_size;
1468
1469 /* switch to the owner's id, to keep quota in line */
1470 if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
1471 if (vflag)
1472 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1473 tname, strerror(errno));
671632c6 1474 goto out;
c988ea91
CH
1475 }
1476
1477 /* Swap the extents */
1478 srval = xfs_swapext(fd, &sx);
1479 if (srval < 0) {
1480 if (errno == ENOTSUP) {
1481 if (vflag || dflag)
1482 fsrprintf(_("%s: file type not supported\n"), fname);
1483 } else if (errno == EFAULT) {
1484 /* The file has changed since we started the copy */
1485 if (vflag || dflag)
1486 fsrprintf(_("%s: file modified defrag aborted\n"),
1487 fname);
1488 } else if (errno == EBUSY) {
1489 /* Timestamp has changed or mmap'ed file */
1490 if (vflag || dflag)
1491 fsrprintf(_("%s: file busy\n"), fname);
1492 } else {
1493 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1494 fname, strerror(errno));
1495 }
671632c6 1496 goto out;
c988ea91
CH
1497 }
1498
1499 /* Report progress */
1500 if (vflag)
1501 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1502 cur_nextents, new_nextents,
1503 (new_nextents <= nextents ? "DONE" : " " ),
1504 fname);
671632c6
ES
1505 retval = 0;
1506
1507out:
1508 free(fbuf);
1509 if (tfd != -1)
1510 close(tfd);
1511 if (ffd != -1)
1512 close(ffd);
1513 return retval;
c988ea91
CH
1514}
1515
1516char *
1517gettmpname(char *fname)
1518{
1519 static char buf[PATH_MAX+1];
1520 char sbuf[SMBUFSZ];
1521 char *ptr;
1522
1523 sprintf(sbuf, "/.fsr%d", getpid());
1524
6063feca
ES
1525 strncpy(buf, fname, PATH_MAX);
1526 buf[PATH_MAX] = '\0';
c988ea91
CH
1527 ptr = strrchr(buf, '/');
1528 if (ptr) {
1529 *ptr = '\0';
1530 } else {
1531 strcpy(buf, ".");
1532 }
1533
1534 if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
1535 fsrprintf(_("tmp file name too long: %s\n"), fname);
1536 return(NULL);
1537 }
1538
1539 strcat(buf, sbuf);
1540
1541 return(buf);
1542}
1543
1544char *
1545getparent(char *fname)
1546{
1547 static char buf[PATH_MAX+1];
1548 char *ptr;
1549
6063feca
ES
1550 strncpy(buf, fname, PATH_MAX);
1551 buf[PATH_MAX] = '\0';
c988ea91
CH
1552 ptr = strrchr(buf, '/');
1553 if (ptr) {
1554 if (ptr == &buf[0])
1555 ++ptr;
1556 *ptr = '\0';
1557 } else {
1558 strcpy(buf, ".");
1559 }
1560
1561 return(buf);
1562}
1563
1564/*
1565 * Read in block map of the input file, coalesce contiguous
1566 * extents into a single range, keep all holes. Convert from 512 byte
1567 * blocks to bytes.
1568 *
1569 * This code was borrowed from mv.c with some minor mods.
1570 */
1571#define MAPSIZE 128
1572#define OUTMAP_SIZE_INCREMENT MAPSIZE
1573
1574int read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
1575{
1576 int i, cnt;
1577 struct getbmap map[MAPSIZE];
1578
1579#define BUMP_CNT \
1580 if (++cnt >= outmap_size) { \
1581 outmap_size += OUTMAP_SIZE_INCREMENT; \
1582 outmap = (struct getbmap *)realloc(outmap, \
1583 outmap_size*sizeof(*outmap)); \
1584 if (outmap == NULL) { \
1585 fsrprintf(_("realloc failed: %s\n"), \
1586 strerror(errno)); \
1587 exit(1); \
1588 } \
1589 }
1590
1591 /* Initialize the outmap array. It always grows - never shrinks.
1592 * Left-over memory allocation is saved for the next files.
1593 */
1594 if (outmap_size == 0) {
1595 outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
1596 outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
1597 if (!outmap) {
1598 fsrprintf(_("malloc failed: %s\n"),
1599 strerror(errno));
1600 exit(1);
1601 }
1602 }
1603
1604 outmap[0].bmv_block = 0;
1605 outmap[0].bmv_offset = 0;
1606 outmap[0].bmv_length = sin->bs_size;
1607
1608 /*
1609 * If a non regular file is involved then forget holes
1610 */
1611
1612 if (!S_ISREG(sin->bs_mode))
1613 return(1);
1614
1615 outmap[0].bmv_length = 0;
1616
1617 map[0].bmv_offset = 0;
1618 map[0].bmv_block = 0;
1619 map[0].bmv_entries = 0;
1620 map[0].bmv_count = MAPSIZE;
1621 map[0].bmv_length = -1;
1622
1623 cnt = 0;
1624 *cur_nextents = 0;
1625
1626 do {
1627 if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
1628 fsrprintf(_("failed reading extents: inode %llu"),
1629 (unsigned long long)sin->bs_ino);
1630 exit(1);
1631 }
1632
1633 /* Concatenate extents together and replicate holes into
1634 * the output map.
1635 */
1636 *cur_nextents += map[0].bmv_entries;
1637 for (i = 0; i < map[0].bmv_entries; i++) {
1638 if (map[i + 1].bmv_block == -1) {
1639 BUMP_CNT;
1640 outmap[cnt] = map[i+1];
1641 } else if (outmap[cnt].bmv_block == -1) {
1642 BUMP_CNT;
1643 outmap[cnt] = map[i+1];
1644 } else {
1645 outmap[cnt].bmv_length += map[i + 1].bmv_length;
1646 }
1647 }
1648 } while (map[0].bmv_entries == (MAPSIZE-1));
1649 for (i = 0; i <= cnt; i++) {
1650 outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
1651 outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
1652 }
1653
1654 outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
1655
1656 return(cnt+1);
1657}
1658
1659/*
1660 * Read the block map and return the number of extents.
1661 */
1662int
1663getnextents(int fd)
1664{
1665 int nextents;
1666 struct getbmap map[MAPSIZE];
1667
1668 map[0].bmv_offset = 0;
1669 map[0].bmv_block = 0;
1670 map[0].bmv_entries = 0;
1671 map[0].bmv_count = MAPSIZE;
1672 map[0].bmv_length = -1;
1673
1674 nextents = 0;
1675
1676 do {
1677 if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
1678 fsrprintf(_("failed reading extents"));
1679 exit(1);
1680 }
1681
1682 nextents += map[0].bmv_entries;
1683 } while (map[0].bmv_entries == (MAPSIZE-1));
1684
1685 return(nextents);
1686}
1687
1688/*
1689 * Get the fs geometry
1690 */
1691int
1692xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
1693{
1694 if (xfs_fsgeometry(fd, fsgeom) < 0) {
1695 return -1;
1696 }
1697 return 0;
1698}
1699
1700/*
1701 * Get xfs realtime space information
1702 */
1703int
09d38d96 1704xfs_getrt(int fd, struct statvfs *sfbp)
c988ea91
CH
1705{
1706 unsigned long bsize;
1707 unsigned long factor;
1708 xfs_fsop_counts_t cnt;
1709
1710 if (!fsgeom.rtblocks)
1711 return -1;
1712
1713 if (xfs_fscounts(fd, &cnt) < 0) {
1714 close(fd);
1715 return -1;
1716 }
1717 bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
1718 factor = fsgeom.blocksize / bsize; /* currently this is == 1 */
1719 sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
1720 return 0;
1721}
1722
1723int
1724fsrprintf(const char *fmt, ...)
1725{
1726 va_list ap;
1727
1728 va_start(ap, fmt);
1729 if (gflag) {
1730 static int didopenlog;
1731 if (!didopenlog) {
1732 openlog("fsr", LOG_PID, LOG_USER);
1733 didopenlog = 1;
1734 }
1735 vsyslog(LOG_INFO, fmt, ap);
1736 } else
1737 vprintf(fmt, ap);
1738 va_end(ap);
1739 return 0;
1740}
1741
c988ea91
CH
1742/*
1743 * Initialize a directory for tmp file use. This is used
1744 * by the full filesystem defragmentation when we're walking
1745 * the inodes and do not know the path for the individual
1746 * files. Multiple directories are used to spread out the
1747 * tmp data around to different ag's (since file data is
1748 * usually allocated to the same ag as the directory and
1749 * directories allocated round robin from the same
1750 * parent directory).
1751 */
1752static void
1753tmp_init(char *mnt)
1754{
1755 int i;
1756 static char buf[SMBUFSZ];
1757 mode_t mask;
1758
1759 tmp_agi = 0;
1760 sprintf(buf, "%s/.fsr", mnt);
1761
1762 mask = umask(0);
1763 if (mkdir(buf, 0700) < 0) {
1764 if (errno == EEXIST) {
1765 if (dflag)
1766 fsrprintf(_("tmpdir already exists: %s\n"),
1767 buf);
1768 } else {
1769 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1770 buf, strerror(errno));
1771 exit(-1);
1772 }
1773 }
1774 for (i=0; i < fsgeom.agcount; i++) {
1775 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
7d59f3fd 1776 if (mkdir(buf, 0700) < 0) {
c988ea91
CH
1777 if (errno == EEXIST) {
1778 if (dflag)
1779 fsrprintf(
1780 _("tmpdir already exists: %s\n"), buf);
1781 } else {
1782 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1783 buf, strerror(errno));
1784 exit(-1);
1785 }
1786 }
1787 }
1788 (void)umask(mask);
1789 return;
1790}
1791
1792static char *
1793tmp_next(char *mnt)
1794{
1795 static char buf[SMBUFSZ];
1796
1797 sprintf(buf, "%s/.fsr/ag%d/tmp%d",
1798 ( (strcmp(mnt, "/") == 0) ? "" : mnt),
1799 tmp_agi,
1800 getpid());
1801
1802 if (++tmp_agi == fsgeom.agcount)
1803 tmp_agi = 0;
1804
1805 return(buf);
1806}
1807
1808static void
1809tmp_close(char *mnt)
1810{
1811 static char buf[SMBUFSZ];
1812 int i;
1813
1814 /* No data is ever actually written so we can just do rmdir's */
1815 for (i=0; i < fsgeom.agcount; i++) {
1816 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
1817 if (rmdir(buf) < 0) {
1818 if (errno != ENOENT) {
1819 fsrprintf(
1820 _("could not remove tmpdir: %s: %s\n"),
1821 buf, strerror(errno));
1822 }
1823 }
1824 }
1825 sprintf(buf, "%s/.fsr", mnt);
1826 if (rmdir(buf) < 0) {
1827 if (errno != ENOENT) {
1828 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1829 buf, strerror(errno));
1830 }
1831 }
1832}