]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - fsr/xfs_fsr.c
xfsprogs: xfs_fsr: Verify bulkstat version information in qsort's cmp()
[thirdparty/xfsprogs-dev.git] / fsr / xfs_fsr.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
c988ea91
CH
2/*
3 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
4 * All Rights Reserved.
c988ea91
CH
5 */
6
6b803e5a
CH
7#include "libxfs.h"
8#include "xfs.h"
9#include "xfs_types.h"
10#include "jdm.h"
11#include "xfs_bmap_btree.h"
12#include "xfs_attr_sf.h"
42b4c8e8 13#include "libfrog/paths.h"
fee68490 14#include "libfrog/fsgeom.h"
f31b5e12 15#include "libfrog/bulkstat.h"
c988ea91
CH
16
17#include <fcntl.h>
18#include <errno.h>
c988ea91
CH
19#include <syslog.h>
20#include <signal.h>
21#include <sys/ioctl.h>
22#include <sys/wait.h>
c988ea91
CH
23#include <sys/statvfs.h>
24#include <sys/xattr.h>
0e2fb84a 25#include <paths.h>
c988ea91 26
89e4b5bd
CH
27#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
28#define _PATH_PROC_MOUNTS "/proc/mounts"
29
c988ea91
CH
30
31char *progname;
32
00ff2b10
ES
33static int vflag;
34static int gflag;
c988ea91
CH
35static int Mflag;
36/* static int nflag; */
00ff2b10 37static int dflag = 0;
c988ea91 38/* static int sflag; */
00ff2b10 39static int argv_blksz_dio;
c988ea91
CH
40extern int max_ext_size;
41static int npasses = 10;
42static int startpass = 0;
43
00ff2b10
ES
44static struct getbmap *outmap = NULL;
45static int outmap_size = 0;
46static int RealUid;
47static int tmp_agi;
14f8b681 48static int64_t minimumfree = 2048;
c988ea91
CH
49
50#define MNTTYPE_XFS "xfs"
51
52#define SMBUFSZ 1024
53#define ROOT 0
54#define NULLFD -1
55#define GRABSZ 64
56#define TARGETRANGE 10
c988ea91 57#define BUFFER_MAX (1<<24)
c988ea91
CH
58
59static time_t howlong = 7200; /* default seconds of reorganizing */
60static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
c988ea91
CH
61static time_t endtime;
62static time_t starttime;
63static xfs_ino_t leftoffino = 0;
64static int pagesize;
65
66void usage(int ret);
67static int fsrfile(char *fname, xfs_ino_t ino);
68static int fsrfile_common( char *fname, char *tname, char *mnt,
b46789e2 69 int fd, struct xfs_bstat *statp);
c988ea91 70static int packfile(char *fname, char *tname, int fd,
b46789e2 71 struct xfs_bstat *statp, struct fsxattr *fsxp);
c988ea91
CH
72static void fsrdir(char *dirname);
73static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
74static void initallfs(char *mtab);
89e4b5bd 75static void fsrallfs(char *mtab, int howlong, char *leftofffile);
c988ea91
CH
76static void fsrall_cleanup(int timeout);
77static int getnextents(int);
78int xfsrtextsize(int fd);
09d38d96 79int xfs_getrt(int fd, struct statvfs *sfbp);
c988ea91
CH
80char * gettmpname(char *fname);
81char * getparent(char *fname);
82int fsrprintf(const char *fmt, ...);
b46789e2 83int read_fd_bmap(int, struct xfs_bstat *, int *);
c988ea91
CH
84static void tmp_init(char *mnt);
85static char * tmp_next(char *mnt);
86static void tmp_close(char *mnt);
c988ea91 87
9612817d 88static struct xfs_fsop_geom fsgeom; /* geometry of active mounted system */
c988ea91
CH
89
90#define NMOUNT 64
91static int numfs;
92
93typedef struct fsdesc {
94 char *dev;
95 char *mnt;
96 int npass;
97} fsdesc_t;
98
00ff2b10
ES
99static fsdesc_t *fs, *fsbase, *fsend;
100static int fsbufsize = 10; /* A starting value */
101static int nfrags = 0; /* Debug option: Coerse into specific number
c988ea91 102 * of extents */
00ff2b10 103static int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
c988ea91 104
00ff2b10 105static int
c988ea91
CH
106xfs_swapext(int fd, xfs_swapext_t *sx)
107{
108 return ioctl(fd, XFS_IOC_SWAPEXT, sx);
109}
110
00ff2b10 111static int
c988ea91
CH
112xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
113{
114 return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
115}
116
00ff2b10 117static void
c988ea91
CH
118aborter(int unused)
119{
120 fsrall_cleanup(1);
121 exit(1);
122}
123
124int
125main(int argc, char **argv)
126{
f594a0d1 127 struct stat sb;
c988ea91 128 char *argname;
c988ea91 129 int c;
938f7b70 130 struct fs_path *fsp;
89e4b5bd 131 char *mtab = NULL;
c988ea91
CH
132
133 setlinebuf(stdout);
134 progname = basename(argv[0]);
135
136 setlocale(LC_ALL, "");
137 bindtextdomain(PACKAGE, LOCALEDIR);
138 textdomain(PACKAGE);
139
140 gflag = ! isatty(0);
141
89e4b5bd 142 while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
c988ea91
CH
143 switch (c) {
144 case 'M':
145 Mflag = 1;
146 break;
147 case 'g':
148 gflag = 1;
149 break;
150 case 'n':
151 /* nflag = 1; */
152 break;
153 case 'v':
154 ++vflag;
155 break;
156 case 'd':
157 dflag = 1;
158 break;
159 case 's': /* frag stats only */
160 /* sflag = 1; */
161 fprintf(stderr,
162 _("%s: Stats not yet supported for XFS\n"),
163 progname);
164 usage(1);
165 break;
166 case 't':
167 howlong = atoi(optarg);
168 break;
169 case 'f':
170 leftofffile = optarg;
171 break;
172 case 'm':
173 mtab = optarg;
174 break;
175 case 'b':
176 argv_blksz_dio = atoi(optarg);
177 break;
178 case 'p':
179 npasses = atoi(optarg);
180 break;
181 case 'C':
182 /* Testing opt: coerses frag count in result */
183 if (getenv("FSRXFSTEST") != NULL) {
184 nfrags = atoi(optarg);
185 openopts |= O_SYNC;
186 }
187 break;
188 case 'V':
189 printf(_("%s version %s\n"), progname, VERSION);
190 exit(0);
191 default:
192 usage(1);
193 }
89e4b5bd
CH
194 }
195
196 /*
197 * If the user did not specify an explicit mount table, try to use
198 * /proc/mounts if it is available, else /etc/mtab. We prefer
199 * /proc/mounts because it is kernel controlled, while /etc/mtab
200 * may contain garbage that userspace tools like pam_mounts wrote
201 * into it.
202 */
203 if (!mtab) {
204 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
205 mtab = _PATH_PROC_MOUNTS;
206 else
207 mtab = _PATH_MOUNTED;
208 }
209
c988ea91
CH
210 if (vflag)
211 setbuf(stdout, NULL);
212
3dd2705a 213 starttime = time(NULL);
c988ea91
CH
214
215 /* Save the caller's real uid */
216 RealUid = getuid();
217
218 pagesize = getpagesize();
938f7b70 219 fs_table_initialise(0, NULL, 0, NULL);
c988ea91
CH
220 if (optind < argc) {
221 for (; optind < argc; optind++) {
222 argname = argv[optind];
3e50d888 223
f594a0d1 224 if (lstat(argname, &sb) < 0) {
c988ea91
CH
225 fprintf(stderr,
226 _("%s: could not stat: %s: %s\n"),
227 progname, argname, strerror(errno));
228 continue;
229 }
3e50d888
CH
230
231 if (S_ISLNK(sb.st_mode)) {
f594a0d1 232 struct stat sb2;
3e50d888 233
f594a0d1 234 if (stat(argname, &sb2) == 0 &&
3e50d888
CH
235 (S_ISBLK(sb2.st_mode) ||
236 S_ISCHR(sb2.st_mode)))
c988ea91 237 sb = sb2;
c988ea91 238 }
3e50d888 239
938f7b70
DW
240 fsp = fs_table_lookup_mount(argname);
241 if (!fsp)
242 fsp = fs_table_lookup_blkdev(argname);
243 if (fsp != NULL) {
244 fsrfs(fsp->fs_dir, 0, 100);
c988ea91
CH
245 } else if (S_ISCHR(sb.st_mode)) {
246 fprintf(stderr, _(
247 "%s: char special not supported: %s\n"),
248 progname, argname);
249 exit(1);
250 } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
251 if (!platform_test_xfs_path(argname)) {
252 fprintf(stderr, _(
253 "%s: cannot defragment: %s: Not XFS\n"),
254 progname, argname);
255 continue;
256 }
257 if (S_ISDIR(sb.st_mode))
258 fsrdir(argname);
259 else
260 fsrfile(argname, sb.st_ino);
261 } else {
262 printf(
263 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
264 argname);
265 }
266 }
267 } else {
268 initallfs(mtab);
89e4b5bd 269 fsrallfs(mtab, howlong, leftofffile);
c988ea91
CH
270 }
271 return 0;
272}
273
274void
275usage(int ret)
276{
277 fprintf(stderr, _(
30626ef6
ES
278"Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
279" %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
280" %s -V\n\n"
c988ea91 281"Options:\n"
c988ea91
CH
282" -g Print to syslog (default if stdout not a tty).\n"
283" -t time How long to run in seconds.\n"
30626ef6 284" -p passes Number of passes before terminating global re-org.\n"
c988ea91
CH
285" -f leftoff Use this instead of %s.\n"
286" -m mtab Use something other than /etc/mtab.\n"
287" -d Debug, print even more.\n"
30626ef6
ES
288" -v Verbose, more -v's more verbose.\n"
289" -V Print version number and exit.\n"
290 ), progname, progname, progname, _PATH_FSRLAST);
c988ea91
CH
291 exit(ret);
292}
293
294/*
295 * initallfs -- read the mount table and set up an internal form
296 */
297static void
298initallfs(char *mtab)
299{
7141fc5b 300 struct mntent_cursor cursor;
7849d55d 301 struct mntent *mnt= NULL;
c988ea91
CH
302 int mi;
303 char *cp;
f594a0d1 304 struct stat sb;
c988ea91
CH
305
306 /* malloc a number of descriptors, increased later if needed */
307 if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
308 fsrprintf(_("out of memory: %s\n"), strerror(errno));
309 exit(1);
310 }
311 fsend = (fsbase + fsbufsize - 1);
312
313 /* find all rw xfs file systems */
314 mi = 0;
315 fs = fsbase;
7141fc5b
JT
316
317 if (platform_mntent_open(&cursor, mtab) != 0){
318 fprintf(stderr, "Error: can't get mntent entries.\n");
319 exit(1);
320 }
321
7849d55d 322 while ((mnt = platform_mntent_next(&cursor)) != NULL) {
c988ea91
CH
323 int rw = 0;
324
7849d55d 325 if (strcmp(mnt->mnt_type, MNTTYPE_XFS ) != 0 ||
f594a0d1 326 stat(mnt->mnt_fsname, &sb) == -1 ||
c988ea91
CH
327 !S_ISBLK(sb.st_mode))
328 continue;
329
7849d55d 330 cp = strtok(mnt->mnt_opts,",");
c988ea91
CH
331 do {
332 if (strcmp("rw", cp) == 0)
333 rw++;
334 } while ((cp = strtok(NULL, ",")) != NULL);
335 if (rw == 0) {
336 if (dflag)
337 fsrprintf(_("Skipping %s: not mounted rw\n"),
7849d55d 338 mnt->mnt_fsname);
c988ea91
CH
339 continue;
340 }
341
342 if (mi == fsbufsize) {
343 fsbufsize += NMOUNT;
344 if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
345 fsbufsize * sizeof(fsdesc_t))) == NULL) {
346 fsrprintf(_("out of memory: %s\n"),
347 strerror(errno));
348 exit(1);
349 }
350 if (!fsbase) {
351 fsrprintf(_("out of memory on realloc: %s\n"),
352 strerror(errno));
353 exit(1);
354 }
355 fs = (fsbase + mi); /* Needed ? */
356 }
357
7849d55d
ES
358 fs->dev = strdup(mnt->mnt_fsname);
359 fs->mnt = strdup(mnt->mnt_dir);
c988ea91 360
758bcc92 361 if (fs->dev == NULL) {
7849d55d 362 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_fsname);
c988ea91
CH
363 exit(1);
364 }
758bcc92 365 if (fs->mnt == NULL) {
7849d55d 366 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_dir);
758bcc92
ES
367 exit(1);
368 }
c988ea91
CH
369 mi++;
370 fs++;
371 }
7141fc5b
JT
372 platform_mntent_close(&cursor);
373
c988ea91
CH
374 numfs = mi;
375 fsend = (fsbase + numfs);
c988ea91
CH
376 if (numfs == 0) {
377 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
378 exit(0);
379 }
380 if (vflag || dflag) {
381 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
382 numfs);
383 if (dflag)
384 for (fs = fsbase; fs < fsend; fs++)
385 fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
386 }
387}
388
389static void
89e4b5bd 390fsrallfs(char *mtab, int howlong, char *leftofffile)
c988ea91
CH
391{
392 int fd;
393 int error;
394 int found = 0;
395 char *fsname;
396 char buf[SMBUFSZ];
397 int mdonly = Mflag;
398 char *ptr;
399 xfs_ino_t startino = 0;
400 fsdesc_t *fsp;
f594a0d1 401 struct stat sb, sb2;
c988ea91
CH
402
403 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
404
405 endtime = starttime + howlong;
406 fs = fsbase;
407
408 /* where'd we leave off last time? */
f594a0d1 409 if (lstat(leftofffile, &sb) == 0) {
c988ea91
CH
410 if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
411 fsrprintf(_("%s: open failed\n"), leftofffile);
412 }
f594a0d1 413 else if ( fstat(fd, &sb2) == 0) {
c988ea91
CH
414 /*
415 * Verify that lstat & fstat point to the
416 * same regular file (no links/no quick spoofs)
417 */
418 if ( (sb.st_dev != sb2.st_dev) ||
419 (sb.st_ino != sb2.st_ino) ||
420 ((sb.st_mode & S_IFMT) != S_IFREG) ||
421 ((sb2.st_mode & S_IFMT) != S_IFREG) ||
422 (sb2.st_uid != ROOT) ||
423 (sb2.st_nlink != 1)
424 )
425 {
426 fsrprintf(_("Can't use %s: mode=0%o own=%d"
427 " nlink=%d\n"),
428 leftofffile, sb.st_mode,
429 sb.st_uid, sb.st_nlink);
430 close(fd);
431 fd = NULLFD;
432 }
433 }
434 else {
435 close(fd);
436 fd = NULLFD;
437 }
438 }
439 else {
440 fd = NULLFD;
441 }
442
443 if (fd != NULLFD) {
444 if (read(fd, buf, SMBUFSZ) == -1) {
445 fs = fsbase;
446 fsrprintf(_("could not read %s, starting with %s\n"),
447 leftofffile, *fs->dev);
448 } else {
eef20df0
ES
449 /* Ensure the buffer we read is null terminated */
450 buf[SMBUFSZ-1] = '\0';
c988ea91
CH
451 for (fs = fsbase; fs < fsend; fs++) {
452 fsname = fs->dev;
453 if ((strncmp(buf,fsname,strlen(fsname)) == 0)
454 && buf[strlen(fsname)] == ' ') {
455 found = 1;
456 break;
457 }
458 }
459 if (! found)
460 fs = fsbase;
461
462 ptr = strchr(buf, ' ');
463 if (ptr) {
464 startpass = atoi(++ptr);
465 ptr = strchr(ptr, ' ');
466 if (ptr) {
467 startino = strtoull(++ptr, NULL, 10);
4cca629d
DW
468 /*
469 * NOTE: The inode number read in from
470 * the leftoff file is the last inode
471 * to have been fsr'd. Since the v5
472 * xfrog_bulkstat function wants to be
473 * passed the first inode that we want
474 * to examine, increment the value that
475 * we read in. The debug message below
476 * prints the lastoff value.
477 */
478 startino++;
c988ea91
CH
479 }
480 }
481 if (startpass < 0)
482 startpass = 0;
483
484 /* Init pass counts */
485 for (fsp = fsbase; fsp < fs; fsp++) {
486 fsp->npass = startpass + 1;
487 }
488 for (fsp = fs; fsp <= fsend; fsp++) {
489 fsp->npass = startpass;
490 }
491 }
492 close(fd);
493 }
494
495 if (vflag) {
496 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
4cca629d 497 fs->npass, (unsigned long long)startino - 1,
c988ea91
CH
498 fs->dev, fs->mnt);
499 }
500
501 signal(SIGABRT, aborter);
502 signal(SIGHUP, aborter);
503 signal(SIGINT, aborter);
504 signal(SIGQUIT, aborter);
505 signal(SIGTERM, aborter);
506
507 /* reorg for 'howlong' -- checked in 'fsrfs' */
3dd2705a 508 while (endtime > time(NULL)) {
c988ea91 509 pid_t pid;
8d2666e3 510
c988ea91
CH
511 if (npasses > 1 && !fs->npass)
512 Mflag = 1;
513 else
514 Mflag = mdonly;
515 pid = fork();
516 switch(pid) {
517 case -1:
518 fsrprintf(_("couldn't fork sub process:"));
519 exit(1);
520 break;
521 case 0:
522 error = fsrfs(fs->mnt, startino, TARGETRANGE);
523 exit (error);
524 break;
525 default:
526 wait(&error);
c988ea91
CH
527 if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
528 /* child timed out & did fsrall_cleanup */
529 exit(0);
530 }
531 break;
532 }
533 startino = 0; /* reset after the first time through */
534 fs->npass++;
535 fs++;
8d2666e3
JM
536 if (fs == fsend)
537 fs = fsbase;
538 if (fs->npass == npasses) {
539 fsrprintf(_("Completed all %d passes\n"), npasses);
540 break;
541 }
c988ea91 542 }
3dd2705a 543 fsrall_cleanup(endtime <= time(NULL));
c988ea91
CH
544}
545
546/*
547 * fsrall_cleanup -- close files, print next starting location, etc.
548 */
549static void
550fsrall_cleanup(int timeout)
551{
552 int fd;
553 int ret;
554 char buf[SMBUFSZ];
555
c988ea91 556 unlink(leftofffile);
d0e82db1
ES
557
558 if (timeout) {
559 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
560 progname, startpass, fs->npass,
3dd2705a 561 time(NULL) - endtime + howlong);
d0e82db1
ES
562
563 /* record where we left off */
564 fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
565 if (fd == -1) {
566 fsrprintf(_("open(%s) failed: %s\n"),
567 leftofffile, strerror(errno));
568 } else {
c988ea91
CH
569 ret = sprintf(buf, "%s %d %llu\n", fs->dev,
570 fs->npass, (unsigned long long)leftoffino);
571 if (write(fd, buf, ret) < strlen(buf))
572 fsrprintf(_("write(%s) failed: %s\n"),
573 leftofffile, strerror(errno));
574 close(fd);
575 }
576 }
c988ea91
CH
577}
578
937e3dd1
CB
579/*
580 * To compare bstat structs for qsort.
581 */
582static int
583cmp(const void *s1, const void *s2)
584{
4d4ea220
CB
585 const struct xfs_bulkstat *bs1 = s1;
586 const struct xfs_bulkstat *bs2 = s2;
587
588 ASSERT((bs1->bs_version == XFS_BULKSTAT_VERSION_V1 &&
589 bs2->bs_version == XFS_BULKSTAT_VERSION_V1) ||
590 (bs1->bs_version == XFS_BULKSTAT_VERSION_V5 &&
591 bs2->bs_version == XFS_BULKSTAT_VERSION_V5));
592
593 return (bs2->bs_extents - bs1->bs_extents);
937e3dd1
CB
594}
595
c988ea91
CH
596/*
597 * fsrfs -- reorganize a file system
598 */
599static int
600fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
601{
f31b5e12
DW
602 struct xfs_fd fsxfd = XFS_FD_INIT_EMPTY;
603 int fd;
c988ea91
CH
604 int count = 0;
605 int ret;
c988ea91
CH
606 char fname[64];
607 char *tname;
608 jdm_fshandle_t *fshandlep;
4cca629d 609 struct xfs_bulkstat_req *breq;
c988ea91
CH
610
611 fsrprintf(_("%s start inode=%llu\n"), mntdir,
612 (unsigned long long)startino);
613
614 fshandlep = jdm_getfshandle( mntdir );
615 if ( ! fshandlep ) {
616 fsrprintf(_("unable to get handle: %s: %s\n"),
617 mntdir, strerror( errno ));
618 return -1;
619 }
620
03d96c64 621 ret = -xfd_open(&fsxfd, mntdir, O_RDONLY);
9612817d 622 if (ret) {
248af7cb
DW
623 fsrprintf(_("unable to open XFS file: %s: %s\n"),
624 mntdir, strerror(ret));
e3e2793d 625 free(fshandlep);
c988ea91
CH
626 return -1;
627 }
f31b5e12 628 memcpy(&fsgeom, &fsxfd.fsgeom, sizeof(fsgeom));
c988ea91
CH
629
630 tmp_init(mntdir);
631
e6542132
DW
632 ret = -xfrog_bulkstat_alloc_req(GRABSZ, startino, &breq);
633 if (ret) {
634 fsrprintf(_("Skipping %s: %s\n"), mntdir, strerror(ret));
4cca629d
DW
635 xfd_close(&fsxfd);
636 free(fshandlep);
637 return -1;
638 }
639
e6542132 640 while ((ret = -xfrog_bulkstat(&fsxfd, breq) == 0)) {
4cca629d
DW
641 struct xfs_bstat bs1;
642 struct xfs_bulkstat *buf = breq->bulkstat;
643 struct xfs_bulkstat *p;
644 struct xfs_bulkstat *endp;
645 uint32_t buflenout = breq->hdr.ocount;
c988ea91
CH
646
647 if (buflenout == 0)
648 goto out0;
649
650 /* Each loop through, defrag targetrange percent of the files */
651 count = (buflenout * targetrange) / 100;
652
4cca629d 653 qsort((char *)buf, buflenout, sizeof(struct xfs_bulkstat), cmp);
c988ea91
CH
654
655 for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
656 /* Do some obvious checks now */
657 if (((p->bs_mode & S_IFMT) != S_IFREG) ||
658 (p->bs_extents < 2))
659 continue;
660
e6542132 661 ret = -xfrog_bulkstat_v5_to_v1(&fsxfd, &bs1, p);
4cca629d
DW
662 if (ret) {
663 fsrprintf(_("bstat conversion error: %s\n"),
664 strerror(ret));
665 continue;
666 }
667
668 fd = jdm_open(fshandlep, &bs1, O_RDWR | O_DIRECT);
108e985b 669 if (fd < 0) {
c988ea91
CH
670 /* This probably means the file was
671 * removed while in progress of handling
672 * it. Just quietly ignore this file.
673 */
674 if (dflag)
675 fsrprintf(_("could not open: "
676 "inode %llu\n"), p->bs_ino);
677 continue;
678 }
679
680 /* Don't know the pathname, so make up something */
681 sprintf(fname, "ino=%lld", (long long)p->bs_ino);
682
683 /* Get a tmp file name */
684 tname = tmp_next(mntdir);
685
4cca629d 686 ret = fsrfile_common(fname, tname, mntdir, fd, &bs1);
c988ea91
CH
687
688 leftoffino = p->bs_ino;
689
690 close(fd);
691
692 if (ret == 0) {
693 if (--count <= 0)
694 break;
695 }
696 }
3dd2705a 697 if (endtime && endtime < time(NULL)) {
4cca629d 698 free(breq);
c988ea91 699 tmp_close(mntdir);
f31b5e12 700 xfd_close(&fsxfd);
c988ea91
CH
701 fsrall_cleanup(1);
702 exit(1);
703 }
704 }
f31b5e12
DW
705 if (ret)
706 fsrprintf(_("%s: bulkstat: %s\n"), progname, strerror(ret));
c988ea91 707out0:
4cca629d 708 free(breq);
c988ea91 709 tmp_close(mntdir);
f31b5e12 710 xfd_close(&fsxfd);
e3e2793d 711 free(fshandlep);
c988ea91
CH
712 return 0;
713}
714
c988ea91
CH
715/*
716 * reorganize by directory hierarchy.
717 * Stay in dev (a restriction based on structure of this program -- either
718 * call efs_{n,u}mount() around each file, something smarter or this)
719 */
720static void
721fsrdir(char *dirname)
722{
723 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
724}
725
726/*
727 * Sets up the defragmentation of a file based on the
728 * filepath. It collects the bstat information, does
729 * an open on the file and passes this all to fsrfile_common.
730 */
731static int
f31b5e12
DW
732fsrfile(
733 char *fname,
734 xfs_ino_t ino)
c988ea91 735{
f31b5e12 736 struct xfs_fd fsxfd = XFS_FD_INIT_EMPTY;
4cca629d 737 struct xfs_bulkstat bulkstat;
f31b5e12
DW
738 struct xfs_bstat statbuf;
739 jdm_fshandle_t *fshandlep;
740 int fd = -1;
741 int error = -1;
742 char *tname;
c988ea91
CH
743
744 fshandlep = jdm_getfshandle(getparent (fname) );
4f10a2fb 745 if (!fshandlep) {
c988ea91
CH
746 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
747 fname, strerror(errno));
4f10a2fb 748 goto out;
c988ea91
CH
749 }
750
751 /*
752 * Need to open something on the same filesystem as the
753 * file. Open the parent.
754 */
03d96c64 755 error = -xfd_open(&fsxfd, getparent(fname), O_RDONLY);
f31b5e12 756 if (error) {
248af7cb
DW
757 fsrprintf(_("unable to open sys handle for XFS file %s: %s\n"),
758 fname, strerror(error));
f31b5e12
DW
759 goto out;
760 }
761
e6542132 762 error = -xfrog_bulkstat_single(&fsxfd, ino, 0, &bulkstat);
f31b5e12 763 if (error) {
c988ea91 764 fsrprintf(_("unable to get bstat on %s: %s\n"),
f31b5e12 765 fname, strerror(error));
4f10a2fb 766 goto out;
c988ea91 767 }
e6542132 768 error = -xfrog_bulkstat_v5_to_v1(&fsxfd, &statbuf, &bulkstat);
4cca629d
DW
769 if (error) {
770 fsrprintf(_("bstat conversion error on %s: %s\n"),
771 fname, strerror(error));
772 goto out;
773 }
c988ea91 774
108e985b 775 fd = jdm_open(fshandlep, &statbuf, O_RDWR|O_DIRECT);
c988ea91
CH
776 if (fd < 0) {
777 fsrprintf(_("unable to open handle %s: %s\n"),
778 fname, strerror(errno));
4f10a2fb 779 goto out;
c988ea91
CH
780 }
781
f31b5e12
DW
782 /* Stash the fs geometry for general use. */
783 memcpy(&fsgeom, &fsxfd.fsgeom, sizeof(fsgeom));
c988ea91 784
c988ea91
CH
785 tname = gettmpname(fname);
786
787 if (tname)
788 error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
789
4f10a2fb 790out:
f31b5e12 791 xfd_close(&fsxfd);
4f10a2fb
ES
792 if (fd >= 0)
793 close(fd);
794 free(fshandlep);
c988ea91
CH
795
796 return error;
797}
798
799
800/*
801 * This is the common defrag code for either a full fs
802 * defragmentation or a single file. Check as much as
803 * possible with the file, fork a process to setuid to the
804 * target file owner's uid and defragment the file.
805 * This is done so the new extents created in a tmp file are
806 * reflected in the owners' quota without having to do any
807 * special code in the kernel. When the existing extents
808 * are removed, the quotas will be correct. It's ugly but
809 * it saves us from doing some quota re-construction in
810 * the extent swap. The price is that the defragmentation
811 * will fail if the owner of the target file is already at
812 * their quota limit.
813 */
814static int
815fsrfile_common(
816 char *fname,
817 char *tname,
818 char *fsname,
819 int fd,
b46789e2 820 struct xfs_bstat *statp)
c988ea91
CH
821{
822 int error;
09d38d96 823 struct statvfs vfss;
c988ea91
CH
824 struct fsxattr fsx;
825 unsigned long bsize;
826
827 if (vflag)
828 fsrprintf("%s\n", fname);
829
830 if (fsync(fd) < 0) {
831 fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
832 return -1;
833 }
834
835 if (statp->bs_size == 0) {
836 if (vflag)
837 fsrprintf(_("%s: zero size, ignoring\n"), fname);
838 return(0);
839 }
840
841 /* Check if a mandatory lock is set on the file to try and
842 * avoid blocking indefinitely on the reads later. Note that
843 * someone could still set a mandatory lock after this check
844 * but before all reads have completed to block fsr reads.
845 * This change just closes the window a bit.
846 */
847 if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
848 struct flock fl;
849
850 fl.l_type = F_RDLCK;
851 fl.l_whence = SEEK_SET;
852 fl.l_start = (off_t)0;
853 fl.l_len = 0;
854 if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
855 if (vflag)
856 fsrprintf(_("locking check failed: %s\n"),
857 fname);
858 return(-1);
859 }
860 if (fl.l_type != F_UNLCK) {
861 /* Mandatory lock is set */
862 if (vflag)
863 fsrprintf(_("mandatory lock: %s: ignoring\n"),
864 fname);
865 return(-1);
866 }
867 }
868
869 /*
870 * Check if there is room to copy the file.
871 *
872 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
873 * not the optimal I/O size as struct stat.
874 */
09d38d96 875 if (statvfs(fsname ? fsname : fname, &vfss) < 0) {
c988ea91
CH
876 fsrprintf(_("unable to get fs stat on %s: %s\n"),
877 fname, strerror(errno));
878 return -1;
879 }
880 bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
881 if (statp->bs_blksize * statp->bs_blocks >
882 vfss.f_bfree * bsize - minimumfree) {
883 fsrprintf(_("insufficient freespace for: %s: "
884 "size=%lld: ignoring\n"), fname,
885 statp->bs_blksize * statp->bs_blocks);
886 return 1;
887 }
888
83f4b5ac 889 if ((ioctl(fd, FS_IOC_FSGETXATTR, &fsx)) < 0) {
c988ea91
CH
890 fsrprintf(_("failed to get inode attrs: %s\n"), fname);
891 return(-1);
892 }
83f4b5ac 893 if (fsx.fsx_xflags & (FS_XFLAG_IMMUTABLE|FS_XFLAG_APPEND)) {
c988ea91
CH
894 if (vflag)
895 fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
896 return(0);
897 }
83f4b5ac 898 if (fsx.fsx_xflags & FS_XFLAG_NODEFRAG) {
c988ea91
CH
899 if (vflag)
900 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
901 fname);
902 return(0);
903 }
83f4b5ac 904 if (fsx.fsx_xflags & FS_XFLAG_REALTIME) {
c988ea91
CH
905 if (xfs_getrt(fd, &vfss) < 0) {
906 fsrprintf(_("cannot get realtime geometry for: %s\n"),
907 fname);
908 return(-1);
909 }
910 if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
911 fsrprintf(_("low on realtime free space: %s: "
912 "ignoring file\n"), fname);
913 return(-1);
914 }
915 }
916
917 if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
918 fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
919 return -1;
920 }
921
922 /*
923 * Previously the code forked here, & the child changed it's uid to
924 * that of the file's owner and then called packfile(), to keep
925 * quota counts correct. (defragged files could use fewer blocks).
926 *
927 * Instead, just fchown() the temp file to the uid,gid of the
928 * file we're defragging, in packfile().
929 */
930
931 if ((error = packfile(fname, tname, fd, statp, &fsx)))
932 return error;
933 return -1; /* no error */
934}
935
bdb041f5
DC
936/*
937 * Attempt to set the attr fork up correctly. This is simple for attr1
938 * filesystems as they have a fixed inode fork offset. In that case
939 * just create an attribute and that's all we need to do.
940 *
941 * For attr2 filesystems, see if we have the actual fork offset in
942 * the bstat structure. If so, just create additional attributes on
943 * the temporary inode until the offset matches.
944 *
945 * If it doesn't exist, we can only do best effort. Add an attribute at a time
946 * to move the inode fork around, but take into account that the attribute
947 * might be too small to move the fork every time we add one. This should
948 * hopefully put the fork offset in the right place. It's not a big deal if we
949 * don't get it right - the kernel will reject it when we try to swap extents.
950 */
951static int
952fsr_setup_attr_fork(
953 int fd,
954 int tfd,
b46789e2 955 struct xfs_bstat *bstatp)
bdb041f5 956{
c14c7b79 957#ifdef HAVE_FSETXATTR
f31b5e12 958 struct xfs_fd txfd = XFS_FD_INIT(tfd);
f594a0d1 959 struct stat tstatbuf;
bdb041f5 960 int i;
27507775 961 int diff = 0;
bdb041f5
DC
962 int last_forkoff = 0;
963 int no_change_cnt = 0;
964 int ret;
965
83f4b5ac 966 if (!(bstatp->bs_xflags & FS_XFLAG_HASATTR))
bdb041f5
DC
967 return 0;
968
969 /*
970 * use the old method if we have attr1 or the kernel does not yet
971 * support passing the fork offset in the bulkstat data.
972 */
973 if (!(fsgeom.flags & XFS_FSOP_GEOM_FLAGS_ATTR2) ||
974 bstatp->bs_forkoff == 0) {
975 /* attr1 */
f31b5e12 976 ret = fsetxattr(txfd.fd, "user.X", "X", 1, XATTR_CREATE);
bdb041f5
DC
977 if (ret) {
978 fsrprintf(_("could not set ATTR\n"));
979 return -1;
980 }
981 goto out;
982 }
983
984 /* attr2 w/ fork offsets */
985
f31b5e12 986 if (fstat(txfd.fd, &tstatbuf) < 0) {
bdb041f5
DC
987 fsrprintf(_("unable to stat temp file: %s\n"),
988 strerror(errno));
989 return -1;
990 }
991
992 i = 0;
993 do {
4cca629d 994 struct xfs_bulkstat tbstat;
bdb041f5 995 char name[64];
f31b5e12 996 int ret;
bdb041f5
DC
997
998 /*
1adfe5c6 999 * bulkstat the temp inode to see what the forkoff is. Use
bdb041f5
DC
1000 * this to compare against the target and determine what we
1001 * need to do.
1002 */
e6542132
DW
1003 ret = -xfrog_bulkstat_single(&txfd, tstatbuf.st_ino, 0,
1004 &tbstat);
f31b5e12 1005 if (ret) {
bdb041f5 1006 fsrprintf(_("unable to get bstat on temp file: %s\n"),
f31b5e12 1007 strerror(ret));
bdb041f5
DC
1008 return -1;
1009 }
1010 if (dflag)
1011 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1012 bstatp->bs_forkoff, tbstat.bs_forkoff);
1adfe5c6
ES
1013 diff = tbstat.bs_forkoff - bstatp->bs_forkoff;
1014
1015 /* if they are equal, we are done */
1016 if (!diff)
1017 goto out;
bdb041f5
DC
1018
1019 snprintf(name, sizeof(name), "user.%d", i);
1020
1021 /*
1022 * If there is no attribute, then we need to create one to get
1023 * an attribute fork at the default location.
1024 */
1025 if (!tbstat.bs_forkoff) {
1adfe5c6 1026 ASSERT(i == 0);
f31b5e12 1027 ret = fsetxattr(txfd.fd, name, "XX", 2, XATTR_CREATE);
bdb041f5
DC
1028 if (ret) {
1029 fsrprintf(_("could not set ATTR\n"));
1030 return -1;
1031 }
1032 continue;
1adfe5c6 1033 } else if (i == 0) {
1adfe5c6
ES
1034 /*
1035 * First pass, and temp file already has an inline
1036 * xattr, probably due to selinux.
1037 *
1038 * It's *possible* that the temp file attr area
e7e3152c 1039 * is larger than the target file's:
1adfe5c6
ES
1040 *
1041 * Target Temp
1042 * +-------+ 0 +-------+ 0
1043 * | | | |
1044 * | | | Data |
1045 * | Data | | |
1046 * | | v-------v forkoff
1047 * | | | |
1048 * v-------v forkoff | Attr | local
e7e3152c 1049 * | Attr | | |
1adfe5c6 1050 * +-------+ +-------+
1adfe5c6
ES
1051 */
1052
1adfe5c6 1053 /*
e7e3152c
ES
1054 * If target attr area is less than the temp's
1055 * (diff < 0) write a big attr to the temp file to knock
1056 * the attr out of local format.
1057 * (This should actually *increase* the temp file's
1058 * forkoffset when the attr moves out of the inode)
1adfe5c6 1059 */
e7e3152c 1060 if (diff < 0) {
1adfe5c6
ES
1061 char val[2048];
1062 memset(val, 'X', 2048);
f31b5e12 1063 if (fsetxattr(txfd.fd, name, val, 2048, 0)) {
1adfe5c6
ES
1064 fsrprintf(_("big ATTR set failed\n"));
1065 return -1;
1066 }
1067 /* Go back & see where we're at now */
1068 continue;
1069 }
bdb041f5
DC
1070 }
1071
1072 /*
1073 * make a progress check so we don't get stuck trying to extend
1074 * a large btree form attribute fork.
1075 */
1076 if (last_forkoff == tbstat.bs_forkoff) {
1077 if (no_change_cnt++ > 10)
1078 break;
ff85ea3f
ES
1079 } else /* progress! */
1080 no_change_cnt = 0;
bdb041f5
DC
1081 last_forkoff = tbstat.bs_forkoff;
1082
1083 /* work out which way to grow the fork */
bdb041f5
DC
1084 if (abs(diff) > fsgeom.inodesize - sizeof(struct xfs_dinode)) {
1085 fsrprintf(_("forkoff diff %d too large!\n"), diff);
1086 return -1;
1087 }
1088
bdb041f5 1089 /*
1adfe5c6
ES
1090 * if the temp inode fork offset is still smaller then we have
1091 * to grow the data fork
bdb041f5
DC
1092 */
1093 if (diff < 0) {
1094 /*
1095 * create some temporary extents in the inode to move
1096 * the fork in the direction we need. This can be done
1097 * by preallocating some single block extents at
1098 * non-contiguous offsets.
1099 */
1100 /* XXX: unimplemented! */
27507775
ES
1101 if (dflag)
1102 printf(_("data fork growth unimplemented\n"));
bdb041f5
DC
1103 goto out;
1104 }
1105
1106 /* we need to grow the attr fork, so create another attr */
f31b5e12 1107 ret = fsetxattr(txfd.fd, name, "XX", 2, XATTR_CREATE);
bdb041f5
DC
1108 if (ret) {
1109 fsrprintf(_("could not set ATTR\n"));
1110 return -1;
1111 }
1112
1113 } while (++i < 100); /* don't go forever */
1114
1115out:
1116 if (dflag)
1117 fsrprintf(_("set temp attr\n"));
27507775
ES
1118 /* We failed to resolve the fork difference */
1119 if (dflag && diff)
1120 fsrprintf(_("failed to match fork offset\n"));;
1121
c14c7b79 1122#endif /* HAVE_FSETXATTR */
bdb041f5
DC
1123 return 0;
1124}
c988ea91
CH
1125
1126/*
1127 * Do the defragmentation of a single file.
1128 * We already are pretty sure we can and want to
1129 * defragment the file. Create the tmp file, copy
1130 * the data (maintaining holes) and call the kernel
671632c6
ES
1131 * extent swap routine.
1132 *
1133 * Return values:
1134 * -1: Some error was encountered
1135 * 0: Successfully defragmented the file
1136 * 1: No change / No Error
c988ea91
CH
1137 */
1138static int
1139packfile(char *fname, char *tname, int fd,
b46789e2 1140 struct xfs_bstat *statp, struct fsxattr *fsxp)
c988ea91 1141{
671632c6 1142 int tfd = -1;
c988ea91 1143 int srval;
671632c6 1144 int retval = -1; /* Failure is the default */
c988ea91
CH
1145 int nextents, extent, cur_nextents, new_nextents;
1146 unsigned blksz_dio;
1147 unsigned dio_min;
1148 struct dioattr dio;
1149 static xfs_swapext_t sx;
1150 struct xfs_flock64 space;
1151 off64_t cnt, pos;
671632c6 1152 void *fbuf = NULL;
c988ea91
CH
1153 int ct, wc, wc_b4;
1154 char ffname[SMBUFSZ];
1155 int ffd = -1;
1156
1157 /*
1158 * Work out the extent map - nextents will be set to the
1159 * minimum number of extents needed for the file (taking
1160 * into account holes), cur_nextents is the current number
1161 * of extents.
1162 */
1163 nextents = read_fd_bmap(fd, statp, &cur_nextents);
1164
1165 if (cur_nextents == 1 || cur_nextents <= nextents) {
1166 if (vflag)
1167 fsrprintf(_("%s already fully defragmented.\n"), fname);
671632c6
ES
1168 retval = 1; /* indicates no change/no error */
1169 goto out;
c988ea91
CH
1170 }
1171
1172 if (dflag)
1173 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1174 fname, cur_nextents, (cur_nextents - nextents),
1175 tname);
1176
1177 if ((tfd = open(tname, openopts, 0666)) < 0) {
1178 if (vflag)
1179 fsrprintf(_("could not open tmp file: %s: %s\n"),
1180 tname, strerror(errno));
671632c6 1181 goto out;
c988ea91
CH
1182 }
1183 unlink(tname);
1184
1185 /* Setup extended attributes */
bdb041f5
DC
1186 if (fsr_setup_attr_fork(fd, tfd, statp) != 0) {
1187 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname);
671632c6 1188 goto out;
c988ea91
CH
1189 }
1190
1191 /* Setup extended inode flags, project identifier, etc */
1192 if (fsxp->fsx_xflags || fsxp->fsx_projid) {
83f4b5ac 1193 if (ioctl(tfd, FS_IOC_FSSETXATTR, fsxp) < 0) {
c988ea91
CH
1194 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1195 tname);
671632c6 1196 goto out;
c988ea91
CH
1197 }
1198 }
1199
1200 if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
1201 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
671632c6 1202 goto out;
c988ea91
CH
1203 }
1204
1205 dio_min = dio.d_miniosz;
1206 if (statp->bs_size <= dio_min) {
1207 blksz_dio = dio_min;
1208 } else {
1209 blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
1210 if (argv_blksz_dio != 0)
1211 blksz_dio = min(argv_blksz_dio, blksz_dio);
1212 blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
1213 }
1214
1215 if (dflag) {
1216 fsrprintf(_("DEBUG: "
1217 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1218 statp->bs_size, blksz_dio, dio.d_miniosz,
1219 dio.d_maxiosz, pagesize);
1220 }
1221
1222 if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
1223 fsrprintf(_("could not allocate buf: %s\n"), tname);
671632c6 1224 goto out;
c988ea91
CH
1225 }
1226
1227 if (nfrags) {
1228 /* Create new tmp file in same AG as first */
1229 sprintf(ffname, "%s.frag", tname);
1230
1231 /* Open the new file for sync writes */
1232 if ((ffd = open(ffname, openopts, 0666)) < 0) {
1233 fsrprintf(_("could not open fragfile: %s : %s\n"),
1234 ffname, strerror(errno));
671632c6 1235 goto out;
c988ea91
CH
1236 }
1237 unlink(ffname);
1238 }
1239
1240 /* Loop through block map allocating new extents */
1241 for (extent = 0; extent < nextents; extent++) {
1242 pos = outmap[extent].bmv_offset;
1243 if (outmap[extent].bmv_block == -1) {
1244 space.l_whence = SEEK_SET;
1245 space.l_start = pos;
1246 space.l_len = outmap[extent].bmv_length;
1247 if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
1248 fsrprintf(_("could not trunc tmp %s\n"),
1249 tname);
1250 }
dc8878f4 1251 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1252 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1253 tname, strerror(errno));
1254 goto out;
1255 }
c988ea91
CH
1256 continue;
1257 } else if (outmap[extent].bmv_length == 0) {
1258 /* to catch holes at the beginning of the file */
1259 continue;
1260 }
1261 if (! nfrags) {
1262 space.l_whence = SEEK_CUR;
1263 space.l_start = 0;
1264 space.l_len = outmap[extent].bmv_length;
1265
1266 if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
1267 fsrprintf(_("could not pre-allocate tmp space:"
1268 " %s\n"), tname);
671632c6 1269 goto out;
c988ea91 1270 }
dc8878f4 1271 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1272 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1273 tname, strerror(errno));
1274 goto out;
1275 }
c988ea91
CH
1276 }
1277 } /* end of space allocation loop */
1278
dc8878f4 1279 if (lseek(tfd, 0, SEEK_SET)) {
c988ea91 1280 fsrprintf(_("Couldn't rewind on temporary file\n"));
671632c6 1281 goto out;
c988ea91
CH
1282 }
1283
1284 /* Check if the temporary file has fewer extents */
1285 new_nextents = getnextents(tfd);
1286 if (dflag)
1287 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
1288 if (cur_nextents <= new_nextents) {
1289 if (vflag)
1290 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
671632c6
ES
1291 retval = 1; /* no change/no error */
1292 goto out;
c988ea91
CH
1293 }
1294
1295 /* Loop through block map copying the file. */
1296 for (extent = 0; extent < nextents; extent++) {
1297 pos = outmap[extent].bmv_offset;
1298 if (outmap[extent].bmv_block == -1) {
dc8878f4 1299 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1300 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1301 tname, strerror(errno));
1302 goto out;
1303 }
dc8878f4 1304 if (lseek(fd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1305 fsrprintf(_("could not lseek in file: %s : %s\n"),
1306 fname, strerror(errno));
1307 goto out;
1308 }
c988ea91
CH
1309 continue;
1310 } else if (outmap[extent].bmv_length == 0) {
1311 /* to catch holes at the beginning of the file */
1312 continue;
1313 }
1314 for (cnt = outmap[extent].bmv_length; cnt > 0;
1315 cnt -= ct, pos += ct) {
1316 if (nfrags && --nfrags) {
1317 ct = min(cnt, dio_min);
1318 } else if (cnt % dio_min == 0) {
1319 ct = min(cnt, blksz_dio);
1320 } else {
1321 ct = min(cnt + dio_min - (cnt % dio_min),
1322 blksz_dio);
1323 }
1324 ct = read(fd, fbuf, ct);
1325 if (ct == 0) {
1326 /* EOF, stop trying to read */
1327 extent = nextents;
1328 break;
1329 }
1330 /* Ensure we do direct I/O to correct block
1331 * boundaries.
1332 */
1333 if (ct % dio_min != 0) {
1334 wc = ct + dio_min - (ct % dio_min);
1335 } else {
1336 wc = ct;
1337 }
1338 wc_b4 = wc;
1339 if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
1340 if (ct < 0)
1341 fsrprintf(_("bad read of %d bytes "
1342 "from %s: %s\n"), wc_b4,
1343 fname, strerror(errno));
1344 else if (wc < 0)
1345 fsrprintf(_("bad write of %d bytes "
1346 "to %s: %s\n"), wc_b4,
1347 tname, strerror(errno));
1348 else {
1349 /*
1350 * Might be out of space
1351 *
1352 * Try to finish write
1353 */
1354 int resid = ct-wc;
1355
1356 if ((wc = write(tfd, ((char *)fbuf)+wc,
1357 resid)) == resid) {
1358 /* worked on second attempt? */
1359 continue;
1360 }
1361 else if (wc < 0) {
1362 fsrprintf(_("bad write2 of %d "
1363 "bytes to %s: %s\n"),
1364 resid, tname,
1365 strerror(errno));
1366 } else {
1367 fsrprintf(_("bad copy to %s\n"),
1368 tname);
1369 }
1370 }
671632c6 1371 goto out;
c988ea91
CH
1372 }
1373 if (nfrags) {
1374 /* Do a matching write to the tmp file */
431ec4e6 1375 wc_b4 = wc;
c988ea91
CH
1376 if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
1377 fsrprintf(_("bad write of %d bytes "
1378 "to %s: %s\n"),
1379 wc_b4, ffname, strerror(errno));
1380 }
1381 }
1382 }
1383 }
dde67673 1384 if (ftruncate(tfd, statp->bs_size) < 0) {
3d303baa
ES
1385 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1386 fname, strerror(errno));
1387 goto out;
1388 }
1389 if (fsync(tfd) < 0) {
1390 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1391 fname, strerror(errno));
1392 goto out;
1393 }
c988ea91 1394
c988ea91
CH
1395 sx.sx_stat = *statp; /* struct copy */
1396 sx.sx_version = XFS_SX_VERSION;
1397 sx.sx_fdtarget = fd;
1398 sx.sx_fdtmp = tfd;
1399 sx.sx_offset = 0;
1400 sx.sx_length = statp->bs_size;
1401
1402 /* switch to the owner's id, to keep quota in line */
1403 if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
1404 if (vflag)
1405 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1406 tname, strerror(errno));
671632c6 1407 goto out;
c988ea91
CH
1408 }
1409
1410 /* Swap the extents */
1411 srval = xfs_swapext(fd, &sx);
1412 if (srval < 0) {
1413 if (errno == ENOTSUP) {
1414 if (vflag || dflag)
1415 fsrprintf(_("%s: file type not supported\n"), fname);
1416 } else if (errno == EFAULT) {
1417 /* The file has changed since we started the copy */
1418 if (vflag || dflag)
1419 fsrprintf(_("%s: file modified defrag aborted\n"),
1420 fname);
1421 } else if (errno == EBUSY) {
1422 /* Timestamp has changed or mmap'ed file */
1423 if (vflag || dflag)
1424 fsrprintf(_("%s: file busy\n"), fname);
1425 } else {
1426 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1427 fname, strerror(errno));
1428 }
671632c6 1429 goto out;
c988ea91
CH
1430 }
1431
1432 /* Report progress */
1433 if (vflag)
1434 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1435 cur_nextents, new_nextents,
1436 (new_nextents <= nextents ? "DONE" : " " ),
1437 fname);
671632c6
ES
1438 retval = 0;
1439
1440out:
1441 free(fbuf);
1442 if (tfd != -1)
1443 close(tfd);
1444 if (ffd != -1)
1445 close(ffd);
1446 return retval;
c988ea91
CH
1447}
1448
1449char *
1450gettmpname(char *fname)
1451{
1452 static char buf[PATH_MAX+1];
1453 char sbuf[SMBUFSZ];
1454 char *ptr;
1455
1456 sprintf(sbuf, "/.fsr%d", getpid());
1457
6063feca
ES
1458 strncpy(buf, fname, PATH_MAX);
1459 buf[PATH_MAX] = '\0';
c988ea91
CH
1460 ptr = strrchr(buf, '/');
1461 if (ptr) {
1462 *ptr = '\0';
1463 } else {
1464 strcpy(buf, ".");
1465 }
1466
1467 if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
1468 fsrprintf(_("tmp file name too long: %s\n"), fname);
1469 return(NULL);
1470 }
1471
1472 strcat(buf, sbuf);
1473
1474 return(buf);
1475}
1476
1477char *
1478getparent(char *fname)
1479{
1480 static char buf[PATH_MAX+1];
1481 char *ptr;
1482
6063feca
ES
1483 strncpy(buf, fname, PATH_MAX);
1484 buf[PATH_MAX] = '\0';
c988ea91
CH
1485 ptr = strrchr(buf, '/');
1486 if (ptr) {
1487 if (ptr == &buf[0])
1488 ++ptr;
1489 *ptr = '\0';
1490 } else {
1491 strcpy(buf, ".");
1492 }
1493
1494 return(buf);
1495}
1496
1497/*
1498 * Read in block map of the input file, coalesce contiguous
1499 * extents into a single range, keep all holes. Convert from 512 byte
1500 * blocks to bytes.
1501 *
1502 * This code was borrowed from mv.c with some minor mods.
1503 */
1504#define MAPSIZE 128
1505#define OUTMAP_SIZE_INCREMENT MAPSIZE
1506
b46789e2 1507int read_fd_bmap(int fd, struct xfs_bstat *sin, int *cur_nextents)
c988ea91
CH
1508{
1509 int i, cnt;
1510 struct getbmap map[MAPSIZE];
1511
1512#define BUMP_CNT \
1513 if (++cnt >= outmap_size) { \
1514 outmap_size += OUTMAP_SIZE_INCREMENT; \
1515 outmap = (struct getbmap *)realloc(outmap, \
1516 outmap_size*sizeof(*outmap)); \
1517 if (outmap == NULL) { \
1518 fsrprintf(_("realloc failed: %s\n"), \
1519 strerror(errno)); \
1520 exit(1); \
1521 } \
1522 }
1523
1524 /* Initialize the outmap array. It always grows - never shrinks.
1525 * Left-over memory allocation is saved for the next files.
1526 */
1527 if (outmap_size == 0) {
1528 outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
1529 outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
1530 if (!outmap) {
1531 fsrprintf(_("malloc failed: %s\n"),
1532 strerror(errno));
1533 exit(1);
1534 }
1535 }
1536
1537 outmap[0].bmv_block = 0;
1538 outmap[0].bmv_offset = 0;
1539 outmap[0].bmv_length = sin->bs_size;
1540
1541 /*
1542 * If a non regular file is involved then forget holes
1543 */
1544
1545 if (!S_ISREG(sin->bs_mode))
1546 return(1);
1547
1548 outmap[0].bmv_length = 0;
1549
1550 map[0].bmv_offset = 0;
1551 map[0].bmv_block = 0;
1552 map[0].bmv_entries = 0;
1553 map[0].bmv_count = MAPSIZE;
1554 map[0].bmv_length = -1;
1555
1556 cnt = 0;
1557 *cur_nextents = 0;
1558
1559 do {
1560 if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
1561 fsrprintf(_("failed reading extents: inode %llu"),
1562 (unsigned long long)sin->bs_ino);
1563 exit(1);
1564 }
1565
1566 /* Concatenate extents together and replicate holes into
1567 * the output map.
1568 */
1569 *cur_nextents += map[0].bmv_entries;
1570 for (i = 0; i < map[0].bmv_entries; i++) {
1571 if (map[i + 1].bmv_block == -1) {
1572 BUMP_CNT;
1573 outmap[cnt] = map[i+1];
1574 } else if (outmap[cnt].bmv_block == -1) {
1575 BUMP_CNT;
1576 outmap[cnt] = map[i+1];
1577 } else {
1578 outmap[cnt].bmv_length += map[i + 1].bmv_length;
1579 }
1580 }
1581 } while (map[0].bmv_entries == (MAPSIZE-1));
1582 for (i = 0; i <= cnt; i++) {
1583 outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
1584 outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
1585 }
1586
1587 outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
1588
1589 return(cnt+1);
1590}
1591
1592/*
1593 * Read the block map and return the number of extents.
1594 */
00ff2b10 1595static int
c988ea91
CH
1596getnextents(int fd)
1597{
1598 int nextents;
1599 struct getbmap map[MAPSIZE];
1600
1601 map[0].bmv_offset = 0;
1602 map[0].bmv_block = 0;
1603 map[0].bmv_entries = 0;
1604 map[0].bmv_count = MAPSIZE;
1605 map[0].bmv_length = -1;
1606
1607 nextents = 0;
1608
1609 do {
1610 if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
1611 fsrprintf(_("failed reading extents"));
1612 exit(1);
1613 }
1614
1615 nextents += map[0].bmv_entries;
1616 } while (map[0].bmv_entries == (MAPSIZE-1));
1617
1618 return(nextents);
1619}
1620
c988ea91
CH
1621/*
1622 * Get xfs realtime space information
1623 */
1624int
09d38d96 1625xfs_getrt(int fd, struct statvfs *sfbp)
c988ea91
CH
1626{
1627 unsigned long bsize;
1628 unsigned long factor;
1629 xfs_fsop_counts_t cnt;
1630
1631 if (!fsgeom.rtblocks)
1632 return -1;
1633
1634 if (xfs_fscounts(fd, &cnt) < 0) {
1635 close(fd);
1636 return -1;
1637 }
1638 bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
1639 factor = fsgeom.blocksize / bsize; /* currently this is == 1 */
1640 sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
1641 return 0;
1642}
1643
1644int
1645fsrprintf(const char *fmt, ...)
1646{
1647 va_list ap;
1648
1649 va_start(ap, fmt);
1650 if (gflag) {
1651 static int didopenlog;
1652 if (!didopenlog) {
1653 openlog("fsr", LOG_PID, LOG_USER);
1654 didopenlog = 1;
1655 }
1656 vsyslog(LOG_INFO, fmt, ap);
1657 } else
1658 vprintf(fmt, ap);
1659 va_end(ap);
1660 return 0;
1661}
1662
c988ea91
CH
1663/*
1664 * Initialize a directory for tmp file use. This is used
1665 * by the full filesystem defragmentation when we're walking
1666 * the inodes and do not know the path for the individual
1667 * files. Multiple directories are used to spread out the
1668 * tmp data around to different ag's (since file data is
1669 * usually allocated to the same ag as the directory and
1670 * directories allocated round robin from the same
1671 * parent directory).
1672 */
1673static void
1674tmp_init(char *mnt)
1675{
1676 int i;
1677 static char buf[SMBUFSZ];
1678 mode_t mask;
1679
1680 tmp_agi = 0;
1681 sprintf(buf, "%s/.fsr", mnt);
1682
1683 mask = umask(0);
1684 if (mkdir(buf, 0700) < 0) {
1685 if (errno == EEXIST) {
1686 if (dflag)
1687 fsrprintf(_("tmpdir already exists: %s\n"),
1688 buf);
1689 } else {
1690 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1691 buf, strerror(errno));
1692 exit(-1);
1693 }
1694 }
1695 for (i=0; i < fsgeom.agcount; i++) {
1696 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
7d59f3fd 1697 if (mkdir(buf, 0700) < 0) {
c988ea91
CH
1698 if (errno == EEXIST) {
1699 if (dflag)
1700 fsrprintf(
1701 _("tmpdir already exists: %s\n"), buf);
1702 } else {
1703 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1704 buf, strerror(errno));
1705 exit(-1);
1706 }
1707 }
1708 }
1709 (void)umask(mask);
1710 return;
1711}
1712
1713static char *
1714tmp_next(char *mnt)
1715{
1716 static char buf[SMBUFSZ];
1717
1718 sprintf(buf, "%s/.fsr/ag%d/tmp%d",
1719 ( (strcmp(mnt, "/") == 0) ? "" : mnt),
1720 tmp_agi,
1721 getpid());
1722
1723 if (++tmp_agi == fsgeom.agcount)
1724 tmp_agi = 0;
1725
1726 return(buf);
1727}
1728
1729static void
1730tmp_close(char *mnt)
1731{
1732 static char buf[SMBUFSZ];
1733 int i;
1734
1735 /* No data is ever actually written so we can just do rmdir's */
1736 for (i=0; i < fsgeom.agcount; i++) {
1737 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
1738 if (rmdir(buf) < 0) {
1739 if (errno != ENOENT) {
1740 fsrprintf(
1741 _("could not remove tmpdir: %s: %s\n"),
1742 buf, strerror(errno));
1743 }
1744 }
1745 }
1746 sprintf(buf, "%s/.fsr", mnt);
1747 if (rmdir(buf) < 0) {
1748 if (errno != ENOENT) {
1749 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1750 buf, strerror(errno));
1751 }
1752 }
1753}