]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - fsr/xfs_fsr.c
xfs_fsr: refactor mountpoint finding to use libfrog paths functions
[thirdparty/xfsprogs-dev.git] / fsr / xfs_fsr.c
CommitLineData
c988ea91
CH
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "xfs.h"
21#include "xfs_types.h"
22#include "jdm.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_attr_sf.h"
938f7b70 25#include "path.h"
c988ea91
CH
26
27#include <fcntl.h>
28#include <errno.h>
c988ea91
CH
29#include <syslog.h>
30#include <signal.h>
31#include <sys/ioctl.h>
32#include <sys/wait.h>
c988ea91
CH
33#include <sys/statvfs.h>
34#include <sys/xattr.h>
0e2fb84a 35#include <paths.h>
c988ea91 36
89e4b5bd
CH
37#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
38#define _PATH_PROC_MOUNTS "/proc/mounts"
39
c988ea91
CH
40
41char *progname;
42
43int vflag;
44int gflag;
45static int Mflag;
46/* static int nflag; */
47int dflag = 0;
48/* static int sflag; */
49int argv_blksz_dio;
50extern int max_ext_size;
51static int npasses = 10;
52static int startpass = 0;
53
54struct getbmap *outmap = NULL;
55int outmap_size = 0;
56int RealUid;
57int tmp_agi;
14f8b681 58static int64_t minimumfree = 2048;
c988ea91
CH
59
60#define MNTTYPE_XFS "xfs"
61
62#define SMBUFSZ 1024
63#define ROOT 0
64#define NULLFD -1
65#define GRABSZ 64
66#define TARGETRANGE 10
67#define V_NONE 0
68#define V_OVERVIEW 1
69#define V_ALL 2
70#define BUFFER_SIZE (1<<16)
71#define BUFFER_MAX (1<<24)
c988ea91
CH
72
73static time_t howlong = 7200; /* default seconds of reorganizing */
74static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
c988ea91
CH
75static time_t endtime;
76static time_t starttime;
77static xfs_ino_t leftoffino = 0;
78static int pagesize;
79
80void usage(int ret);
81static int fsrfile(char *fname, xfs_ino_t ino);
82static int fsrfile_common( char *fname, char *tname, char *mnt,
83 int fd, xfs_bstat_t *statp);
84static int packfile(char *fname, char *tname, int fd,
85 xfs_bstat_t *statp, struct fsxattr *fsxp);
86static void fsrdir(char *dirname);
87static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
88static void initallfs(char *mtab);
89e4b5bd 89static void fsrallfs(char *mtab, int howlong, char *leftofffile);
c988ea91
CH
90static void fsrall_cleanup(int timeout);
91static int getnextents(int);
92int xfsrtextsize(int fd);
09d38d96 93int xfs_getrt(int fd, struct statvfs *sfbp);
c988ea91
CH
94char * gettmpname(char *fname);
95char * getparent(char *fname);
96int fsrprintf(const char *fmt, ...);
97int read_fd_bmap(int, xfs_bstat_t *, int *);
98int cmp(const void *, const void *);
99static void tmp_init(char *mnt);
100static char * tmp_next(char *mnt);
101static void tmp_close(char *mnt);
102int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
c988ea91
CH
103
104xfs_fsop_geom_v1_t fsgeom; /* geometry of active mounted system */
105
106#define NMOUNT 64
107static int numfs;
108
109typedef struct fsdesc {
110 char *dev;
111 char *mnt;
112 int npass;
113} fsdesc_t;
114
115fsdesc_t *fs, *fsbase, *fsend;
116int fsbufsize = 10; /* A starting value */
117int nfrags = 0; /* Debug option: Coerse into specific number
118 * of extents */
119int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
120
121int
122xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
123{
124 return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
125}
126
127int
128xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
129{
130 xfs_fsop_bulkreq_t bulkreq;
131
cad114df 132 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
133 bulkreq.icount = 1;
134 bulkreq.ubuffer = ubuffer;
135 bulkreq.ocount = NULL;
136 return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
137}
138
139int
140xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
141 xfs_bstat_t *ubuffer, __s32 *ocount)
142{
143 xfs_fsop_bulkreq_t bulkreq;
144
cad114df 145 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
146 bulkreq.icount = icount;
147 bulkreq.ubuffer = ubuffer;
148 bulkreq.ocount = ocount;
149 return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
150}
151
152int
153xfs_swapext(int fd, xfs_swapext_t *sx)
154{
155 return ioctl(fd, XFS_IOC_SWAPEXT, sx);
156}
157
158int
159xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
160{
161 return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
162}
163
164void
165aborter(int unused)
166{
167 fsrall_cleanup(1);
168 exit(1);
169}
170
171int
172main(int argc, char **argv)
173{
f594a0d1 174 struct stat sb;
c988ea91 175 char *argname;
c988ea91 176 int c;
938f7b70 177 struct fs_path *fsp;
89e4b5bd 178 char *mtab = NULL;
c988ea91
CH
179
180 setlinebuf(stdout);
181 progname = basename(argv[0]);
182
183 setlocale(LC_ALL, "");
184 bindtextdomain(PACKAGE, LOCALEDIR);
185 textdomain(PACKAGE);
186
187 gflag = ! isatty(0);
188
89e4b5bd 189 while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
c988ea91
CH
190 switch (c) {
191 case 'M':
192 Mflag = 1;
193 break;
194 case 'g':
195 gflag = 1;
196 break;
197 case 'n':
198 /* nflag = 1; */
199 break;
200 case 'v':
201 ++vflag;
202 break;
203 case 'd':
204 dflag = 1;
205 break;
206 case 's': /* frag stats only */
207 /* sflag = 1; */
208 fprintf(stderr,
209 _("%s: Stats not yet supported for XFS\n"),
210 progname);
211 usage(1);
212 break;
213 case 't':
214 howlong = atoi(optarg);
215 break;
216 case 'f':
217 leftofffile = optarg;
218 break;
219 case 'm':
220 mtab = optarg;
221 break;
222 case 'b':
223 argv_blksz_dio = atoi(optarg);
224 break;
225 case 'p':
226 npasses = atoi(optarg);
227 break;
228 case 'C':
229 /* Testing opt: coerses frag count in result */
230 if (getenv("FSRXFSTEST") != NULL) {
231 nfrags = atoi(optarg);
232 openopts |= O_SYNC;
233 }
234 break;
235 case 'V':
236 printf(_("%s version %s\n"), progname, VERSION);
237 exit(0);
238 default:
239 usage(1);
240 }
89e4b5bd
CH
241 }
242
243 /*
244 * If the user did not specify an explicit mount table, try to use
245 * /proc/mounts if it is available, else /etc/mtab. We prefer
246 * /proc/mounts because it is kernel controlled, while /etc/mtab
247 * may contain garbage that userspace tools like pam_mounts wrote
248 * into it.
249 */
250 if (!mtab) {
251 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
252 mtab = _PATH_PROC_MOUNTS;
253 else
254 mtab = _PATH_MOUNTED;
255 }
256
c988ea91
CH
257 if (vflag)
258 setbuf(stdout, NULL);
259
260 starttime = time(0);
261
262 /* Save the caller's real uid */
263 RealUid = getuid();
264
265 pagesize = getpagesize();
938f7b70 266 fs_table_initialise(0, NULL, 0, NULL);
c988ea91
CH
267 if (optind < argc) {
268 for (; optind < argc; optind++) {
269 argname = argv[optind];
3e50d888 270
f594a0d1 271 if (lstat(argname, &sb) < 0) {
c988ea91
CH
272 fprintf(stderr,
273 _("%s: could not stat: %s: %s\n"),
274 progname, argname, strerror(errno));
275 continue;
276 }
3e50d888
CH
277
278 if (S_ISLNK(sb.st_mode)) {
f594a0d1 279 struct stat sb2;
3e50d888 280
f594a0d1 281 if (stat(argname, &sb2) == 0 &&
3e50d888
CH
282 (S_ISBLK(sb2.st_mode) ||
283 S_ISCHR(sb2.st_mode)))
c988ea91 284 sb = sb2;
c988ea91 285 }
3e50d888 286
938f7b70
DW
287 fsp = fs_table_lookup_mount(argname);
288 if (!fsp)
289 fsp = fs_table_lookup_blkdev(argname);
290 if (fsp != NULL) {
291 fsrfs(fsp->fs_dir, 0, 100);
c988ea91
CH
292 } else if (S_ISCHR(sb.st_mode)) {
293 fprintf(stderr, _(
294 "%s: char special not supported: %s\n"),
295 progname, argname);
296 exit(1);
297 } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
298 if (!platform_test_xfs_path(argname)) {
299 fprintf(stderr, _(
300 "%s: cannot defragment: %s: Not XFS\n"),
301 progname, argname);
302 continue;
303 }
304 if (S_ISDIR(sb.st_mode))
305 fsrdir(argname);
306 else
307 fsrfile(argname, sb.st_ino);
308 } else {
309 printf(
310 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
311 argname);
312 }
313 }
314 } else {
315 initallfs(mtab);
89e4b5bd 316 fsrallfs(mtab, howlong, leftofffile);
c988ea91
CH
317 }
318 return 0;
319}
320
321void
322usage(int ret)
323{
324 fprintf(stderr, _(
30626ef6
ES
325"Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
326" %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
327" %s -V\n\n"
c988ea91 328"Options:\n"
c988ea91
CH
329" -g Print to syslog (default if stdout not a tty).\n"
330" -t time How long to run in seconds.\n"
30626ef6 331" -p passes Number of passes before terminating global re-org.\n"
c988ea91
CH
332" -f leftoff Use this instead of %s.\n"
333" -m mtab Use something other than /etc/mtab.\n"
334" -d Debug, print even more.\n"
30626ef6
ES
335" -v Verbose, more -v's more verbose.\n"
336" -V Print version number and exit.\n"
337 ), progname, progname, progname, _PATH_FSRLAST);
c988ea91
CH
338 exit(ret);
339}
340
341/*
342 * initallfs -- read the mount table and set up an internal form
343 */
344static void
345initallfs(char *mtab)
346{
7141fc5b 347 struct mntent_cursor cursor;
7849d55d 348 struct mntent *mnt= NULL;
c988ea91
CH
349 int mi;
350 char *cp;
f594a0d1 351 struct stat sb;
c988ea91
CH
352
353 /* malloc a number of descriptors, increased later if needed */
354 if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
355 fsrprintf(_("out of memory: %s\n"), strerror(errno));
356 exit(1);
357 }
358 fsend = (fsbase + fsbufsize - 1);
359
360 /* find all rw xfs file systems */
361 mi = 0;
362 fs = fsbase;
7141fc5b
JT
363
364 if (platform_mntent_open(&cursor, mtab) != 0){
365 fprintf(stderr, "Error: can't get mntent entries.\n");
366 exit(1);
367 }
368
7849d55d 369 while ((mnt = platform_mntent_next(&cursor)) != NULL) {
c988ea91
CH
370 int rw = 0;
371
7849d55d 372 if (strcmp(mnt->mnt_type, MNTTYPE_XFS ) != 0 ||
f594a0d1 373 stat(mnt->mnt_fsname, &sb) == -1 ||
c988ea91
CH
374 !S_ISBLK(sb.st_mode))
375 continue;
376
7849d55d 377 cp = strtok(mnt->mnt_opts,",");
c988ea91
CH
378 do {
379 if (strcmp("rw", cp) == 0)
380 rw++;
381 } while ((cp = strtok(NULL, ",")) != NULL);
382 if (rw == 0) {
383 if (dflag)
384 fsrprintf(_("Skipping %s: not mounted rw\n"),
7849d55d 385 mnt->mnt_fsname);
c988ea91
CH
386 continue;
387 }
388
389 if (mi == fsbufsize) {
390 fsbufsize += NMOUNT;
391 if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
392 fsbufsize * sizeof(fsdesc_t))) == NULL) {
393 fsrprintf(_("out of memory: %s\n"),
394 strerror(errno));
395 exit(1);
396 }
397 if (!fsbase) {
398 fsrprintf(_("out of memory on realloc: %s\n"),
399 strerror(errno));
400 exit(1);
401 }
402 fs = (fsbase + mi); /* Needed ? */
403 }
404
7849d55d
ES
405 fs->dev = strdup(mnt->mnt_fsname);
406 fs->mnt = strdup(mnt->mnt_dir);
c988ea91 407
758bcc92 408 if (fs->dev == NULL) {
7849d55d 409 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_fsname);
c988ea91
CH
410 exit(1);
411 }
758bcc92 412 if (fs->mnt == NULL) {
7849d55d 413 fsrprintf(_("strdup(%s) failed\n"), mnt->mnt_dir);
758bcc92
ES
414 exit(1);
415 }
c988ea91
CH
416 mi++;
417 fs++;
418 }
7141fc5b
JT
419 platform_mntent_close(&cursor);
420
c988ea91
CH
421 numfs = mi;
422 fsend = (fsbase + numfs);
c988ea91
CH
423 if (numfs == 0) {
424 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
425 exit(0);
426 }
427 if (vflag || dflag) {
428 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
429 numfs);
430 if (dflag)
431 for (fs = fsbase; fs < fsend; fs++)
432 fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
433 }
434}
435
436static void
89e4b5bd 437fsrallfs(char *mtab, int howlong, char *leftofffile)
c988ea91
CH
438{
439 int fd;
440 int error;
441 int found = 0;
442 char *fsname;
443 char buf[SMBUFSZ];
444 int mdonly = Mflag;
445 char *ptr;
446 xfs_ino_t startino = 0;
447 fsdesc_t *fsp;
f594a0d1 448 struct stat sb, sb2;
c988ea91
CH
449
450 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
451
452 endtime = starttime + howlong;
453 fs = fsbase;
454
455 /* where'd we leave off last time? */
f594a0d1 456 if (lstat(leftofffile, &sb) == 0) {
c988ea91
CH
457 if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
458 fsrprintf(_("%s: open failed\n"), leftofffile);
459 }
f594a0d1 460 else if ( fstat(fd, &sb2) == 0) {
c988ea91
CH
461 /*
462 * Verify that lstat & fstat point to the
463 * same regular file (no links/no quick spoofs)
464 */
465 if ( (sb.st_dev != sb2.st_dev) ||
466 (sb.st_ino != sb2.st_ino) ||
467 ((sb.st_mode & S_IFMT) != S_IFREG) ||
468 ((sb2.st_mode & S_IFMT) != S_IFREG) ||
469 (sb2.st_uid != ROOT) ||
470 (sb2.st_nlink != 1)
471 )
472 {
473 fsrprintf(_("Can't use %s: mode=0%o own=%d"
474 " nlink=%d\n"),
475 leftofffile, sb.st_mode,
476 sb.st_uid, sb.st_nlink);
477 close(fd);
478 fd = NULLFD;
479 }
480 }
481 else {
482 close(fd);
483 fd = NULLFD;
484 }
485 }
486 else {
487 fd = NULLFD;
488 }
489
490 if (fd != NULLFD) {
491 if (read(fd, buf, SMBUFSZ) == -1) {
492 fs = fsbase;
493 fsrprintf(_("could not read %s, starting with %s\n"),
494 leftofffile, *fs->dev);
495 } else {
eef20df0
ES
496 /* Ensure the buffer we read is null terminated */
497 buf[SMBUFSZ-1] = '\0';
c988ea91
CH
498 for (fs = fsbase; fs < fsend; fs++) {
499 fsname = fs->dev;
500 if ((strncmp(buf,fsname,strlen(fsname)) == 0)
501 && buf[strlen(fsname)] == ' ') {
502 found = 1;
503 break;
504 }
505 }
506 if (! found)
507 fs = fsbase;
508
509 ptr = strchr(buf, ' ');
510 if (ptr) {
511 startpass = atoi(++ptr);
512 ptr = strchr(ptr, ' ');
513 if (ptr) {
514 startino = strtoull(++ptr, NULL, 10);
515 }
516 }
517 if (startpass < 0)
518 startpass = 0;
519
520 /* Init pass counts */
521 for (fsp = fsbase; fsp < fs; fsp++) {
522 fsp->npass = startpass + 1;
523 }
524 for (fsp = fs; fsp <= fsend; fsp++) {
525 fsp->npass = startpass;
526 }
527 }
528 close(fd);
529 }
530
531 if (vflag) {
532 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
533 fs->npass, (unsigned long long)startino,
534 fs->dev, fs->mnt);
535 }
536
537 signal(SIGABRT, aborter);
538 signal(SIGHUP, aborter);
539 signal(SIGINT, aborter);
540 signal(SIGQUIT, aborter);
541 signal(SIGTERM, aborter);
542
543 /* reorg for 'howlong' -- checked in 'fsrfs' */
544 while (endtime > time(0)) {
545 pid_t pid;
8d2666e3 546
c988ea91
CH
547 if (npasses > 1 && !fs->npass)
548 Mflag = 1;
549 else
550 Mflag = mdonly;
551 pid = fork();
552 switch(pid) {
553 case -1:
554 fsrprintf(_("couldn't fork sub process:"));
555 exit(1);
556 break;
557 case 0:
558 error = fsrfs(fs->mnt, startino, TARGETRANGE);
559 exit (error);
560 break;
561 default:
562 wait(&error);
c988ea91
CH
563 if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
564 /* child timed out & did fsrall_cleanup */
565 exit(0);
566 }
567 break;
568 }
569 startino = 0; /* reset after the first time through */
570 fs->npass++;
571 fs++;
8d2666e3
JM
572 if (fs == fsend)
573 fs = fsbase;
574 if (fs->npass == npasses) {
575 fsrprintf(_("Completed all %d passes\n"), npasses);
576 break;
577 }
c988ea91
CH
578 }
579 fsrall_cleanup(endtime <= time(0));
580}
581
582/*
583 * fsrall_cleanup -- close files, print next starting location, etc.
584 */
585static void
586fsrall_cleanup(int timeout)
587{
588 int fd;
589 int ret;
590 char buf[SMBUFSZ];
591
c988ea91 592 unlink(leftofffile);
d0e82db1
ES
593
594 if (timeout) {
595 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
596 progname, startpass, fs->npass,
597 time(0) - endtime + howlong);
598
599 /* record where we left off */
600 fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
601 if (fd == -1) {
602 fsrprintf(_("open(%s) failed: %s\n"),
603 leftofffile, strerror(errno));
604 } else {
c988ea91
CH
605 ret = sprintf(buf, "%s %d %llu\n", fs->dev,
606 fs->npass, (unsigned long long)leftoffino);
607 if (write(fd, buf, ret) < strlen(buf))
608 fsrprintf(_("write(%s) failed: %s\n"),
609 leftofffile, strerror(errno));
610 close(fd);
611 }
612 }
c988ea91
CH
613}
614
615/*
616 * fsrfs -- reorganize a file system
617 */
618static int
619fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
620{
621
622 int fsfd, fd;
623 int count = 0;
624 int ret;
625 __s32 buflenout;
626 xfs_bstat_t buf[GRABSZ];
627 char fname[64];
628 char *tname;
629 jdm_fshandle_t *fshandlep;
630 xfs_ino_t lastino = startino;
631
632 fsrprintf(_("%s start inode=%llu\n"), mntdir,
633 (unsigned long long)startino);
634
635 fshandlep = jdm_getfshandle( mntdir );
636 if ( ! fshandlep ) {
637 fsrprintf(_("unable to get handle: %s: %s\n"),
638 mntdir, strerror( errno ));
639 return -1;
640 }
641
642 if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
643 fsrprintf(_("unable to open: %s: %s\n"),
644 mntdir, strerror( errno ));
e3e2793d 645 free(fshandlep);
c988ea91
CH
646 return -1;
647 }
648
649 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
650 fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
651 mntdir);
11e06961 652 close(fsfd);
e3e2793d 653 free(fshandlep);
c988ea91
CH
654 return -1;
655 }
656
657 tmp_init(mntdir);
658
659 while ((ret = xfs_bulkstat(fsfd,
98166c91 660 &lastino, GRABSZ, &buf[0], &buflenout)) == 0) {
c988ea91
CH
661 xfs_bstat_t *p;
662 xfs_bstat_t *endp;
663
664 if (buflenout == 0)
665 goto out0;
666
667 /* Each loop through, defrag targetrange percent of the files */
668 count = (buflenout * targetrange) / 100;
669
670 qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
671
672 for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
673 /* Do some obvious checks now */
674 if (((p->bs_mode & S_IFMT) != S_IFREG) ||
675 (p->bs_extents < 2))
676 continue;
677
108e985b
DC
678 fd = jdm_open(fshandlep, p, O_RDWR|O_DIRECT);
679 if (fd < 0) {
c988ea91
CH
680 /* This probably means the file was
681 * removed while in progress of handling
682 * it. Just quietly ignore this file.
683 */
684 if (dflag)
685 fsrprintf(_("could not open: "
686 "inode %llu\n"), p->bs_ino);
687 continue;
688 }
689
690 /* Don't know the pathname, so make up something */
691 sprintf(fname, "ino=%lld", (long long)p->bs_ino);
692
693 /* Get a tmp file name */
694 tname = tmp_next(mntdir);
695
696 ret = fsrfile_common(fname, tname, mntdir, fd, p);
697
698 leftoffino = p->bs_ino;
699
700 close(fd);
701
702 if (ret == 0) {
703 if (--count <= 0)
704 break;
705 }
706 }
707 if (endtime && endtime < time(0)) {
708 tmp_close(mntdir);
709 close(fsfd);
710 fsrall_cleanup(1);
711 exit(1);
712 }
713 }
714 if (ret < 0)
715 fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
716out0:
717 tmp_close(mntdir);
718 close(fsfd);
e3e2793d 719 free(fshandlep);
c988ea91
CH
720 return 0;
721}
722
723/*
724 * To compare bstat structs for qsort.
725 */
726int
727cmp(const void *s1, const void *s2)
728{
729 return( ((xfs_bstat_t *)s2)->bs_extents -
730 ((xfs_bstat_t *)s1)->bs_extents);
731
732}
733
734/*
735 * reorganize by directory hierarchy.
736 * Stay in dev (a restriction based on structure of this program -- either
737 * call efs_{n,u}mount() around each file, something smarter or this)
738 */
739static void
740fsrdir(char *dirname)
741{
742 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
743}
744
745/*
746 * Sets up the defragmentation of a file based on the
747 * filepath. It collects the bstat information, does
748 * an open on the file and passes this all to fsrfile_common.
749 */
750static int
751fsrfile(char *fname, xfs_ino_t ino)
752{
753 xfs_bstat_t statbuf;
754 jdm_fshandle_t *fshandlep;
4f10a2fb
ES
755 int fd = -1, fsfd = -1;
756 int error = -1;
c988ea91
CH
757 char *tname;
758
759 fshandlep = jdm_getfshandle(getparent (fname) );
4f10a2fb 760 if (!fshandlep) {
c988ea91
CH
761 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
762 fname, strerror(errno));
4f10a2fb 763 goto out;
c988ea91
CH
764 }
765
766 /*
767 * Need to open something on the same filesystem as the
768 * file. Open the parent.
769 */
770 fsfd = open(getparent(fname), O_RDONLY);
771 if (fsfd < 0) {
772 fsrprintf(_("unable to open sys handle for %s: %s\n"),
773 fname, strerror(errno));
4f10a2fb 774 goto out;
c988ea91
CH
775 }
776
777 if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
778 fsrprintf(_("unable to get bstat on %s: %s\n"),
779 fname, strerror(errno));
4f10a2fb 780 goto out;
c988ea91
CH
781 }
782
108e985b 783 fd = jdm_open(fshandlep, &statbuf, O_RDWR|O_DIRECT);
c988ea91
CH
784 if (fd < 0) {
785 fsrprintf(_("unable to open handle %s: %s\n"),
786 fname, strerror(errno));
4f10a2fb 787 goto out;
c988ea91
CH
788 }
789
790 /* Get the fs geometry */
791 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
792 fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
4f10a2fb 793 goto out;
c988ea91
CH
794 }
795
c988ea91
CH
796 tname = gettmpname(fname);
797
798 if (tname)
799 error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
800
4f10a2fb
ES
801out:
802 if (fsfd >= 0)
803 close(fsfd);
804 if (fd >= 0)
805 close(fd);
806 free(fshandlep);
c988ea91
CH
807
808 return error;
809}
810
811
812/*
813 * This is the common defrag code for either a full fs
814 * defragmentation or a single file. Check as much as
815 * possible with the file, fork a process to setuid to the
816 * target file owner's uid and defragment the file.
817 * This is done so the new extents created in a tmp file are
818 * reflected in the owners' quota without having to do any
819 * special code in the kernel. When the existing extents
820 * are removed, the quotas will be correct. It's ugly but
821 * it saves us from doing some quota re-construction in
822 * the extent swap. The price is that the defragmentation
823 * will fail if the owner of the target file is already at
824 * their quota limit.
825 */
826static int
827fsrfile_common(
828 char *fname,
829 char *tname,
830 char *fsname,
831 int fd,
832 xfs_bstat_t *statp)
833{
834 int error;
09d38d96 835 struct statvfs vfss;
c988ea91
CH
836 struct fsxattr fsx;
837 unsigned long bsize;
838
839 if (vflag)
840 fsrprintf("%s\n", fname);
841
842 if (fsync(fd) < 0) {
843 fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
844 return -1;
845 }
846
847 if (statp->bs_size == 0) {
848 if (vflag)
849 fsrprintf(_("%s: zero size, ignoring\n"), fname);
850 return(0);
851 }
852
853 /* Check if a mandatory lock is set on the file to try and
854 * avoid blocking indefinitely on the reads later. Note that
855 * someone could still set a mandatory lock after this check
856 * but before all reads have completed to block fsr reads.
857 * This change just closes the window a bit.
858 */
859 if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
860 struct flock fl;
861
862 fl.l_type = F_RDLCK;
863 fl.l_whence = SEEK_SET;
864 fl.l_start = (off_t)0;
865 fl.l_len = 0;
866 if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
867 if (vflag)
868 fsrprintf(_("locking check failed: %s\n"),
869 fname);
870 return(-1);
871 }
872 if (fl.l_type != F_UNLCK) {
873 /* Mandatory lock is set */
874 if (vflag)
875 fsrprintf(_("mandatory lock: %s: ignoring\n"),
876 fname);
877 return(-1);
878 }
879 }
880
881 /*
882 * Check if there is room to copy the file.
883 *
884 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
885 * not the optimal I/O size as struct stat.
886 */
09d38d96 887 if (statvfs(fsname ? fsname : fname, &vfss) < 0) {
c988ea91
CH
888 fsrprintf(_("unable to get fs stat on %s: %s\n"),
889 fname, strerror(errno));
890 return -1;
891 }
892 bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
893 if (statp->bs_blksize * statp->bs_blocks >
894 vfss.f_bfree * bsize - minimumfree) {
895 fsrprintf(_("insufficient freespace for: %s: "
896 "size=%lld: ignoring\n"), fname,
897 statp->bs_blksize * statp->bs_blocks);
898 return 1;
899 }
900
83f4b5ac 901 if ((ioctl(fd, FS_IOC_FSGETXATTR, &fsx)) < 0) {
c988ea91
CH
902 fsrprintf(_("failed to get inode attrs: %s\n"), fname);
903 return(-1);
904 }
83f4b5ac 905 if (fsx.fsx_xflags & (FS_XFLAG_IMMUTABLE|FS_XFLAG_APPEND)) {
c988ea91
CH
906 if (vflag)
907 fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
908 return(0);
909 }
83f4b5ac 910 if (fsx.fsx_xflags & FS_XFLAG_NODEFRAG) {
c988ea91
CH
911 if (vflag)
912 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
913 fname);
914 return(0);
915 }
83f4b5ac 916 if (fsx.fsx_xflags & FS_XFLAG_REALTIME) {
c988ea91
CH
917 if (xfs_getrt(fd, &vfss) < 0) {
918 fsrprintf(_("cannot get realtime geometry for: %s\n"),
919 fname);
920 return(-1);
921 }
922 if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
923 fsrprintf(_("low on realtime free space: %s: "
924 "ignoring file\n"), fname);
925 return(-1);
926 }
927 }
928
929 if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
930 fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
931 return -1;
932 }
933
934 /*
935 * Previously the code forked here, & the child changed it's uid to
936 * that of the file's owner and then called packfile(), to keep
937 * quota counts correct. (defragged files could use fewer blocks).
938 *
939 * Instead, just fchown() the temp file to the uid,gid of the
940 * file we're defragging, in packfile().
941 */
942
943 if ((error = packfile(fname, tname, fd, statp, &fsx)))
944 return error;
945 return -1; /* no error */
946}
947
bdb041f5
DC
948/*
949 * Attempt to set the attr fork up correctly. This is simple for attr1
950 * filesystems as they have a fixed inode fork offset. In that case
951 * just create an attribute and that's all we need to do.
952 *
953 * For attr2 filesystems, see if we have the actual fork offset in
954 * the bstat structure. If so, just create additional attributes on
955 * the temporary inode until the offset matches.
956 *
957 * If it doesn't exist, we can only do best effort. Add an attribute at a time
958 * to move the inode fork around, but take into account that the attribute
959 * might be too small to move the fork every time we add one. This should
960 * hopefully put the fork offset in the right place. It's not a big deal if we
961 * don't get it right - the kernel will reject it when we try to swap extents.
962 */
963static int
964fsr_setup_attr_fork(
965 int fd,
966 int tfd,
967 xfs_bstat_t *bstatp)
968{
c14c7b79 969#ifdef HAVE_FSETXATTR
f594a0d1 970 struct stat tstatbuf;
bdb041f5 971 int i;
27507775 972 int diff = 0;
bdb041f5
DC
973 int last_forkoff = 0;
974 int no_change_cnt = 0;
975 int ret;
976
83f4b5ac 977 if (!(bstatp->bs_xflags & FS_XFLAG_HASATTR))
bdb041f5
DC
978 return 0;
979
980 /*
981 * use the old method if we have attr1 or the kernel does not yet
982 * support passing the fork offset in the bulkstat data.
983 */
984 if (!(fsgeom.flags & XFS_FSOP_GEOM_FLAGS_ATTR2) ||
985 bstatp->bs_forkoff == 0) {
986 /* attr1 */
987 ret = fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE);
988 if (ret) {
989 fsrprintf(_("could not set ATTR\n"));
990 return -1;
991 }
992 goto out;
993 }
994
995 /* attr2 w/ fork offsets */
996
f594a0d1 997 if (fstat(tfd, &tstatbuf) < 0) {
bdb041f5
DC
998 fsrprintf(_("unable to stat temp file: %s\n"),
999 strerror(errno));
1000 return -1;
1001 }
1002
1003 i = 0;
1004 do {
1005 xfs_bstat_t tbstat;
1006 xfs_ino_t ino;
1007 char name[64];
bdb041f5
DC
1008
1009 /*
1adfe5c6 1010 * bulkstat the temp inode to see what the forkoff is. Use
bdb041f5
DC
1011 * this to compare against the target and determine what we
1012 * need to do.
1013 */
1014 ino = tstatbuf.st_ino;
1015 if ((xfs_bulkstat_single(tfd, &ino, &tbstat)) < 0) {
1016 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1017 strerror(errno));
1018 return -1;
1019 }
1020 if (dflag)
1021 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1022 bstatp->bs_forkoff, tbstat.bs_forkoff);
1adfe5c6
ES
1023 diff = tbstat.bs_forkoff - bstatp->bs_forkoff;
1024
1025 /* if they are equal, we are done */
1026 if (!diff)
1027 goto out;
bdb041f5
DC
1028
1029 snprintf(name, sizeof(name), "user.%d", i);
1030
1031 /*
1032 * If there is no attribute, then we need to create one to get
1033 * an attribute fork at the default location.
1034 */
1035 if (!tbstat.bs_forkoff) {
1adfe5c6 1036 ASSERT(i == 0);
bdb041f5
DC
1037 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1038 if (ret) {
1039 fsrprintf(_("could not set ATTR\n"));
1040 return -1;
1041 }
1042 continue;
1adfe5c6 1043 } else if (i == 0) {
1adfe5c6
ES
1044 /*
1045 * First pass, and temp file already has an inline
1046 * xattr, probably due to selinux.
1047 *
1048 * It's *possible* that the temp file attr area
e7e3152c 1049 * is larger than the target file's:
1adfe5c6
ES
1050 *
1051 * Target Temp
1052 * +-------+ 0 +-------+ 0
1053 * | | | |
1054 * | | | Data |
1055 * | Data | | |
1056 * | | v-------v forkoff
1057 * | | | |
1058 * v-------v forkoff | Attr | local
e7e3152c 1059 * | Attr | | |
1adfe5c6 1060 * +-------+ +-------+
1adfe5c6
ES
1061 */
1062
1adfe5c6 1063 /*
e7e3152c
ES
1064 * If target attr area is less than the temp's
1065 * (diff < 0) write a big attr to the temp file to knock
1066 * the attr out of local format.
1067 * (This should actually *increase* the temp file's
1068 * forkoffset when the attr moves out of the inode)
1adfe5c6 1069 */
e7e3152c 1070 if (diff < 0) {
1adfe5c6
ES
1071 char val[2048];
1072 memset(val, 'X', 2048);
1073 if (fsetxattr(tfd, name, val, 2048, 0)) {
1074 fsrprintf(_("big ATTR set failed\n"));
1075 return -1;
1076 }
1077 /* Go back & see where we're at now */
1078 continue;
1079 }
bdb041f5
DC
1080 }
1081
1082 /*
1083 * make a progress check so we don't get stuck trying to extend
1084 * a large btree form attribute fork.
1085 */
1086 if (last_forkoff == tbstat.bs_forkoff) {
1087 if (no_change_cnt++ > 10)
1088 break;
ff85ea3f
ES
1089 } else /* progress! */
1090 no_change_cnt = 0;
bdb041f5
DC
1091 last_forkoff = tbstat.bs_forkoff;
1092
1093 /* work out which way to grow the fork */
bdb041f5
DC
1094 if (abs(diff) > fsgeom.inodesize - sizeof(struct xfs_dinode)) {
1095 fsrprintf(_("forkoff diff %d too large!\n"), diff);
1096 return -1;
1097 }
1098
bdb041f5 1099 /*
1adfe5c6
ES
1100 * if the temp inode fork offset is still smaller then we have
1101 * to grow the data fork
bdb041f5
DC
1102 */
1103 if (diff < 0) {
1104 /*
1105 * create some temporary extents in the inode to move
1106 * the fork in the direction we need. This can be done
1107 * by preallocating some single block extents at
1108 * non-contiguous offsets.
1109 */
1110 /* XXX: unimplemented! */
27507775
ES
1111 if (dflag)
1112 printf(_("data fork growth unimplemented\n"));
bdb041f5
DC
1113 goto out;
1114 }
1115
1116 /* we need to grow the attr fork, so create another attr */
1117 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1118 if (ret) {
1119 fsrprintf(_("could not set ATTR\n"));
1120 return -1;
1121 }
1122
1123 } while (++i < 100); /* don't go forever */
1124
1125out:
1126 if (dflag)
1127 fsrprintf(_("set temp attr\n"));
27507775
ES
1128 /* We failed to resolve the fork difference */
1129 if (dflag && diff)
1130 fsrprintf(_("failed to match fork offset\n"));;
1131
c14c7b79 1132#endif /* HAVE_FSETXATTR */
bdb041f5
DC
1133 return 0;
1134}
c988ea91
CH
1135
1136/*
1137 * Do the defragmentation of a single file.
1138 * We already are pretty sure we can and want to
1139 * defragment the file. Create the tmp file, copy
1140 * the data (maintaining holes) and call the kernel
671632c6
ES
1141 * extent swap routine.
1142 *
1143 * Return values:
1144 * -1: Some error was encountered
1145 * 0: Successfully defragmented the file
1146 * 1: No change / No Error
c988ea91
CH
1147 */
1148static int
1149packfile(char *fname, char *tname, int fd,
1150 xfs_bstat_t *statp, struct fsxattr *fsxp)
1151{
671632c6 1152 int tfd = -1;
c988ea91 1153 int srval;
671632c6 1154 int retval = -1; /* Failure is the default */
c988ea91
CH
1155 int nextents, extent, cur_nextents, new_nextents;
1156 unsigned blksz_dio;
1157 unsigned dio_min;
1158 struct dioattr dio;
1159 static xfs_swapext_t sx;
1160 struct xfs_flock64 space;
1161 off64_t cnt, pos;
671632c6 1162 void *fbuf = NULL;
c988ea91
CH
1163 int ct, wc, wc_b4;
1164 char ffname[SMBUFSZ];
1165 int ffd = -1;
1166
1167 /*
1168 * Work out the extent map - nextents will be set to the
1169 * minimum number of extents needed for the file (taking
1170 * into account holes), cur_nextents is the current number
1171 * of extents.
1172 */
1173 nextents = read_fd_bmap(fd, statp, &cur_nextents);
1174
1175 if (cur_nextents == 1 || cur_nextents <= nextents) {
1176 if (vflag)
1177 fsrprintf(_("%s already fully defragmented.\n"), fname);
671632c6
ES
1178 retval = 1; /* indicates no change/no error */
1179 goto out;
c988ea91
CH
1180 }
1181
1182 if (dflag)
1183 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1184 fname, cur_nextents, (cur_nextents - nextents),
1185 tname);
1186
1187 if ((tfd = open(tname, openopts, 0666)) < 0) {
1188 if (vflag)
1189 fsrprintf(_("could not open tmp file: %s: %s\n"),
1190 tname, strerror(errno));
671632c6 1191 goto out;
c988ea91
CH
1192 }
1193 unlink(tname);
1194
1195 /* Setup extended attributes */
bdb041f5
DC
1196 if (fsr_setup_attr_fork(fd, tfd, statp) != 0) {
1197 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname);
671632c6 1198 goto out;
c988ea91
CH
1199 }
1200
1201 /* Setup extended inode flags, project identifier, etc */
1202 if (fsxp->fsx_xflags || fsxp->fsx_projid) {
83f4b5ac 1203 if (ioctl(tfd, FS_IOC_FSSETXATTR, fsxp) < 0) {
c988ea91
CH
1204 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1205 tname);
671632c6 1206 goto out;
c988ea91
CH
1207 }
1208 }
1209
1210 if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
1211 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
671632c6 1212 goto out;
c988ea91
CH
1213 }
1214
1215 dio_min = dio.d_miniosz;
1216 if (statp->bs_size <= dio_min) {
1217 blksz_dio = dio_min;
1218 } else {
1219 blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
1220 if (argv_blksz_dio != 0)
1221 blksz_dio = min(argv_blksz_dio, blksz_dio);
1222 blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
1223 }
1224
1225 if (dflag) {
1226 fsrprintf(_("DEBUG: "
1227 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1228 statp->bs_size, blksz_dio, dio.d_miniosz,
1229 dio.d_maxiosz, pagesize);
1230 }
1231
1232 if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
1233 fsrprintf(_("could not allocate buf: %s\n"), tname);
671632c6 1234 goto out;
c988ea91
CH
1235 }
1236
1237 if (nfrags) {
1238 /* Create new tmp file in same AG as first */
1239 sprintf(ffname, "%s.frag", tname);
1240
1241 /* Open the new file for sync writes */
1242 if ((ffd = open(ffname, openopts, 0666)) < 0) {
1243 fsrprintf(_("could not open fragfile: %s : %s\n"),
1244 ffname, strerror(errno));
671632c6 1245 goto out;
c988ea91
CH
1246 }
1247 unlink(ffname);
1248 }
1249
1250 /* Loop through block map allocating new extents */
1251 for (extent = 0; extent < nextents; extent++) {
1252 pos = outmap[extent].bmv_offset;
1253 if (outmap[extent].bmv_block == -1) {
1254 space.l_whence = SEEK_SET;
1255 space.l_start = pos;
1256 space.l_len = outmap[extent].bmv_length;
1257 if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
1258 fsrprintf(_("could not trunc tmp %s\n"),
1259 tname);
1260 }
dc8878f4 1261 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1262 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1263 tname, strerror(errno));
1264 goto out;
1265 }
c988ea91
CH
1266 continue;
1267 } else if (outmap[extent].bmv_length == 0) {
1268 /* to catch holes at the beginning of the file */
1269 continue;
1270 }
1271 if (! nfrags) {
1272 space.l_whence = SEEK_CUR;
1273 space.l_start = 0;
1274 space.l_len = outmap[extent].bmv_length;
1275
1276 if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
1277 fsrprintf(_("could not pre-allocate tmp space:"
1278 " %s\n"), tname);
671632c6 1279 goto out;
c988ea91 1280 }
dc8878f4 1281 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1282 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1283 tname, strerror(errno));
1284 goto out;
1285 }
c988ea91
CH
1286 }
1287 } /* end of space allocation loop */
1288
dc8878f4 1289 if (lseek(tfd, 0, SEEK_SET)) {
c988ea91 1290 fsrprintf(_("Couldn't rewind on temporary file\n"));
671632c6 1291 goto out;
c988ea91
CH
1292 }
1293
1294 /* Check if the temporary file has fewer extents */
1295 new_nextents = getnextents(tfd);
1296 if (dflag)
1297 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
1298 if (cur_nextents <= new_nextents) {
1299 if (vflag)
1300 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
671632c6
ES
1301 retval = 1; /* no change/no error */
1302 goto out;
c988ea91
CH
1303 }
1304
1305 /* Loop through block map copying the file. */
1306 for (extent = 0; extent < nextents; extent++) {
1307 pos = outmap[extent].bmv_offset;
1308 if (outmap[extent].bmv_block == -1) {
dc8878f4 1309 if (lseek(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1310 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1311 tname, strerror(errno));
1312 goto out;
1313 }
dc8878f4 1314 if (lseek(fd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
3d303baa
ES
1315 fsrprintf(_("could not lseek in file: %s : %s\n"),
1316 fname, strerror(errno));
1317 goto out;
1318 }
c988ea91
CH
1319 continue;
1320 } else if (outmap[extent].bmv_length == 0) {
1321 /* to catch holes at the beginning of the file */
1322 continue;
1323 }
1324 for (cnt = outmap[extent].bmv_length; cnt > 0;
1325 cnt -= ct, pos += ct) {
1326 if (nfrags && --nfrags) {
1327 ct = min(cnt, dio_min);
1328 } else if (cnt % dio_min == 0) {
1329 ct = min(cnt, blksz_dio);
1330 } else {
1331 ct = min(cnt + dio_min - (cnt % dio_min),
1332 blksz_dio);
1333 }
1334 ct = read(fd, fbuf, ct);
1335 if (ct == 0) {
1336 /* EOF, stop trying to read */
1337 extent = nextents;
1338 break;
1339 }
1340 /* Ensure we do direct I/O to correct block
1341 * boundaries.
1342 */
1343 if (ct % dio_min != 0) {
1344 wc = ct + dio_min - (ct % dio_min);
1345 } else {
1346 wc = ct;
1347 }
1348 wc_b4 = wc;
1349 if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
1350 if (ct < 0)
1351 fsrprintf(_("bad read of %d bytes "
1352 "from %s: %s\n"), wc_b4,
1353 fname, strerror(errno));
1354 else if (wc < 0)
1355 fsrprintf(_("bad write of %d bytes "
1356 "to %s: %s\n"), wc_b4,
1357 tname, strerror(errno));
1358 else {
1359 /*
1360 * Might be out of space
1361 *
1362 * Try to finish write
1363 */
1364 int resid = ct-wc;
1365
1366 if ((wc = write(tfd, ((char *)fbuf)+wc,
1367 resid)) == resid) {
1368 /* worked on second attempt? */
1369 continue;
1370 }
1371 else if (wc < 0) {
1372 fsrprintf(_("bad write2 of %d "
1373 "bytes to %s: %s\n"),
1374 resid, tname,
1375 strerror(errno));
1376 } else {
1377 fsrprintf(_("bad copy to %s\n"),
1378 tname);
1379 }
1380 }
671632c6 1381 goto out;
c988ea91
CH
1382 }
1383 if (nfrags) {
1384 /* Do a matching write to the tmp file */
431ec4e6 1385 wc_b4 = wc;
c988ea91
CH
1386 if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
1387 fsrprintf(_("bad write of %d bytes "
1388 "to %s: %s\n"),
1389 wc_b4, ffname, strerror(errno));
1390 }
1391 }
1392 }
1393 }
dde67673 1394 if (ftruncate(tfd, statp->bs_size) < 0) {
3d303baa
ES
1395 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1396 fname, strerror(errno));
1397 goto out;
1398 }
1399 if (fsync(tfd) < 0) {
1400 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1401 fname, strerror(errno));
1402 goto out;
1403 }
c988ea91 1404
c988ea91
CH
1405 sx.sx_stat = *statp; /* struct copy */
1406 sx.sx_version = XFS_SX_VERSION;
1407 sx.sx_fdtarget = fd;
1408 sx.sx_fdtmp = tfd;
1409 sx.sx_offset = 0;
1410 sx.sx_length = statp->bs_size;
1411
1412 /* switch to the owner's id, to keep quota in line */
1413 if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
1414 if (vflag)
1415 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1416 tname, strerror(errno));
671632c6 1417 goto out;
c988ea91
CH
1418 }
1419
1420 /* Swap the extents */
1421 srval = xfs_swapext(fd, &sx);
1422 if (srval < 0) {
1423 if (errno == ENOTSUP) {
1424 if (vflag || dflag)
1425 fsrprintf(_("%s: file type not supported\n"), fname);
1426 } else if (errno == EFAULT) {
1427 /* The file has changed since we started the copy */
1428 if (vflag || dflag)
1429 fsrprintf(_("%s: file modified defrag aborted\n"),
1430 fname);
1431 } else if (errno == EBUSY) {
1432 /* Timestamp has changed or mmap'ed file */
1433 if (vflag || dflag)
1434 fsrprintf(_("%s: file busy\n"), fname);
1435 } else {
1436 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1437 fname, strerror(errno));
1438 }
671632c6 1439 goto out;
c988ea91
CH
1440 }
1441
1442 /* Report progress */
1443 if (vflag)
1444 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1445 cur_nextents, new_nextents,
1446 (new_nextents <= nextents ? "DONE" : " " ),
1447 fname);
671632c6
ES
1448 retval = 0;
1449
1450out:
1451 free(fbuf);
1452 if (tfd != -1)
1453 close(tfd);
1454 if (ffd != -1)
1455 close(ffd);
1456 return retval;
c988ea91
CH
1457}
1458
1459char *
1460gettmpname(char *fname)
1461{
1462 static char buf[PATH_MAX+1];
1463 char sbuf[SMBUFSZ];
1464 char *ptr;
1465
1466 sprintf(sbuf, "/.fsr%d", getpid());
1467
6063feca
ES
1468 strncpy(buf, fname, PATH_MAX);
1469 buf[PATH_MAX] = '\0';
c988ea91
CH
1470 ptr = strrchr(buf, '/');
1471 if (ptr) {
1472 *ptr = '\0';
1473 } else {
1474 strcpy(buf, ".");
1475 }
1476
1477 if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
1478 fsrprintf(_("tmp file name too long: %s\n"), fname);
1479 return(NULL);
1480 }
1481
1482 strcat(buf, sbuf);
1483
1484 return(buf);
1485}
1486
1487char *
1488getparent(char *fname)
1489{
1490 static char buf[PATH_MAX+1];
1491 char *ptr;
1492
6063feca
ES
1493 strncpy(buf, fname, PATH_MAX);
1494 buf[PATH_MAX] = '\0';
c988ea91
CH
1495 ptr = strrchr(buf, '/');
1496 if (ptr) {
1497 if (ptr == &buf[0])
1498 ++ptr;
1499 *ptr = '\0';
1500 } else {
1501 strcpy(buf, ".");
1502 }
1503
1504 return(buf);
1505}
1506
1507/*
1508 * Read in block map of the input file, coalesce contiguous
1509 * extents into a single range, keep all holes. Convert from 512 byte
1510 * blocks to bytes.
1511 *
1512 * This code was borrowed from mv.c with some minor mods.
1513 */
1514#define MAPSIZE 128
1515#define OUTMAP_SIZE_INCREMENT MAPSIZE
1516
1517int read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
1518{
1519 int i, cnt;
1520 struct getbmap map[MAPSIZE];
1521
1522#define BUMP_CNT \
1523 if (++cnt >= outmap_size) { \
1524 outmap_size += OUTMAP_SIZE_INCREMENT; \
1525 outmap = (struct getbmap *)realloc(outmap, \
1526 outmap_size*sizeof(*outmap)); \
1527 if (outmap == NULL) { \
1528 fsrprintf(_("realloc failed: %s\n"), \
1529 strerror(errno)); \
1530 exit(1); \
1531 } \
1532 }
1533
1534 /* Initialize the outmap array. It always grows - never shrinks.
1535 * Left-over memory allocation is saved for the next files.
1536 */
1537 if (outmap_size == 0) {
1538 outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
1539 outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
1540 if (!outmap) {
1541 fsrprintf(_("malloc failed: %s\n"),
1542 strerror(errno));
1543 exit(1);
1544 }
1545 }
1546
1547 outmap[0].bmv_block = 0;
1548 outmap[0].bmv_offset = 0;
1549 outmap[0].bmv_length = sin->bs_size;
1550
1551 /*
1552 * If a non regular file is involved then forget holes
1553 */
1554
1555 if (!S_ISREG(sin->bs_mode))
1556 return(1);
1557
1558 outmap[0].bmv_length = 0;
1559
1560 map[0].bmv_offset = 0;
1561 map[0].bmv_block = 0;
1562 map[0].bmv_entries = 0;
1563 map[0].bmv_count = MAPSIZE;
1564 map[0].bmv_length = -1;
1565
1566 cnt = 0;
1567 *cur_nextents = 0;
1568
1569 do {
1570 if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
1571 fsrprintf(_("failed reading extents: inode %llu"),
1572 (unsigned long long)sin->bs_ino);
1573 exit(1);
1574 }
1575
1576 /* Concatenate extents together and replicate holes into
1577 * the output map.
1578 */
1579 *cur_nextents += map[0].bmv_entries;
1580 for (i = 0; i < map[0].bmv_entries; i++) {
1581 if (map[i + 1].bmv_block == -1) {
1582 BUMP_CNT;
1583 outmap[cnt] = map[i+1];
1584 } else if (outmap[cnt].bmv_block == -1) {
1585 BUMP_CNT;
1586 outmap[cnt] = map[i+1];
1587 } else {
1588 outmap[cnt].bmv_length += map[i + 1].bmv_length;
1589 }
1590 }
1591 } while (map[0].bmv_entries == (MAPSIZE-1));
1592 for (i = 0; i <= cnt; i++) {
1593 outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
1594 outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
1595 }
1596
1597 outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
1598
1599 return(cnt+1);
1600}
1601
1602/*
1603 * Read the block map and return the number of extents.
1604 */
1605int
1606getnextents(int fd)
1607{
1608 int nextents;
1609 struct getbmap map[MAPSIZE];
1610
1611 map[0].bmv_offset = 0;
1612 map[0].bmv_block = 0;
1613 map[0].bmv_entries = 0;
1614 map[0].bmv_count = MAPSIZE;
1615 map[0].bmv_length = -1;
1616
1617 nextents = 0;
1618
1619 do {
1620 if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
1621 fsrprintf(_("failed reading extents"));
1622 exit(1);
1623 }
1624
1625 nextents += map[0].bmv_entries;
1626 } while (map[0].bmv_entries == (MAPSIZE-1));
1627
1628 return(nextents);
1629}
1630
1631/*
1632 * Get the fs geometry
1633 */
1634int
1635xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
1636{
1637 if (xfs_fsgeometry(fd, fsgeom) < 0) {
1638 return -1;
1639 }
1640 return 0;
1641}
1642
1643/*
1644 * Get xfs realtime space information
1645 */
1646int
09d38d96 1647xfs_getrt(int fd, struct statvfs *sfbp)
c988ea91
CH
1648{
1649 unsigned long bsize;
1650 unsigned long factor;
1651 xfs_fsop_counts_t cnt;
1652
1653 if (!fsgeom.rtblocks)
1654 return -1;
1655
1656 if (xfs_fscounts(fd, &cnt) < 0) {
1657 close(fd);
1658 return -1;
1659 }
1660 bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
1661 factor = fsgeom.blocksize / bsize; /* currently this is == 1 */
1662 sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
1663 return 0;
1664}
1665
1666int
1667fsrprintf(const char *fmt, ...)
1668{
1669 va_list ap;
1670
1671 va_start(ap, fmt);
1672 if (gflag) {
1673 static int didopenlog;
1674 if (!didopenlog) {
1675 openlog("fsr", LOG_PID, LOG_USER);
1676 didopenlog = 1;
1677 }
1678 vsyslog(LOG_INFO, fmt, ap);
1679 } else
1680 vprintf(fmt, ap);
1681 va_end(ap);
1682 return 0;
1683}
1684
c988ea91
CH
1685/*
1686 * Initialize a directory for tmp file use. This is used
1687 * by the full filesystem defragmentation when we're walking
1688 * the inodes and do not know the path for the individual
1689 * files. Multiple directories are used to spread out the
1690 * tmp data around to different ag's (since file data is
1691 * usually allocated to the same ag as the directory and
1692 * directories allocated round robin from the same
1693 * parent directory).
1694 */
1695static void
1696tmp_init(char *mnt)
1697{
1698 int i;
1699 static char buf[SMBUFSZ];
1700 mode_t mask;
1701
1702 tmp_agi = 0;
1703 sprintf(buf, "%s/.fsr", mnt);
1704
1705 mask = umask(0);
1706 if (mkdir(buf, 0700) < 0) {
1707 if (errno == EEXIST) {
1708 if (dflag)
1709 fsrprintf(_("tmpdir already exists: %s\n"),
1710 buf);
1711 } else {
1712 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1713 buf, strerror(errno));
1714 exit(-1);
1715 }
1716 }
1717 for (i=0; i < fsgeom.agcount; i++) {
1718 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
7d59f3fd 1719 if (mkdir(buf, 0700) < 0) {
c988ea91
CH
1720 if (errno == EEXIST) {
1721 if (dflag)
1722 fsrprintf(
1723 _("tmpdir already exists: %s\n"), buf);
1724 } else {
1725 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1726 buf, strerror(errno));
1727 exit(-1);
1728 }
1729 }
1730 }
1731 (void)umask(mask);
1732 return;
1733}
1734
1735static char *
1736tmp_next(char *mnt)
1737{
1738 static char buf[SMBUFSZ];
1739
1740 sprintf(buf, "%s/.fsr/ag%d/tmp%d",
1741 ( (strcmp(mnt, "/") == 0) ? "" : mnt),
1742 tmp_agi,
1743 getpid());
1744
1745 if (++tmp_agi == fsgeom.agcount)
1746 tmp_agi = 0;
1747
1748 return(buf);
1749}
1750
1751static void
1752tmp_close(char *mnt)
1753{
1754 static char buf[SMBUFSZ];
1755 int i;
1756
1757 /* No data is ever actually written so we can just do rmdir's */
1758 for (i=0; i < fsgeom.agcount; i++) {
1759 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
1760 if (rmdir(buf) < 0) {
1761 if (errno != ENOENT) {
1762 fsrprintf(
1763 _("could not remove tmpdir: %s: %s\n"),
1764 buf, strerror(errno));
1765 }
1766 }
1767 }
1768 sprintf(buf, "%s/.fsr", mnt);
1769 if (rmdir(buf) < 0) {
1770 if (errno != ENOENT) {
1771 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1772 buf, strerror(errno));
1773 }
1774 }
1775}