]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - fsr/xfs_fsr.c
xfs_fsr: test for more potential failures in packfile()
[thirdparty/xfsprogs-dev.git] / fsr / xfs_fsr.c
CommitLineData
c988ea91
CH
1/*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
bdb041f5 19#include <libxfs.h>
a2ceac1f 20#include <xfs/xfs.h>
bdb041f5 21#include <xfs/xfs_types.h>
c988ea91 22#include <xfs/jdm.h>
bdb041f5
DC
23#include <xfs/xfs_bmap_btree.h>
24#include <xfs/xfs_dinode.h>
25#include <xfs/xfs_attr_sf.h>
c988ea91
CH
26
27#include <fcntl.h>
28#include <errno.h>
29#include <malloc.h>
30#include <mntent.h>
31#include <syslog.h>
32#include <signal.h>
33#include <sys/ioctl.h>
34#include <sys/wait.h>
35#include <sys/vfs.h>
36#include <sys/statvfs.h>
37#include <sys/xattr.h>
38
39
40#ifndef XFS_XFLAG_NODEFRAG
41#define XFS_XFLAG_NODEFRAG 0x00002000 /* src dependancy, remove later */
42#endif
43
89e4b5bd
CH
44#define _PATH_FSRLAST "/var/tmp/.fsrlast_xfs"
45#define _PATH_PROC_MOUNTS "/proc/mounts"
46
c988ea91
CH
47
48char *progname;
49
50int vflag;
51int gflag;
52static int Mflag;
53/* static int nflag; */
54int dflag = 0;
55/* static int sflag; */
56int argv_blksz_dio;
57extern int max_ext_size;
58static int npasses = 10;
59static int startpass = 0;
60
61struct getbmap *outmap = NULL;
62int outmap_size = 0;
63int RealUid;
64int tmp_agi;
65static __int64_t minimumfree = 2048;
66
67#define MNTTYPE_XFS "xfs"
68
69#define SMBUFSZ 1024
70#define ROOT 0
71#define NULLFD -1
72#define GRABSZ 64
73#define TARGETRANGE 10
74#define V_NONE 0
75#define V_OVERVIEW 1
76#define V_ALL 2
77#define BUFFER_SIZE (1<<16)
78#define BUFFER_MAX (1<<24)
c988ea91
CH
79
80static time_t howlong = 7200; /* default seconds of reorganizing */
81static char *leftofffile = _PATH_FSRLAST; /* where we left off last */
c988ea91
CH
82static time_t endtime;
83static time_t starttime;
84static xfs_ino_t leftoffino = 0;
85static int pagesize;
86
87void usage(int ret);
88static int fsrfile(char *fname, xfs_ino_t ino);
89static int fsrfile_common( char *fname, char *tname, char *mnt,
90 int fd, xfs_bstat_t *statp);
91static int packfile(char *fname, char *tname, int fd,
92 xfs_bstat_t *statp, struct fsxattr *fsxp);
93static void fsrdir(char *dirname);
94static int fsrfs(char *mntdir, xfs_ino_t ino, int targetrange);
95static void initallfs(char *mtab);
89e4b5bd 96static void fsrallfs(char *mtab, int howlong, char *leftofffile);
c988ea91
CH
97static void fsrall_cleanup(int timeout);
98static int getnextents(int);
99int xfsrtextsize(int fd);
100int xfs_getrt(int fd, struct statvfs64 *sfbp);
101char * gettmpname(char *fname);
102char * getparent(char *fname);
103int fsrprintf(const char *fmt, ...);
104int read_fd_bmap(int, xfs_bstat_t *, int *);
105int cmp(const void *, const void *);
106static void tmp_init(char *mnt);
107static char * tmp_next(char *mnt);
108static void tmp_close(char *mnt);
109int xfs_getgeom(int , xfs_fsop_geom_v1_t * );
c988ea91
CH
110
111xfs_fsop_geom_v1_t fsgeom; /* geometry of active mounted system */
112
113#define NMOUNT 64
114static int numfs;
115
116typedef struct fsdesc {
117 char *dev;
118 char *mnt;
119 int npass;
120} fsdesc_t;
121
122fsdesc_t *fs, *fsbase, *fsend;
123int fsbufsize = 10; /* A starting value */
124int nfrags = 0; /* Debug option: Coerse into specific number
125 * of extents */
126int openopts = O_CREAT|O_EXCL|O_RDWR|O_DIRECT;
127
128int
129xfs_fsgeometry(int fd, xfs_fsop_geom_v1_t *geom)
130{
131 return ioctl(fd, XFS_IOC_FSGEOMETRY_V1, geom);
132}
133
134int
135xfs_bulkstat_single(int fd, xfs_ino_t *lastip, xfs_bstat_t *ubuffer)
136{
137 xfs_fsop_bulkreq_t bulkreq;
138
cad114df 139 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
140 bulkreq.icount = 1;
141 bulkreq.ubuffer = ubuffer;
142 bulkreq.ocount = NULL;
143 return ioctl(fd, XFS_IOC_FSBULKSTAT_SINGLE, &bulkreq);
144}
145
146int
147xfs_bulkstat(int fd, xfs_ino_t *lastip, int icount,
148 xfs_bstat_t *ubuffer, __s32 *ocount)
149{
150 xfs_fsop_bulkreq_t bulkreq;
151
cad114df 152 bulkreq.lastip = (__u64 *)lastip;
c988ea91
CH
153 bulkreq.icount = icount;
154 bulkreq.ubuffer = ubuffer;
155 bulkreq.ocount = ocount;
156 return ioctl(fd, XFS_IOC_FSBULKSTAT, &bulkreq);
157}
158
159int
160xfs_swapext(int fd, xfs_swapext_t *sx)
161{
162 return ioctl(fd, XFS_IOC_SWAPEXT, sx);
163}
164
165int
166xfs_fscounts(int fd, xfs_fsop_counts_t *counts)
167{
168 return ioctl(fd, XFS_IOC_FSCOUNTS, counts);
169}
170
171void
172aborter(int unused)
173{
174 fsrall_cleanup(1);
175 exit(1);
176}
177
3e50d888
CH
178/*
179 * Check if the argument is either the device name or mountpoint of an XFS
180 * filesystem. Note that we do not care about bind mounted regular files
181 * here - the code that handles defragmentation of invidual files takes care
182 * of that.
183 */
184static char *
185find_mountpoint(char *mtab, char *argname, struct stat64 *sb)
186{
187 struct mntent *t;
188 struct stat64 ms;
189 FILE *mtabp;
190 char *mntp = NULL;
191
192 mtabp = setmntent(mtab, "r");
193 if (!mtabp) {
194 fprintf(stderr, _("%s: cannot read %s\n"),
195 progname, mtab);
196 exit(1);
197 }
198
199 while ((t = getmntent(mtabp))) {
200 if (S_ISDIR(sb->st_mode)) { /* mount point */
201 if (stat64(t->mnt_dir, &ms) < 0)
202 continue;
203 if (sb->st_ino != ms.st_ino)
204 continue;
205 if (sb->st_dev != ms.st_dev)
206 continue;
207 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
208 continue;
209 } else { /* device */
210 struct stat64 sb2;
211
212 if (stat64(t->mnt_fsname, &ms) < 0)
213 continue;
214 if (sb->st_rdev != ms.st_rdev)
215 continue;
216 if (strcmp(t->mnt_type, MNTTYPE_XFS) != 0)
217 continue;
218
219 /*
220 * Make sure the mountpoint given by mtab is accessible
221 * before using it.
222 */
223 if (stat64(t->mnt_dir, &sb2) < 0)
224 continue;
225 }
226
227 mntp = t->mnt_dir;
228 break;
229 }
230
231 endmntent(mtabp);
232 return mntp;
233}
234
c988ea91
CH
235int
236main(int argc, char **argv)
237{
3e50d888 238 struct stat64 sb;
c988ea91 239 char *argname;
c988ea91 240 int c;
3e50d888 241 char *mntp;
89e4b5bd 242 char *mtab = NULL;
c988ea91
CH
243
244 setlinebuf(stdout);
245 progname = basename(argv[0]);
246
247 setlocale(LC_ALL, "");
248 bindtextdomain(PACKAGE, LOCALEDIR);
249 textdomain(PACKAGE);
250
251 gflag = ! isatty(0);
252
89e4b5bd 253 while ((c = getopt(argc, argv, "C:p:e:MgsdnvTt:f:m:b:N:FV")) != -1) {
c988ea91
CH
254 switch (c) {
255 case 'M':
256 Mflag = 1;
257 break;
258 case 'g':
259 gflag = 1;
260 break;
261 case 'n':
262 /* nflag = 1; */
263 break;
264 case 'v':
265 ++vflag;
266 break;
267 case 'd':
268 dflag = 1;
269 break;
270 case 's': /* frag stats only */
271 /* sflag = 1; */
272 fprintf(stderr,
273 _("%s: Stats not yet supported for XFS\n"),
274 progname);
275 usage(1);
276 break;
277 case 't':
278 howlong = atoi(optarg);
279 break;
280 case 'f':
281 leftofffile = optarg;
282 break;
283 case 'm':
284 mtab = optarg;
285 break;
286 case 'b':
287 argv_blksz_dio = atoi(optarg);
288 break;
289 case 'p':
290 npasses = atoi(optarg);
291 break;
292 case 'C':
293 /* Testing opt: coerses frag count in result */
294 if (getenv("FSRXFSTEST") != NULL) {
295 nfrags = atoi(optarg);
296 openopts |= O_SYNC;
297 }
298 break;
299 case 'V':
300 printf(_("%s version %s\n"), progname, VERSION);
301 exit(0);
302 default:
303 usage(1);
304 }
89e4b5bd
CH
305 }
306
307 /*
308 * If the user did not specify an explicit mount table, try to use
309 * /proc/mounts if it is available, else /etc/mtab. We prefer
310 * /proc/mounts because it is kernel controlled, while /etc/mtab
311 * may contain garbage that userspace tools like pam_mounts wrote
312 * into it.
313 */
314 if (!mtab) {
315 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
316 mtab = _PATH_PROC_MOUNTS;
317 else
318 mtab = _PATH_MOUNTED;
319 }
320
c988ea91
CH
321 if (vflag)
322 setbuf(stdout, NULL);
323
324 starttime = time(0);
325
326 /* Save the caller's real uid */
327 RealUid = getuid();
328
329 pagesize = getpagesize();
330
331 if (optind < argc) {
332 for (; optind < argc; optind++) {
333 argname = argv[optind];
3e50d888 334
c988ea91
CH
335 if (lstat64(argname, &sb) < 0) {
336 fprintf(stderr,
337 _("%s: could not stat: %s: %s\n"),
338 progname, argname, strerror(errno));
339 continue;
340 }
3e50d888
CH
341
342 if (S_ISLNK(sb.st_mode)) {
343 struct stat64 sb2;
344
345 if (stat64(argname, &sb2) == 0 &&
346 (S_ISBLK(sb2.st_mode) ||
347 S_ISCHR(sb2.st_mode)))
c988ea91 348 sb = sb2;
c988ea91 349 }
3e50d888
CH
350
351 mntp = find_mountpoint(mtab, argname, &sb);
c988ea91 352 if (mntp != NULL) {
3e50d888 353 fsrfs(mntp, 0, 100);
c988ea91
CH
354 } else if (S_ISCHR(sb.st_mode)) {
355 fprintf(stderr, _(
356 "%s: char special not supported: %s\n"),
357 progname, argname);
358 exit(1);
359 } else if (S_ISDIR(sb.st_mode) || S_ISREG(sb.st_mode)) {
360 if (!platform_test_xfs_path(argname)) {
361 fprintf(stderr, _(
362 "%s: cannot defragment: %s: Not XFS\n"),
363 progname, argname);
364 continue;
365 }
366 if (S_ISDIR(sb.st_mode))
367 fsrdir(argname);
368 else
369 fsrfile(argname, sb.st_ino);
370 } else {
371 printf(
372 _("%s: not fsys dev, dir, or reg file, ignoring\n"),
373 argname);
374 }
375 }
376 } else {
377 initallfs(mtab);
89e4b5bd 378 fsrallfs(mtab, howlong, leftofffile);
c988ea91
CH
379 }
380 return 0;
381}
382
383void
384usage(int ret)
385{
386 fprintf(stderr, _(
30626ef6
ES
387"Usage: %s [-d] [-v] [-g] [-t time] [-p passes] [-f leftf] [-m mtab]\n"
388" %s [-d] [-v] [-g] xfsdev | dir | file ...\n"
389" %s -V\n\n"
c988ea91 390"Options:\n"
c988ea91
CH
391" -g Print to syslog (default if stdout not a tty).\n"
392" -t time How long to run in seconds.\n"
30626ef6 393" -p passes Number of passes before terminating global re-org.\n"
c988ea91
CH
394" -f leftoff Use this instead of %s.\n"
395" -m mtab Use something other than /etc/mtab.\n"
396" -d Debug, print even more.\n"
30626ef6
ES
397" -v Verbose, more -v's more verbose.\n"
398" -V Print version number and exit.\n"
399 ), progname, progname, progname, _PATH_FSRLAST);
c988ea91
CH
400 exit(ret);
401}
402
403/*
404 * initallfs -- read the mount table and set up an internal form
405 */
406static void
407initallfs(char *mtab)
408{
409 FILE *fp;
410 struct mntent *mp;
411 int mi;
412 char *cp;
413 struct stat64 sb;
414
415 fp = setmntent(mtab, "r");
416 if (fp == NULL) {
417 fsrprintf(_("could not open mtab file: %s\n"), mtab);
418 exit(1);
419 }
420
421 /* malloc a number of descriptors, increased later if needed */
422 if (!(fsbase = (fsdesc_t *)malloc(fsbufsize * sizeof(fsdesc_t)))) {
423 fsrprintf(_("out of memory: %s\n"), strerror(errno));
424 exit(1);
425 }
426 fsend = (fsbase + fsbufsize - 1);
427
428 /* find all rw xfs file systems */
429 mi = 0;
430 fs = fsbase;
431 while ((mp = getmntent(fp))) {
432 int rw = 0;
433
434 if (strcmp(mp->mnt_type, MNTTYPE_XFS ) != 0 ||
435 stat64(mp->mnt_fsname, &sb) == -1 ||
436 !S_ISBLK(sb.st_mode))
437 continue;
438
439 cp = strtok(mp->mnt_opts,",");
440 do {
441 if (strcmp("rw", cp) == 0)
442 rw++;
443 } while ((cp = strtok(NULL, ",")) != NULL);
444 if (rw == 0) {
445 if (dflag)
446 fsrprintf(_("Skipping %s: not mounted rw\n"),
447 mp->mnt_fsname);
448 continue;
449 }
450
451 if (mi == fsbufsize) {
452 fsbufsize += NMOUNT;
453 if ((fsbase = (fsdesc_t *)realloc((char *)fsbase,
454 fsbufsize * sizeof(fsdesc_t))) == NULL) {
455 fsrprintf(_("out of memory: %s\n"),
456 strerror(errno));
457 exit(1);
458 }
459 if (!fsbase) {
460 fsrprintf(_("out of memory on realloc: %s\n"),
461 strerror(errno));
462 exit(1);
463 }
464 fs = (fsbase + mi); /* Needed ? */
465 }
466
467 fs->dev = strdup(mp->mnt_fsname);
468 fs->mnt = strdup(mp->mnt_dir);
469
758bcc92 470 if (fs->dev == NULL) {
c988ea91
CH
471 fsrprintf(_("strdup(%s) failed\n"), mp->mnt_fsname);
472 exit(1);
473 }
758bcc92
ES
474 if (fs->mnt == NULL) {
475 fsrprintf(_("strdup(%s) failed\n"), mp->mnt_dir);
476 exit(1);
477 }
c988ea91
CH
478 mi++;
479 fs++;
480 }
481 numfs = mi;
482 fsend = (fsbase + numfs);
483 endmntent(fp);
484 if (numfs == 0) {
485 fsrprintf(_("no rw xfs file systems in mtab: %s\n"), mtab);
486 exit(0);
487 }
488 if (vflag || dflag) {
489 fsrprintf(_("Found %d mounted, writable, XFS filesystems\n"),
490 numfs);
491 if (dflag)
492 for (fs = fsbase; fs < fsend; fs++)
493 fsrprintf("\t%-30.30s%-30.30s\n", fs->dev, fs->mnt);
494 }
495}
496
497static void
89e4b5bd 498fsrallfs(char *mtab, int howlong, char *leftofffile)
c988ea91
CH
499{
500 int fd;
501 int error;
502 int found = 0;
503 char *fsname;
504 char buf[SMBUFSZ];
505 int mdonly = Mflag;
506 char *ptr;
507 xfs_ino_t startino = 0;
508 fsdesc_t *fsp;
509 struct stat64 sb, sb2;
510
511 fsrprintf("xfs_fsr -m %s -t %d -f %s ...\n", mtab, howlong, leftofffile);
512
513 endtime = starttime + howlong;
514 fs = fsbase;
515
516 /* where'd we leave off last time? */
517 if (lstat64(leftofffile, &sb) == 0) {
518 if ( (fd = open(leftofffile, O_RDONLY)) == -1 ) {
519 fsrprintf(_("%s: open failed\n"), leftofffile);
520 }
521 else if ( fstat64(fd, &sb2) == 0) {
522 /*
523 * Verify that lstat & fstat point to the
524 * same regular file (no links/no quick spoofs)
525 */
526 if ( (sb.st_dev != sb2.st_dev) ||
527 (sb.st_ino != sb2.st_ino) ||
528 ((sb.st_mode & S_IFMT) != S_IFREG) ||
529 ((sb2.st_mode & S_IFMT) != S_IFREG) ||
530 (sb2.st_uid != ROOT) ||
531 (sb2.st_nlink != 1)
532 )
533 {
534 fsrprintf(_("Can't use %s: mode=0%o own=%d"
535 " nlink=%d\n"),
536 leftofffile, sb.st_mode,
537 sb.st_uid, sb.st_nlink);
538 close(fd);
539 fd = NULLFD;
540 }
541 }
542 else {
543 close(fd);
544 fd = NULLFD;
545 }
546 }
547 else {
548 fd = NULLFD;
549 }
550
551 if (fd != NULLFD) {
552 if (read(fd, buf, SMBUFSZ) == -1) {
553 fs = fsbase;
554 fsrprintf(_("could not read %s, starting with %s\n"),
555 leftofffile, *fs->dev);
556 } else {
eef20df0
ES
557 /* Ensure the buffer we read is null terminated */
558 buf[SMBUFSZ-1] = '\0';
c988ea91
CH
559 for (fs = fsbase; fs < fsend; fs++) {
560 fsname = fs->dev;
561 if ((strncmp(buf,fsname,strlen(fsname)) == 0)
562 && buf[strlen(fsname)] == ' ') {
563 found = 1;
564 break;
565 }
566 }
567 if (! found)
568 fs = fsbase;
569
570 ptr = strchr(buf, ' ');
571 if (ptr) {
572 startpass = atoi(++ptr);
573 ptr = strchr(ptr, ' ');
574 if (ptr) {
575 startino = strtoull(++ptr, NULL, 10);
576 }
577 }
578 if (startpass < 0)
579 startpass = 0;
580
581 /* Init pass counts */
582 for (fsp = fsbase; fsp < fs; fsp++) {
583 fsp->npass = startpass + 1;
584 }
585 for (fsp = fs; fsp <= fsend; fsp++) {
586 fsp->npass = startpass;
587 }
588 }
589 close(fd);
590 }
591
592 if (vflag) {
593 fsrprintf(_("START: pass=%d ino=%llu %s %s\n"),
594 fs->npass, (unsigned long long)startino,
595 fs->dev, fs->mnt);
596 }
597
598 signal(SIGABRT, aborter);
599 signal(SIGHUP, aborter);
600 signal(SIGINT, aborter);
601 signal(SIGQUIT, aborter);
602 signal(SIGTERM, aborter);
603
604 /* reorg for 'howlong' -- checked in 'fsrfs' */
605 while (endtime > time(0)) {
606 pid_t pid;
607 if (fs == fsend)
608 fs = fsbase;
609 if (fs->npass == npasses) {
610 fsrprintf(_("Completed all %d passes\n"), npasses);
611 break;
612 }
613 if (npasses > 1 && !fs->npass)
614 Mflag = 1;
615 else
616 Mflag = mdonly;
617 pid = fork();
618 switch(pid) {
619 case -1:
620 fsrprintf(_("couldn't fork sub process:"));
621 exit(1);
622 break;
623 case 0:
624 error = fsrfs(fs->mnt, startino, TARGETRANGE);
625 exit (error);
626 break;
627 default:
628 wait(&error);
c988ea91
CH
629 if (WIFEXITED(error) && WEXITSTATUS(error) == 1) {
630 /* child timed out & did fsrall_cleanup */
631 exit(0);
632 }
633 break;
634 }
635 startino = 0; /* reset after the first time through */
636 fs->npass++;
637 fs++;
638 }
639 fsrall_cleanup(endtime <= time(0));
640}
641
642/*
643 * fsrall_cleanup -- close files, print next starting location, etc.
644 */
645static void
646fsrall_cleanup(int timeout)
647{
648 int fd;
649 int ret;
650 char buf[SMBUFSZ];
651
c988ea91 652 unlink(leftofffile);
d0e82db1
ES
653
654 if (timeout) {
655 fsrprintf(_("%s startpass %d, endpass %d, time %d seconds\n"),
656 progname, startpass, fs->npass,
657 time(0) - endtime + howlong);
658
659 /* record where we left off */
660 fd = open(leftofffile, O_WRONLY|O_CREAT|O_EXCL, 0644);
661 if (fd == -1) {
662 fsrprintf(_("open(%s) failed: %s\n"),
663 leftofffile, strerror(errno));
664 } else {
c988ea91
CH
665 ret = sprintf(buf, "%s %d %llu\n", fs->dev,
666 fs->npass, (unsigned long long)leftoffino);
667 if (write(fd, buf, ret) < strlen(buf))
668 fsrprintf(_("write(%s) failed: %s\n"),
669 leftofffile, strerror(errno));
670 close(fd);
671 }
672 }
c988ea91
CH
673}
674
675/*
676 * fsrfs -- reorganize a file system
677 */
678static int
679fsrfs(char *mntdir, xfs_ino_t startino, int targetrange)
680{
681
682 int fsfd, fd;
683 int count = 0;
684 int ret;
685 __s32 buflenout;
686 xfs_bstat_t buf[GRABSZ];
687 char fname[64];
688 char *tname;
689 jdm_fshandle_t *fshandlep;
690 xfs_ino_t lastino = startino;
691
692 fsrprintf(_("%s start inode=%llu\n"), mntdir,
693 (unsigned long long)startino);
694
695 fshandlep = jdm_getfshandle( mntdir );
696 if ( ! fshandlep ) {
697 fsrprintf(_("unable to get handle: %s: %s\n"),
698 mntdir, strerror( errno ));
699 return -1;
700 }
701
702 if ((fsfd = open(mntdir, O_RDONLY)) < 0) {
703 fsrprintf(_("unable to open: %s: %s\n"),
704 mntdir, strerror( errno ));
705 return -1;
706 }
707
708 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
709 fsrprintf(_("Skipping %s: could not get XFS geometry\n"),
710 mntdir);
11e06961 711 close(fsfd);
c988ea91
CH
712 return -1;
713 }
714
715 tmp_init(mntdir);
716
717 while ((ret = xfs_bulkstat(fsfd,
718 &lastino, GRABSZ, &buf[0], &buflenout) == 0)) {
719 xfs_bstat_t *p;
720 xfs_bstat_t *endp;
721
722 if (buflenout == 0)
723 goto out0;
724
725 /* Each loop through, defrag targetrange percent of the files */
726 count = (buflenout * targetrange) / 100;
727
728 qsort((char *)buf, buflenout, sizeof(struct xfs_bstat), cmp);
729
730 for (p = buf, endp = (buf + buflenout); p < endp ; p++) {
731 /* Do some obvious checks now */
732 if (((p->bs_mode & S_IFMT) != S_IFREG) ||
733 (p->bs_extents < 2))
734 continue;
735
108e985b
DC
736 fd = jdm_open(fshandlep, p, O_RDWR|O_DIRECT);
737 if (fd < 0) {
c988ea91
CH
738 /* This probably means the file was
739 * removed while in progress of handling
740 * it. Just quietly ignore this file.
741 */
742 if (dflag)
743 fsrprintf(_("could not open: "
744 "inode %llu\n"), p->bs_ino);
745 continue;
746 }
747
748 /* Don't know the pathname, so make up something */
749 sprintf(fname, "ino=%lld", (long long)p->bs_ino);
750
751 /* Get a tmp file name */
752 tname = tmp_next(mntdir);
753
754 ret = fsrfile_common(fname, tname, mntdir, fd, p);
755
756 leftoffino = p->bs_ino;
757
758 close(fd);
759
760 if (ret == 0) {
761 if (--count <= 0)
762 break;
763 }
764 }
765 if (endtime && endtime < time(0)) {
766 tmp_close(mntdir);
767 close(fsfd);
768 fsrall_cleanup(1);
769 exit(1);
770 }
771 }
772 if (ret < 0)
773 fsrprintf(_("%s: xfs_bulkstat: %s\n"), progname, strerror(errno));
774out0:
775 tmp_close(mntdir);
776 close(fsfd);
777 return 0;
778}
779
780/*
781 * To compare bstat structs for qsort.
782 */
783int
784cmp(const void *s1, const void *s2)
785{
786 return( ((xfs_bstat_t *)s2)->bs_extents -
787 ((xfs_bstat_t *)s1)->bs_extents);
788
789}
790
791/*
792 * reorganize by directory hierarchy.
793 * Stay in dev (a restriction based on structure of this program -- either
794 * call efs_{n,u}mount() around each file, something smarter or this)
795 */
796static void
797fsrdir(char *dirname)
798{
799 fsrprintf(_("%s: Directory defragmentation not supported\n"), dirname);
800}
801
802/*
803 * Sets up the defragmentation of a file based on the
804 * filepath. It collects the bstat information, does
805 * an open on the file and passes this all to fsrfile_common.
806 */
807static int
808fsrfile(char *fname, xfs_ino_t ino)
809{
810 xfs_bstat_t statbuf;
811 jdm_fshandle_t *fshandlep;
812 int fd, fsfd;
813 int error = 0;
814 char *tname;
815
816 fshandlep = jdm_getfshandle(getparent (fname) );
817 if (! fshandlep) {
818 fsrprintf(_("unable to construct sys handle for %s: %s\n"),
819 fname, strerror(errno));
820 return -1;
821 }
822
823 /*
824 * Need to open something on the same filesystem as the
825 * file. Open the parent.
826 */
827 fsfd = open(getparent(fname), O_RDONLY);
828 if (fsfd < 0) {
829 fsrprintf(_("unable to open sys handle for %s: %s\n"),
830 fname, strerror(errno));
831 return -1;
832 }
833
834 if ((xfs_bulkstat_single(fsfd, &ino, &statbuf)) < 0) {
835 fsrprintf(_("unable to get bstat on %s: %s\n"),
836 fname, strerror(errno));
837 close(fsfd);
838 return -1;
839 }
840
108e985b 841 fd = jdm_open(fshandlep, &statbuf, O_RDWR|O_DIRECT);
c988ea91
CH
842 if (fd < 0) {
843 fsrprintf(_("unable to open handle %s: %s\n"),
844 fname, strerror(errno));
845 close(fsfd);
846 return -1;
847 }
848
849 /* Get the fs geometry */
850 if (xfs_getgeom(fsfd, &fsgeom) < 0 ) {
851 fsrprintf(_("Unable to get geom on fs for: %s\n"), fname);
852 close(fsfd);
853 return -1;
854 }
855
856 close(fsfd);
857
858 tname = gettmpname(fname);
859
860 if (tname)
861 error = fsrfile_common(fname, tname, NULL, fd, &statbuf);
862
863 close(fd);
864
865 return error;
866}
867
868
869/*
870 * This is the common defrag code for either a full fs
871 * defragmentation or a single file. Check as much as
872 * possible with the file, fork a process to setuid to the
873 * target file owner's uid and defragment the file.
874 * This is done so the new extents created in a tmp file are
875 * reflected in the owners' quota without having to do any
876 * special code in the kernel. When the existing extents
877 * are removed, the quotas will be correct. It's ugly but
878 * it saves us from doing some quota re-construction in
879 * the extent swap. The price is that the defragmentation
880 * will fail if the owner of the target file is already at
881 * their quota limit.
882 */
883static int
884fsrfile_common(
885 char *fname,
886 char *tname,
887 char *fsname,
888 int fd,
889 xfs_bstat_t *statp)
890{
891 int error;
892 struct statvfs64 vfss;
893 struct fsxattr fsx;
894 unsigned long bsize;
895
896 if (vflag)
897 fsrprintf("%s\n", fname);
898
899 if (fsync(fd) < 0) {
900 fsrprintf(_("sync failed: %s: %s\n"), fname, strerror(errno));
901 return -1;
902 }
903
904 if (statp->bs_size == 0) {
905 if (vflag)
906 fsrprintf(_("%s: zero size, ignoring\n"), fname);
907 return(0);
908 }
909
910 /* Check if a mandatory lock is set on the file to try and
911 * avoid blocking indefinitely on the reads later. Note that
912 * someone could still set a mandatory lock after this check
913 * but before all reads have completed to block fsr reads.
914 * This change just closes the window a bit.
915 */
916 if ( (statp->bs_mode & S_ISGID) && ( ! (statp->bs_mode&S_IXGRP) ) ) {
917 struct flock fl;
918
919 fl.l_type = F_RDLCK;
920 fl.l_whence = SEEK_SET;
921 fl.l_start = (off_t)0;
922 fl.l_len = 0;
923 if ((fcntl(fd, F_GETLK, &fl)) < 0 ) {
924 if (vflag)
925 fsrprintf(_("locking check failed: %s\n"),
926 fname);
927 return(-1);
928 }
929 if (fl.l_type != F_UNLCK) {
930 /* Mandatory lock is set */
931 if (vflag)
932 fsrprintf(_("mandatory lock: %s: ignoring\n"),
933 fname);
934 return(-1);
935 }
936 }
937
938 /*
939 * Check if there is room to copy the file.
940 *
941 * Note that xfs_bstat.bs_blksize returns the filesystem blocksize,
942 * not the optimal I/O size as struct stat.
943 */
944 if (statvfs64(fsname ? fsname : fname, &vfss) < 0) {
945 fsrprintf(_("unable to get fs stat on %s: %s\n"),
946 fname, strerror(errno));
947 return -1;
948 }
949 bsize = vfss.f_frsize ? vfss.f_frsize : vfss.f_bsize;
950 if (statp->bs_blksize * statp->bs_blocks >
951 vfss.f_bfree * bsize - minimumfree) {
952 fsrprintf(_("insufficient freespace for: %s: "
953 "size=%lld: ignoring\n"), fname,
954 statp->bs_blksize * statp->bs_blocks);
955 return 1;
956 }
957
958 if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
959 fsrprintf(_("failed to get inode attrs: %s\n"), fname);
960 return(-1);
961 }
962 if (fsx.fsx_xflags & (XFS_XFLAG_IMMUTABLE|XFS_XFLAG_APPEND)) {
963 if (vflag)
964 fsrprintf(_("%s: immutable/append, ignoring\n"), fname);
965 return(0);
966 }
967 if (fsx.fsx_xflags & XFS_XFLAG_NODEFRAG) {
968 if (vflag)
969 fsrprintf(_("%s: marked as don't defrag, ignoring\n"),
970 fname);
971 return(0);
972 }
973 if (fsx.fsx_xflags & XFS_XFLAG_REALTIME) {
974 if (xfs_getrt(fd, &vfss) < 0) {
975 fsrprintf(_("cannot get realtime geometry for: %s\n"),
976 fname);
977 return(-1);
978 }
979 if (statp->bs_size > ((vfss.f_bfree * bsize) - minimumfree)) {
980 fsrprintf(_("low on realtime free space: %s: "
981 "ignoring file\n"), fname);
982 return(-1);
983 }
984 }
985
986 if ((RealUid != ROOT) && (RealUid != statp->bs_uid)) {
987 fsrprintf(_("cannot open: %s: Permission denied\n"), fname);
988 return -1;
989 }
990
991 /*
992 * Previously the code forked here, & the child changed it's uid to
993 * that of the file's owner and then called packfile(), to keep
994 * quota counts correct. (defragged files could use fewer blocks).
995 *
996 * Instead, just fchown() the temp file to the uid,gid of the
997 * file we're defragging, in packfile().
998 */
999
1000 if ((error = packfile(fname, tname, fd, statp, &fsx)))
1001 return error;
1002 return -1; /* no error */
1003}
1004
bdb041f5
DC
1005/*
1006 * Attempt to set the attr fork up correctly. This is simple for attr1
1007 * filesystems as they have a fixed inode fork offset. In that case
1008 * just create an attribute and that's all we need to do.
1009 *
1010 * For attr2 filesystems, see if we have the actual fork offset in
1011 * the bstat structure. If so, just create additional attributes on
1012 * the temporary inode until the offset matches.
1013 *
1014 * If it doesn't exist, we can only do best effort. Add an attribute at a time
1015 * to move the inode fork around, but take into account that the attribute
1016 * might be too small to move the fork every time we add one. This should
1017 * hopefully put the fork offset in the right place. It's not a big deal if we
1018 * don't get it right - the kernel will reject it when we try to swap extents.
1019 */
1020static int
1021fsr_setup_attr_fork(
1022 int fd,
1023 int tfd,
1024 xfs_bstat_t *bstatp)
1025{
1026 struct stat64 tstatbuf;
1027 int i;
27507775 1028 int diff = 0;
bdb041f5
DC
1029 int last_forkoff = 0;
1030 int no_change_cnt = 0;
1031 int ret;
1032
1033 if (!(bstatp->bs_xflags & XFS_XFLAG_HASATTR))
1034 return 0;
1035
1036 /*
1037 * use the old method if we have attr1 or the kernel does not yet
1038 * support passing the fork offset in the bulkstat data.
1039 */
1040 if (!(fsgeom.flags & XFS_FSOP_GEOM_FLAGS_ATTR2) ||
1041 bstatp->bs_forkoff == 0) {
1042 /* attr1 */
1043 ret = fsetxattr(tfd, "user.X", "X", 1, XATTR_CREATE);
1044 if (ret) {
1045 fsrprintf(_("could not set ATTR\n"));
1046 return -1;
1047 }
1048 goto out;
1049 }
1050
1051 /* attr2 w/ fork offsets */
1052
1053 if (fstat64(tfd, &tstatbuf) < 0) {
1054 fsrprintf(_("unable to stat temp file: %s\n"),
1055 strerror(errno));
1056 return -1;
1057 }
1058
1059 i = 0;
1060 do {
1061 xfs_bstat_t tbstat;
1062 xfs_ino_t ino;
1063 char name[64];
bdb041f5
DC
1064
1065 /*
1adfe5c6 1066 * bulkstat the temp inode to see what the forkoff is. Use
bdb041f5
DC
1067 * this to compare against the target and determine what we
1068 * need to do.
1069 */
1070 ino = tstatbuf.st_ino;
1071 if ((xfs_bulkstat_single(tfd, &ino, &tbstat)) < 0) {
1072 fsrprintf(_("unable to get bstat on temp file: %s\n"),
1073 strerror(errno));
1074 return -1;
1075 }
1076 if (dflag)
1077 fsrprintf(_("orig forkoff %d, temp forkoff %d\n"),
1078 bstatp->bs_forkoff, tbstat.bs_forkoff);
1adfe5c6
ES
1079 diff = tbstat.bs_forkoff - bstatp->bs_forkoff;
1080
1081 /* if they are equal, we are done */
1082 if (!diff)
1083 goto out;
bdb041f5
DC
1084
1085 snprintf(name, sizeof(name), "user.%d", i);
1086
1087 /*
1088 * If there is no attribute, then we need to create one to get
1089 * an attribute fork at the default location.
1090 */
1091 if (!tbstat.bs_forkoff) {
1adfe5c6 1092 ASSERT(i == 0);
bdb041f5
DC
1093 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1094 if (ret) {
1095 fsrprintf(_("could not set ATTR\n"));
1096 return -1;
1097 }
1098 continue;
1adfe5c6
ES
1099 } else if (i == 0) {
1100 struct fsxattr fsx;
1101 /*
1102 * First pass, and temp file already has an inline
1103 * xattr, probably due to selinux.
1104 *
1105 * It's *possible* that the temp file attr area
1106 * is larger than the target file's, if the
1107 * target file's attrs are not inline:
1108 *
1109 * Target Temp
1110 * +-------+ 0 +-------+ 0
1111 * | | | |
1112 * | | | Data |
1113 * | Data | | |
1114 * | | v-------v forkoff
1115 * | | | |
1116 * v-------v forkoff | Attr | local
1117 * | Attr | ext/btree | |
1118 * +-------+ +-------+
1119 *
1120 * FSGETXATTRA will tell us nr of attr extents in
1121 * target, if any. If none, it's local:
1122 */
1123
1124 memset(&fsx, 0, sizeof(fsx));
1125 if (ioctl(fd, XFS_IOC_FSGETXATTRA, &fsx)) {
1126 fsrprintf(_("FSGETXATTRA failed on target\n"));
1127 return -1;
1128 }
1129
1130 /*
1131 * If target attr area is less than the temp's (diff < 0)
1132 * and the target is not local, write a big attr to
1133 * the temp file to knock the attr out of local format,
1134 * to match the target. (This should actually *increase*
1135 * the temp file's forkoffset when the attr moves out
1136 * of the inode)
1137 */
1138 if (diff < 0 && fsx.fsx_nextents > 0) {
1139 char val[2048];
1140 memset(val, 'X', 2048);
1141 if (fsetxattr(tfd, name, val, 2048, 0)) {
1142 fsrprintf(_("big ATTR set failed\n"));
1143 return -1;
1144 }
1145 /* Go back & see where we're at now */
1146 continue;
1147 }
bdb041f5
DC
1148 }
1149
1150 /*
1151 * make a progress check so we don't get stuck trying to extend
1152 * a large btree form attribute fork.
1153 */
1154 if (last_forkoff == tbstat.bs_forkoff) {
1155 if (no_change_cnt++ > 10)
1156 break;
ff85ea3f
ES
1157 } else /* progress! */
1158 no_change_cnt = 0;
bdb041f5
DC
1159 last_forkoff = tbstat.bs_forkoff;
1160
1161 /* work out which way to grow the fork */
bdb041f5
DC
1162 if (abs(diff) > fsgeom.inodesize - sizeof(struct xfs_dinode)) {
1163 fsrprintf(_("forkoff diff %d too large!\n"), diff);
1164 return -1;
1165 }
1166
bdb041f5 1167 /*
1adfe5c6
ES
1168 * if the temp inode fork offset is still smaller then we have
1169 * to grow the data fork
bdb041f5
DC
1170 */
1171 if (diff < 0) {
1172 /*
1173 * create some temporary extents in the inode to move
1174 * the fork in the direction we need. This can be done
1175 * by preallocating some single block extents at
1176 * non-contiguous offsets.
1177 */
1178 /* XXX: unimplemented! */
27507775
ES
1179 if (dflag)
1180 printf(_("data fork growth unimplemented\n"));
bdb041f5
DC
1181 goto out;
1182 }
1183
1184 /* we need to grow the attr fork, so create another attr */
1185 ret = fsetxattr(tfd, name, "XX", 2, XATTR_CREATE);
1186 if (ret) {
1187 fsrprintf(_("could not set ATTR\n"));
1188 return -1;
1189 }
1190
1191 } while (++i < 100); /* don't go forever */
1192
1193out:
1194 if (dflag)
1195 fsrprintf(_("set temp attr\n"));
27507775
ES
1196 /* We failed to resolve the fork difference */
1197 if (dflag && diff)
1198 fsrprintf(_("failed to match fork offset\n"));;
1199
bdb041f5
DC
1200 return 0;
1201}
c988ea91
CH
1202
1203/*
1204 * Do the defragmentation of a single file.
1205 * We already are pretty sure we can and want to
1206 * defragment the file. Create the tmp file, copy
1207 * the data (maintaining holes) and call the kernel
671632c6
ES
1208 * extent swap routine.
1209 *
1210 * Return values:
1211 * -1: Some error was encountered
1212 * 0: Successfully defragmented the file
1213 * 1: No change / No Error
c988ea91
CH
1214 */
1215static int
1216packfile(char *fname, char *tname, int fd,
1217 xfs_bstat_t *statp, struct fsxattr *fsxp)
1218{
671632c6 1219 int tfd = -1;
c988ea91 1220 int srval;
671632c6 1221 int retval = -1; /* Failure is the default */
c988ea91
CH
1222 int nextents, extent, cur_nextents, new_nextents;
1223 unsigned blksz_dio;
1224 unsigned dio_min;
1225 struct dioattr dio;
1226 static xfs_swapext_t sx;
1227 struct xfs_flock64 space;
1228 off64_t cnt, pos;
671632c6 1229 void *fbuf = NULL;
c988ea91
CH
1230 int ct, wc, wc_b4;
1231 char ffname[SMBUFSZ];
1232 int ffd = -1;
1233
1234 /*
1235 * Work out the extent map - nextents will be set to the
1236 * minimum number of extents needed for the file (taking
1237 * into account holes), cur_nextents is the current number
1238 * of extents.
1239 */
1240 nextents = read_fd_bmap(fd, statp, &cur_nextents);
1241
1242 if (cur_nextents == 1 || cur_nextents <= nextents) {
1243 if (vflag)
1244 fsrprintf(_("%s already fully defragmented.\n"), fname);
671632c6
ES
1245 retval = 1; /* indicates no change/no error */
1246 goto out;
c988ea91
CH
1247 }
1248
1249 if (dflag)
1250 fsrprintf(_("%s extents=%d can_save=%d tmp=%s\n"),
1251 fname, cur_nextents, (cur_nextents - nextents),
1252 tname);
1253
1254 if ((tfd = open(tname, openopts, 0666)) < 0) {
1255 if (vflag)
1256 fsrprintf(_("could not open tmp file: %s: %s\n"),
1257 tname, strerror(errno));
671632c6 1258 goto out;
c988ea91
CH
1259 }
1260 unlink(tname);
1261
1262 /* Setup extended attributes */
bdb041f5
DC
1263 if (fsr_setup_attr_fork(fd, tfd, statp) != 0) {
1264 fsrprintf(_("failed to set ATTR fork on tmp: %s:\n"), tname);
671632c6 1265 goto out;
c988ea91
CH
1266 }
1267
1268 /* Setup extended inode flags, project identifier, etc */
1269 if (fsxp->fsx_xflags || fsxp->fsx_projid) {
1270 if (ioctl(tfd, XFS_IOC_FSSETXATTR, fsxp) < 0) {
1271 fsrprintf(_("could not set inode attrs on tmp: %s\n"),
1272 tname);
671632c6 1273 goto out;
c988ea91
CH
1274 }
1275 }
1276
1277 if ((ioctl(tfd, XFS_IOC_DIOINFO, &dio)) < 0 ) {
1278 fsrprintf(_("could not get DirectIO info on tmp: %s\n"), tname);
671632c6 1279 goto out;
c988ea91
CH
1280 }
1281
1282 dio_min = dio.d_miniosz;
1283 if (statp->bs_size <= dio_min) {
1284 blksz_dio = dio_min;
1285 } else {
1286 blksz_dio = min(dio.d_maxiosz, BUFFER_MAX - pagesize);
1287 if (argv_blksz_dio != 0)
1288 blksz_dio = min(argv_blksz_dio, blksz_dio);
1289 blksz_dio = (min(statp->bs_size, blksz_dio) / dio_min) * dio_min;
1290 }
1291
1292 if (dflag) {
1293 fsrprintf(_("DEBUG: "
1294 "fsize=%lld blsz_dio=%d d_min=%d d_max=%d pgsz=%d\n"),
1295 statp->bs_size, blksz_dio, dio.d_miniosz,
1296 dio.d_maxiosz, pagesize);
1297 }
1298
1299 if (!(fbuf = (char *)memalign(dio.d_mem, blksz_dio))) {
1300 fsrprintf(_("could not allocate buf: %s\n"), tname);
671632c6 1301 goto out;
c988ea91
CH
1302 }
1303
1304 if (nfrags) {
1305 /* Create new tmp file in same AG as first */
1306 sprintf(ffname, "%s.frag", tname);
1307
1308 /* Open the new file for sync writes */
1309 if ((ffd = open(ffname, openopts, 0666)) < 0) {
1310 fsrprintf(_("could not open fragfile: %s : %s\n"),
1311 ffname, strerror(errno));
671632c6 1312 goto out;
c988ea91
CH
1313 }
1314 unlink(ffname);
1315 }
1316
1317 /* Loop through block map allocating new extents */
1318 for (extent = 0; extent < nextents; extent++) {
1319 pos = outmap[extent].bmv_offset;
1320 if (outmap[extent].bmv_block == -1) {
1321 space.l_whence = SEEK_SET;
1322 space.l_start = pos;
1323 space.l_len = outmap[extent].bmv_length;
1324 if (ioctl(tfd, XFS_IOC_UNRESVSP64, &space) < 0) {
1325 fsrprintf(_("could not trunc tmp %s\n"),
1326 tname);
1327 }
3d303baa
ES
1328 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1329 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1330 tname, strerror(errno));
1331 goto out;
1332 }
c988ea91
CH
1333 continue;
1334 } else if (outmap[extent].bmv_length == 0) {
1335 /* to catch holes at the beginning of the file */
1336 continue;
1337 }
1338 if (! nfrags) {
1339 space.l_whence = SEEK_CUR;
1340 space.l_start = 0;
1341 space.l_len = outmap[extent].bmv_length;
1342
1343 if (ioctl(tfd, XFS_IOC_RESVSP64, &space) < 0) {
1344 fsrprintf(_("could not pre-allocate tmp space:"
1345 " %s\n"), tname);
671632c6 1346 goto out;
c988ea91 1347 }
3d303baa
ES
1348 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1349 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1350 tname, strerror(errno));
1351 goto out;
1352 }
c988ea91
CH
1353 }
1354 } /* end of space allocation loop */
1355
1356 if (lseek64(tfd, 0, SEEK_SET)) {
1357 fsrprintf(_("Couldn't rewind on temporary file\n"));
671632c6 1358 goto out;
c988ea91
CH
1359 }
1360
1361 /* Check if the temporary file has fewer extents */
1362 new_nextents = getnextents(tfd);
1363 if (dflag)
1364 fsrprintf(_("Temporary file has %d extents (%d in original)\n"), new_nextents, cur_nextents);
1365 if (cur_nextents <= new_nextents) {
1366 if (vflag)
1367 fsrprintf(_("No improvement will be made (skipping): %s\n"), fname);
671632c6
ES
1368 retval = 1; /* no change/no error */
1369 goto out;
c988ea91
CH
1370 }
1371
1372 /* Loop through block map copying the file. */
1373 for (extent = 0; extent < nextents; extent++) {
1374 pos = outmap[extent].bmv_offset;
1375 if (outmap[extent].bmv_block == -1) {
3d303baa
ES
1376 if (lseek64(tfd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1377 fsrprintf(_("could not lseek in tmpfile: %s : %s\n"),
1378 tname, strerror(errno));
1379 goto out;
1380 }
1381 if (lseek64(fd, outmap[extent].bmv_length, SEEK_CUR) < 0) {
1382 fsrprintf(_("could not lseek in file: %s : %s\n"),
1383 fname, strerror(errno));
1384 goto out;
1385 }
c988ea91
CH
1386 continue;
1387 } else if (outmap[extent].bmv_length == 0) {
1388 /* to catch holes at the beginning of the file */
1389 continue;
1390 }
1391 for (cnt = outmap[extent].bmv_length; cnt > 0;
1392 cnt -= ct, pos += ct) {
1393 if (nfrags && --nfrags) {
1394 ct = min(cnt, dio_min);
1395 } else if (cnt % dio_min == 0) {
1396 ct = min(cnt, blksz_dio);
1397 } else {
1398 ct = min(cnt + dio_min - (cnt % dio_min),
1399 blksz_dio);
1400 }
1401 ct = read(fd, fbuf, ct);
1402 if (ct == 0) {
1403 /* EOF, stop trying to read */
1404 extent = nextents;
1405 break;
1406 }
1407 /* Ensure we do direct I/O to correct block
1408 * boundaries.
1409 */
1410 if (ct % dio_min != 0) {
1411 wc = ct + dio_min - (ct % dio_min);
1412 } else {
1413 wc = ct;
1414 }
1415 wc_b4 = wc;
1416 if (ct < 0 || ((wc = write(tfd, fbuf, wc)) != wc_b4)) {
1417 if (ct < 0)
1418 fsrprintf(_("bad read of %d bytes "
1419 "from %s: %s\n"), wc_b4,
1420 fname, strerror(errno));
1421 else if (wc < 0)
1422 fsrprintf(_("bad write of %d bytes "
1423 "to %s: %s\n"), wc_b4,
1424 tname, strerror(errno));
1425 else {
1426 /*
1427 * Might be out of space
1428 *
1429 * Try to finish write
1430 */
1431 int resid = ct-wc;
1432
1433 if ((wc = write(tfd, ((char *)fbuf)+wc,
1434 resid)) == resid) {
1435 /* worked on second attempt? */
1436 continue;
1437 }
1438 else if (wc < 0) {
1439 fsrprintf(_("bad write2 of %d "
1440 "bytes to %s: %s\n"),
1441 resid, tname,
1442 strerror(errno));
1443 } else {
1444 fsrprintf(_("bad copy to %s\n"),
1445 tname);
1446 }
1447 }
671632c6 1448 goto out;
c988ea91
CH
1449 }
1450 if (nfrags) {
1451 /* Do a matching write to the tmp file */
431ec4e6 1452 wc_b4 = wc;
c988ea91
CH
1453 if (((wc = write(ffd, fbuf, wc)) != wc_b4)) {
1454 fsrprintf(_("bad write of %d bytes "
1455 "to %s: %s\n"),
1456 wc_b4, ffname, strerror(errno));
1457 }
1458 }
1459 }
1460 }
3d303baa
ES
1461 if (ftruncate64(tfd, statp->bs_size) < 0) {
1462 fsrprintf(_("could not truncate tmpfile: %s : %s\n"),
1463 fname, strerror(errno));
1464 goto out;
1465 }
1466 if (fsync(tfd) < 0) {
1467 fsrprintf(_("could not fsync tmpfile: %s : %s\n"),
1468 fname, strerror(errno));
1469 goto out;
1470 }
c988ea91 1471
c988ea91
CH
1472 sx.sx_stat = *statp; /* struct copy */
1473 sx.sx_version = XFS_SX_VERSION;
1474 sx.sx_fdtarget = fd;
1475 sx.sx_fdtmp = tfd;
1476 sx.sx_offset = 0;
1477 sx.sx_length = statp->bs_size;
1478
1479 /* switch to the owner's id, to keep quota in line */
1480 if (fchown(tfd, statp->bs_uid, statp->bs_gid) < 0) {
1481 if (vflag)
1482 fsrprintf(_("failed to fchown tmpfile %s: %s\n"),
1483 tname, strerror(errno));
671632c6 1484 goto out;
c988ea91
CH
1485 }
1486
1487 /* Swap the extents */
1488 srval = xfs_swapext(fd, &sx);
1489 if (srval < 0) {
1490 if (errno == ENOTSUP) {
1491 if (vflag || dflag)
1492 fsrprintf(_("%s: file type not supported\n"), fname);
1493 } else if (errno == EFAULT) {
1494 /* The file has changed since we started the copy */
1495 if (vflag || dflag)
1496 fsrprintf(_("%s: file modified defrag aborted\n"),
1497 fname);
1498 } else if (errno == EBUSY) {
1499 /* Timestamp has changed or mmap'ed file */
1500 if (vflag || dflag)
1501 fsrprintf(_("%s: file busy\n"), fname);
1502 } else {
1503 fsrprintf(_("XFS_IOC_SWAPEXT failed: %s: %s\n"),
1504 fname, strerror(errno));
1505 }
671632c6 1506 goto out;
c988ea91
CH
1507 }
1508
1509 /* Report progress */
1510 if (vflag)
1511 fsrprintf(_("extents before:%d after:%d %s %s\n"),
1512 cur_nextents, new_nextents,
1513 (new_nextents <= nextents ? "DONE" : " " ),
1514 fname);
671632c6
ES
1515 retval = 0;
1516
1517out:
1518 free(fbuf);
1519 if (tfd != -1)
1520 close(tfd);
1521 if (ffd != -1)
1522 close(ffd);
1523 return retval;
c988ea91
CH
1524}
1525
1526char *
1527gettmpname(char *fname)
1528{
1529 static char buf[PATH_MAX+1];
1530 char sbuf[SMBUFSZ];
1531 char *ptr;
1532
1533 sprintf(sbuf, "/.fsr%d", getpid());
1534
6063feca
ES
1535 strncpy(buf, fname, PATH_MAX);
1536 buf[PATH_MAX] = '\0';
c988ea91
CH
1537 ptr = strrchr(buf, '/');
1538 if (ptr) {
1539 *ptr = '\0';
1540 } else {
1541 strcpy(buf, ".");
1542 }
1543
1544 if ((strlen(buf) + strlen (sbuf)) > PATH_MAX) {
1545 fsrprintf(_("tmp file name too long: %s\n"), fname);
1546 return(NULL);
1547 }
1548
1549 strcat(buf, sbuf);
1550
1551 return(buf);
1552}
1553
1554char *
1555getparent(char *fname)
1556{
1557 static char buf[PATH_MAX+1];
1558 char *ptr;
1559
6063feca
ES
1560 strncpy(buf, fname, PATH_MAX);
1561 buf[PATH_MAX] = '\0';
c988ea91
CH
1562 ptr = strrchr(buf, '/');
1563 if (ptr) {
1564 if (ptr == &buf[0])
1565 ++ptr;
1566 *ptr = '\0';
1567 } else {
1568 strcpy(buf, ".");
1569 }
1570
1571 return(buf);
1572}
1573
1574/*
1575 * Read in block map of the input file, coalesce contiguous
1576 * extents into a single range, keep all holes. Convert from 512 byte
1577 * blocks to bytes.
1578 *
1579 * This code was borrowed from mv.c with some minor mods.
1580 */
1581#define MAPSIZE 128
1582#define OUTMAP_SIZE_INCREMENT MAPSIZE
1583
1584int read_fd_bmap(int fd, xfs_bstat_t *sin, int *cur_nextents)
1585{
1586 int i, cnt;
1587 struct getbmap map[MAPSIZE];
1588
1589#define BUMP_CNT \
1590 if (++cnt >= outmap_size) { \
1591 outmap_size += OUTMAP_SIZE_INCREMENT; \
1592 outmap = (struct getbmap *)realloc(outmap, \
1593 outmap_size*sizeof(*outmap)); \
1594 if (outmap == NULL) { \
1595 fsrprintf(_("realloc failed: %s\n"), \
1596 strerror(errno)); \
1597 exit(1); \
1598 } \
1599 }
1600
1601 /* Initialize the outmap array. It always grows - never shrinks.
1602 * Left-over memory allocation is saved for the next files.
1603 */
1604 if (outmap_size == 0) {
1605 outmap_size = OUTMAP_SIZE_INCREMENT; /* Initial size */
1606 outmap = (struct getbmap *)malloc(outmap_size*sizeof(*outmap));
1607 if (!outmap) {
1608 fsrprintf(_("malloc failed: %s\n"),
1609 strerror(errno));
1610 exit(1);
1611 }
1612 }
1613
1614 outmap[0].bmv_block = 0;
1615 outmap[0].bmv_offset = 0;
1616 outmap[0].bmv_length = sin->bs_size;
1617
1618 /*
1619 * If a non regular file is involved then forget holes
1620 */
1621
1622 if (!S_ISREG(sin->bs_mode))
1623 return(1);
1624
1625 outmap[0].bmv_length = 0;
1626
1627 map[0].bmv_offset = 0;
1628 map[0].bmv_block = 0;
1629 map[0].bmv_entries = 0;
1630 map[0].bmv_count = MAPSIZE;
1631 map[0].bmv_length = -1;
1632
1633 cnt = 0;
1634 *cur_nextents = 0;
1635
1636 do {
1637 if (ioctl(fd, XFS_IOC_GETBMAP, map) < 0) {
1638 fsrprintf(_("failed reading extents: inode %llu"),
1639 (unsigned long long)sin->bs_ino);
1640 exit(1);
1641 }
1642
1643 /* Concatenate extents together and replicate holes into
1644 * the output map.
1645 */
1646 *cur_nextents += map[0].bmv_entries;
1647 for (i = 0; i < map[0].bmv_entries; i++) {
1648 if (map[i + 1].bmv_block == -1) {
1649 BUMP_CNT;
1650 outmap[cnt] = map[i+1];
1651 } else if (outmap[cnt].bmv_block == -1) {
1652 BUMP_CNT;
1653 outmap[cnt] = map[i+1];
1654 } else {
1655 outmap[cnt].bmv_length += map[i + 1].bmv_length;
1656 }
1657 }
1658 } while (map[0].bmv_entries == (MAPSIZE-1));
1659 for (i = 0; i <= cnt; i++) {
1660 outmap[i].bmv_offset = BBTOB(outmap[i].bmv_offset);
1661 outmap[i].bmv_length = BBTOB(outmap[i].bmv_length);
1662 }
1663
1664 outmap[cnt].bmv_length = sin->bs_size - outmap[cnt].bmv_offset;
1665
1666 return(cnt+1);
1667}
1668
1669/*
1670 * Read the block map and return the number of extents.
1671 */
1672int
1673getnextents(int fd)
1674{
1675 int nextents;
1676 struct getbmap map[MAPSIZE];
1677
1678 map[0].bmv_offset = 0;
1679 map[0].bmv_block = 0;
1680 map[0].bmv_entries = 0;
1681 map[0].bmv_count = MAPSIZE;
1682 map[0].bmv_length = -1;
1683
1684 nextents = 0;
1685
1686 do {
1687 if (ioctl(fd,XFS_IOC_GETBMAP, map) < 0) {
1688 fsrprintf(_("failed reading extents"));
1689 exit(1);
1690 }
1691
1692 nextents += map[0].bmv_entries;
1693 } while (map[0].bmv_entries == (MAPSIZE-1));
1694
1695 return(nextents);
1696}
1697
1698/*
1699 * Get the fs geometry
1700 */
1701int
1702xfs_getgeom(int fd, xfs_fsop_geom_v1_t * fsgeom)
1703{
1704 if (xfs_fsgeometry(fd, fsgeom) < 0) {
1705 return -1;
1706 }
1707 return 0;
1708}
1709
1710/*
1711 * Get xfs realtime space information
1712 */
1713int
1714xfs_getrt(int fd, struct statvfs64 *sfbp)
1715{
1716 unsigned long bsize;
1717 unsigned long factor;
1718 xfs_fsop_counts_t cnt;
1719
1720 if (!fsgeom.rtblocks)
1721 return -1;
1722
1723 if (xfs_fscounts(fd, &cnt) < 0) {
1724 close(fd);
1725 return -1;
1726 }
1727 bsize = (sfbp->f_frsize ? sfbp->f_frsize : sfbp->f_bsize);
1728 factor = fsgeom.blocksize / bsize; /* currently this is == 1 */
1729 sfbp->f_bfree = (cnt.freertx * fsgeom.rtextsize) * factor;
1730 return 0;
1731}
1732
1733int
1734fsrprintf(const char *fmt, ...)
1735{
1736 va_list ap;
1737
1738 va_start(ap, fmt);
1739 if (gflag) {
1740 static int didopenlog;
1741 if (!didopenlog) {
1742 openlog("fsr", LOG_PID, LOG_USER);
1743 didopenlog = 1;
1744 }
1745 vsyslog(LOG_INFO, fmt, ap);
1746 } else
1747 vprintf(fmt, ap);
1748 va_end(ap);
1749 return 0;
1750}
1751
c988ea91
CH
1752/*
1753 * Initialize a directory for tmp file use. This is used
1754 * by the full filesystem defragmentation when we're walking
1755 * the inodes and do not know the path for the individual
1756 * files. Multiple directories are used to spread out the
1757 * tmp data around to different ag's (since file data is
1758 * usually allocated to the same ag as the directory and
1759 * directories allocated round robin from the same
1760 * parent directory).
1761 */
1762static void
1763tmp_init(char *mnt)
1764{
1765 int i;
1766 static char buf[SMBUFSZ];
1767 mode_t mask;
1768
1769 tmp_agi = 0;
1770 sprintf(buf, "%s/.fsr", mnt);
1771
1772 mask = umask(0);
1773 if (mkdir(buf, 0700) < 0) {
1774 if (errno == EEXIST) {
1775 if (dflag)
1776 fsrprintf(_("tmpdir already exists: %s\n"),
1777 buf);
1778 } else {
1779 fsrprintf(_("could not create tmpdir: %s: %s\n"),
1780 buf, strerror(errno));
1781 exit(-1);
1782 }
1783 }
1784 for (i=0; i < fsgeom.agcount; i++) {
1785 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
7d59f3fd 1786 if (mkdir(buf, 0700) < 0) {
c988ea91
CH
1787 if (errno == EEXIST) {
1788 if (dflag)
1789 fsrprintf(
1790 _("tmpdir already exists: %s\n"), buf);
1791 } else {
1792 fsrprintf(_("cannot create tmpdir: %s: %s\n"),
1793 buf, strerror(errno));
1794 exit(-1);
1795 }
1796 }
1797 }
1798 (void)umask(mask);
1799 return;
1800}
1801
1802static char *
1803tmp_next(char *mnt)
1804{
1805 static char buf[SMBUFSZ];
1806
1807 sprintf(buf, "%s/.fsr/ag%d/tmp%d",
1808 ( (strcmp(mnt, "/") == 0) ? "" : mnt),
1809 tmp_agi,
1810 getpid());
1811
1812 if (++tmp_agi == fsgeom.agcount)
1813 tmp_agi = 0;
1814
1815 return(buf);
1816}
1817
1818static void
1819tmp_close(char *mnt)
1820{
1821 static char buf[SMBUFSZ];
1822 int i;
1823
1824 /* No data is ever actually written so we can just do rmdir's */
1825 for (i=0; i < fsgeom.agcount; i++) {
1826 sprintf(buf, "%s/.fsr/ag%d", mnt, i);
1827 if (rmdir(buf) < 0) {
1828 if (errno != ENOENT) {
1829 fsrprintf(
1830 _("could not remove tmpdir: %s: %s\n"),
1831 buf, strerror(errno));
1832 }
1833 }
1834 }
1835 sprintf(buf, "%s/.fsr", mnt);
1836 if (rmdir(buf) < 0) {
1837 if (errno != ENOENT) {
1838 fsrprintf(_("could not remove tmpdir: %s: %s\n"),
1839 buf, strerror(errno));
1840 }
1841 }
1842}