2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "xfs/libxfs.h"
20 #include <xfs/libxlog.h>
21 #include <sys/resource.h>
29 #include "err_protos.h"
35 #define rounddown(x, y) (((x)/(y))*(y))
37 #define XR_MAX_SECT_SIZE (64 * 1024)
40 * option tables for getsubopt calls
44 * -o: user-supplied override options
46 static char *o_opts
[] = {
59 #define PHASE2_THREADS 6
65 * -c: conversion options
67 static char *c_opts
[] = {
68 #define CONVERT_LAZY_COUNT 0
74 static int bhash_option_used
;
75 static long max_mem_specified
; /* in megabytes */
76 static int phase2_threads
= 32;
82 "Usage: %s [options] device\n"
85 " -f The device is a file\n"
86 " -L Force log zeroing. Do this as a last resort.\n"
87 " -l logdev Specifies the device where the external log resides.\n"
88 " -m maxmem Maximum amount of memory to be used in megabytes.\n"
89 " -n No modify mode, just checks the filesystem for damage.\n"
90 " -P Disables prefetching.\n"
91 " -r rtdev Specifies the device where the realtime section resides.\n"
92 " -v Verbose output.\n"
93 " -c subopts Change filesystem parameters - use xfs_admin.\n"
94 " -o subopts Override default behaviour, refer to man page.\n"
95 " -t interval Reporting interval in seconds.\n"
96 " -d Repair dangerously.\n"
97 " -V Reports version and exits.\n"), progname
);
102 err_string(int err_code
)
104 static char *err_message
[XR_BAD_ERR_CODE
];
108 err_message
[XR_OK
] = _("no error");
109 err_message
[XR_BAD_MAGIC
] = _("bad magic number");
110 err_message
[XR_BAD_BLOCKSIZE
] = _("bad blocksize field");
111 err_message
[XR_BAD_BLOCKLOG
] = _("bad blocksize log field");
112 err_message
[XR_BAD_VERSION
] = _("bad or unsupported version");
113 err_message
[XR_BAD_INPROGRESS
] =
114 _("filesystem mkfs-in-progress bit set");
115 err_message
[XR_BAD_FS_SIZE_DATA
] =
116 _("inconsistent filesystem geometry information");
117 err_message
[XR_BAD_INO_SIZE_DATA
] =
118 _("bad inode size or inconsistent with number of inodes/block"),
119 err_message
[XR_BAD_SECT_SIZE_DATA
] = _("bad sector size");
120 err_message
[XR_AGF_GEO_MISMATCH
] =
121 _("AGF geometry info conflicts with filesystem geometry");
122 err_message
[XR_AGI_GEO_MISMATCH
] =
123 _("AGI geometry info conflicts with filesystem geometry");
124 err_message
[XR_SB_GEO_MISMATCH
] =
125 _("AG superblock geometry info conflicts with filesystem geometry");
126 err_message
[XR_EOF
] = _("attempted to perform I/O beyond EOF");
127 err_message
[XR_BAD_RT_GEO_DATA
] =
128 _("inconsistent filesystem geometry in realtime filesystem component");
129 err_message
[XR_BAD_INO_MAX_PCT
] =
130 _("maximum indicated percentage of inodes > 100%");
131 err_message
[XR_BAD_INO_ALIGN
] =
132 _("inconsistent inode alignment value");
133 err_message
[XR_INSUFF_SEC_SB
] =
134 _("not enough secondary superblocks with matching geometry");
135 err_message
[XR_BAD_SB_UNIT
] =
136 _("bad stripe unit in superblock");
137 err_message
[XR_BAD_SB_WIDTH
] =
138 _("bad stripe width in superblock");
139 err_message
[XR_BAD_SVN
] =
140 _("bad shared version number in superblock");
141 err_message
[XR_BAD_CRC
] =
142 _("bad CRC in superblock");
146 if (err_code
< XR_OK
|| err_code
>= XR_BAD_ERR_CODE
)
147 do_abort(_("bad error code - %d\n"), err_code
);
149 return(err_message
[err_code
]);
153 noval(char opt
, char *tbl
[], int idx
)
155 do_warn(_("-%c %s option cannot have a value\n"), opt
, tbl
[idx
]);
160 respec(char opt
, char *tbl
[], int idx
)
162 do_warn("-%c ", opt
);
164 do_warn("%s ", tbl
[idx
]);
165 do_warn(_("option respecified\n"));
170 unknown(char opt
, char *s
)
172 do_warn(_("unknown option -%c %s\n"), opt
, s
);
177 * sets only the global argument flags and variables
180 process_args(int argc
, char **argv
)
193 full_ino_ex_data
= 0;
201 fs_attributes_allowed
= 1;
202 fs_attributes2_allowed
= 1;
203 fs_quotas_allowed
= 1;
204 fs_aligned_inodes_allowed
= 1;
205 fs_sb_feature_bits_allowed
= 1;
206 fs_has_extflgbit_allowed
= 1;
208 fs_shared_allowed
= 1;
211 report_interval
= PROG_RPT_DEFAULT
;
214 * XXX have to add suboption processing here
215 * attributes, quotas, nlinks, aligned_inos, sb_fbits
217 while ((c
= getopt(argc
, argv
, "c:o:fl:m:r:LnDvVdPt:")) != EOF
) {
227 switch (getsubopt(&p
, (constpp
)o_opts
, &val
)) {
230 noval('o', o_opts
, ASSUME_XFS
);
232 respec('o', o_opts
, ASSUME_XFS
);
237 noval('o', o_opts
, PRE_65_BETA
);
245 _("-o ihash option has been removed and will be ignored\n"));
248 if (max_mem_specified
)
250 _("-o bhash option cannot be used with -m option\n"));
251 libxfs_bhash_size
= (int)strtol(val
, NULL
, 0);
252 bhash_option_used
= 1;
255 ag_stride
= (int)strtol(val
, NULL
, 0);
259 noval('o', o_opts
, FORCE_GEO
);
261 respec('o', o_opts
, FORCE_GEO
);
265 phase2_threads
= (int)strtol(val
, NULL
, 0);
278 switch (getsubopt(&p
, (constpp
)c_opts
, &val
)) {
279 case CONVERT_LAZY_COUNT
:
280 lazy_count
= (int)strtol(val
, NULL
, 0);
281 convert_lazy_count
= 1;
301 if (bhash_option_used
)
302 do_abort(_("-m option cannot be used with "
303 "-o bhash option\n"));
304 max_mem_specified
= strtol(optarg
, NULL
, 0);
319 printf(_("%s version %s\n"), progname
, VERSION
);
325 report_interval
= (int)strtol(optarg
, NULL
, 0);
332 if (argc
- optind
!= 1)
335 if ((fs_name
= argv
[optind
]) == NULL
)
339 void __attribute__((noreturn
))
340 do_error(char const *msg
, ...)
344 fprintf(stderr
, _("\nfatal error -- "));
347 vfprintf(stderr
, msg
, args
);
354 * like do_error, only the error is internal, no system
355 * error so no oserror processing
357 void __attribute__((noreturn
))
358 do_abort(char const *msg
, ...)
363 vfprintf(stderr
, msg
, args
);
370 do_warn(char const *msg
, ...)
377 vfprintf(stderr
, msg
, args
);
384 do_log(char const *msg
, ...)
389 vfprintf(stderr
, msg
, args
);
394 calc_mkfs(xfs_mount_t
*mp
)
396 xfs_agblock_t fino_bno
;
399 do_inoalign
= mp
->m_sinoalign
;
402 * Pre-calculate the geometry of ag 0. We know what it looks like
403 * because we know what mkfs does: 2 allocation btree roots (by block
404 * and by size), the inode allocation btree root, the free inode
405 * allocation btree root (if enabled) and some number of blocks to
408 bnobt_root
= howmany(4 * mp
->m_sb
.sb_sectsize
, mp
->m_sb
.sb_blocksize
);
409 bcntbt_root
= bnobt_root
+ 1;
410 inobt_root
= bnobt_root
+ 2;
411 fino_bno
= inobt_root
+ XFS_MIN_FREELIST_RAW(1, 1, mp
) + 1;
412 if (xfs_sb_version_hasfinobt(&mp
->m_sb
))
416 * If the log is allocated in the first allocation group we need to
417 * add the number of blocks used by the log to the above calculation.
419 * This can happens with filesystems that only have a single
420 * allocation group, or very odd geometries created by old mkfs
421 * versions on very small filesystems.
423 if (mp
->m_sb
.sb_logstart
&&
424 XFS_FSB_TO_AGNO(mp
, mp
->m_sb
.sb_logstart
) == 0) {
427 * XXX(hch): verify that sb_logstart makes sense?
429 fino_bno
+= mp
->m_sb
.sb_logblocks
;
433 * ditto the location of the first inode chunks in the fs ('/')
435 if (xfs_sb_version_hasdalign(&mp
->m_sb
) && do_inoalign
) {
436 first_prealloc_ino
= XFS_OFFBNO_TO_AGINO(mp
, roundup(fino_bno
,
437 mp
->m_sb
.sb_unit
), 0);
438 } else if (xfs_sb_version_hasalign(&mp
->m_sb
) &&
439 mp
->m_sb
.sb_inoalignmt
> 1) {
440 first_prealloc_ino
= XFS_OFFBNO_TO_AGINO(mp
,
442 mp
->m_sb
.sb_inoalignmt
),
445 first_prealloc_ino
= XFS_OFFBNO_TO_AGINO(mp
, fino_bno
, 0);
448 ASSERT(mp
->m_ialloc_blks
> 0);
450 if (mp
->m_ialloc_blks
> 1)
451 last_prealloc_ino
= first_prealloc_ino
+ XFS_INODES_PER_CHUNK
;
453 last_prealloc_ino
= XFS_OFFBNO_TO_AGINO(mp
, fino_bno
+ 1, 0);
456 * now the first 3 inodes in the system
458 if (mp
->m_sb
.sb_rootino
!= first_prealloc_ino
) {
460 _("sb root inode value %" PRIu64
" %sinconsistent with calculated value %u\n"),
462 (mp
->m_sb
.sb_rootino
== NULLFSINO
? "(NULLFSINO) ":""),
467 _("resetting superblock root inode pointer to %u\n"),
471 _("would reset superblock root inode pointer to %u\n"),
475 * just set the value -- safe since the superblock
476 * doesn't get flushed out if no_modify is set
478 mp
->m_sb
.sb_rootino
= first_prealloc_ino
;
481 if (mp
->m_sb
.sb_rbmino
!= first_prealloc_ino
+ 1) {
483 _("sb realtime bitmap inode %" PRIu64
" %sinconsistent with calculated value %u\n"),
485 (mp
->m_sb
.sb_rbmino
== NULLFSINO
? "(NULLFSINO) ":""),
486 first_prealloc_ino
+ 1);
490 _("resetting superblock realtime bitmap ino pointer to %u\n"),
491 first_prealloc_ino
+ 1);
494 _("would reset superblock realtime bitmap ino pointer to %u\n"),
495 first_prealloc_ino
+ 1);
498 * just set the value -- safe since the superblock
499 * doesn't get flushed out if no_modify is set
501 mp
->m_sb
.sb_rbmino
= first_prealloc_ino
+ 1;
504 if (mp
->m_sb
.sb_rsumino
!= first_prealloc_ino
+ 2) {
506 _("sb realtime summary inode %" PRIu64
" %sinconsistent with calculated value %u\n"),
508 (mp
->m_sb
.sb_rsumino
== NULLFSINO
? "(NULLFSINO) ":""),
509 first_prealloc_ino
+ 2);
513 _("resetting superblock realtime summary ino pointer to %u\n"),
514 first_prealloc_ino
+ 2);
517 _("would reset superblock realtime summary ino pointer to %u\n"),
518 first_prealloc_ino
+ 2);
521 * just set the value -- safe since the superblock
522 * doesn't get flushed out if no_modify is set
524 mp
->m_sb
.sb_rsumino
= first_prealloc_ino
+ 2;
530 main(int argc
, char **argv
)
532 xfs_mount_t
*temp_mp
;
541 progname
= basename(argv
[0]);
542 setlocale(LC_ALL
, "");
543 bindtextdomain(PACKAGE
, LOCALEDIR
);
545 dinode_bmbt_translation_init();
548 setbuf(stdout
, NULL
);
550 process_args(argc
, argv
);
553 msgbuf
= malloc(DURATION_BUF_SIZE
);
555 timestamp(PHASE_START
, 0, NULL
);
556 timestamp(PHASE_END
, 0, NULL
);
558 /* do phase1 to make sure we have a superblock */
560 timestamp(PHASE_END
, 1, NULL
);
562 if (no_modify
&& primary_sb_modified
) {
563 do_warn(_("Primary superblock would have been modified.\n"
564 "Cannot proceed further in no_modify mode.\n"
569 rval
= get_sb(&psb
, 0, XFS_MAX_SECTORSIZE
, 0);
571 do_warn(_("Primary superblock bad after phase 1!\n"
576 /* -f forces this, but let's be nice and autodetect it, as well. */
578 int fd
= libxfs_device_to_fd(x
.ddev
);
579 struct stat64 statbuf
;
581 if (fstat64(fd
, &statbuf
) < 0)
582 do_warn(_("%s: couldn't stat \"%s\"\n"),
584 else if (S_ISREG(statbuf
.st_mode
))
589 * if the sector size of the filesystem we are trying to repair is
590 * smaller than that of the underlying filesystem (i.e. we are repairing
591 * an image), the we have to turn off direct IO because we cannot do IO
592 * smaller than the host filesystem's sector size.
595 int fd
= libxfs_device_to_fd(x
.ddev
);
596 struct xfs_fsop_geom_v1 geom
= { 0 };
598 if (ioctl(fd
, XFS_IOC_FSGEOMETRY_V1
, &geom
) < 0) {
599 do_warn(_("Cannot get host filesystem geometry.\n"
600 "Repair may fail if there is a sector size mismatch between\n"
601 "the image and the host filesystem.\n"));
602 geom
.sectsize
= BBSIZE
;
605 if (psb
.sb_sectsize
< geom
.sectsize
) {
608 old_flags
= fcntl(fd
, F_GETFL
, 0);
609 if (fcntl(fd
, F_SETFL
, old_flags
& ~O_DIRECT
) < 0) {
611 "Sector size on host filesystem larger than image sector size.\n"
612 "Cannot turn off direct IO, so exiting.\n"));
618 /* prepare the mount structure */
619 memset(&xfs_m
, 0, sizeof(xfs_mount_t
));
620 mp
= libxfs_mount(&xfs_m
, &psb
, x
.ddev
, x
.logdev
, x
.rtdev
, 0);
624 _("%s: cannot repair this filesystem. Sorry.\n"),
630 * set XFS-independent status vars from the mount/sb structure
632 glob_agcount
= mp
->m_sb
.sb_agcount
;
634 chunks_pblock
= mp
->m_sb
.sb_inopblock
/ XFS_INODES_PER_CHUNK
;
635 max_symlink_blocks
= libxfs_symlink_blocks(mp
, MAXPATHLEN
);
636 inodes_per_cluster
= MAX(mp
->m_sb
.sb_inopblock
,
637 mp
->m_inode_cluster_size
>> mp
->m_sb
.sb_inodelog
);
640 * Automatic striding for high agcount filesystems.
642 * More AGs indicates that the filesystem is either large or can handle
643 * more IO parallelism. Either way, we should try to process multiple
644 * AGs at a time in such a configuration to try to saturate the
645 * underlying storage and speed the repair process. Only do this if
646 * prefetching is enabled.
648 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
649 * to target these for an increase in thread count. Hence a stride value
650 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
651 * on such filesystems.
653 * Limit the maximum thread count based on the available CPU power that
654 * is available. If we use too many threads, we might run out of memory
655 * and CPU power before we run out of IO concurrency. We limit to 8
656 * threads/CPU as this is enough threads to saturate a CPU on fast
657 * devices, yet few enough that it will saturate but won't overload slow
660 if (!ag_stride
&& glob_agcount
>= 16 && do_prefetch
)
664 int max_threads
= platform_nproc() * 8;
666 thread_count
= (glob_agcount
+ ag_stride
- 1) / ag_stride
;
667 while (thread_count
> max_threads
) {
669 thread_count
= (glob_agcount
+ ag_stride
- 1) /
672 if (thread_count
> 0)
680 if (ag_stride
&& report_interval
) {
683 do_log(_(" - reporting progress in intervals of %s\n"),
684 duration(report_interval
, msgbuf
));
689 * Adjust libxfs cache sizes based on system memory,
690 * filesystem size and inode count.
692 * We'll set the cache size based on 3/4s the memory minus
693 * space used by the inode AVL tree and block usage map.
695 * Inode AVL tree space is approximately 4 bytes per inode,
696 * block usage map is currently 1 byte for 2 blocks.
698 * We assume most blocks will be inode clusters.
700 * Calculations are done in kilobyte units.
703 if (!bhash_option_used
|| max_mem_specified
) {
704 unsigned long mem_used
;
705 unsigned long max_mem
;
708 libxfs_bcache_purge();
709 cache_destroy(libxfs_bcache
);
711 mem_used
= (mp
->m_sb
.sb_icount
>> (10 - 2)) +
712 (mp
->m_sb
.sb_dblocks
>> (10 + 1)) +
713 50000; /* rough estimate of 50MB overhead */
714 max_mem
= max_mem_specified
? max_mem_specified
* 1024 :
715 libxfs_physmem() * 3 / 4;
717 if (getrlimit(RLIMIT_AS
, &rlim
) != -1 &&
718 rlim
.rlim_cur
!= RLIM_INFINITY
) {
719 rlim
.rlim_cur
= rlim
.rlim_max
;
720 setrlimit(RLIMIT_AS
, &rlim
);
721 /* use approximately 80% of rlimit to avoid overrun */
722 max_mem
= MIN(max_mem
, rlim
.rlim_cur
/ 1280);
724 max_mem
= MIN(max_mem
, (LONG_MAX
>> 10) + 1);
728 _(" - max_mem = %lu, icount = %" PRIu64
", imem = %" PRIu64
", dblock = %" PRIu64
", dmem = %" PRIu64
"\n"),
729 max_mem
, mp
->m_sb
.sb_icount
,
730 mp
->m_sb
.sb_icount
>> (10 - 2),
732 mp
->m_sb
.sb_dblocks
>> (10 + 1));
734 if (max_mem
<= mem_used
) {
735 if (max_mem_specified
) {
737 _("Required memory for repair is greater that the maximum specified\n"
738 "with the -m option. Please increase it to at least %lu.\n"),
742 _("Memory available for repair (%luMB) may not be sufficient.\n"
743 "At least %luMB is needed to repair this filesystem efficiently\n"
744 "If repair fails due to lack of memory, please\n"),
745 max_mem
/ 1024, mem_used
/ 1024);
748 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
751 _("increase system RAM and/or swap space to at least %luMB.\n"),
752 mem_used
* 2 / 1024);
758 if (max_mem
>= (1 << 30))
760 libxfs_bhash_size
= max_mem
/ (HASH_CACHE_RATIO
*
761 (mp
->m_inode_cluster_size
>> 10));
762 if (libxfs_bhash_size
< 512)
763 libxfs_bhash_size
= 512;
766 do_log(_(" - block cache size set to %d entries\n"),
767 libxfs_bhash_size
* HASH_CACHE_RATIO
);
769 libxfs_bcache
= cache_init(0, libxfs_bhash_size
,
770 &libxfs_bcache_operations
);
774 * calculate what mkfs would do to this filesystem
779 * initialize block alloc map
785 /* initialize random globals now that we know the fs geometry */
786 inodes_per_block
= mp
->m_sb
.sb_inopblock
;
788 if (parse_sb_version(&mp
->m_sb
)) {
790 _("Found unsupported filesystem features. Exiting now.\n"));
794 /* make sure the per-ag freespace maps are ok so we can mount the fs */
795 phase2(mp
, phase2_threads
);
796 timestamp(PHASE_END
, 2, NULL
);
802 timestamp(PHASE_END
, 3, NULL
);
805 timestamp(PHASE_END
, 4, NULL
);
808 printf(_("No modify flag set, skipping phase 5\n"));
812 timestamp(PHASE_END
, 5, NULL
);
815 * Done with the block usage maps, toss them...
819 if (!bad_ino_btree
) {
821 timestamp(PHASE_END
, 6, NULL
);
824 timestamp(PHASE_END
, 7, NULL
);
827 _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
830 if (lost_quotas
&& !have_uquotino
&& !have_gquotino
&& !have_pquotino
) {
833 _("Warning: no quota inodes were found. Quotas disabled.\n"));
836 _("Warning: no quota inodes were found. Quotas would be disabled.\n"));
838 } else if (lost_quotas
) {
841 _("Warning: quota inodes were cleared. Quotas disabled.\n"));
844 _("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
850 _("Warning: user quota information was cleared.\n"
851 "User quotas can not be enforced until limit information is recreated.\n"));
854 _("Warning: user quota information would be cleared.\n"
855 "User quotas could not be enforced until limit information was recreated.\n"));
862 _("Warning: group quota information was cleared.\n"
863 "Group quotas can not be enforced until limit information is recreated.\n"));
866 _("Warning: group quota information would be cleared.\n"
867 "Group quotas could not be enforced until limit information was recreated.\n"));
874 _("Warning: project quota information was cleared.\n"
875 "Project quotas can not be enforced until limit information is recreated.\n"));
878 _("Warning: project quota information would be cleared.\n"
879 "Project quotas could not be enforced until limit information was recreated.\n"));
884 if (ag_stride
&& report_interval
)
889 _("No modify flag set, skipping filesystem flush and exiting.\n"));
899 * Clear the quota flags if they're on.
901 sbp
= libxfs_getsb(mp
, 0);
903 do_error(_("couldn't get superblock\n"));
905 dsb
= XFS_BUF_TO_SBP(sbp
);
907 if (be16_to_cpu(dsb
->sb_qflags
) & XFS_ALL_QUOTA_CHKD
) {
908 do_warn(_("Note - quota info will be regenerated on next "
910 dsb
->sb_qflags
&= cpu_to_be16(~XFS_ALL_QUOTA_CHKD
);
915 _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
916 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
917 be32_to_cpu(dsb
->sb_unit
), be32_to_cpu(dsb
->sb_width
));
920 libxfs_writebuf(sbp
, 0);
923 * Done, flush all cached buffers and inodes.
925 libxfs_bcache_flush();
929 libxfs_device_close(x
.rtdev
);
930 if (x
.logdev
&& x
.logdev
!= x
.ddev
)
931 libxfs_device_close(x
.logdev
);
932 libxfs_device_close(x
.ddev
);
938 if (dangerously
&& !no_modify
)
940 _("Repair of readonly mount complete. Immediate reboot encouraged.\n"));