/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like. Any license provided herein, whether implied or
- * otherwise, applies only to this software file. Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA 94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <xfs/libxlog.h>
+#include "libxfs.h"
+#include "libxlog.h"
+#include <sys/resource.h>
+#include "xfs_multidisk.h"
#include "avl.h"
#include "avl64.h"
#include "globals.h"
#include "protos.h"
#include "incore.h"
#include "err_protos.h"
+#include "prefetch.h"
+#include "threads.h"
+#include "progress.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
#define rounddown(x, y) (((x)/(y))*(y))
-extern void phase1(xfs_mount_t *);
-extern void phase2(xfs_mount_t *);
-extern void phase3(xfs_mount_t *);
-extern void phase4(xfs_mount_t *);
-extern void phase5(xfs_mount_t *);
-extern void phase6(xfs_mount_t *);
-extern void phase7(xfs_mount_t *);
-extern void incore_init(xfs_mount_t *);
-
#define XR_MAX_SECT_SIZE (64 * 1024)
/*
*/
/*
- * -o (user-supplied override options)
+ * -o: user-supplied override options
*/
-
-char *o_opts[] = {
+static char *o_opts[] = {
#define ASSUME_XFS 0
"assume_xfs",
#define PRE_65_BETA 1
"fs_is_pre_65_beta",
+#define IHASH_SIZE 2
+ "ihash",
+#define BHASH_SIZE 3
+ "bhash",
+#define AG_STRIDE 4
+ "ag_stride",
+#define FORCE_GEO 5
+ "force_geometry",
+#define PHASE2_THREADS 6
+ "phase2_threads",
NULL
};
+/*
+ * -c: conversion options
+ */
+static char *c_opts[] = {
+#define CONVERT_LAZY_COUNT 0
+ "lazycount",
+ NULL
+};
+
+
+static int bhash_option_used;
+static long max_mem_specified; /* in megabytes */
+static int phase2_threads = 32;
+static bool report_corrected;
+
static void
usage(void)
{
- do_warn(
-_("Usage: %s [-nLvV] [-o subopt[=value]] [-l logdev] [-r rtdev] devname\n"),
- progname);
+ do_warn(_(
+"Usage: %s [options] device\n"
+"\n"
+"Options:\n"
+" -f The device is a file\n"
+" -L Force log zeroing. Do this as a last resort.\n"
+" -l logdev Specifies the device where the external log resides.\n"
+" -m maxmem Maximum amount of memory to be used in megabytes.\n"
+" -n No modify mode, just checks the filesystem for damage.\n"
+" (Cannot be used together with -e.)\n"
+" -P Disables prefetching.\n"
+" -r rtdev Specifies the device where the realtime section resides.\n"
+" -v Verbose output.\n"
+" -c subopts Change filesystem parameters - use xfs_admin.\n"
+" -o subopts Override default behaviour, refer to man page.\n"
+" -t interval Reporting interval in seconds.\n"
+" -d Repair dangerously.\n"
+" -e Exit with a non-zero code if any errors were repaired.\n"
+" (Cannot be used together with -n.)\n"
+" -V Reports version and exits.\n"), progname);
exit(1);
}
err_message[XR_BAD_MAGIC] = _("bad magic number");
err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
- err_message[XR_BAD_VERSION] = _("bad version number");
+ err_message[XR_BAD_VERSION] = _("bad or unsupported version");
err_message[XR_BAD_INPROGRESS] =
_("filesystem mkfs-in-progress bit set");
err_message[XR_BAD_FS_SIZE_DATA] =
_("bad stripe width in superblock");
err_message[XR_BAD_SVN] =
_("bad shared version number in superblock");
+ err_message[XR_BAD_CRC] =
+ _("bad CRC in superblock");
+ err_message[XR_BAD_DIR_SIZE_DATA] =
+ _("inconsistent directory geometry information");
done = 1;
}
/*
* sets only the global argument flags and variables
*/
-void
+static void
process_args(int argc, char **argv)
{
char *p;
isa_file = 0;
zap_log = 0;
dumpcore = 0;
- full_backptrs = 0;
+ full_ino_ex_data = 0;
delete_attr_ok = 1;
force_geo = 0;
assume_xfs = 0;
- clear_sunit = 0;
+ copied_sunit = 0;
sb_inoalignmt = 0;
sb_unit = 0;
sb_width = 0;
- fs_attributes_allowed = 1;
- fs_inode_nlink_allowed = 1;
- fs_quotas_allowed = 1;
- fs_aligned_inodes_allowed = 1;
- fs_sb_feature_bits_allowed = 1;
- fs_has_extflgbit_allowed = 1;
pre_65_beta = 0;
fs_shared_allowed = 1;
+ ag_stride = 0;
+ thread_count = 1;
+ report_interval = PROG_RPT_DEFAULT;
+ report_corrected = false;
/*
* XXX have to add suboption processing here
* attributes, quotas, nlinks, aligned_inos, sb_fbits
*/
- while ((c = getopt(argc, argv, "o:fl:r:LnDvVd")) != EOF) {
+ while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
switch (c) {
case 'D':
dumpcore = 1;
while (*p != '\0') {
char *val;
- switch (getsubopt(&p, (constpp)o_opts, &val)) {
+ switch (getsubopt(&p, o_opts, &val)) {
case ASSUME_XFS:
if (val)
noval('o', o_opts, ASSUME_XFS);
PRE_65_BETA);
pre_65_beta = 1;
break;
+ case IHASH_SIZE:
+ do_warn(
+ _("-o ihash option has been removed and will be ignored\n"));
+ break;
+ case BHASH_SIZE:
+ if (max_mem_specified)
+ do_abort(
+ _("-o bhash option cannot be used with -m option\n"));
+ libxfs_bhash_size = (int)strtol(val, NULL, 0);
+ bhash_option_used = 1;
+ break;
+ case AG_STRIDE:
+ ag_stride = (int)strtol(val, NULL, 0);
+ break;
+ case FORCE_GEO:
+ if (val)
+ noval('o', o_opts, FORCE_GEO);
+ if (force_geo)
+ respec('o', o_opts, FORCE_GEO);
+ force_geo = 1;
+ break;
+ case PHASE2_THREADS:
+ phase2_threads = (int)strtol(val, NULL, 0);
+ break;
default:
unknown('o', val);
break;
}
}
break;
+ case 'c':
+ p = optarg;
+ while (*p) {
+ char *val;
+
+ switch (getsubopt(&p, c_opts, &val)) {
+ case CONVERT_LAZY_COUNT:
+ lazy_count = (int)strtol(val, NULL, 0);
+ convert_lazy_count = 1;
+ break;
+ default:
+ unknown('c', val);
+ break;
+ }
+ }
+ break;
case 'l':
log_name = optarg;
log_spec = 1;
case 'f':
isa_file = 1;
break;
+ case 'm':
+ if (bhash_option_used)
+ do_abort(_("-m option cannot be used with "
+ "-o bhash option\n"));
+ max_mem_specified = strtol(optarg, NULL, 0);
+ break;
case 'L':
zap_log = 1;
break;
dangerously = 1;
break;
case 'v':
- verbose = 1;
+ verbose++;
break;
case 'V':
printf(_("%s version %s\n"), progname, VERSION);
exit(0);
+ case 'P':
+ do_prefetch = 0;
+ break;
+ case 't':
+ report_interval = (int)strtol(optarg, NULL, 0);
+ break;
+ case 'e':
+ report_corrected = true;
+ break;
case '?':
usage();
}
if ((fs_name = argv[optind]) == NULL)
usage();
-}
-void
-do_msg(int do_abort, char const *msg, va_list args)
-{
- vfprintf(stderr, msg, args);
-
- if (do_abort) {
- if (dumpcore)
- abort();
- exit(1);
- }
+ if (report_corrected && no_modify)
+ usage();
}
-void
+void __attribute__((noreturn))
do_error(char const *msg, ...)
{
va_list args;
fprintf(stderr, _("\nfatal error -- "));
va_start(args, msg);
- do_msg(1, msg, args);
+ vfprintf(stderr, msg, args);
+ if (dumpcore)
+ abort();
+ exit(1);
}
/*
* like do_error, only the error is internal, no system
* error so no oserror processing
*/
-void
+void __attribute__((noreturn))
do_abort(char const *msg, ...)
{
va_list args;
va_start(args, msg);
- do_msg(1, msg, args);
+ vfprintf(stderr, msg, args);
+ if (dumpcore)
+ abort();
+ exit(1);
}
void
fs_is_dirty = 1;
va_start(args, msg);
- do_msg(0, msg, args);
+ vfprintf(stderr, msg, args);
va_end(args);
}
va_list args;
va_start(args, msg);
- do_msg(0, msg, args);
+ vfprintf(stderr, msg, args);
va_end(args);
}
-void
+static void
calc_mkfs(xfs_mount_t *mp)
{
xfs_agblock_t fino_bno;
do_inoalign = mp->m_sinoalign;
/*
- * pre-calculate geometry of ag 0. We know what it looks
- * like because we know what mkfs does -- 3 btree roots,
- * and some number of blocks to prefill the agfl.
+ * Pre-calculate the geometry of ag 0. We know what it looks like
+ * because we know what mkfs does: 2 allocation btree roots (by block
+ * and by size), the inode allocation btree root, the free inode
+ * allocation btree root (if enabled) and some number of blocks to
+ * prefill the agfl.
+ *
+ * Because the current shape of the btrees may differ from the current
+ * shape, we open code the mkfs freelist block count here. mkfs creates
+ * single level trees, so the calculation is pertty straight forward for
+ * the trees that use the AGFL.
*/
bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
bcntbt_root = bnobt_root + 1;
inobt_root = bnobt_root + 2;
- fino_bno = inobt_root + XFS_MIN_FREELIST_RAW(1, 1, mp) + 1;
+ fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ fino_bno++;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
+ fino_bno++;
+ }
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ fino_bno++;
+
+ /*
+ * If the log is allocated in the first allocation group we need to
+ * add the number of blocks used by the log to the above calculation.
+ *
+ * This can happens with filesystems that only have a single
+ * allocation group, or very odd geometries created by old mkfs
+ * versions on very small filesystems.
+ */
+ if (mp->m_sb.sb_logstart &&
+ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
+
+ /*
+ * XXX(hch): verify that sb_logstart makes sense?
+ */
+ fino_bno += mp->m_sb.sb_logblocks;
+ }
/*
* ditto the location of the first inode chunks in the fs ('/')
*/
- if (XFS_SB_VERSION_HASDALIGN(&mp->m_sb) && do_inoalign) {
+ if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
mp->m_sb.sb_unit), 0);
- } else if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
+ } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
mp->m_sb.sb_inoalignmt > 1) {
first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
roundup(fino_bno,
first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
}
- ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+ ASSERT(mp->m_ialloc_blks > 0);
- if (XFS_IALLOC_BLOCKS(mp) > 1)
+ if (mp->m_ialloc_blks > 1)
last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
else
last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
*/
if (mp->m_sb.sb_rootino != first_prealloc_ino) {
do_warn(
-_("sb root inode value %llu %sinconsistent with calculated value %lu\n"),
+_("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
mp->m_sb.sb_rootino,
(mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
first_prealloc_ino);
if (!no_modify)
do_warn(
- _("resetting superblock root inode pointer to %lu\n"),
+ _("resetting superblock root inode pointer to %u\n"),
first_prealloc_ino);
else
do_warn(
- _("would reset superblock root inode pointer to %lu\n"),
+ _("would reset superblock root inode pointer to %u\n"),
first_prealloc_ino);
/*
if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
do_warn(
-_("sb realtime bitmap inode %llu %sinconsistent with calculated value %lu\n"),
+_("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
mp->m_sb.sb_rbmino,
(mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
first_prealloc_ino + 1);
if (!no_modify)
do_warn(
- _("resetting superblock realtime bitmap ino pointer to %lu\n"),
+ _("resetting superblock realtime bitmap ino pointer to %u\n"),
first_prealloc_ino + 1);
else
do_warn(
- _("would reset superblock realtime bitmap ino pointer to %lu\n"),
+ _("would reset superblock realtime bitmap ino pointer to %u\n"),
first_prealloc_ino + 1);
/*
if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
do_warn(
-_("sb realtime summary inode %llu %sinconsistent with calculated value %lu\n"),
- mp->m_sb.sb_rsumino,
- (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
- first_prealloc_ino + 2);
+_("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
+ mp->m_sb.sb_rsumino,
+ (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
+ first_prealloc_ino + 2);
if (!no_modify)
do_warn(
- _("resetting superblock realtime summary ino pointer to %lu\n"),
+ _("resetting superblock realtime summary ino pointer to %u\n"),
first_prealloc_ino + 2);
else
do_warn(
- _("would reset superblock realtime summary ino pointer to %lu\n"),
+ _("would reset superblock realtime summary ino pointer to %u\n"),
first_prealloc_ino + 2);
/*
}
+/*
+ * v5 superblock metadata track the LSN of last modification and thus require
+ * that the current LSN is always moving forward. The current LSN is reset if
+ * the log has been cleared, which puts the log behind parts of the filesystem
+ * on-disk and can disrupt log recovery.
+ *
+ * We have tracked the maximum LSN of every piece of metadata that has been read
+ * in via the read verifiers. Compare the max LSN with the log and if the log is
+ * behind, bump the cycle number and reformat the log.
+ */
+static void
+format_log_max_lsn(
+ struct xfs_mount *mp)
+{
+ struct xlog *log = mp->m_log;
+ int max_cycle;
+ int max_block;
+ int new_cycle;
+ xfs_daddr_t logstart;
+ xfs_daddr_t logblocks;
+ int logversion;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
+ /*
+ * If the log is ahead of the highest metadata LSN we've seen, we're
+ * safe and there's nothing to do.
+ */
+ max_cycle = CYCLE_LSN(libxfs_max_lsn);
+ max_block = BLOCK_LSN(libxfs_max_lsn);
+ if (max_cycle < log->l_curr_cycle ||
+ (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
+ return;
+
+ /*
+ * Going to the next cycle should be sufficient but we bump by a few
+ * counts to help cover any metadata LSNs we could have missed.
+ */
+ new_cycle = max_cycle + 3;
+ logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
+ logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+ logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
+
+ do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
+ max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
+
+ if (no_modify) {
+ do_warn(_("Would format log to cycle %d.\n"), new_cycle);
+ return;
+ }
+
+ do_warn(_("Format log to cycle %d.\n"), new_cycle);
+ libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
+ &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
+ XLOG_FMT, new_cycle, true);
+}
+
+/*
+ * mkfs increases the AG count for "multidisk" configurations, we want
+ * to target these for an increase in thread count. Hence check the superlock
+ * geometry information to determine if mkfs considered this a multidisk
+ * configuration.
+ */
+static bool
+is_multidisk_filesystem(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+
+ /* High agcount filesystems are always considered "multidisk" */
+ if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
+ return true;
+
+ /*
+ * If it doesn't have a sunit/swidth, mkfs didn't consider it a
+ * multi-disk array, so we don't either.
+ */
+ if (!sbp->sb_unit)
+ return false;
+
+ ASSERT(sbp->sb_width);
+ return true;
+}
+
+/*
+ * if the sector size of the filesystem we are trying to repair is
+ * smaller than that of the underlying filesystem (i.e. we are repairing
+ * an image), the we have to turn off direct IO because we cannot do IO
+ * smaller than the host filesystem's sector size.
+ */
+static void
+check_fs_vs_host_sectsize(
+ struct xfs_sb *sb)
+{
+ int fd;
+ long old_flags;
+ struct xfs_fsop_geom_v1 geom = { 0 };
+
+ fd = libxfs_device_to_fd(x.ddev);
+
+ if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
+ do_log(_("Cannot get host filesystem geometry.\n"
+ "Repair may fail if there is a sector size mismatch between\n"
+ "the image and the host filesystem.\n"));
+ geom.sectsize = BBSIZE;
+ }
+
+ if (sb->sb_sectsize < geom.sectsize) {
+ old_flags = fcntl(fd, F_GETFL, 0);
+ if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
+ do_warn(_(
+ "Sector size on host filesystem larger than image sector size.\n"
+ "Cannot turn off direct IO, so exiting.\n"));
+ exit(1);
+ }
+ }
+}
+
int
main(int argc, char **argv)
{
xfs_mount_t *temp_mp;
xfs_mount_t *mp;
- xfs_sb_t *sb;
+ xfs_dsb_t *dsb;
xfs_buf_t *sbp;
xfs_mount_t xfs_m;
+ struct xlog log = {0};
+ char *msgbuf;
+ struct xfs_sb psb;
+ int rval;
progname = basename(argv[0]);
setlocale(LC_ALL, "");
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
+ dinode_bmbt_translation_init();
temp_mp = &xfs_m;
setbuf(stdout, NULL);
process_args(argc, argv);
xfs_init(&x);
+ msgbuf = malloc(DURATION_BUF_SIZE);
+
+ timestamp(PHASE_START, 0, NULL);
+ timestamp(PHASE_END, 0, NULL);
+
+ /* -f forces this, but let's be nice and autodetect it, as well. */
+ if (!isa_file) {
+ int fd = libxfs_device_to_fd(x.ddev);
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0)
+ do_warn(_("%s: couldn't stat \"%s\"\n"),
+ progname, fs_name);
+ else if (S_ISREG(statbuf.st_mode))
+ isa_file = 1;
+ }
+
+ if (isa_file) {
+ /* Best effort attempt to validate fs vs host sector size */
+ rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
+ if (rval == XR_OK)
+ check_fs_vs_host_sectsize(&psb);
+ }
+
/* do phase1 to make sure we have a superblock */
phase1(temp_mp);
+ timestamp(PHASE_END, 1, NULL);
if (no_modify && primary_sb_modified) {
do_warn(_("Primary superblock would have been modified.\n"
exit(1);
}
- /* prepare the mount structure */
- sbp = libxfs_readbuf(x.ddev, XFS_SB_DADDR, 1, 0);
- memset(&xfs_m, 0, sizeof(xfs_mount_t));
- sb = &xfs_m.m_sb;
- libxfs_xlate_sb(XFS_BUF_PTR(sbp), sb, 1, XFS_SB_ALL_BITS);
+ rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
+ if (rval != XR_OK) {
+ do_warn(_("Primary superblock bad after phase 1!\n"
+ "Exiting now.\n"));
+ exit(1);
+ }
- mp = libxfs_mount(&xfs_m, sb, x.ddev, x.logdev, x.rtdev, 0);
+ /*
+ * Now that we have completely validated the superblock, geometry may
+ * have changed; re-check geometry vs the host filesystem geometry
+ */
+ if (isa_file)
+ check_fs_vs_host_sectsize(&psb);
+
+ /*
+ * Prepare the mount structure. Point the log reference to our local
+ * copy so it's available to the various phases. The log bits are
+ * initialized in phase 2.
+ */
+ memset(&xfs_m, 0, sizeof(xfs_mount_t));
+ mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
if (!mp) {
fprintf(stderr,
progname);
exit(1);
}
- libxfs_putbuf(sbp);
+ mp->m_log = &log;
+
+ /* Spit out function & line on these corruption macros */
+ if (verbose > 2)
+ mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
/*
* set XFS-independent status vars from the mount/sb structure
glob_agcount = mp->m_sb.sb_agcount;
chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
- max_symlink_blocks = howmany(MAXPATHLEN - 1, mp->m_sb.sb_blocksize);
- inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
+ max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
+ inodes_per_cluster = MAX(mp->m_sb.sb_inopblock,
+ mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
+
+ /*
+ * Automatic striding for high agcount filesystems.
+ *
+ * More AGs indicates that the filesystem is either large or can handle
+ * more IO parallelism. Either way, we should try to process multiple
+ * AGs at a time in such a configuration to try to saturate the
+ * underlying storage and speed the repair process. Only do this if
+ * prefetching is enabled.
+ *
+ * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
+ * to target these for an increase in thread count. Hence a stride value
+ * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
+ * on such filesystems.
+ *
+ * Limit the maximum thread count based on the available CPU power that
+ * is available. If we use too many threads, we might run out of memory
+ * and CPU power before we run out of IO concurrency. We limit to 8
+ * threads/CPU as this is enough threads to saturate a CPU on fast
+ * devices, yet few enough that it will saturate but won't overload slow
+ * devices.
+ *
+ * Multidisk filesystems can handle more IO parallelism so we should try
+ * to process multiple AGs at a time in such a configuration to try to
+ * saturate the underlying storage and speed the repair process. Only do
+ * this if prefetching is enabled.
+ */
+ if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
+ /*
+ * For small agcount multidisk systems, just double the
+ * parallelism. For larger AG count filesystems (32 and above)
+ * use more parallelism, and linearly increase the parallelism
+ * with the number of AGs.
+ */
+ ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
+ }
+
+ if (ag_stride) {
+ int max_threads = platform_nproc() * 8;
+
+ thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
+ while (thread_count > max_threads) {
+ ag_stride *= 2;
+ thread_count = (glob_agcount + ag_stride - 1) /
+ ag_stride;
+ }
+ if (thread_count > 0)
+ thread_init();
+ else {
+ thread_count = 1;
+ ag_stride = 0;
+ }
+ }
+
+ if (ag_stride && report_interval) {
+ init_progress_rpt();
+ if (msgbuf) {
+ do_log(_(" - reporting progress in intervals of %s\n"),
+ duration(report_interval, msgbuf));
+ }
+ }
+
+ /*
+ * Adjust libxfs cache sizes based on system memory,
+ * filesystem size and inode count.
+ *
+ * We'll set the cache size based on 3/4s the memory minus
+ * space used by the inode AVL tree and block usage map.
+ *
+ * Inode AVL tree space is approximately 4 bytes per inode,
+ * block usage map is currently 1 byte for 2 blocks.
+ *
+ * We assume most blocks will be inode clusters.
+ *
+ * Calculations are done in kilobyte units.
+ */
+
+ if (!bhash_option_used || max_mem_specified) {
+ unsigned long mem_used;
+ unsigned long max_mem;
+ struct rlimit rlim;
+
+ libxfs_bcache_purge();
+ cache_destroy(libxfs_bcache);
+
+ mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
+ (mp->m_sb.sb_dblocks >> (10 + 1)) +
+ 50000; /* rough estimate of 50MB overhead */
+ max_mem = max_mem_specified ? max_mem_specified * 1024 :
+ libxfs_physmem() * 3 / 4;
+
+ if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
+ rlim.rlim_cur != RLIM_INFINITY) {
+ rlim.rlim_cur = rlim.rlim_max;
+ setrlimit(RLIMIT_AS, &rlim);
+ /* use approximately 80% of rlimit to avoid overrun */
+ max_mem = MIN(max_mem, rlim.rlim_cur / 1280);
+ } else
+ max_mem = MIN(max_mem, (LONG_MAX >> 10) + 1);
+
+ if (verbose > 1)
+ do_log(
+ _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
+ max_mem, mp->m_sb.sb_icount,
+ mp->m_sb.sb_icount >> (10 - 2),
+ mp->m_sb.sb_dblocks,
+ mp->m_sb.sb_dblocks >> (10 + 1));
+
+ if (max_mem <= mem_used) {
+ if (max_mem_specified) {
+ do_abort(
+ _("Required memory for repair is greater that the maximum specified\n"
+ "with the -m option. Please increase it to at least %lu.\n"),
+ mem_used / 1024);
+ }
+ do_log(
+ _("Memory available for repair (%luMB) may not be sufficient.\n"
+ "At least %luMB is needed to repair this filesystem efficiently\n"
+ "If repair fails due to lack of memory, please\n"),
+ max_mem / 1024, mem_used / 1024);
+ if (do_prefetch)
+ do_log(
+ _("turn prefetching off (-P) to reduce the memory footprint.\n"));
+ else
+ do_log(
+ _("increase system RAM and/or swap space to at least %luMB.\n"),
+ mem_used * 2 / 1024);
+
+ max_mem = mem_used;
+ }
+
+ max_mem -= mem_used;
+ if (max_mem >= (1 << 30))
+ max_mem = 1 << 30;
+ libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
+ (mp->m_inode_cluster_size >> 10));
+ if (libxfs_bhash_size < 512)
+ libxfs_bhash_size = 512;
+
+ if (verbose)
+ do_log(_(" - block cache size set to %d entries\n"),
+ libxfs_bhash_size * HASH_CACHE_RATIO);
+
+ libxfs_bcache = cache_init(0, libxfs_bhash_size,
+ &libxfs_bcache_operations);
+ }
/*
* calculate what mkfs would do to this filesystem
calc_mkfs(mp);
/*
- * check sb filesystem stats and initialize in-core data structures
+ * initialize block alloc map
*/
- incore_init(mp);
+ init_bmaps(mp);
+ incore_ino_init(mp);
+ incore_ext_init(mp);
+ rmaps_init(mp);
+
+ /* initialize random globals now that we know the fs geometry */
+ inodes_per_block = mp->m_sb.sb_inopblock;
if (parse_sb_version(&mp->m_sb)) {
do_warn(
}
/* make sure the per-ag freespace maps are ok so we can mount the fs */
+ phase2(mp, phase2_threads);
+ timestamp(PHASE_END, 2, NULL);
- phase2(mp);
+ if (do_prefetch)
+ init_prefetch(mp);
- phase3(mp);
+ phase3(mp, phase2_threads);
+ timestamp(PHASE_END, 3, NULL);
phase4(mp);
+ timestamp(PHASE_END, 4, NULL);
if (no_modify)
printf(_("No modify flag set, skipping phase 5\n"));
- else
+ else {
phase5(mp);
+ }
+ timestamp(PHASE_END, 5, NULL);
+
+ /*
+ * Done with the block usage maps, toss them...
+ */
+ rmaps_free(mp);
+ free_bmaps(mp);
if (!bad_ino_btree) {
phase6(mp);
+ timestamp(PHASE_END, 6, NULL);
- phase7(mp);
+ phase7(mp, phase2_threads);
+ timestamp(PHASE_END, 7, NULL);
} else {
do_warn(
_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
}
- if (lost_quotas && !have_uquotino && !have_gquotino) {
+ if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
if (!no_modify) {
do_warn(
_("Warning: no quota inodes were found. Quotas disabled.\n"));
}
}
+ if (ag_stride && report_interval)
+ stop_progress_rpt();
+
if (no_modify) {
+ /*
+ * Warn if the current LSN is problematic and the log requires a
+ * reformat.
+ */
+ format_log_max_lsn(mp);
+
do_log(
_("No modify flag set, skipping filesystem flush and exiting.\n"));
+ if (verbose)
+ summary_report();
if (fs_is_dirty)
return(1);
if (!sbp)
do_error(_("couldn't get superblock\n"));
- sb = XFS_BUF_TO_SBP(sbp);
+ dsb = XFS_BUF_TO_SBP(sbp);
- if (sb->sb_qflags & (XFS_UQUOTA_CHKD|XFS_OQUOTA_CHKD)) {
- do_warn(
- _("Note - quota info will be regenerated on next quota mount.\n"));
- sb->sb_qflags &= ~(XFS_UQUOTA_CHKD|XFS_OQUOTA_CHKD);
+ if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
+ do_warn(_("Note - quota info will be regenerated on next "
+ "quota mount.\n"));
+ dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
}
- if (clear_sunit) {
+ if (copied_sunit) {
do_warn(
-_("Note - stripe unit (%d) and width (%d) fields have been reset.\n"
- "Please set with mount -o sunit=<value>,swidth=<value>\n"),
- sb->sb_unit, sb->sb_width);
- sb->sb_unit = 0;
- sb->sb_width = 0;
+_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
+ "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
+ be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
}
libxfs_writebuf(sbp, 0);
+ /*
+ * Done. Flush all cached buffers and inodes first to ensure all
+ * verifiers are run (where we discover the max metadata LSN), reformat
+ * the log if necessary and unmount.
+ */
+ libxfs_bcache_flush();
+ format_log_max_lsn(mp);
libxfs_umount(mp);
+
if (x.rtdev)
libxfs_device_close(x.rtdev);
if (x.logdev && x.logdev != x.ddev)
libxfs_device_close(x.logdev);
libxfs_device_close(x.ddev);
+ libxfs_destroy();
+ if (verbose)
+ summary_report();
do_log(_("done\n"));
- return(0);
+ if (dangerously && !no_modify)
+ do_warn(
+_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
+
+ pftrace_done();
+
+ free(msgbuf);
+
+ if (fs_is_dirty && report_corrected)
+ return (4);
+ return (0);
}