]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/xfs_repair.c
libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
da23017d
NS
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
2bd0ea18
NS
5 */
6
6b803e5a
CH
7#include "libxfs.h"
8#include "libxlog.h"
12be365e 9#include <sys/resource.h>
4a32b9e9 10#include "xfs_multidisk.h"
2bd0ea18 11#include "avl.h"
b4a09f89 12#include "libfrog/avl64.h"
2bd0ea18
NS
13#include "globals.h"
14#include "versions.h"
15#include "agheader.h"
16#include "protos.h"
17#include "incore.h"
18#include "err_protos.h"
cb5b3ef4 19#include "prefetch.h"
3b6ac903 20#include "threads.h"
06fbdda9 21#include "progress.h"
beed0dc8 22#include "dinode.h"
9e0f480e
DW
23#include "slab.h"
24#include "rmap.h"
fee68490 25#include "libfrog/fsgeom.h"
b658de93 26#include "libfrog/platform.h"
2bd0ea18 27
2bd0ea18
NS
28/*
29 * option tables for getsubopt calls
30 */
31
32/*
4af916f8 33 * -o: user-supplied override options
2bd0ea18 34 */
98884b66
DW
35enum o_opt_nums {
36 ASSUME_XFS = 0,
37 IHASH_SIZE,
38 BHASH_SIZE,
39 AG_STRIDE,
40 FORCE_GEO,
41 PHASE2_THREADS,
42 O_MAX_OPTS,
43};
44
8b8a6b02 45static char *o_opts[] = {
98884b66
DW
46 [ASSUME_XFS] = "assume_xfs",
47 [IHASH_SIZE] = "ihash",
48 [BHASH_SIZE] = "bhash",
49 [AG_STRIDE] = "ag_stride",
50 [FORCE_GEO] = "force_geometry",
51 [PHASE2_THREADS] = "phase2_threads",
52 [O_MAX_OPTS] = NULL,
2bd0ea18
NS
53};
54
4af916f8
BN
55/*
56 * -c: conversion options
57 */
98884b66
DW
58enum c_opt_nums {
59 CONVERT_LAZY_COUNT = 0,
60 C_MAX_OPTS,
61};
62
8b8a6b02 63static char *c_opts[] = {
98884b66
DW
64 [CONVERT_LAZY_COUNT] = "lazycount",
65 [C_MAX_OPTS] = NULL,
4af916f8
BN
66};
67
68
2556c98b 69static int bhash_option_used;
12be365e 70static long max_mem_specified; /* in megabytes */
364a126c 71static int phase2_threads = 32;
7c3e94a3 72static bool report_corrected;
2556c98b 73
2bd0ea18
NS
74static void
75usage(void)
76{
4af916f8
BN
77 do_warn(_(
78"Usage: %s [options] device\n"
79"\n"
80"Options:\n"
81" -f The device is a file\n"
82" -L Force log zeroing. Do this as a last resort.\n"
83" -l logdev Specifies the device where the external log resides.\n"
84" -m maxmem Maximum amount of memory to be used in megabytes.\n"
85" -n No modify mode, just checks the filesystem for damage.\n"
7c3e94a3 86" (Cannot be used together with -e.)\n"
4af916f8
BN
87" -P Disables prefetching.\n"
88" -r rtdev Specifies the device where the realtime section resides.\n"
89" -v Verbose output.\n"
90" -c subopts Change filesystem parameters - use xfs_admin.\n"
91" -o subopts Override default behaviour, refer to man page.\n"
79e106f0 92" -t interval Reporting interval in seconds.\n"
4af916f8 93" -d Repair dangerously.\n"
7c3e94a3
JT
94" -e Exit with a non-zero code if any errors were repaired.\n"
95" (Cannot be used together with -n.)\n"
4af916f8 96" -V Reports version and exits.\n"), progname);
2bd0ea18
NS
97 exit(1);
98}
99
2bd0ea18
NS
100char *
101err_string(int err_code)
102{
507f4e33
NS
103 static char *err_message[XR_BAD_ERR_CODE];
104 static int done;
105
106 if (!done) {
107 err_message[XR_OK] = _("no error");
108 err_message[XR_BAD_MAGIC] = _("bad magic number");
109 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
110 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
4af916f8 111 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
507f4e33
NS
112 err_message[XR_BAD_INPROGRESS] =
113 _("filesystem mkfs-in-progress bit set");
114 err_message[XR_BAD_FS_SIZE_DATA] =
115 _("inconsistent filesystem geometry information");
116 err_message[XR_BAD_INO_SIZE_DATA] =
117 _("bad inode size or inconsistent with number of inodes/block"),
118 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
119 err_message[XR_AGF_GEO_MISMATCH] =
120 _("AGF geometry info conflicts with filesystem geometry");
121 err_message[XR_AGI_GEO_MISMATCH] =
122 _("AGI geometry info conflicts with filesystem geometry");
123 err_message[XR_SB_GEO_MISMATCH] =
124 _("AG superblock geometry info conflicts with filesystem geometry");
125 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
126 err_message[XR_BAD_RT_GEO_DATA] =
127 _("inconsistent filesystem geometry in realtime filesystem component");
128 err_message[XR_BAD_INO_MAX_PCT] =
129 _("maximum indicated percentage of inodes > 100%");
130 err_message[XR_BAD_INO_ALIGN] =
131 _("inconsistent inode alignment value");
132 err_message[XR_INSUFF_SEC_SB] =
133 _("not enough secondary superblocks with matching geometry");
134 err_message[XR_BAD_SB_UNIT] =
135 _("bad stripe unit in superblock");
136 err_message[XR_BAD_SB_WIDTH] =
137 _("bad stripe width in superblock");
138 err_message[XR_BAD_SVN] =
139 _("bad shared version number in superblock");
88f364a9
DC
140 err_message[XR_BAD_CRC] =
141 _("bad CRC in superblock");
02b56f87
DW
142 err_message[XR_BAD_DIR_SIZE_DATA] =
143 _("inconsistent directory geometry information");
eb9cee60
DW
144 err_message[XR_BAD_LOG_GEOMETRY] =
145 _("inconsistent log geometry information");
507f4e33
NS
146 done = 1;
147 }
148
2bd0ea18 149 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
507f4e33 150 do_abort(_("bad error code - %d\n"), err_code);
2bd0ea18
NS
151
152 return(err_message[err_code]);
153}
154
155static void
156noval(char opt, char *tbl[], int idx)
157{
507f4e33 158 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
2bd0ea18
NS
159 usage();
160}
161
162static void
163respec(char opt, char *tbl[], int idx)
164{
165 do_warn("-%c ", opt);
166 if (tbl)
167 do_warn("%s ", tbl[idx]);
507f4e33 168 do_warn(_("option respecified\n"));
2bd0ea18
NS
169 usage();
170}
171
172static void
173unknown(char opt, char *s)
174{
507f4e33 175 do_warn(_("unknown option -%c %s\n"), opt, s);
2bd0ea18
NS
176 usage();
177}
178
179/*
180 * sets only the global argument flags and variables
181 */
8b8a6b02 182static void
2bd0ea18
NS
183process_args(int argc, char **argv)
184{
185 char *p;
186 int c;
187
188 log_spec = 0;
189 fs_is_dirty = 0;
190 verbose = 0;
191 no_modify = 0;
c781939c 192 dangerously = 0;
2bd0ea18 193 isa_file = 0;
d321ceac 194 zap_log = 0;
2bd0ea18 195 dumpcore = 0;
0f012a4c 196 full_ino_ex_data = 0;
2bd0ea18
NS
197 force_geo = 0;
198 assume_xfs = 0;
6bf4721d 199 copied_sunit = 0;
2bd0ea18
NS
200 sb_inoalignmt = 0;
201 sb_unit = 0;
202 sb_width = 0;
add3cb90 203 ag_stride = 0;
2556c98b 204 thread_count = 1;
06fbdda9 205 report_interval = PROG_RPT_DEFAULT;
7c3e94a3 206 report_corrected = false;
2bd0ea18
NS
207
208 /*
209 * XXX have to add suboption processing here
210 * attributes, quotas, nlinks, aligned_inos, sb_fbits
211 */
7c3e94a3 212 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
2bd0ea18
NS
213 switch (c) {
214 case 'D':
215 dumpcore = 1;
216 break;
217 case 'o':
218 p = optarg;
219 while (*p != '\0') {
220 char *val;
221
ab870d0e 222 switch (getsubopt(&p, o_opts, &val)) {
2bd0ea18
NS
223 case ASSUME_XFS:
224 if (val)
225 noval('o', o_opts, ASSUME_XFS);
226 if (assume_xfs)
227 respec('o', o_opts, ASSUME_XFS);
228 assume_xfs = 1;
229 break;
9f38f08d 230 case IHASH_SIZE:
3a19fb7d
CH
231 do_warn(
232 _("-o ihash option has been removed and will be ignored\n"));
9f38f08d
MV
233 break;
234 case BHASH_SIZE:
12be365e
BN
235 if (max_mem_specified)
236 do_abort(
3a19fb7d 237 _("-o bhash option cannot be used with -m option\n"));
1f8480b6
DW
238 if (!val)
239 do_abort(
240 _("-o bhash requires a parameter\n"));
5e656dbb 241 libxfs_bhash_size = (int)strtol(val, NULL, 0);
2556c98b 242 bhash_option_used = 1;
cb5b3ef4 243 break;
add3cb90 244 case AG_STRIDE:
1f8480b6
DW
245 if (!val)
246 do_abort(
247 _("-o ag_stride requires a parameter\n"));
5e656dbb 248 ag_stride = (int)strtol(val, NULL, 0);
3b6ac903 249 break;
d4dd6ab5
CH
250 case FORCE_GEO:
251 if (val)
252 noval('o', o_opts, FORCE_GEO);
253 if (force_geo)
254 respec('o', o_opts, FORCE_GEO);
255 force_geo = 1;
256 break;
364a126c 257 case PHASE2_THREADS:
1f8480b6
DW
258 if (!val)
259 do_abort(
260 _("-o phase2_threads requires a parameter\n"));
364a126c
DC
261 phase2_threads = (int)strtol(val, NULL, 0);
262 break;
2bd0ea18
NS
263 default:
264 unknown('o', val);
265 break;
266 }
267 }
268 break;
4af916f8
BN
269 case 'c':
270 p = optarg;
271 while (*p) {
272 char *val;
273
ab870d0e 274 switch (getsubopt(&p, c_opts, &val)) {
4af916f8 275 case CONVERT_LAZY_COUNT:
1f8480b6
DW
276 if (!val)
277 do_abort(
278 _("-c lazycount requires a parameter\n"));
5e656dbb 279 lazy_count = (int)strtol(val, NULL, 0);
4af916f8
BN
280 convert_lazy_count = 1;
281 break;
282 default:
283 unknown('c', val);
284 break;
285 }
286 }
287 break;
2bd0ea18
NS
288 case 'l':
289 log_name = optarg;
290 log_spec = 1;
291 break;
42a564ab
ES
292 case 'r':
293 rt_name = optarg;
294 rt_spec = 1;
295 break;
2bd0ea18
NS
296 case 'f':
297 isa_file = 1;
298 break;
12be365e
BN
299 case 'm':
300 if (bhash_option_used)
301 do_abort(_("-m option cannot be used with "
302 "-o bhash option\n"));
5e656dbb 303 max_mem_specified = strtol(optarg, NULL, 0);
12be365e 304 break;
d321ceac
NS
305 case 'L':
306 zap_log = 1;
307 break;
2bd0ea18
NS
308 case 'n':
309 no_modify = 1;
310 break;
6089b6f0
NS
311 case 'd':
312 dangerously = 1;
313 break;
2bd0ea18 314 case 'v':
3b6ac903 315 verbose++;
2bd0ea18
NS
316 break;
317 case 'V':
507f4e33 318 printf(_("%s version %s\n"), progname, VERSION);
3d98fe63 319 exit(0);
cb5b3ef4 320 case 'P':
2556c98b 321 do_prefetch = 0;
3b6ac903 322 break;
06fbdda9 323 case 't':
5e656dbb 324 report_interval = (int)strtol(optarg, NULL, 0);
06fbdda9 325 break;
7c3e94a3
JT
326 case 'e':
327 report_corrected = true;
328 break;
2bd0ea18
NS
329 case '?':
330 usage();
331 }
332 }
333
334 if (argc - optind != 1)
335 usage();
336
337 if ((fs_name = argv[optind]) == NULL)
338 usage();
7c3e94a3
JT
339
340 if (report_corrected && no_modify)
341 usage();
2bd0ea18
NS
342}
343
b1559967 344void __attribute__((noreturn))
2bd0ea18
NS
345do_error(char const *msg, ...)
346{
347 va_list args;
348
507f4e33 349 fprintf(stderr, _("\nfatal error -- "));
2bd0ea18
NS
350
351 va_start(args, msg);
079afa09
CH
352 vfprintf(stderr, msg, args);
353 if (dumpcore)
354 abort();
355 exit(1);
2bd0ea18
NS
356}
357
358/*
359 * like do_error, only the error is internal, no system
360 * error so no oserror processing
361 */
b1559967 362void __attribute__((noreturn))
2bd0ea18
NS
363do_abort(char const *msg, ...)
364{
365 va_list args;
366
367 va_start(args, msg);
079afa09
CH
368 vfprintf(stderr, msg, args);
369 if (dumpcore)
370 abort();
371 exit(1);
2bd0ea18
NS
372}
373
374void
375do_warn(char const *msg, ...)
376{
377 va_list args;
378
379 fs_is_dirty = 1;
380
381 va_start(args, msg);
079afa09 382 vfprintf(stderr, msg, args);
2bd0ea18
NS
383 va_end(args);
384}
385
386/* no formatting */
387
388void
389do_log(char const *msg, ...)
390{
391 va_list args;
392
393 va_start(args, msg);
079afa09 394 vfprintf(stderr, msg, args);
2bd0ea18
NS
395 va_end(args);
396}
397
a3e126aa
DW
398/* Make sure a fixed-location inode is where it should be. */
399static void
400validate_sb_ino(
401 xfs_ino_t *ino,
402 xfs_ino_t expected_ino,
403 const char *tag)
404{
405 if (*ino == expected_ino)
406 return;
407
408 do_warn(
409_("sb %s inode value %" PRIu64 " %sinconsistent with calculated value %"PRIu64"\n"),
410 tag, *ino, *ino == NULLFSINO ? "(NULLFSINO) " : "",
411 expected_ino);
412
413 if (!no_modify)
414 do_warn(
415_("resetting superblock %s inode pointer to %"PRIu64"\n"),
416 tag, expected_ino);
417 else
418 do_warn(
419_("would reset superblock %s inode pointer to %"PRIu64"\n"),
420 tag, expected_ino);
421
422 /*
423 * Just set the value -- safe since the superblock doesn't get flushed
424 * out if no_modify is set.
425 */
426 *ino = expected_ino;
427}
428
ded6b558
DW
429/* Does the root directory inode look like a plausible root directory? */
430static bool
431has_plausible_rootdir(
432 struct xfs_mount *mp)
433{
434 struct xfs_inode *ip;
435 xfs_ino_t ino;
436 int error;
437 bool ret = false;
438
439 error = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &ip,
440 &xfs_default_ifork_ops);
441 if (error)
442 goto out;
443 if (!S_ISDIR(VFS_I(ip)->i_mode))
444 goto out_rele;
445
446 error = -libxfs_dir_lookup(NULL, ip, &xfs_name_dotdot, &ino, NULL);
447 if (error)
448 goto out_rele;
449
450 /* The root directory '..' entry points to the directory. */
451 if (ino == mp->m_sb.sb_rootino)
452 ret = true;
453
454out_rele:
455 libxfs_irele(ip);
456out:
457 return ret;
458}
459
306b450b
DW
460/*
461 * If any of the secondary SBs contain a *correct* value for sunit, write that
462 * back to the primary superblock.
463 */
464static void
465guess_correct_sunit(
466 struct xfs_mount *mp)
467{
468 struct xfs_sb sb;
469 struct xfs_buf *bp;
470 xfs_ino_t calc_rootino = NULLFSINO;
471 xfs_agnumber_t agno;
472 unsigned int new_sunit;
473 unsigned int sunit_guess;
474 int error;
475
476 /* Try reading secondary supers to see if we find a good sb_unit. */
477 for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
478 error = -libxfs_sb_read_secondary(mp, NULL, agno, &bp);
479 if (error)
480 continue;
481 libxfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
482 libxfs_putbuf(bp);
483
484 calc_rootino = libxfs_ialloc_calc_rootino(mp, sb.sb_unit);
485 if (calc_rootino == mp->m_sb.sb_rootino)
486 break;
487 }
488
489 /* If we found a reasonable value, log where we found it. */
490 if (calc_rootino == mp->m_sb.sb_rootino) {
491 do_warn(_("AG %u superblock contains plausible sb_unit value\n"),
492 agno);
493 new_sunit = sb.sb_unit;
494 goto fix;
495 }
496
497 /* Try successive powers of two. */
498 for (sunit_guess = 1;
499 sunit_guess <= XFS_AG_MAX_BLOCKS(mp->m_sb.sb_blocklog);
500 sunit_guess *= 2) {
501 calc_rootino = libxfs_ialloc_calc_rootino(mp, sunit_guess);
502 if (calc_rootino == mp->m_sb.sb_rootino)
503 break;
504 }
505
506 /* If we found a reasonable value, log where we found it. */
507 if (calc_rootino == mp->m_sb.sb_rootino) {
508 do_warn(_("Found an sb_unit value that looks plausible\n"));
509 new_sunit = sunit_guess;
510 goto fix;
511 }
512
513 do_warn(_("Could not estimate a plausible sb_unit value\n"));
514 return;
515
516fix:
517 if (!no_modify)
518 do_warn(_("Resetting sb_unit to %u\n"), new_sunit);
519 else
520 do_warn(_("Would reset sb_unit to %u\n"), new_sunit);
521
522 /*
523 * Just set the value -- safe since the superblock doesn't get flushed
524 * out if no_modify is set.
525 */
526 mp->m_sb.sb_unit = new_sunit;
527
528 /* Make sure that swidth is still a multiple of sunit. */
529 if (mp->m_sb.sb_width % mp->m_sb.sb_unit == 0)
530 return;
531
532 if (!no_modify)
533 do_warn(_("Resetting sb_width to %u\n"), new_sunit);
534 else
535 do_warn(_("Would reset sb_width to %u\n"), new_sunit);
536}
537
90b2397e
DW
538/*
539 * Make sure that the first 3 inodes in the filesystem are the root directory,
540 * the realtime bitmap, and the realtime summary, in that order.
541 */
8b8a6b02 542static void
90b2397e
DW
543calc_mkfs(
544 struct xfs_mount *mp)
2bd0ea18 545{
90b2397e 546 xfs_ino_t rootino;
649bfa9a 547
90b2397e 548 rootino = libxfs_ialloc_calc_rootino(mp, mp->m_sb.sb_unit);
d4dd6ab5 549
ded6b558
DW
550 /*
551 * If the root inode isn't where we think it is, check its plausibility
552 * as a root directory. It's possible that somebody changed sunit
553 * since the filesystem was created, which can change the value of the
554 * above computation. Don't blow up the root directory if this is the
555 * case.
556 */
557 if (mp->m_sb.sb_rootino != rootino && has_plausible_rootdir(mp)) {
558 do_warn(
559_("sb root inode value %" PRIu64 " valid but in unaligned location (expected %"PRIu64") possibly due to sunit change\n"),
560 mp->m_sb.sb_rootino, rootino);
306b450b 561 guess_correct_sunit(mp);
ded6b558
DW
562 rootino = mp->m_sb.sb_rootino;
563 }
564
90b2397e 565 validate_sb_ino(&mp->m_sb.sb_rootino, rootino,
a3e126aa 566 _("root"));
90b2397e 567 validate_sb_ino(&mp->m_sb.sb_rbmino, rootino + 1,
a3e126aa 568 _("realtime bitmap"));
90b2397e 569 validate_sb_ino(&mp->m_sb.sb_rsumino, rootino + 2,
a3e126aa 570 _("realtime summary"));
2bd0ea18
NS
571}
572
1926558d
BF
573/*
574 * v5 superblock metadata track the LSN of last modification and thus require
575 * that the current LSN is always moving forward. The current LSN is reset if
576 * the log has been cleared, which puts the log behind parts of the filesystem
577 * on-disk and can disrupt log recovery.
578 *
579 * We have tracked the maximum LSN of every piece of metadata that has been read
580 * in via the read verifiers. Compare the max LSN with the log and if the log is
581 * behind, bump the cycle number and reformat the log.
582 */
583static void
584format_log_max_lsn(
585 struct xfs_mount *mp)
586{
587 struct xlog *log = mp->m_log;
588 int max_cycle;
589 int max_block;
590 int new_cycle;
591 xfs_daddr_t logstart;
592 xfs_daddr_t logblocks;
593 int logversion;
594
595 if (!xfs_sb_version_hascrc(&mp->m_sb))
596 return;
597
598 /*
599 * If the log is ahead of the highest metadata LSN we've seen, we're
600 * safe and there's nothing to do.
601 */
602 max_cycle = CYCLE_LSN(libxfs_max_lsn);
603 max_block = BLOCK_LSN(libxfs_max_lsn);
604 if (max_cycle < log->l_curr_cycle ||
605 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
606 return;
607
608 /*
609 * Going to the next cycle should be sufficient but we bump by a few
610 * counts to help cover any metadata LSNs we could have missed.
611 */
612 new_cycle = max_cycle + 3;
613 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
614 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
615 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
616
617 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
618 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
619
620 if (no_modify) {
621 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
622 return;
623 }
624
625 do_warn(_("Format log to cycle %d.\n"), new_cycle);
1c12a814
BF
626 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
627 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
571a78a7 628 XLOG_FMT, new_cycle, true);
1926558d
BF
629}
630
4a32b9e9
DC
631/*
632 * mkfs increases the AG count for "multidisk" configurations, we want
633 * to target these for an increase in thread count. Hence check the superlock
634 * geometry information to determine if mkfs considered this a multidisk
635 * configuration.
636 */
637static bool
638is_multidisk_filesystem(
639 struct xfs_mount *mp)
640{
641 struct xfs_sb *sbp = &mp->m_sb;
642
643 /* High agcount filesystems are always considered "multidisk" */
644 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
645 return true;
646
647 /*
648 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
649 * multi-disk array, so we don't either.
650 */
651 if (!sbp->sb_unit)
652 return false;
653
654 ASSERT(sbp->sb_width);
655 return true;
656}
657
28a0a30f
ZL
658/*
659 * if the sector size of the filesystem we are trying to repair is
660 * smaller than that of the underlying filesystem (i.e. we are repairing
661 * an image), the we have to turn off direct IO because we cannot do IO
662 * smaller than the host filesystem's sector size.
663 */
664static void
665check_fs_vs_host_sectsize(
666 struct xfs_sb *sb)
667{
9612817d 668 int fd, ret;
28a0a30f 669 long old_flags;
9612817d 670 struct xfs_fsop_geom geom = { 0 };
28a0a30f
ZL
671
672 fd = libxfs_device_to_fd(x.ddev);
673
03d96c64 674 ret = -xfrog_geometry(fd, &geom);
9612817d 675 if (ret) {
28a0a30f
ZL
676 do_log(_("Cannot get host filesystem geometry.\n"
677 "Repair may fail if there is a sector size mismatch between\n"
678 "the image and the host filesystem.\n"));
679 geom.sectsize = BBSIZE;
680 }
681
682 if (sb->sb_sectsize < geom.sectsize) {
683 old_flags = fcntl(fd, F_GETFL, 0);
684 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
685 do_warn(_(
686 "Sector size on host filesystem larger than image sector size.\n"
687 "Cannot turn off direct IO, so exiting.\n"));
688 exit(1);
689 }
690 }
691}
692
2bd0ea18
NS
693int
694main(int argc, char **argv)
695{
2bd0ea18
NS
696 xfs_mount_t *temp_mp;
697 xfs_mount_t *mp;
5e656dbb 698 xfs_dsb_t *dsb;
2bd0ea18
NS
699 xfs_buf_t *sbp;
700 xfs_mount_t xfs_m;
1d6cb115 701 struct xlog log = {0};
06fbdda9 702 char *msgbuf;
88f364a9
DC
703 struct xfs_sb psb;
704 int rval;
e7fd2b6f 705 struct xfs_ino_geometry *igeo;
2bd0ea18
NS
706
707 progname = basename(argv[0]);
507f4e33
NS
708 setlocale(LC_ALL, "");
709 bindtextdomain(PACKAGE, LOCALEDIR);
710 textdomain(PACKAGE);
beed0dc8 711 dinode_bmbt_translation_init();
2bd0ea18
NS
712
713 temp_mp = &xfs_m;
714 setbuf(stdout, NULL);
715
716 process_args(argc, argv);
d321ceac 717 xfs_init(&x);
2bd0ea18 718
2556c98b
BN
719 msgbuf = malloc(DURATION_BUF_SIZE);
720
06fbdda9
MV
721 timestamp(PHASE_START, 0, NULL);
722 timestamp(PHASE_END, 0, NULL);
723
28a0a30f
ZL
724 /* -f forces this, but let's be nice and autodetect it, as well. */
725 if (!isa_file) {
726 int fd = libxfs_device_to_fd(x.ddev);
727 struct stat statbuf;
728
729 if (fstat(fd, &statbuf) < 0)
730 do_warn(_("%s: couldn't stat \"%s\"\n"),
731 progname, fs_name);
732 else if (S_ISREG(statbuf.st_mode))
733 isa_file = 1;
734 }
735
736 if (isa_file) {
737 /* Best effort attempt to validate fs vs host sector size */
738 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
739 if (rval == XR_OK)
740 check_fs_vs_host_sectsize(&psb);
741 }
742
2bd0ea18
NS
743 /* do phase1 to make sure we have a superblock */
744 phase1(temp_mp);
06fbdda9 745 timestamp(PHASE_END, 1, NULL);
2bd0ea18
NS
746
747 if (no_modify && primary_sb_modified) {
507f4e33
NS
748 do_warn(_("Primary superblock would have been modified.\n"
749 "Cannot proceed further in no_modify mode.\n"
750 "Exiting now.\n"));
2bd0ea18
NS
751 exit(1);
752 }
753
88f364a9
DC
754 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
755 if (rval != XR_OK) {
756 do_warn(_("Primary superblock bad after phase 1!\n"
757 "Exiting now.\n"));
758 exit(1);
759 }
2bd0ea18 760
f63fd268 761 /*
28a0a30f
ZL
762 * Now that we have completely validated the superblock, geometry may
763 * have changed; re-check geometry vs the host filesystem geometry
f63fd268 764 */
28a0a30f
ZL
765 if (isa_file)
766 check_fs_vs_host_sectsize(&psb);
88f364a9 767
1d6cb115
BF
768 /*
769 * Prepare the mount structure. Point the log reference to our local
770 * copy so it's available to the various phases. The log bits are
771 * initialized in phase 2.
772 */
88f364a9
DC
773 memset(&xfs_m, 0, sizeof(xfs_mount_t));
774 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
2bd0ea18
NS
775
776 if (!mp) {
507f4e33
NS
777 fprintf(stderr,
778 _("%s: cannot repair this filesystem. Sorry.\n"),
2bd0ea18
NS
779 progname);
780 exit(1);
781 }
1d6cb115 782 mp->m_log = &log;
e7fd2b6f 783 igeo = M_IGEO(mp);
2bd0ea18 784
23639f77
ES
785 /* Spit out function & line on these corruption macros */
786 if (verbose > 2)
787 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
788
2bd0ea18
NS
789 /*
790 * set XFS-independent status vars from the mount/sb structure
791 */
792 glob_agcount = mp->m_sb.sb_agcount;
793
794 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
5a707ca1 795 max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
2bd0ea18 796
0cce4aa1
DC
797 /*
798 * Automatic striding for high agcount filesystems.
799 *
800 * More AGs indicates that the filesystem is either large or can handle
801 * more IO parallelism. Either way, we should try to process multiple
802 * AGs at a time in such a configuration to try to saturate the
803 * underlying storage and speed the repair process. Only do this if
804 * prefetching is enabled.
805 *
806 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
807 * to target these for an increase in thread count. Hence a stride value
808 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
809 * on such filesystems.
12b55baf
DC
810 *
811 * Limit the maximum thread count based on the available CPU power that
812 * is available. If we use too many threads, we might run out of memory
813 * and CPU power before we run out of IO concurrency. We limit to 8
814 * threads/CPU as this is enough threads to saturate a CPU on fast
815 * devices, yet few enough that it will saturate but won't overload slow
816 * devices.
4a32b9e9
DC
817 *
818 * Multidisk filesystems can handle more IO parallelism so we should try
819 * to process multiple AGs at a time in such a configuration to try to
820 * saturate the underlying storage and speed the repair process. Only do
821 * this if prefetching is enabled.
0cce4aa1 822 */
4a32b9e9
DC
823 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
824 /*
825 * For small agcount multidisk systems, just double the
826 * parallelism. For larger AG count filesystems (32 and above)
827 * use more parallelism, and linearly increase the parallelism
828 * with the number of AGs.
829 */
830 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
831 }
0cce4aa1 832
add3cb90 833 if (ag_stride) {
12b55baf
DC
834 int max_threads = platform_nproc() * 8;
835
2556c98b 836 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
12b55baf
DC
837 while (thread_count > max_threads) {
838 ag_stride *= 2;
839 thread_count = (glob_agcount + ag_stride - 1) /
840 ag_stride;
841 }
842 if (thread_count > 0)
843 thread_init();
844 else {
845 thread_count = 1;
846 ag_stride = 0;
847 }
add3cb90
BN
848 }
849
2556c98b 850 if (ag_stride && report_interval) {
06fbdda9 851 init_progress_rpt();
06fbdda9
MV
852 if (msgbuf) {
853 do_log(_(" - reporting progress in intervals of %s\n"),
854 duration(report_interval, msgbuf));
06fbdda9
MV
855 }
856 }
857
2556c98b
BN
858 /*
859 * Adjust libxfs cache sizes based on system memory,
860 * filesystem size and inode count.
861 *
862 * We'll set the cache size based on 3/4s the memory minus
863 * space used by the inode AVL tree and block usage map.
864 *
865 * Inode AVL tree space is approximately 4 bytes per inode,
866 * block usage map is currently 1 byte for 2 blocks.
867 *
868 * We assume most blocks will be inode clusters.
869 *
870 * Calculations are done in kilobyte units.
871 */
872
12be365e 873 if (!bhash_option_used || max_mem_specified) {
2556c98b 874 unsigned long mem_used;
12be365e
BN
875 unsigned long max_mem;
876 struct rlimit rlim;
2556c98b 877
2556c98b 878 libxfs_bcache_purge();
2556c98b
BN
879 cache_destroy(libxfs_bcache);
880
881 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
12be365e
BN
882 (mp->m_sb.sb_dblocks >> (10 + 1)) +
883 50000; /* rough estimate of 50MB overhead */
884 max_mem = max_mem_specified ? max_mem_specified * 1024 :
4e5fe123 885 platform_physmem() * 3 / 4;
12be365e
BN
886
887 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
888 rlim.rlim_cur != RLIM_INFINITY) {
889 rlim.rlim_cur = rlim.rlim_max;
890 setrlimit(RLIMIT_AS, &rlim);
891 /* use approximately 80% of rlimit to avoid overrun */
68d16907 892 max_mem = min(max_mem, rlim.rlim_cur / 1280);
12be365e 893 } else
68d16907 894 max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
2556c98b
BN
895
896 if (verbose > 1)
5d1b7f0f
CH
897 do_log(
898 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
12be365e
BN
899 max_mem, mp->m_sb.sb_icount,
900 mp->m_sb.sb_icount >> (10 - 2),
901 mp->m_sb.sb_dblocks,
902 mp->m_sb.sb_dblocks >> (10 + 1));
903
904 if (max_mem <= mem_used) {
0335a835
DC
905 if (max_mem_specified) {
906 do_abort(
907 _("Required memory for repair is greater that the maximum specified\n"
908 "with the -m option. Please increase it to at least %lu.\n"),
12be365e 909 mem_used / 1024);
0335a835 910 }
70a4820f 911 do_log(
61510437
DC
912 _("Memory available for repair (%luMB) may not be sufficient.\n"
913 "At least %luMB is needed to repair this filesystem efficiently\n"
914 "If repair fails due to lack of memory, please\n"),
915 max_mem / 1024, mem_used / 1024);
916 if (do_prefetch)
70a4820f 917 do_log(
61510437
DC
918 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
919 else
70a4820f 920 do_log(
61510437
DC
921 _("increase system RAM and/or swap space to at least %luMB.\n"),
922 mem_used * 2 / 1024);
923
924 max_mem = mem_used;
2556c98b
BN
925 }
926
61510437
DC
927 max_mem -= mem_used;
928 if (max_mem >= (1 << 30))
929 max_mem = 1 << 30;
930 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
e7fd2b6f 931 (igeo->inode_cluster_size >> 10));
61510437
DC
932 if (libxfs_bhash_size < 512)
933 libxfs_bhash_size = 512;
934
2556c98b
BN
935 if (verbose)
936 do_log(_(" - block cache size set to %d entries\n"),
937 libxfs_bhash_size * HASH_CACHE_RATIO);
938
ba9ecd40 939 libxfs_bcache = cache_init(0, libxfs_bhash_size,
2556c98b
BN
940 &libxfs_bcache_operations);
941 }
942
2bd0ea18
NS
943 /*
944 * calculate what mkfs would do to this filesystem
945 */
946 calc_mkfs(mp);
947
948 /*
c1f7a46c 949 * initialize block alloc map
2bd0ea18 950 */
c1f7a46c
BN
951 init_bmaps(mp);
952 incore_ino_init(mp);
953 incore_ext_init(mp);
2d273771 954 rmaps_init(mp);
c1f7a46c
BN
955
956 /* initialize random globals now that we know the fs geometry */
957 inodes_per_block = mp->m_sb.sb_inopblock;
2bd0ea18
NS
958
959 if (parse_sb_version(&mp->m_sb)) {
960 do_warn(
507f4e33 961 _("Found unsupported filesystem features. Exiting now.\n"));
2bd0ea18
NS
962 return(1);
963 }
964
965 /* make sure the per-ag freespace maps are ok so we can mount the fs */
364a126c 966 phase2(mp, phase2_threads);
06fbdda9 967 timestamp(PHASE_END, 2, NULL);
2bd0ea18 968
2556c98b
BN
969 if (do_prefetch)
970 init_prefetch(mp);
971
8100dd79 972 phase3(mp, phase2_threads);
06fbdda9 973 timestamp(PHASE_END, 3, NULL);
2bd0ea18
NS
974
975 phase4(mp);
06fbdda9 976 timestamp(PHASE_END, 4, NULL);
2bd0ea18
NS
977
978 if (no_modify)
507f4e33 979 printf(_("No modify flag set, skipping phase 5\n"));
3b6ac903 980 else {
2bd0ea18 981 phase5(mp);
3b6ac903 982 }
06fbdda9 983 timestamp(PHASE_END, 5, NULL);
2bd0ea18 984
c1f7a46c
BN
985 /*
986 * Done with the block usage maps, toss them...
987 */
2d273771 988 rmaps_free(mp);
c1f7a46c
BN
989 free_bmaps(mp);
990
2bd0ea18
NS
991 if (!bad_ino_btree) {
992 phase6(mp);
06fbdda9 993 timestamp(PHASE_END, 6, NULL);
2bd0ea18 994
e161d4a8 995 phase7(mp, phase2_threads);
06fbdda9 996 timestamp(PHASE_END, 7, NULL);
2bd0ea18
NS
997 } else {
998 do_warn(
507f4e33 999_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
2bd0ea18
NS
1000 }
1001
0340d706 1002 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
2bd0ea18
NS
1003 if (!no_modify) {
1004 do_warn(
507f4e33 1005_("Warning: no quota inodes were found. Quotas disabled.\n"));
2bd0ea18
NS
1006 } else {
1007 do_warn(
507f4e33 1008_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
2bd0ea18
NS
1009 }
1010 } else if (lost_quotas) {
1011 if (!no_modify) {
1012 do_warn(
507f4e33 1013_("Warning: quota inodes were cleared. Quotas disabled.\n"));
2bd0ea18
NS
1014 } else {
1015 do_warn(
507f4e33 1016_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
2bd0ea18
NS
1017 }
1018 } else {
1019 if (lost_uquotino) {
1020 if (!no_modify) {
1021 do_warn(
507f4e33
NS
1022_("Warning: user quota information was cleared.\n"
1023 "User quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1024 } else {
1025 do_warn(
507f4e33
NS
1026_("Warning: user quota information would be cleared.\n"
1027 "User quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1028 }
1029 }
1030
b36eef04 1031 if (lost_gquotino) {
2bd0ea18
NS
1032 if (!no_modify) {
1033 do_warn(
507f4e33
NS
1034_("Warning: group quota information was cleared.\n"
1035 "Group quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1036 } else {
1037 do_warn(
507f4e33
NS
1038_("Warning: group quota information would be cleared.\n"
1039 "Group quotas could not be enforced until limit information was recreated.\n"));
9b27bdbb
NS
1040 }
1041 }
1042
1043 if (lost_pquotino) {
1044 if (!no_modify) {
1045 do_warn(
1046_("Warning: project quota information was cleared.\n"
1047 "Project quotas can not be enforced until limit information is recreated.\n"));
1048 } else {
1049 do_warn(
1050_("Warning: project quota information would be cleared.\n"
1051 "Project quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1052 }
1053 }
1054 }
1055
2556c98b 1056 if (ag_stride && report_interval)
06fbdda9 1057 stop_progress_rpt();
9f38f08d 1058
2bd0ea18 1059 if (no_modify) {
1926558d
BF
1060 /*
1061 * Warn if the current LSN is problematic and the log requires a
1062 * reformat.
1063 */
1064 format_log_max_lsn(mp);
1065
2bd0ea18 1066 do_log(
507f4e33 1067 _("No modify flag set, skipping filesystem flush and exiting.\n"));
3b6ac903 1068 if (verbose)
06fbdda9 1069 summary_report();
2bd0ea18
NS
1070 if (fs_is_dirty)
1071 return(1);
1072
1073 return(0);
1074 }
1075
1076 /*
1077 * Clear the quota flags if they're on.
1078 */
67c4a324 1079 sbp = libxfs_getsb(mp);
2bd0ea18 1080 if (!sbp)
507f4e33 1081 do_error(_("couldn't get superblock\n"));
2bd0ea18 1082
5e656dbb 1083 dsb = XFS_BUF_TO_SBP(sbp);
2bd0ea18 1084
342aef1e 1085 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
5e656dbb
BN
1086 do_warn(_("Note - quota info will be regenerated on next "
1087 "quota mount.\n"));
342aef1e 1088 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
2bd0ea18
NS
1089 }
1090
6bf4721d 1091 if (copied_sunit) {
2bd0ea18 1092 do_warn(
6bf4721d
ES
1093_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1094 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
5e656dbb 1095 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
dfc130f3 1096 }
2bd0ea18
NS
1097
1098 libxfs_writebuf(sbp, 0);
1099
2556c98b 1100 /*
1926558d
BF
1101 * Done. Flush all cached buffers and inodes first to ensure all
1102 * verifiers are run (where we discover the max metadata LSN), reformat
1103 * the log if necessary and unmount.
2556c98b
BN
1104 */
1105 libxfs_bcache_flush();
1926558d 1106 format_log_max_lsn(mp);
2bd0ea18 1107 libxfs_umount(mp);
1926558d 1108
d321ceac
NS
1109 if (x.rtdev)
1110 libxfs_device_close(x.rtdev);
1111 if (x.logdev && x.logdev != x.ddev)
1112 libxfs_device_close(x.logdev);
1113 libxfs_device_close(x.ddev);
2ce8bff5 1114 libxfs_destroy();
2bd0ea18 1115
06fbdda9
MV
1116 if (verbose)
1117 summary_report();
507f4e33 1118 do_log(_("done\n"));
3ae81520
ES
1119
1120 if (dangerously && !no_modify)
1121 do_warn(
1122_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1123
4c0a98ae
BN
1124 pftrace_done();
1125
0a223eb8
ES
1126 free(msgbuf);
1127
7c3e94a3
JT
1128 if (fs_is_dirty && report_corrected)
1129 return (4);
3b6ac903
MV
1130 return (0);
1131}