]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/xfs_repair.c
libxfs: remove libxfs_physmem
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
da23017d
NS
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
2bd0ea18
NS
5 */
6
6b803e5a
CH
7#include "libxfs.h"
8#include "libxlog.h"
12be365e 9#include <sys/resource.h>
4a32b9e9 10#include "xfs_multidisk.h"
2bd0ea18 11#include "avl.h"
b4a09f89 12#include "libfrog/avl64.h"
2bd0ea18
NS
13#include "globals.h"
14#include "versions.h"
15#include "agheader.h"
16#include "protos.h"
17#include "incore.h"
18#include "err_protos.h"
cb5b3ef4 19#include "prefetch.h"
3b6ac903 20#include "threads.h"
06fbdda9 21#include "progress.h"
beed0dc8 22#include "dinode.h"
9e0f480e
DW
23#include "slab.h"
24#include "rmap.h"
fee68490 25#include "libfrog/fsgeom.h"
2bd0ea18 26
2bd0ea18
NS
27/*
28 * option tables for getsubopt calls
29 */
30
31/*
4af916f8 32 * -o: user-supplied override options
2bd0ea18 33 */
98884b66
DW
34enum o_opt_nums {
35 ASSUME_XFS = 0,
36 IHASH_SIZE,
37 BHASH_SIZE,
38 AG_STRIDE,
39 FORCE_GEO,
40 PHASE2_THREADS,
41 O_MAX_OPTS,
42};
43
8b8a6b02 44static char *o_opts[] = {
98884b66
DW
45 [ASSUME_XFS] = "assume_xfs",
46 [IHASH_SIZE] = "ihash",
47 [BHASH_SIZE] = "bhash",
48 [AG_STRIDE] = "ag_stride",
49 [FORCE_GEO] = "force_geometry",
50 [PHASE2_THREADS] = "phase2_threads",
51 [O_MAX_OPTS] = NULL,
2bd0ea18
NS
52};
53
4af916f8
BN
54/*
55 * -c: conversion options
56 */
98884b66
DW
57enum c_opt_nums {
58 CONVERT_LAZY_COUNT = 0,
59 C_MAX_OPTS,
60};
61
8b8a6b02 62static char *c_opts[] = {
98884b66
DW
63 [CONVERT_LAZY_COUNT] = "lazycount",
64 [C_MAX_OPTS] = NULL,
4af916f8
BN
65};
66
67
2556c98b 68static int bhash_option_used;
12be365e 69static long max_mem_specified; /* in megabytes */
364a126c 70static int phase2_threads = 32;
7c3e94a3 71static bool report_corrected;
2556c98b 72
2bd0ea18
NS
73static void
74usage(void)
75{
4af916f8
BN
76 do_warn(_(
77"Usage: %s [options] device\n"
78"\n"
79"Options:\n"
80" -f The device is a file\n"
81" -L Force log zeroing. Do this as a last resort.\n"
82" -l logdev Specifies the device where the external log resides.\n"
83" -m maxmem Maximum amount of memory to be used in megabytes.\n"
84" -n No modify mode, just checks the filesystem for damage.\n"
7c3e94a3 85" (Cannot be used together with -e.)\n"
4af916f8
BN
86" -P Disables prefetching.\n"
87" -r rtdev Specifies the device where the realtime section resides.\n"
88" -v Verbose output.\n"
89" -c subopts Change filesystem parameters - use xfs_admin.\n"
90" -o subopts Override default behaviour, refer to man page.\n"
79e106f0 91" -t interval Reporting interval in seconds.\n"
4af916f8 92" -d Repair dangerously.\n"
7c3e94a3
JT
93" -e Exit with a non-zero code if any errors were repaired.\n"
94" (Cannot be used together with -n.)\n"
4af916f8 95" -V Reports version and exits.\n"), progname);
2bd0ea18
NS
96 exit(1);
97}
98
2bd0ea18
NS
99char *
100err_string(int err_code)
101{
507f4e33
NS
102 static char *err_message[XR_BAD_ERR_CODE];
103 static int done;
104
105 if (!done) {
106 err_message[XR_OK] = _("no error");
107 err_message[XR_BAD_MAGIC] = _("bad magic number");
108 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
109 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
4af916f8 110 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
507f4e33
NS
111 err_message[XR_BAD_INPROGRESS] =
112 _("filesystem mkfs-in-progress bit set");
113 err_message[XR_BAD_FS_SIZE_DATA] =
114 _("inconsistent filesystem geometry information");
115 err_message[XR_BAD_INO_SIZE_DATA] =
116 _("bad inode size or inconsistent with number of inodes/block"),
117 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
118 err_message[XR_AGF_GEO_MISMATCH] =
119 _("AGF geometry info conflicts with filesystem geometry");
120 err_message[XR_AGI_GEO_MISMATCH] =
121 _("AGI geometry info conflicts with filesystem geometry");
122 err_message[XR_SB_GEO_MISMATCH] =
123 _("AG superblock geometry info conflicts with filesystem geometry");
124 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
125 err_message[XR_BAD_RT_GEO_DATA] =
126 _("inconsistent filesystem geometry in realtime filesystem component");
127 err_message[XR_BAD_INO_MAX_PCT] =
128 _("maximum indicated percentage of inodes > 100%");
129 err_message[XR_BAD_INO_ALIGN] =
130 _("inconsistent inode alignment value");
131 err_message[XR_INSUFF_SEC_SB] =
132 _("not enough secondary superblocks with matching geometry");
133 err_message[XR_BAD_SB_UNIT] =
134 _("bad stripe unit in superblock");
135 err_message[XR_BAD_SB_WIDTH] =
136 _("bad stripe width in superblock");
137 err_message[XR_BAD_SVN] =
138 _("bad shared version number in superblock");
88f364a9
DC
139 err_message[XR_BAD_CRC] =
140 _("bad CRC in superblock");
02b56f87
DW
141 err_message[XR_BAD_DIR_SIZE_DATA] =
142 _("inconsistent directory geometry information");
eb9cee60
DW
143 err_message[XR_BAD_LOG_GEOMETRY] =
144 _("inconsistent log geometry information");
507f4e33
NS
145 done = 1;
146 }
147
2bd0ea18 148 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
507f4e33 149 do_abort(_("bad error code - %d\n"), err_code);
2bd0ea18
NS
150
151 return(err_message[err_code]);
152}
153
154static void
155noval(char opt, char *tbl[], int idx)
156{
507f4e33 157 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
2bd0ea18
NS
158 usage();
159}
160
161static void
162respec(char opt, char *tbl[], int idx)
163{
164 do_warn("-%c ", opt);
165 if (tbl)
166 do_warn("%s ", tbl[idx]);
507f4e33 167 do_warn(_("option respecified\n"));
2bd0ea18
NS
168 usage();
169}
170
171static void
172unknown(char opt, char *s)
173{
507f4e33 174 do_warn(_("unknown option -%c %s\n"), opt, s);
2bd0ea18
NS
175 usage();
176}
177
178/*
179 * sets only the global argument flags and variables
180 */
8b8a6b02 181static void
2bd0ea18
NS
182process_args(int argc, char **argv)
183{
184 char *p;
185 int c;
186
187 log_spec = 0;
188 fs_is_dirty = 0;
189 verbose = 0;
190 no_modify = 0;
c781939c 191 dangerously = 0;
2bd0ea18 192 isa_file = 0;
d321ceac 193 zap_log = 0;
2bd0ea18 194 dumpcore = 0;
0f012a4c 195 full_ino_ex_data = 0;
2bd0ea18
NS
196 force_geo = 0;
197 assume_xfs = 0;
6bf4721d 198 copied_sunit = 0;
2bd0ea18
NS
199 sb_inoalignmt = 0;
200 sb_unit = 0;
201 sb_width = 0;
add3cb90 202 ag_stride = 0;
2556c98b 203 thread_count = 1;
06fbdda9 204 report_interval = PROG_RPT_DEFAULT;
7c3e94a3 205 report_corrected = false;
2bd0ea18
NS
206
207 /*
208 * XXX have to add suboption processing here
209 * attributes, quotas, nlinks, aligned_inos, sb_fbits
210 */
7c3e94a3 211 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
2bd0ea18
NS
212 switch (c) {
213 case 'D':
214 dumpcore = 1;
215 break;
216 case 'o':
217 p = optarg;
218 while (*p != '\0') {
219 char *val;
220
ab870d0e 221 switch (getsubopt(&p, o_opts, &val)) {
2bd0ea18
NS
222 case ASSUME_XFS:
223 if (val)
224 noval('o', o_opts, ASSUME_XFS);
225 if (assume_xfs)
226 respec('o', o_opts, ASSUME_XFS);
227 assume_xfs = 1;
228 break;
9f38f08d 229 case IHASH_SIZE:
3a19fb7d
CH
230 do_warn(
231 _("-o ihash option has been removed and will be ignored\n"));
9f38f08d
MV
232 break;
233 case BHASH_SIZE:
12be365e
BN
234 if (max_mem_specified)
235 do_abort(
3a19fb7d 236 _("-o bhash option cannot be used with -m option\n"));
1f8480b6
DW
237 if (!val)
238 do_abort(
239 _("-o bhash requires a parameter\n"));
5e656dbb 240 libxfs_bhash_size = (int)strtol(val, NULL, 0);
2556c98b 241 bhash_option_used = 1;
cb5b3ef4 242 break;
add3cb90 243 case AG_STRIDE:
1f8480b6
DW
244 if (!val)
245 do_abort(
246 _("-o ag_stride requires a parameter\n"));
5e656dbb 247 ag_stride = (int)strtol(val, NULL, 0);
3b6ac903 248 break;
d4dd6ab5
CH
249 case FORCE_GEO:
250 if (val)
251 noval('o', o_opts, FORCE_GEO);
252 if (force_geo)
253 respec('o', o_opts, FORCE_GEO);
254 force_geo = 1;
255 break;
364a126c 256 case PHASE2_THREADS:
1f8480b6
DW
257 if (!val)
258 do_abort(
259 _("-o phase2_threads requires a parameter\n"));
364a126c
DC
260 phase2_threads = (int)strtol(val, NULL, 0);
261 break;
2bd0ea18
NS
262 default:
263 unknown('o', val);
264 break;
265 }
266 }
267 break;
4af916f8
BN
268 case 'c':
269 p = optarg;
270 while (*p) {
271 char *val;
272
ab870d0e 273 switch (getsubopt(&p, c_opts, &val)) {
4af916f8 274 case CONVERT_LAZY_COUNT:
1f8480b6
DW
275 if (!val)
276 do_abort(
277 _("-c lazycount requires a parameter\n"));
5e656dbb 278 lazy_count = (int)strtol(val, NULL, 0);
4af916f8
BN
279 convert_lazy_count = 1;
280 break;
281 default:
282 unknown('c', val);
283 break;
284 }
285 }
286 break;
2bd0ea18
NS
287 case 'l':
288 log_name = optarg;
289 log_spec = 1;
290 break;
42a564ab
ES
291 case 'r':
292 rt_name = optarg;
293 rt_spec = 1;
294 break;
2bd0ea18
NS
295 case 'f':
296 isa_file = 1;
297 break;
12be365e
BN
298 case 'm':
299 if (bhash_option_used)
300 do_abort(_("-m option cannot be used with "
301 "-o bhash option\n"));
5e656dbb 302 max_mem_specified = strtol(optarg, NULL, 0);
12be365e 303 break;
d321ceac
NS
304 case 'L':
305 zap_log = 1;
306 break;
2bd0ea18
NS
307 case 'n':
308 no_modify = 1;
309 break;
6089b6f0
NS
310 case 'd':
311 dangerously = 1;
312 break;
2bd0ea18 313 case 'v':
3b6ac903 314 verbose++;
2bd0ea18
NS
315 break;
316 case 'V':
507f4e33 317 printf(_("%s version %s\n"), progname, VERSION);
3d98fe63 318 exit(0);
cb5b3ef4 319 case 'P':
2556c98b 320 do_prefetch = 0;
3b6ac903 321 break;
06fbdda9 322 case 't':
5e656dbb 323 report_interval = (int)strtol(optarg, NULL, 0);
06fbdda9 324 break;
7c3e94a3
JT
325 case 'e':
326 report_corrected = true;
327 break;
2bd0ea18
NS
328 case '?':
329 usage();
330 }
331 }
332
333 if (argc - optind != 1)
334 usage();
335
336 if ((fs_name = argv[optind]) == NULL)
337 usage();
7c3e94a3
JT
338
339 if (report_corrected && no_modify)
340 usage();
2bd0ea18
NS
341}
342
b1559967 343void __attribute__((noreturn))
2bd0ea18
NS
344do_error(char const *msg, ...)
345{
346 va_list args;
347
507f4e33 348 fprintf(stderr, _("\nfatal error -- "));
2bd0ea18
NS
349
350 va_start(args, msg);
079afa09
CH
351 vfprintf(stderr, msg, args);
352 if (dumpcore)
353 abort();
354 exit(1);
2bd0ea18
NS
355}
356
357/*
358 * like do_error, only the error is internal, no system
359 * error so no oserror processing
360 */
b1559967 361void __attribute__((noreturn))
2bd0ea18
NS
362do_abort(char const *msg, ...)
363{
364 va_list args;
365
366 va_start(args, msg);
079afa09
CH
367 vfprintf(stderr, msg, args);
368 if (dumpcore)
369 abort();
370 exit(1);
2bd0ea18
NS
371}
372
373void
374do_warn(char const *msg, ...)
375{
376 va_list args;
377
378 fs_is_dirty = 1;
379
380 va_start(args, msg);
079afa09 381 vfprintf(stderr, msg, args);
2bd0ea18
NS
382 va_end(args);
383}
384
385/* no formatting */
386
387void
388do_log(char const *msg, ...)
389{
390 va_list args;
391
392 va_start(args, msg);
079afa09 393 vfprintf(stderr, msg, args);
2bd0ea18
NS
394 va_end(args);
395}
396
8b8a6b02 397static void
2bd0ea18
NS
398calc_mkfs(xfs_mount_t *mp)
399{
400 xfs_agblock_t fino_bno;
401 int do_inoalign;
402
e7fd2b6f 403 do_inoalign = M_IGEO(mp)->ialloc_align;
2bd0ea18
NS
404
405 /*
7b370905
BF
406 * Pre-calculate the geometry of ag 0. We know what it looks like
407 * because we know what mkfs does: 2 allocation btree roots (by block
408 * and by size), the inode allocation btree root, the free inode
409 * allocation btree root (if enabled) and some number of blocks to
410 * prefill the agfl.
de046644
DC
411 *
412 * Because the current shape of the btrees may differ from the current
413 * shape, we open code the mkfs freelist block count here. mkfs creates
414 * single level trees, so the calculation is pertty straight forward for
7ddb50f8 415 * the trees that use the AGFL.
2bd0ea18
NS
416 */
417 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
418 bcntbt_root = bnobt_root + 1;
419 inobt_root = bnobt_root + 2;
de046644 420 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
7b370905
BF
421 if (xfs_sb_version_hasfinobt(&mp->m_sb))
422 fino_bno++;
7ddb50f8
DW
423 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
424 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
0f94fa4b 425 fino_bno++;
7ddb50f8 426 }
18c44aa9
DW
427 if (xfs_sb_version_hasreflink(&mp->m_sb))
428 fino_bno++;
2bd0ea18 429
d4dd6ab5 430 /*
649bfa9a
CH
431 * If the log is allocated in the first allocation group we need to
432 * add the number of blocks used by the log to the above calculation.
433 *
434 * This can happens with filesystems that only have a single
435 * allocation group, or very odd geometries created by old mkfs
436 * versions on very small filesystems.
d4dd6ab5 437 */
649bfa9a
CH
438 if (mp->m_sb.sb_logstart &&
439 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
440
d4dd6ab5
CH
441 /*
442 * XXX(hch): verify that sb_logstart makes sense?
443 */
444 fino_bno += mp->m_sb.sb_logblocks;
445 }
446
2bd0ea18
NS
447 /*
448 * ditto the location of the first inode chunks in the fs ('/')
449 */
5e656dbb 450 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
7516da71
DW
451 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, roundup(fino_bno,
452 mp->m_sb.sb_unit));
5e656dbb 453 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
2bd0ea18 454 mp->m_sb.sb_inoalignmt > 1) {
7516da71 455 first_prealloc_ino = XFS_AGB_TO_AGINO(mp,
2bd0ea18 456 roundup(fino_bno,
7516da71 457 mp->m_sb.sb_inoalignmt));
2bd0ea18 458 } else {
7516da71 459 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno);
2bd0ea18
NS
460 }
461
e7fd2b6f 462 ASSERT(M_IGEO(mp)->ialloc_blks > 0);
2bd0ea18 463
e7fd2b6f 464 if (M_IGEO(mp)->ialloc_blks > 1)
2bd0ea18
NS
465 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
466 else
7516da71 467 last_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno + 1);
2bd0ea18
NS
468
469 /*
470 * now the first 3 inodes in the system
471 */
472 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
473 do_warn(
5d1b7f0f 474_("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
475 mp->m_sb.sb_rootino,
476 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
477 first_prealloc_ino);
2bd0ea18
NS
478
479 if (!no_modify)
480 do_warn(
5d1b7f0f 481 _("resetting superblock root inode pointer to %u\n"),
2bd0ea18
NS
482 first_prealloc_ino);
483 else
484 do_warn(
5d1b7f0f 485 _("would reset superblock root inode pointer to %u\n"),
2bd0ea18
NS
486 first_prealloc_ino);
487
488 /*
489 * just set the value -- safe since the superblock
490 * doesn't get flushed out if no_modify is set
491 */
492 mp->m_sb.sb_rootino = first_prealloc_ino;
493 }
494
495 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
496 do_warn(
5d1b7f0f 497_("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
498 mp->m_sb.sb_rbmino,
499 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
500 first_prealloc_ino + 1);
2bd0ea18
NS
501
502 if (!no_modify)
503 do_warn(
5d1b7f0f 504 _("resetting superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
505 first_prealloc_ino + 1);
506 else
507 do_warn(
5d1b7f0f 508 _("would reset superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
509 first_prealloc_ino + 1);
510
511 /*
512 * just set the value -- safe since the superblock
513 * doesn't get flushed out if no_modify is set
514 */
515 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
516 }
517
518 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
519 do_warn(
5d1b7f0f
CH
520_("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
521 mp->m_sb.sb_rsumino,
522 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
523 first_prealloc_ino + 2);
2bd0ea18
NS
524
525 if (!no_modify)
526 do_warn(
5d1b7f0f 527 _("resetting superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
528 first_prealloc_ino + 2);
529 else
530 do_warn(
5d1b7f0f 531 _("would reset superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
532 first_prealloc_ino + 2);
533
534 /*
535 * just set the value -- safe since the superblock
536 * doesn't get flushed out if no_modify is set
537 */
538 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
539 }
540
541}
542
1926558d
BF
543/*
544 * v5 superblock metadata track the LSN of last modification and thus require
545 * that the current LSN is always moving forward. The current LSN is reset if
546 * the log has been cleared, which puts the log behind parts of the filesystem
547 * on-disk and can disrupt log recovery.
548 *
549 * We have tracked the maximum LSN of every piece of metadata that has been read
550 * in via the read verifiers. Compare the max LSN with the log and if the log is
551 * behind, bump the cycle number and reformat the log.
552 */
553static void
554format_log_max_lsn(
555 struct xfs_mount *mp)
556{
557 struct xlog *log = mp->m_log;
558 int max_cycle;
559 int max_block;
560 int new_cycle;
561 xfs_daddr_t logstart;
562 xfs_daddr_t logblocks;
563 int logversion;
564
565 if (!xfs_sb_version_hascrc(&mp->m_sb))
566 return;
567
568 /*
569 * If the log is ahead of the highest metadata LSN we've seen, we're
570 * safe and there's nothing to do.
571 */
572 max_cycle = CYCLE_LSN(libxfs_max_lsn);
573 max_block = BLOCK_LSN(libxfs_max_lsn);
574 if (max_cycle < log->l_curr_cycle ||
575 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
576 return;
577
578 /*
579 * Going to the next cycle should be sufficient but we bump by a few
580 * counts to help cover any metadata LSNs we could have missed.
581 */
582 new_cycle = max_cycle + 3;
583 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
584 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
585 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
586
587 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
588 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
589
590 if (no_modify) {
591 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
592 return;
593 }
594
595 do_warn(_("Format log to cycle %d.\n"), new_cycle);
1c12a814
BF
596 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
597 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
571a78a7 598 XLOG_FMT, new_cycle, true);
1926558d
BF
599}
600
4a32b9e9
DC
601/*
602 * mkfs increases the AG count for "multidisk" configurations, we want
603 * to target these for an increase in thread count. Hence check the superlock
604 * geometry information to determine if mkfs considered this a multidisk
605 * configuration.
606 */
607static bool
608is_multidisk_filesystem(
609 struct xfs_mount *mp)
610{
611 struct xfs_sb *sbp = &mp->m_sb;
612
613 /* High agcount filesystems are always considered "multidisk" */
614 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
615 return true;
616
617 /*
618 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
619 * multi-disk array, so we don't either.
620 */
621 if (!sbp->sb_unit)
622 return false;
623
624 ASSERT(sbp->sb_width);
625 return true;
626}
627
28a0a30f
ZL
628/*
629 * if the sector size of the filesystem we are trying to repair is
630 * smaller than that of the underlying filesystem (i.e. we are repairing
631 * an image), the we have to turn off direct IO because we cannot do IO
632 * smaller than the host filesystem's sector size.
633 */
634static void
635check_fs_vs_host_sectsize(
636 struct xfs_sb *sb)
637{
9612817d 638 int fd, ret;
28a0a30f 639 long old_flags;
9612817d 640 struct xfs_fsop_geom geom = { 0 };
28a0a30f
ZL
641
642 fd = libxfs_device_to_fd(x.ddev);
643
9612817d
DW
644 ret = xfrog_geometry(fd, &geom);
645 if (ret) {
28a0a30f
ZL
646 do_log(_("Cannot get host filesystem geometry.\n"
647 "Repair may fail if there is a sector size mismatch between\n"
648 "the image and the host filesystem.\n"));
649 geom.sectsize = BBSIZE;
650 }
651
652 if (sb->sb_sectsize < geom.sectsize) {
653 old_flags = fcntl(fd, F_GETFL, 0);
654 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
655 do_warn(_(
656 "Sector size on host filesystem larger than image sector size.\n"
657 "Cannot turn off direct IO, so exiting.\n"));
658 exit(1);
659 }
660 }
661}
662
2bd0ea18
NS
663int
664main(int argc, char **argv)
665{
2bd0ea18
NS
666 xfs_mount_t *temp_mp;
667 xfs_mount_t *mp;
5e656dbb 668 xfs_dsb_t *dsb;
2bd0ea18
NS
669 xfs_buf_t *sbp;
670 xfs_mount_t xfs_m;
1d6cb115 671 struct xlog log = {0};
06fbdda9 672 char *msgbuf;
88f364a9
DC
673 struct xfs_sb psb;
674 int rval;
e7fd2b6f 675 struct xfs_ino_geometry *igeo;
2bd0ea18
NS
676
677 progname = basename(argv[0]);
507f4e33
NS
678 setlocale(LC_ALL, "");
679 bindtextdomain(PACKAGE, LOCALEDIR);
680 textdomain(PACKAGE);
beed0dc8 681 dinode_bmbt_translation_init();
2bd0ea18
NS
682
683 temp_mp = &xfs_m;
684 setbuf(stdout, NULL);
685
686 process_args(argc, argv);
d321ceac 687 xfs_init(&x);
2bd0ea18 688
2556c98b
BN
689 msgbuf = malloc(DURATION_BUF_SIZE);
690
06fbdda9
MV
691 timestamp(PHASE_START, 0, NULL);
692 timestamp(PHASE_END, 0, NULL);
693
28a0a30f
ZL
694 /* -f forces this, but let's be nice and autodetect it, as well. */
695 if (!isa_file) {
696 int fd = libxfs_device_to_fd(x.ddev);
697 struct stat statbuf;
698
699 if (fstat(fd, &statbuf) < 0)
700 do_warn(_("%s: couldn't stat \"%s\"\n"),
701 progname, fs_name);
702 else if (S_ISREG(statbuf.st_mode))
703 isa_file = 1;
704 }
705
706 if (isa_file) {
707 /* Best effort attempt to validate fs vs host sector size */
708 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
709 if (rval == XR_OK)
710 check_fs_vs_host_sectsize(&psb);
711 }
712
2bd0ea18
NS
713 /* do phase1 to make sure we have a superblock */
714 phase1(temp_mp);
06fbdda9 715 timestamp(PHASE_END, 1, NULL);
2bd0ea18
NS
716
717 if (no_modify && primary_sb_modified) {
507f4e33
NS
718 do_warn(_("Primary superblock would have been modified.\n"
719 "Cannot proceed further in no_modify mode.\n"
720 "Exiting now.\n"));
2bd0ea18
NS
721 exit(1);
722 }
723
88f364a9
DC
724 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
725 if (rval != XR_OK) {
726 do_warn(_("Primary superblock bad after phase 1!\n"
727 "Exiting now.\n"));
728 exit(1);
729 }
2bd0ea18 730
f63fd268 731 /*
28a0a30f
ZL
732 * Now that we have completely validated the superblock, geometry may
733 * have changed; re-check geometry vs the host filesystem geometry
f63fd268 734 */
28a0a30f
ZL
735 if (isa_file)
736 check_fs_vs_host_sectsize(&psb);
88f364a9 737
1d6cb115
BF
738 /*
739 * Prepare the mount structure. Point the log reference to our local
740 * copy so it's available to the various phases. The log bits are
741 * initialized in phase 2.
742 */
88f364a9
DC
743 memset(&xfs_m, 0, sizeof(xfs_mount_t));
744 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
2bd0ea18
NS
745
746 if (!mp) {
507f4e33
NS
747 fprintf(stderr,
748 _("%s: cannot repair this filesystem. Sorry.\n"),
2bd0ea18
NS
749 progname);
750 exit(1);
751 }
1d6cb115 752 mp->m_log = &log;
e7fd2b6f 753 igeo = M_IGEO(mp);
2bd0ea18 754
23639f77
ES
755 /* Spit out function & line on these corruption macros */
756 if (verbose > 2)
757 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
758
2bd0ea18
NS
759 /*
760 * set XFS-independent status vars from the mount/sb structure
761 */
762 glob_agcount = mp->m_sb.sb_agcount;
763
764 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
5a707ca1 765 max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
2bd0ea18 766
0cce4aa1
DC
767 /*
768 * Automatic striding for high agcount filesystems.
769 *
770 * More AGs indicates that the filesystem is either large or can handle
771 * more IO parallelism. Either way, we should try to process multiple
772 * AGs at a time in such a configuration to try to saturate the
773 * underlying storage and speed the repair process. Only do this if
774 * prefetching is enabled.
775 *
776 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
777 * to target these for an increase in thread count. Hence a stride value
778 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
779 * on such filesystems.
12b55baf
DC
780 *
781 * Limit the maximum thread count based on the available CPU power that
782 * is available. If we use too many threads, we might run out of memory
783 * and CPU power before we run out of IO concurrency. We limit to 8
784 * threads/CPU as this is enough threads to saturate a CPU on fast
785 * devices, yet few enough that it will saturate but won't overload slow
786 * devices.
4a32b9e9
DC
787 *
788 * Multidisk filesystems can handle more IO parallelism so we should try
789 * to process multiple AGs at a time in such a configuration to try to
790 * saturate the underlying storage and speed the repair process. Only do
791 * this if prefetching is enabled.
0cce4aa1 792 */
4a32b9e9
DC
793 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
794 /*
795 * For small agcount multidisk systems, just double the
796 * parallelism. For larger AG count filesystems (32 and above)
797 * use more parallelism, and linearly increase the parallelism
798 * with the number of AGs.
799 */
800 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
801 }
0cce4aa1 802
add3cb90 803 if (ag_stride) {
12b55baf
DC
804 int max_threads = platform_nproc() * 8;
805
2556c98b 806 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
12b55baf
DC
807 while (thread_count > max_threads) {
808 ag_stride *= 2;
809 thread_count = (glob_agcount + ag_stride - 1) /
810 ag_stride;
811 }
812 if (thread_count > 0)
813 thread_init();
814 else {
815 thread_count = 1;
816 ag_stride = 0;
817 }
add3cb90
BN
818 }
819
2556c98b 820 if (ag_stride && report_interval) {
06fbdda9 821 init_progress_rpt();
06fbdda9
MV
822 if (msgbuf) {
823 do_log(_(" - reporting progress in intervals of %s\n"),
824 duration(report_interval, msgbuf));
06fbdda9
MV
825 }
826 }
827
2556c98b
BN
828 /*
829 * Adjust libxfs cache sizes based on system memory,
830 * filesystem size and inode count.
831 *
832 * We'll set the cache size based on 3/4s the memory minus
833 * space used by the inode AVL tree and block usage map.
834 *
835 * Inode AVL tree space is approximately 4 bytes per inode,
836 * block usage map is currently 1 byte for 2 blocks.
837 *
838 * We assume most blocks will be inode clusters.
839 *
840 * Calculations are done in kilobyte units.
841 */
842
12be365e 843 if (!bhash_option_used || max_mem_specified) {
2556c98b 844 unsigned long mem_used;
12be365e
BN
845 unsigned long max_mem;
846 struct rlimit rlim;
2556c98b 847
2556c98b 848 libxfs_bcache_purge();
2556c98b
BN
849 cache_destroy(libxfs_bcache);
850
851 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
12be365e
BN
852 (mp->m_sb.sb_dblocks >> (10 + 1)) +
853 50000; /* rough estimate of 50MB overhead */
854 max_mem = max_mem_specified ? max_mem_specified * 1024 :
4e5fe123 855 platform_physmem() * 3 / 4;
12be365e
BN
856
857 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
858 rlim.rlim_cur != RLIM_INFINITY) {
859 rlim.rlim_cur = rlim.rlim_max;
860 setrlimit(RLIMIT_AS, &rlim);
861 /* use approximately 80% of rlimit to avoid overrun */
68d16907 862 max_mem = min(max_mem, rlim.rlim_cur / 1280);
12be365e 863 } else
68d16907 864 max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
2556c98b
BN
865
866 if (verbose > 1)
5d1b7f0f
CH
867 do_log(
868 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
12be365e
BN
869 max_mem, mp->m_sb.sb_icount,
870 mp->m_sb.sb_icount >> (10 - 2),
871 mp->m_sb.sb_dblocks,
872 mp->m_sb.sb_dblocks >> (10 + 1));
873
874 if (max_mem <= mem_used) {
0335a835
DC
875 if (max_mem_specified) {
876 do_abort(
877 _("Required memory for repair is greater that the maximum specified\n"
878 "with the -m option. Please increase it to at least %lu.\n"),
12be365e 879 mem_used / 1024);
0335a835 880 }
70a4820f 881 do_log(
61510437
DC
882 _("Memory available for repair (%luMB) may not be sufficient.\n"
883 "At least %luMB is needed to repair this filesystem efficiently\n"
884 "If repair fails due to lack of memory, please\n"),
885 max_mem / 1024, mem_used / 1024);
886 if (do_prefetch)
70a4820f 887 do_log(
61510437
DC
888 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
889 else
70a4820f 890 do_log(
61510437
DC
891 _("increase system RAM and/or swap space to at least %luMB.\n"),
892 mem_used * 2 / 1024);
893
894 max_mem = mem_used;
2556c98b
BN
895 }
896
61510437
DC
897 max_mem -= mem_used;
898 if (max_mem >= (1 << 30))
899 max_mem = 1 << 30;
900 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
e7fd2b6f 901 (igeo->inode_cluster_size >> 10));
61510437
DC
902 if (libxfs_bhash_size < 512)
903 libxfs_bhash_size = 512;
904
2556c98b
BN
905 if (verbose)
906 do_log(_(" - block cache size set to %d entries\n"),
907 libxfs_bhash_size * HASH_CACHE_RATIO);
908
ba9ecd40 909 libxfs_bcache = cache_init(0, libxfs_bhash_size,
2556c98b
BN
910 &libxfs_bcache_operations);
911 }
912
2bd0ea18
NS
913 /*
914 * calculate what mkfs would do to this filesystem
915 */
916 calc_mkfs(mp);
917
918 /*
c1f7a46c 919 * initialize block alloc map
2bd0ea18 920 */
c1f7a46c
BN
921 init_bmaps(mp);
922 incore_ino_init(mp);
923 incore_ext_init(mp);
2d273771 924 rmaps_init(mp);
c1f7a46c
BN
925
926 /* initialize random globals now that we know the fs geometry */
927 inodes_per_block = mp->m_sb.sb_inopblock;
2bd0ea18
NS
928
929 if (parse_sb_version(&mp->m_sb)) {
930 do_warn(
507f4e33 931 _("Found unsupported filesystem features. Exiting now.\n"));
2bd0ea18
NS
932 return(1);
933 }
934
935 /* make sure the per-ag freespace maps are ok so we can mount the fs */
364a126c 936 phase2(mp, phase2_threads);
06fbdda9 937 timestamp(PHASE_END, 2, NULL);
2bd0ea18 938
2556c98b
BN
939 if (do_prefetch)
940 init_prefetch(mp);
941
8100dd79 942 phase3(mp, phase2_threads);
06fbdda9 943 timestamp(PHASE_END, 3, NULL);
2bd0ea18
NS
944
945 phase4(mp);
06fbdda9 946 timestamp(PHASE_END, 4, NULL);
2bd0ea18
NS
947
948 if (no_modify)
507f4e33 949 printf(_("No modify flag set, skipping phase 5\n"));
3b6ac903 950 else {
2bd0ea18 951 phase5(mp);
3b6ac903 952 }
06fbdda9 953 timestamp(PHASE_END, 5, NULL);
2bd0ea18 954
c1f7a46c
BN
955 /*
956 * Done with the block usage maps, toss them...
957 */
2d273771 958 rmaps_free(mp);
c1f7a46c
BN
959 free_bmaps(mp);
960
2bd0ea18
NS
961 if (!bad_ino_btree) {
962 phase6(mp);
06fbdda9 963 timestamp(PHASE_END, 6, NULL);
2bd0ea18 964
e161d4a8 965 phase7(mp, phase2_threads);
06fbdda9 966 timestamp(PHASE_END, 7, NULL);
2bd0ea18
NS
967 } else {
968 do_warn(
507f4e33 969_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
2bd0ea18
NS
970 }
971
0340d706 972 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
2bd0ea18
NS
973 if (!no_modify) {
974 do_warn(
507f4e33 975_("Warning: no quota inodes were found. Quotas disabled.\n"));
2bd0ea18
NS
976 } else {
977 do_warn(
507f4e33 978_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
2bd0ea18
NS
979 }
980 } else if (lost_quotas) {
981 if (!no_modify) {
982 do_warn(
507f4e33 983_("Warning: quota inodes were cleared. Quotas disabled.\n"));
2bd0ea18
NS
984 } else {
985 do_warn(
507f4e33 986_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
2bd0ea18
NS
987 }
988 } else {
989 if (lost_uquotino) {
990 if (!no_modify) {
991 do_warn(
507f4e33
NS
992_("Warning: user quota information was cleared.\n"
993 "User quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
994 } else {
995 do_warn(
507f4e33
NS
996_("Warning: user quota information would be cleared.\n"
997 "User quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
998 }
999 }
1000
b36eef04 1001 if (lost_gquotino) {
2bd0ea18
NS
1002 if (!no_modify) {
1003 do_warn(
507f4e33
NS
1004_("Warning: group quota information was cleared.\n"
1005 "Group quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1006 } else {
1007 do_warn(
507f4e33
NS
1008_("Warning: group quota information would be cleared.\n"
1009 "Group quotas could not be enforced until limit information was recreated.\n"));
9b27bdbb
NS
1010 }
1011 }
1012
1013 if (lost_pquotino) {
1014 if (!no_modify) {
1015 do_warn(
1016_("Warning: project quota information was cleared.\n"
1017 "Project quotas can not be enforced until limit information is recreated.\n"));
1018 } else {
1019 do_warn(
1020_("Warning: project quota information would be cleared.\n"
1021 "Project quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1022 }
1023 }
1024 }
1025
2556c98b 1026 if (ag_stride && report_interval)
06fbdda9 1027 stop_progress_rpt();
9f38f08d 1028
2bd0ea18 1029 if (no_modify) {
1926558d
BF
1030 /*
1031 * Warn if the current LSN is problematic and the log requires a
1032 * reformat.
1033 */
1034 format_log_max_lsn(mp);
1035
2bd0ea18 1036 do_log(
507f4e33 1037 _("No modify flag set, skipping filesystem flush and exiting.\n"));
3b6ac903 1038 if (verbose)
06fbdda9 1039 summary_report();
2bd0ea18
NS
1040 if (fs_is_dirty)
1041 return(1);
1042
1043 return(0);
1044 }
1045
1046 /*
1047 * Clear the quota flags if they're on.
1048 */
67c4a324 1049 sbp = libxfs_getsb(mp);
2bd0ea18 1050 if (!sbp)
507f4e33 1051 do_error(_("couldn't get superblock\n"));
2bd0ea18 1052
5e656dbb 1053 dsb = XFS_BUF_TO_SBP(sbp);
2bd0ea18 1054
342aef1e 1055 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
5e656dbb
BN
1056 do_warn(_("Note - quota info will be regenerated on next "
1057 "quota mount.\n"));
342aef1e 1058 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
2bd0ea18
NS
1059 }
1060
6bf4721d 1061 if (copied_sunit) {
2bd0ea18 1062 do_warn(
6bf4721d
ES
1063_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1064 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
5e656dbb 1065 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
dfc130f3 1066 }
2bd0ea18
NS
1067
1068 libxfs_writebuf(sbp, 0);
1069
2556c98b 1070 /*
1926558d
BF
1071 * Done. Flush all cached buffers and inodes first to ensure all
1072 * verifiers are run (where we discover the max metadata LSN), reformat
1073 * the log if necessary and unmount.
2556c98b
BN
1074 */
1075 libxfs_bcache_flush();
1926558d 1076 format_log_max_lsn(mp);
2bd0ea18 1077 libxfs_umount(mp);
1926558d 1078
d321ceac
NS
1079 if (x.rtdev)
1080 libxfs_device_close(x.rtdev);
1081 if (x.logdev && x.logdev != x.ddev)
1082 libxfs_device_close(x.logdev);
1083 libxfs_device_close(x.ddev);
2ce8bff5 1084 libxfs_destroy();
2bd0ea18 1085
06fbdda9
MV
1086 if (verbose)
1087 summary_report();
507f4e33 1088 do_log(_("done\n"));
3ae81520
ES
1089
1090 if (dangerously && !no_modify)
1091 do_warn(
1092_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1093
4c0a98ae
BN
1094 pftrace_done();
1095
0a223eb8
ES
1096 free(msgbuf);
1097
7c3e94a3
JT
1098 if (fs_is_dirty && report_corrected)
1099 return (4);
3b6ac903
MV
1100 return (0);
1101}