]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/xfs_repair.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
da23017d
NS
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
2bd0ea18
NS
5 */
6
6b803e5a
CH
7#include "libxfs.h"
8#include "libxlog.h"
12be365e 9#include <sys/resource.h>
4a32b9e9 10#include "xfs_multidisk.h"
2bd0ea18
NS
11#include "avl.h"
12#include "avl64.h"
13#include "globals.h"
14#include "versions.h"
15#include "agheader.h"
16#include "protos.h"
17#include "incore.h"
18#include "err_protos.h"
cb5b3ef4 19#include "prefetch.h"
3b6ac903 20#include "threads.h"
06fbdda9 21#include "progress.h"
beed0dc8 22#include "dinode.h"
9e0f480e
DW
23#include "slab.h"
24#include "rmap.h"
2bd0ea18 25
2bd0ea18
NS
26/*
27 * option tables for getsubopt calls
28 */
29
30/*
4af916f8 31 * -o: user-supplied override options
2bd0ea18 32 */
98884b66
DW
33enum o_opt_nums {
34 ASSUME_XFS = 0,
35 IHASH_SIZE,
36 BHASH_SIZE,
37 AG_STRIDE,
38 FORCE_GEO,
39 PHASE2_THREADS,
40 O_MAX_OPTS,
41};
42
8b8a6b02 43static char *o_opts[] = {
98884b66
DW
44 [ASSUME_XFS] = "assume_xfs",
45 [IHASH_SIZE] = "ihash",
46 [BHASH_SIZE] = "bhash",
47 [AG_STRIDE] = "ag_stride",
48 [FORCE_GEO] = "force_geometry",
49 [PHASE2_THREADS] = "phase2_threads",
50 [O_MAX_OPTS] = NULL,
2bd0ea18
NS
51};
52
4af916f8
BN
53/*
54 * -c: conversion options
55 */
98884b66
DW
56enum c_opt_nums {
57 CONVERT_LAZY_COUNT = 0,
58 C_MAX_OPTS,
59};
60
8b8a6b02 61static char *c_opts[] = {
98884b66
DW
62 [CONVERT_LAZY_COUNT] = "lazycount",
63 [C_MAX_OPTS] = NULL,
4af916f8
BN
64};
65
66
2556c98b 67static int bhash_option_used;
12be365e 68static long max_mem_specified; /* in megabytes */
364a126c 69static int phase2_threads = 32;
7c3e94a3 70static bool report_corrected;
2556c98b 71
2bd0ea18
NS
72static void
73usage(void)
74{
4af916f8
BN
75 do_warn(_(
76"Usage: %s [options] device\n"
77"\n"
78"Options:\n"
79" -f The device is a file\n"
80" -L Force log zeroing. Do this as a last resort.\n"
81" -l logdev Specifies the device where the external log resides.\n"
82" -m maxmem Maximum amount of memory to be used in megabytes.\n"
83" -n No modify mode, just checks the filesystem for damage.\n"
7c3e94a3 84" (Cannot be used together with -e.)\n"
4af916f8
BN
85" -P Disables prefetching.\n"
86" -r rtdev Specifies the device where the realtime section resides.\n"
87" -v Verbose output.\n"
88" -c subopts Change filesystem parameters - use xfs_admin.\n"
89" -o subopts Override default behaviour, refer to man page.\n"
79e106f0 90" -t interval Reporting interval in seconds.\n"
4af916f8 91" -d Repair dangerously.\n"
7c3e94a3
JT
92" -e Exit with a non-zero code if any errors were repaired.\n"
93" (Cannot be used together with -n.)\n"
4af916f8 94" -V Reports version and exits.\n"), progname);
2bd0ea18
NS
95 exit(1);
96}
97
2bd0ea18
NS
98char *
99err_string(int err_code)
100{
507f4e33
NS
101 static char *err_message[XR_BAD_ERR_CODE];
102 static int done;
103
104 if (!done) {
105 err_message[XR_OK] = _("no error");
106 err_message[XR_BAD_MAGIC] = _("bad magic number");
107 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
108 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
4af916f8 109 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
507f4e33
NS
110 err_message[XR_BAD_INPROGRESS] =
111 _("filesystem mkfs-in-progress bit set");
112 err_message[XR_BAD_FS_SIZE_DATA] =
113 _("inconsistent filesystem geometry information");
114 err_message[XR_BAD_INO_SIZE_DATA] =
115 _("bad inode size or inconsistent with number of inodes/block"),
116 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
117 err_message[XR_AGF_GEO_MISMATCH] =
118 _("AGF geometry info conflicts with filesystem geometry");
119 err_message[XR_AGI_GEO_MISMATCH] =
120 _("AGI geometry info conflicts with filesystem geometry");
121 err_message[XR_SB_GEO_MISMATCH] =
122 _("AG superblock geometry info conflicts with filesystem geometry");
123 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
124 err_message[XR_BAD_RT_GEO_DATA] =
125 _("inconsistent filesystem geometry in realtime filesystem component");
126 err_message[XR_BAD_INO_MAX_PCT] =
127 _("maximum indicated percentage of inodes > 100%");
128 err_message[XR_BAD_INO_ALIGN] =
129 _("inconsistent inode alignment value");
130 err_message[XR_INSUFF_SEC_SB] =
131 _("not enough secondary superblocks with matching geometry");
132 err_message[XR_BAD_SB_UNIT] =
133 _("bad stripe unit in superblock");
134 err_message[XR_BAD_SB_WIDTH] =
135 _("bad stripe width in superblock");
136 err_message[XR_BAD_SVN] =
137 _("bad shared version number in superblock");
88f364a9
DC
138 err_message[XR_BAD_CRC] =
139 _("bad CRC in superblock");
02b56f87
DW
140 err_message[XR_BAD_DIR_SIZE_DATA] =
141 _("inconsistent directory geometry information");
eb9cee60
DW
142 err_message[XR_BAD_LOG_GEOMETRY] =
143 _("inconsistent log geometry information");
507f4e33
NS
144 done = 1;
145 }
146
2bd0ea18 147 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
507f4e33 148 do_abort(_("bad error code - %d\n"), err_code);
2bd0ea18
NS
149
150 return(err_message[err_code]);
151}
152
153static void
154noval(char opt, char *tbl[], int idx)
155{
507f4e33 156 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
2bd0ea18
NS
157 usage();
158}
159
160static void
161respec(char opt, char *tbl[], int idx)
162{
163 do_warn("-%c ", opt);
164 if (tbl)
165 do_warn("%s ", tbl[idx]);
507f4e33 166 do_warn(_("option respecified\n"));
2bd0ea18
NS
167 usage();
168}
169
170static void
171unknown(char opt, char *s)
172{
507f4e33 173 do_warn(_("unknown option -%c %s\n"), opt, s);
2bd0ea18
NS
174 usage();
175}
176
177/*
178 * sets only the global argument flags and variables
179 */
8b8a6b02 180static void
2bd0ea18
NS
181process_args(int argc, char **argv)
182{
183 char *p;
184 int c;
185
186 log_spec = 0;
187 fs_is_dirty = 0;
188 verbose = 0;
189 no_modify = 0;
c781939c 190 dangerously = 0;
2bd0ea18 191 isa_file = 0;
d321ceac 192 zap_log = 0;
2bd0ea18 193 dumpcore = 0;
0f012a4c 194 full_ino_ex_data = 0;
2bd0ea18
NS
195 force_geo = 0;
196 assume_xfs = 0;
6bf4721d 197 copied_sunit = 0;
2bd0ea18
NS
198 sb_inoalignmt = 0;
199 sb_unit = 0;
200 sb_width = 0;
add3cb90 201 ag_stride = 0;
2556c98b 202 thread_count = 1;
06fbdda9 203 report_interval = PROG_RPT_DEFAULT;
7c3e94a3 204 report_corrected = false;
2bd0ea18
NS
205
206 /*
207 * XXX have to add suboption processing here
208 * attributes, quotas, nlinks, aligned_inos, sb_fbits
209 */
7c3e94a3 210 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
2bd0ea18
NS
211 switch (c) {
212 case 'D':
213 dumpcore = 1;
214 break;
215 case 'o':
216 p = optarg;
217 while (*p != '\0') {
218 char *val;
219
ab870d0e 220 switch (getsubopt(&p, o_opts, &val)) {
2bd0ea18
NS
221 case ASSUME_XFS:
222 if (val)
223 noval('o', o_opts, ASSUME_XFS);
224 if (assume_xfs)
225 respec('o', o_opts, ASSUME_XFS);
226 assume_xfs = 1;
227 break;
9f38f08d 228 case IHASH_SIZE:
3a19fb7d
CH
229 do_warn(
230 _("-o ihash option has been removed and will be ignored\n"));
9f38f08d
MV
231 break;
232 case BHASH_SIZE:
12be365e
BN
233 if (max_mem_specified)
234 do_abort(
3a19fb7d 235 _("-o bhash option cannot be used with -m option\n"));
1f8480b6
DW
236 if (!val)
237 do_abort(
238 _("-o bhash requires a parameter\n"));
5e656dbb 239 libxfs_bhash_size = (int)strtol(val, NULL, 0);
2556c98b 240 bhash_option_used = 1;
cb5b3ef4 241 break;
add3cb90 242 case AG_STRIDE:
1f8480b6
DW
243 if (!val)
244 do_abort(
245 _("-o ag_stride requires a parameter\n"));
5e656dbb 246 ag_stride = (int)strtol(val, NULL, 0);
3b6ac903 247 break;
d4dd6ab5
CH
248 case FORCE_GEO:
249 if (val)
250 noval('o', o_opts, FORCE_GEO);
251 if (force_geo)
252 respec('o', o_opts, FORCE_GEO);
253 force_geo = 1;
254 break;
364a126c 255 case PHASE2_THREADS:
1f8480b6
DW
256 if (!val)
257 do_abort(
258 _("-o phase2_threads requires a parameter\n"));
364a126c
DC
259 phase2_threads = (int)strtol(val, NULL, 0);
260 break;
2bd0ea18
NS
261 default:
262 unknown('o', val);
263 break;
264 }
265 }
266 break;
4af916f8
BN
267 case 'c':
268 p = optarg;
269 while (*p) {
270 char *val;
271
ab870d0e 272 switch (getsubopt(&p, c_opts, &val)) {
4af916f8 273 case CONVERT_LAZY_COUNT:
1f8480b6
DW
274 if (!val)
275 do_abort(
276 _("-c lazycount requires a parameter\n"));
5e656dbb 277 lazy_count = (int)strtol(val, NULL, 0);
4af916f8
BN
278 convert_lazy_count = 1;
279 break;
280 default:
281 unknown('c', val);
282 break;
283 }
284 }
285 break;
2bd0ea18
NS
286 case 'l':
287 log_name = optarg;
288 log_spec = 1;
289 break;
42a564ab
ES
290 case 'r':
291 rt_name = optarg;
292 rt_spec = 1;
293 break;
2bd0ea18
NS
294 case 'f':
295 isa_file = 1;
296 break;
12be365e
BN
297 case 'm':
298 if (bhash_option_used)
299 do_abort(_("-m option cannot be used with "
300 "-o bhash option\n"));
5e656dbb 301 max_mem_specified = strtol(optarg, NULL, 0);
12be365e 302 break;
d321ceac
NS
303 case 'L':
304 zap_log = 1;
305 break;
2bd0ea18
NS
306 case 'n':
307 no_modify = 1;
308 break;
6089b6f0
NS
309 case 'd':
310 dangerously = 1;
311 break;
2bd0ea18 312 case 'v':
3b6ac903 313 verbose++;
2bd0ea18
NS
314 break;
315 case 'V':
507f4e33 316 printf(_("%s version %s\n"), progname, VERSION);
3d98fe63 317 exit(0);
cb5b3ef4 318 case 'P':
2556c98b 319 do_prefetch = 0;
3b6ac903 320 break;
06fbdda9 321 case 't':
5e656dbb 322 report_interval = (int)strtol(optarg, NULL, 0);
06fbdda9 323 break;
7c3e94a3
JT
324 case 'e':
325 report_corrected = true;
326 break;
2bd0ea18
NS
327 case '?':
328 usage();
329 }
330 }
331
332 if (argc - optind != 1)
333 usage();
334
335 if ((fs_name = argv[optind]) == NULL)
336 usage();
7c3e94a3
JT
337
338 if (report_corrected && no_modify)
339 usage();
2bd0ea18
NS
340}
341
b1559967 342void __attribute__((noreturn))
2bd0ea18
NS
343do_error(char const *msg, ...)
344{
345 va_list args;
346
507f4e33 347 fprintf(stderr, _("\nfatal error -- "));
2bd0ea18
NS
348
349 va_start(args, msg);
079afa09
CH
350 vfprintf(stderr, msg, args);
351 if (dumpcore)
352 abort();
353 exit(1);
2bd0ea18
NS
354}
355
356/*
357 * like do_error, only the error is internal, no system
358 * error so no oserror processing
359 */
b1559967 360void __attribute__((noreturn))
2bd0ea18
NS
361do_abort(char const *msg, ...)
362{
363 va_list args;
364
365 va_start(args, msg);
079afa09
CH
366 vfprintf(stderr, msg, args);
367 if (dumpcore)
368 abort();
369 exit(1);
2bd0ea18
NS
370}
371
372void
373do_warn(char const *msg, ...)
374{
375 va_list args;
376
377 fs_is_dirty = 1;
378
379 va_start(args, msg);
079afa09 380 vfprintf(stderr, msg, args);
2bd0ea18
NS
381 va_end(args);
382}
383
384/* no formatting */
385
386void
387do_log(char const *msg, ...)
388{
389 va_list args;
390
391 va_start(args, msg);
079afa09 392 vfprintf(stderr, msg, args);
2bd0ea18
NS
393 va_end(args);
394}
395
8b8a6b02 396static void
2bd0ea18
NS
397calc_mkfs(xfs_mount_t *mp)
398{
399 xfs_agblock_t fino_bno;
400 int do_inoalign;
401
402 do_inoalign = mp->m_sinoalign;
403
404 /*
7b370905
BF
405 * Pre-calculate the geometry of ag 0. We know what it looks like
406 * because we know what mkfs does: 2 allocation btree roots (by block
407 * and by size), the inode allocation btree root, the free inode
408 * allocation btree root (if enabled) and some number of blocks to
409 * prefill the agfl.
de046644
DC
410 *
411 * Because the current shape of the btrees may differ from the current
412 * shape, we open code the mkfs freelist block count here. mkfs creates
413 * single level trees, so the calculation is pertty straight forward for
7ddb50f8 414 * the trees that use the AGFL.
2bd0ea18
NS
415 */
416 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
417 bcntbt_root = bnobt_root + 1;
418 inobt_root = bnobt_root + 2;
de046644 419 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
7b370905
BF
420 if (xfs_sb_version_hasfinobt(&mp->m_sb))
421 fino_bno++;
7ddb50f8
DW
422 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
423 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
0f94fa4b 424 fino_bno++;
7ddb50f8 425 }
18c44aa9
DW
426 if (xfs_sb_version_hasreflink(&mp->m_sb))
427 fino_bno++;
2bd0ea18 428
d4dd6ab5 429 /*
649bfa9a
CH
430 * If the log is allocated in the first allocation group we need to
431 * add the number of blocks used by the log to the above calculation.
432 *
433 * This can happens with filesystems that only have a single
434 * allocation group, or very odd geometries created by old mkfs
435 * versions on very small filesystems.
d4dd6ab5 436 */
649bfa9a
CH
437 if (mp->m_sb.sb_logstart &&
438 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
439
d4dd6ab5
CH
440 /*
441 * XXX(hch): verify that sb_logstart makes sense?
442 */
443 fino_bno += mp->m_sb.sb_logblocks;
444 }
445
2bd0ea18
NS
446 /*
447 * ditto the location of the first inode chunks in the fs ('/')
448 */
5e656dbb 449 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
7516da71
DW
450 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, roundup(fino_bno,
451 mp->m_sb.sb_unit));
5e656dbb 452 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
2bd0ea18 453 mp->m_sb.sb_inoalignmt > 1) {
7516da71 454 first_prealloc_ino = XFS_AGB_TO_AGINO(mp,
2bd0ea18 455 roundup(fino_bno,
7516da71 456 mp->m_sb.sb_inoalignmt));
2bd0ea18 457 } else {
7516da71 458 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno);
2bd0ea18
NS
459 }
460
ff105f75 461 ASSERT(mp->m_ialloc_blks > 0);
2bd0ea18 462
ff105f75 463 if (mp->m_ialloc_blks > 1)
2bd0ea18
NS
464 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
465 else
7516da71 466 last_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno + 1);
2bd0ea18
NS
467
468 /*
469 * now the first 3 inodes in the system
470 */
471 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
472 do_warn(
5d1b7f0f 473_("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
474 mp->m_sb.sb_rootino,
475 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
476 first_prealloc_ino);
2bd0ea18
NS
477
478 if (!no_modify)
479 do_warn(
5d1b7f0f 480 _("resetting superblock root inode pointer to %u\n"),
2bd0ea18
NS
481 first_prealloc_ino);
482 else
483 do_warn(
5d1b7f0f 484 _("would reset superblock root inode pointer to %u\n"),
2bd0ea18
NS
485 first_prealloc_ino);
486
487 /*
488 * just set the value -- safe since the superblock
489 * doesn't get flushed out if no_modify is set
490 */
491 mp->m_sb.sb_rootino = first_prealloc_ino;
492 }
493
494 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
495 do_warn(
5d1b7f0f 496_("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
497 mp->m_sb.sb_rbmino,
498 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
499 first_prealloc_ino + 1);
2bd0ea18
NS
500
501 if (!no_modify)
502 do_warn(
5d1b7f0f 503 _("resetting superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
504 first_prealloc_ino + 1);
505 else
506 do_warn(
5d1b7f0f 507 _("would reset superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
508 first_prealloc_ino + 1);
509
510 /*
511 * just set the value -- safe since the superblock
512 * doesn't get flushed out if no_modify is set
513 */
514 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
515 }
516
517 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
518 do_warn(
5d1b7f0f
CH
519_("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
520 mp->m_sb.sb_rsumino,
521 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
522 first_prealloc_ino + 2);
2bd0ea18
NS
523
524 if (!no_modify)
525 do_warn(
5d1b7f0f 526 _("resetting superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
527 first_prealloc_ino + 2);
528 else
529 do_warn(
5d1b7f0f 530 _("would reset superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
531 first_prealloc_ino + 2);
532
533 /*
534 * just set the value -- safe since the superblock
535 * doesn't get flushed out if no_modify is set
536 */
537 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
538 }
539
540}
541
1926558d
BF
542/*
543 * v5 superblock metadata track the LSN of last modification and thus require
544 * that the current LSN is always moving forward. The current LSN is reset if
545 * the log has been cleared, which puts the log behind parts of the filesystem
546 * on-disk and can disrupt log recovery.
547 *
548 * We have tracked the maximum LSN of every piece of metadata that has been read
549 * in via the read verifiers. Compare the max LSN with the log and if the log is
550 * behind, bump the cycle number and reformat the log.
551 */
552static void
553format_log_max_lsn(
554 struct xfs_mount *mp)
555{
556 struct xlog *log = mp->m_log;
557 int max_cycle;
558 int max_block;
559 int new_cycle;
560 xfs_daddr_t logstart;
561 xfs_daddr_t logblocks;
562 int logversion;
563
564 if (!xfs_sb_version_hascrc(&mp->m_sb))
565 return;
566
567 /*
568 * If the log is ahead of the highest metadata LSN we've seen, we're
569 * safe and there's nothing to do.
570 */
571 max_cycle = CYCLE_LSN(libxfs_max_lsn);
572 max_block = BLOCK_LSN(libxfs_max_lsn);
573 if (max_cycle < log->l_curr_cycle ||
574 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
575 return;
576
577 /*
578 * Going to the next cycle should be sufficient but we bump by a few
579 * counts to help cover any metadata LSNs we could have missed.
580 */
581 new_cycle = max_cycle + 3;
582 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
583 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
584 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
585
586 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
587 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
588
589 if (no_modify) {
590 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
591 return;
592 }
593
594 do_warn(_("Format log to cycle %d.\n"), new_cycle);
1c12a814
BF
595 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
596 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
571a78a7 597 XLOG_FMT, new_cycle, true);
1926558d
BF
598}
599
4a32b9e9
DC
600/*
601 * mkfs increases the AG count for "multidisk" configurations, we want
602 * to target these for an increase in thread count. Hence check the superlock
603 * geometry information to determine if mkfs considered this a multidisk
604 * configuration.
605 */
606static bool
607is_multidisk_filesystem(
608 struct xfs_mount *mp)
609{
610 struct xfs_sb *sbp = &mp->m_sb;
611
612 /* High agcount filesystems are always considered "multidisk" */
613 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
614 return true;
615
616 /*
617 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
618 * multi-disk array, so we don't either.
619 */
620 if (!sbp->sb_unit)
621 return false;
622
623 ASSERT(sbp->sb_width);
624 return true;
625}
626
28a0a30f
ZL
627/*
628 * if the sector size of the filesystem we are trying to repair is
629 * smaller than that of the underlying filesystem (i.e. we are repairing
630 * an image), the we have to turn off direct IO because we cannot do IO
631 * smaller than the host filesystem's sector size.
632 */
633static void
634check_fs_vs_host_sectsize(
635 struct xfs_sb *sb)
636{
637 int fd;
638 long old_flags;
639 struct xfs_fsop_geom_v1 geom = { 0 };
640
641 fd = libxfs_device_to_fd(x.ddev);
642
643 if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
644 do_log(_("Cannot get host filesystem geometry.\n"
645 "Repair may fail if there is a sector size mismatch between\n"
646 "the image and the host filesystem.\n"));
647 geom.sectsize = BBSIZE;
648 }
649
650 if (sb->sb_sectsize < geom.sectsize) {
651 old_flags = fcntl(fd, F_GETFL, 0);
652 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
653 do_warn(_(
654 "Sector size on host filesystem larger than image sector size.\n"
655 "Cannot turn off direct IO, so exiting.\n"));
656 exit(1);
657 }
658 }
659}
660
2bd0ea18
NS
661int
662main(int argc, char **argv)
663{
2bd0ea18
NS
664 xfs_mount_t *temp_mp;
665 xfs_mount_t *mp;
5e656dbb 666 xfs_dsb_t *dsb;
2bd0ea18
NS
667 xfs_buf_t *sbp;
668 xfs_mount_t xfs_m;
1d6cb115 669 struct xlog log = {0};
06fbdda9 670 char *msgbuf;
88f364a9
DC
671 struct xfs_sb psb;
672 int rval;
2bd0ea18
NS
673
674 progname = basename(argv[0]);
507f4e33
NS
675 setlocale(LC_ALL, "");
676 bindtextdomain(PACKAGE, LOCALEDIR);
677 textdomain(PACKAGE);
beed0dc8 678 dinode_bmbt_translation_init();
2bd0ea18
NS
679
680 temp_mp = &xfs_m;
681 setbuf(stdout, NULL);
682
683 process_args(argc, argv);
d321ceac 684 xfs_init(&x);
2bd0ea18 685
2556c98b
BN
686 msgbuf = malloc(DURATION_BUF_SIZE);
687
06fbdda9
MV
688 timestamp(PHASE_START, 0, NULL);
689 timestamp(PHASE_END, 0, NULL);
690
28a0a30f
ZL
691 /* -f forces this, but let's be nice and autodetect it, as well. */
692 if (!isa_file) {
693 int fd = libxfs_device_to_fd(x.ddev);
694 struct stat statbuf;
695
696 if (fstat(fd, &statbuf) < 0)
697 do_warn(_("%s: couldn't stat \"%s\"\n"),
698 progname, fs_name);
699 else if (S_ISREG(statbuf.st_mode))
700 isa_file = 1;
701 }
702
703 if (isa_file) {
704 /* Best effort attempt to validate fs vs host sector size */
705 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
706 if (rval == XR_OK)
707 check_fs_vs_host_sectsize(&psb);
708 }
709
2bd0ea18
NS
710 /* do phase1 to make sure we have a superblock */
711 phase1(temp_mp);
06fbdda9 712 timestamp(PHASE_END, 1, NULL);
2bd0ea18
NS
713
714 if (no_modify && primary_sb_modified) {
507f4e33
NS
715 do_warn(_("Primary superblock would have been modified.\n"
716 "Cannot proceed further in no_modify mode.\n"
717 "Exiting now.\n"));
2bd0ea18
NS
718 exit(1);
719 }
720
88f364a9
DC
721 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
722 if (rval != XR_OK) {
723 do_warn(_("Primary superblock bad after phase 1!\n"
724 "Exiting now.\n"));
725 exit(1);
726 }
2bd0ea18 727
f63fd268 728 /*
28a0a30f
ZL
729 * Now that we have completely validated the superblock, geometry may
730 * have changed; re-check geometry vs the host filesystem geometry
f63fd268 731 */
28a0a30f
ZL
732 if (isa_file)
733 check_fs_vs_host_sectsize(&psb);
88f364a9 734
1d6cb115
BF
735 /*
736 * Prepare the mount structure. Point the log reference to our local
737 * copy so it's available to the various phases. The log bits are
738 * initialized in phase 2.
739 */
88f364a9
DC
740 memset(&xfs_m, 0, sizeof(xfs_mount_t));
741 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
2bd0ea18
NS
742
743 if (!mp) {
507f4e33
NS
744 fprintf(stderr,
745 _("%s: cannot repair this filesystem. Sorry.\n"),
2bd0ea18
NS
746 progname);
747 exit(1);
748 }
1d6cb115 749 mp->m_log = &log;
2bd0ea18 750
23639f77
ES
751 /* Spit out function & line on these corruption macros */
752 if (verbose > 2)
753 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
754
2bd0ea18
NS
755 /*
756 * set XFS-independent status vars from the mount/sb structure
757 */
758 glob_agcount = mp->m_sb.sb_agcount;
759
760 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
5a707ca1 761 max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
68d16907 762 inodes_per_cluster = max(mp->m_sb.sb_inopblock,
ff105f75 763 mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
2bd0ea18 764
0cce4aa1
DC
765 /*
766 * Automatic striding for high agcount filesystems.
767 *
768 * More AGs indicates that the filesystem is either large or can handle
769 * more IO parallelism. Either way, we should try to process multiple
770 * AGs at a time in such a configuration to try to saturate the
771 * underlying storage and speed the repair process. Only do this if
772 * prefetching is enabled.
773 *
774 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
775 * to target these for an increase in thread count. Hence a stride value
776 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
777 * on such filesystems.
12b55baf
DC
778 *
779 * Limit the maximum thread count based on the available CPU power that
780 * is available. If we use too many threads, we might run out of memory
781 * and CPU power before we run out of IO concurrency. We limit to 8
782 * threads/CPU as this is enough threads to saturate a CPU on fast
783 * devices, yet few enough that it will saturate but won't overload slow
784 * devices.
4a32b9e9
DC
785 *
786 * Multidisk filesystems can handle more IO parallelism so we should try
787 * to process multiple AGs at a time in such a configuration to try to
788 * saturate the underlying storage and speed the repair process. Only do
789 * this if prefetching is enabled.
0cce4aa1 790 */
4a32b9e9
DC
791 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
792 /*
793 * For small agcount multidisk systems, just double the
794 * parallelism. For larger AG count filesystems (32 and above)
795 * use more parallelism, and linearly increase the parallelism
796 * with the number of AGs.
797 */
798 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
799 }
0cce4aa1 800
add3cb90 801 if (ag_stride) {
12b55baf
DC
802 int max_threads = platform_nproc() * 8;
803
2556c98b 804 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
12b55baf
DC
805 while (thread_count > max_threads) {
806 ag_stride *= 2;
807 thread_count = (glob_agcount + ag_stride - 1) /
808 ag_stride;
809 }
810 if (thread_count > 0)
811 thread_init();
812 else {
813 thread_count = 1;
814 ag_stride = 0;
815 }
add3cb90
BN
816 }
817
2556c98b 818 if (ag_stride && report_interval) {
06fbdda9 819 init_progress_rpt();
06fbdda9
MV
820 if (msgbuf) {
821 do_log(_(" - reporting progress in intervals of %s\n"),
822 duration(report_interval, msgbuf));
06fbdda9
MV
823 }
824 }
825
2556c98b
BN
826 /*
827 * Adjust libxfs cache sizes based on system memory,
828 * filesystem size and inode count.
829 *
830 * We'll set the cache size based on 3/4s the memory minus
831 * space used by the inode AVL tree and block usage map.
832 *
833 * Inode AVL tree space is approximately 4 bytes per inode,
834 * block usage map is currently 1 byte for 2 blocks.
835 *
836 * We assume most blocks will be inode clusters.
837 *
838 * Calculations are done in kilobyte units.
839 */
840
12be365e 841 if (!bhash_option_used || max_mem_specified) {
2556c98b 842 unsigned long mem_used;
12be365e
BN
843 unsigned long max_mem;
844 struct rlimit rlim;
2556c98b 845
2556c98b 846 libxfs_bcache_purge();
2556c98b
BN
847 cache_destroy(libxfs_bcache);
848
849 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
12be365e
BN
850 (mp->m_sb.sb_dblocks >> (10 + 1)) +
851 50000; /* rough estimate of 50MB overhead */
852 max_mem = max_mem_specified ? max_mem_specified * 1024 :
853 libxfs_physmem() * 3 / 4;
854
855 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
856 rlim.rlim_cur != RLIM_INFINITY) {
857 rlim.rlim_cur = rlim.rlim_max;
858 setrlimit(RLIMIT_AS, &rlim);
859 /* use approximately 80% of rlimit to avoid overrun */
68d16907 860 max_mem = min(max_mem, rlim.rlim_cur / 1280);
12be365e 861 } else
68d16907 862 max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
2556c98b
BN
863
864 if (verbose > 1)
5d1b7f0f
CH
865 do_log(
866 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
12be365e
BN
867 max_mem, mp->m_sb.sb_icount,
868 mp->m_sb.sb_icount >> (10 - 2),
869 mp->m_sb.sb_dblocks,
870 mp->m_sb.sb_dblocks >> (10 + 1));
871
872 if (max_mem <= mem_used) {
0335a835
DC
873 if (max_mem_specified) {
874 do_abort(
875 _("Required memory for repair is greater that the maximum specified\n"
876 "with the -m option. Please increase it to at least %lu.\n"),
12be365e 877 mem_used / 1024);
0335a835 878 }
70a4820f 879 do_log(
61510437
DC
880 _("Memory available for repair (%luMB) may not be sufficient.\n"
881 "At least %luMB is needed to repair this filesystem efficiently\n"
882 "If repair fails due to lack of memory, please\n"),
883 max_mem / 1024, mem_used / 1024);
884 if (do_prefetch)
70a4820f 885 do_log(
61510437
DC
886 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
887 else
70a4820f 888 do_log(
61510437
DC
889 _("increase system RAM and/or swap space to at least %luMB.\n"),
890 mem_used * 2 / 1024);
891
892 max_mem = mem_used;
2556c98b
BN
893 }
894
61510437
DC
895 max_mem -= mem_used;
896 if (max_mem >= (1 << 30))
897 max_mem = 1 << 30;
898 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
899 (mp->m_inode_cluster_size >> 10));
900 if (libxfs_bhash_size < 512)
901 libxfs_bhash_size = 512;
902
2556c98b
BN
903 if (verbose)
904 do_log(_(" - block cache size set to %d entries\n"),
905 libxfs_bhash_size * HASH_CACHE_RATIO);
906
ba9ecd40 907 libxfs_bcache = cache_init(0, libxfs_bhash_size,
2556c98b
BN
908 &libxfs_bcache_operations);
909 }
910
2bd0ea18
NS
911 /*
912 * calculate what mkfs would do to this filesystem
913 */
914 calc_mkfs(mp);
915
916 /*
c1f7a46c 917 * initialize block alloc map
2bd0ea18 918 */
c1f7a46c
BN
919 init_bmaps(mp);
920 incore_ino_init(mp);
921 incore_ext_init(mp);
2d273771 922 rmaps_init(mp);
c1f7a46c
BN
923
924 /* initialize random globals now that we know the fs geometry */
925 inodes_per_block = mp->m_sb.sb_inopblock;
2bd0ea18
NS
926
927 if (parse_sb_version(&mp->m_sb)) {
928 do_warn(
507f4e33 929 _("Found unsupported filesystem features. Exiting now.\n"));
2bd0ea18
NS
930 return(1);
931 }
932
933 /* make sure the per-ag freespace maps are ok so we can mount the fs */
364a126c 934 phase2(mp, phase2_threads);
06fbdda9 935 timestamp(PHASE_END, 2, NULL);
2bd0ea18 936
2556c98b
BN
937 if (do_prefetch)
938 init_prefetch(mp);
939
8100dd79 940 phase3(mp, phase2_threads);
06fbdda9 941 timestamp(PHASE_END, 3, NULL);
2bd0ea18
NS
942
943 phase4(mp);
06fbdda9 944 timestamp(PHASE_END, 4, NULL);
2bd0ea18
NS
945
946 if (no_modify)
507f4e33 947 printf(_("No modify flag set, skipping phase 5\n"));
3b6ac903 948 else {
2bd0ea18 949 phase5(mp);
3b6ac903 950 }
06fbdda9 951 timestamp(PHASE_END, 5, NULL);
2bd0ea18 952
c1f7a46c
BN
953 /*
954 * Done with the block usage maps, toss them...
955 */
2d273771 956 rmaps_free(mp);
c1f7a46c
BN
957 free_bmaps(mp);
958
2bd0ea18
NS
959 if (!bad_ino_btree) {
960 phase6(mp);
06fbdda9 961 timestamp(PHASE_END, 6, NULL);
2bd0ea18 962
e161d4a8 963 phase7(mp, phase2_threads);
06fbdda9 964 timestamp(PHASE_END, 7, NULL);
2bd0ea18
NS
965 } else {
966 do_warn(
507f4e33 967_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
2bd0ea18
NS
968 }
969
0340d706 970 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
2bd0ea18
NS
971 if (!no_modify) {
972 do_warn(
507f4e33 973_("Warning: no quota inodes were found. Quotas disabled.\n"));
2bd0ea18
NS
974 } else {
975 do_warn(
507f4e33 976_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
2bd0ea18
NS
977 }
978 } else if (lost_quotas) {
979 if (!no_modify) {
980 do_warn(
507f4e33 981_("Warning: quota inodes were cleared. Quotas disabled.\n"));
2bd0ea18
NS
982 } else {
983 do_warn(
507f4e33 984_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
2bd0ea18
NS
985 }
986 } else {
987 if (lost_uquotino) {
988 if (!no_modify) {
989 do_warn(
507f4e33
NS
990_("Warning: user quota information was cleared.\n"
991 "User quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
992 } else {
993 do_warn(
507f4e33
NS
994_("Warning: user quota information would be cleared.\n"
995 "User quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
996 }
997 }
998
b36eef04 999 if (lost_gquotino) {
2bd0ea18
NS
1000 if (!no_modify) {
1001 do_warn(
507f4e33
NS
1002_("Warning: group quota information was cleared.\n"
1003 "Group quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1004 } else {
1005 do_warn(
507f4e33
NS
1006_("Warning: group quota information would be cleared.\n"
1007 "Group quotas could not be enforced until limit information was recreated.\n"));
9b27bdbb
NS
1008 }
1009 }
1010
1011 if (lost_pquotino) {
1012 if (!no_modify) {
1013 do_warn(
1014_("Warning: project quota information was cleared.\n"
1015 "Project quotas can not be enforced until limit information is recreated.\n"));
1016 } else {
1017 do_warn(
1018_("Warning: project quota information would be cleared.\n"
1019 "Project quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1020 }
1021 }
1022 }
1023
2556c98b 1024 if (ag_stride && report_interval)
06fbdda9 1025 stop_progress_rpt();
9f38f08d 1026
2bd0ea18 1027 if (no_modify) {
1926558d
BF
1028 /*
1029 * Warn if the current LSN is problematic and the log requires a
1030 * reformat.
1031 */
1032 format_log_max_lsn(mp);
1033
2bd0ea18 1034 do_log(
507f4e33 1035 _("No modify flag set, skipping filesystem flush and exiting.\n"));
3b6ac903 1036 if (verbose)
06fbdda9 1037 summary_report();
2bd0ea18
NS
1038 if (fs_is_dirty)
1039 return(1);
1040
1041 return(0);
1042 }
1043
1044 /*
1045 * Clear the quota flags if they're on.
1046 */
1047 sbp = libxfs_getsb(mp, 0);
1048 if (!sbp)
507f4e33 1049 do_error(_("couldn't get superblock\n"));
2bd0ea18 1050
5e656dbb 1051 dsb = XFS_BUF_TO_SBP(sbp);
2bd0ea18 1052
342aef1e 1053 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
5e656dbb
BN
1054 do_warn(_("Note - quota info will be regenerated on next "
1055 "quota mount.\n"));
342aef1e 1056 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
2bd0ea18
NS
1057 }
1058
6bf4721d 1059 if (copied_sunit) {
2bd0ea18 1060 do_warn(
6bf4721d
ES
1061_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1062 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
5e656dbb 1063 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
dfc130f3 1064 }
2bd0ea18
NS
1065
1066 libxfs_writebuf(sbp, 0);
1067
2556c98b 1068 /*
1926558d
BF
1069 * Done. Flush all cached buffers and inodes first to ensure all
1070 * verifiers are run (where we discover the max metadata LSN), reformat
1071 * the log if necessary and unmount.
2556c98b
BN
1072 */
1073 libxfs_bcache_flush();
1926558d 1074 format_log_max_lsn(mp);
2bd0ea18 1075 libxfs_umount(mp);
1926558d 1076
d321ceac
NS
1077 if (x.rtdev)
1078 libxfs_device_close(x.rtdev);
1079 if (x.logdev && x.logdev != x.ddev)
1080 libxfs_device_close(x.logdev);
1081 libxfs_device_close(x.ddev);
2ce8bff5 1082 libxfs_destroy();
2bd0ea18 1083
06fbdda9
MV
1084 if (verbose)
1085 summary_report();
507f4e33 1086 do_log(_("done\n"));
3ae81520
ES
1087
1088 if (dangerously && !no_modify)
1089 do_warn(
1090_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1091
4c0a98ae
BN
1092 pftrace_done();
1093
0a223eb8
ES
1094 free(msgbuf);
1095
7c3e94a3
JT
1096 if (fs_is_dirty && report_corrected)
1097 return (4);
3b6ac903
MV
1098 return (0);
1099}