]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/xfs_repair.c
xfs_repair: remove unused fs_shared_allowed variable
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
CommitLineData
2bd0ea18 1/*
da23017d
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
dfc130f3 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
dfc130f3 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
dfc130f3 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18
NS
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "libxlog.h"
12be365e 21#include <sys/resource.h>
4a32b9e9 22#include "xfs_multidisk.h"
2bd0ea18
NS
23#include "avl.h"
24#include "avl64.h"
25#include "globals.h"
26#include "versions.h"
27#include "agheader.h"
28#include "protos.h"
29#include "incore.h"
30#include "err_protos.h"
cb5b3ef4 31#include "prefetch.h"
3b6ac903 32#include "threads.h"
06fbdda9 33#include "progress.h"
beed0dc8 34#include "dinode.h"
9e0f480e
DW
35#include "slab.h"
36#include "rmap.h"
2bd0ea18
NS
37
38#define rounddown(x, y) (((x)/(y))*(y))
39
2bd0ea18
NS
40#define XR_MAX_SECT_SIZE (64 * 1024)
41
42/*
43 * option tables for getsubopt calls
44 */
45
46/*
4af916f8 47 * -o: user-supplied override options
2bd0ea18 48 */
8b8a6b02 49static char *o_opts[] = {
2bd0ea18
NS
50#define ASSUME_XFS 0
51 "assume_xfs",
52#define PRE_65_BETA 1
53 "fs_is_pre_65_beta",
9f38f08d
MV
54#define IHASH_SIZE 2
55 "ihash",
56#define BHASH_SIZE 3
57 "bhash",
2556c98b 58#define AG_STRIDE 4
add3cb90 59 "ag_stride",
d4dd6ab5
CH
60#define FORCE_GEO 5
61 "force_geometry",
364a126c
DC
62#define PHASE2_THREADS 6
63 "phase2_threads",
2bd0ea18
NS
64 NULL
65};
66
4af916f8
BN
67/*
68 * -c: conversion options
69 */
8b8a6b02 70static char *c_opts[] = {
4af916f8
BN
71#define CONVERT_LAZY_COUNT 0
72 "lazycount",
73 NULL
74};
75
76
2556c98b 77static int bhash_option_used;
12be365e 78static long max_mem_specified; /* in megabytes */
364a126c 79static int phase2_threads = 32;
7c3e94a3 80static bool report_corrected;
2556c98b 81
2bd0ea18
NS
82static void
83usage(void)
84{
4af916f8
BN
85 do_warn(_(
86"Usage: %s [options] device\n"
87"\n"
88"Options:\n"
89" -f The device is a file\n"
90" -L Force log zeroing. Do this as a last resort.\n"
91" -l logdev Specifies the device where the external log resides.\n"
92" -m maxmem Maximum amount of memory to be used in megabytes.\n"
93" -n No modify mode, just checks the filesystem for damage.\n"
7c3e94a3 94" (Cannot be used together with -e.)\n"
4af916f8
BN
95" -P Disables prefetching.\n"
96" -r rtdev Specifies the device where the realtime section resides.\n"
97" -v Verbose output.\n"
98" -c subopts Change filesystem parameters - use xfs_admin.\n"
99" -o subopts Override default behaviour, refer to man page.\n"
79e106f0 100" -t interval Reporting interval in seconds.\n"
4af916f8 101" -d Repair dangerously.\n"
7c3e94a3
JT
102" -e Exit with a non-zero code if any errors were repaired.\n"
103" (Cannot be used together with -n.)\n"
4af916f8 104" -V Reports version and exits.\n"), progname);
2bd0ea18
NS
105 exit(1);
106}
107
2bd0ea18
NS
108char *
109err_string(int err_code)
110{
507f4e33
NS
111 static char *err_message[XR_BAD_ERR_CODE];
112 static int done;
113
114 if (!done) {
115 err_message[XR_OK] = _("no error");
116 err_message[XR_BAD_MAGIC] = _("bad magic number");
117 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
118 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
4af916f8 119 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
507f4e33
NS
120 err_message[XR_BAD_INPROGRESS] =
121 _("filesystem mkfs-in-progress bit set");
122 err_message[XR_BAD_FS_SIZE_DATA] =
123 _("inconsistent filesystem geometry information");
124 err_message[XR_BAD_INO_SIZE_DATA] =
125 _("bad inode size or inconsistent with number of inodes/block"),
126 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
127 err_message[XR_AGF_GEO_MISMATCH] =
128 _("AGF geometry info conflicts with filesystem geometry");
129 err_message[XR_AGI_GEO_MISMATCH] =
130 _("AGI geometry info conflicts with filesystem geometry");
131 err_message[XR_SB_GEO_MISMATCH] =
132 _("AG superblock geometry info conflicts with filesystem geometry");
133 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
134 err_message[XR_BAD_RT_GEO_DATA] =
135 _("inconsistent filesystem geometry in realtime filesystem component");
136 err_message[XR_BAD_INO_MAX_PCT] =
137 _("maximum indicated percentage of inodes > 100%");
138 err_message[XR_BAD_INO_ALIGN] =
139 _("inconsistent inode alignment value");
140 err_message[XR_INSUFF_SEC_SB] =
141 _("not enough secondary superblocks with matching geometry");
142 err_message[XR_BAD_SB_UNIT] =
143 _("bad stripe unit in superblock");
144 err_message[XR_BAD_SB_WIDTH] =
145 _("bad stripe width in superblock");
146 err_message[XR_BAD_SVN] =
147 _("bad shared version number in superblock");
88f364a9
DC
148 err_message[XR_BAD_CRC] =
149 _("bad CRC in superblock");
02b56f87
DW
150 err_message[XR_BAD_DIR_SIZE_DATA] =
151 _("inconsistent directory geometry information");
507f4e33
NS
152 done = 1;
153 }
154
2bd0ea18 155 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
507f4e33 156 do_abort(_("bad error code - %d\n"), err_code);
2bd0ea18
NS
157
158 return(err_message[err_code]);
159}
160
161static void
162noval(char opt, char *tbl[], int idx)
163{
507f4e33 164 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
2bd0ea18
NS
165 usage();
166}
167
168static void
169respec(char opt, char *tbl[], int idx)
170{
171 do_warn("-%c ", opt);
172 if (tbl)
173 do_warn("%s ", tbl[idx]);
507f4e33 174 do_warn(_("option respecified\n"));
2bd0ea18
NS
175 usage();
176}
177
178static void
179unknown(char opt, char *s)
180{
507f4e33 181 do_warn(_("unknown option -%c %s\n"), opt, s);
2bd0ea18
NS
182 usage();
183}
184
185/*
186 * sets only the global argument flags and variables
187 */
8b8a6b02 188static void
2bd0ea18
NS
189process_args(int argc, char **argv)
190{
191 char *p;
192 int c;
193
194 log_spec = 0;
195 fs_is_dirty = 0;
196 verbose = 0;
197 no_modify = 0;
c781939c 198 dangerously = 0;
2bd0ea18 199 isa_file = 0;
d321ceac 200 zap_log = 0;
2bd0ea18 201 dumpcore = 0;
0f012a4c 202 full_ino_ex_data = 0;
2bd0ea18
NS
203 delete_attr_ok = 1;
204 force_geo = 0;
205 assume_xfs = 0;
6bf4721d 206 copied_sunit = 0;
2bd0ea18
NS
207 sb_inoalignmt = 0;
208 sb_unit = 0;
209 sb_width = 0;
2bd0ea18 210 pre_65_beta = 0;
add3cb90 211 ag_stride = 0;
2556c98b 212 thread_count = 1;
06fbdda9 213 report_interval = PROG_RPT_DEFAULT;
7c3e94a3 214 report_corrected = false;
2bd0ea18
NS
215
216 /*
217 * XXX have to add suboption processing here
218 * attributes, quotas, nlinks, aligned_inos, sb_fbits
219 */
7c3e94a3 220 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
2bd0ea18
NS
221 switch (c) {
222 case 'D':
223 dumpcore = 1;
224 break;
225 case 'o':
226 p = optarg;
227 while (*p != '\0') {
228 char *val;
229
ab870d0e 230 switch (getsubopt(&p, o_opts, &val)) {
2bd0ea18
NS
231 case ASSUME_XFS:
232 if (val)
233 noval('o', o_opts, ASSUME_XFS);
234 if (assume_xfs)
235 respec('o', o_opts, ASSUME_XFS);
236 assume_xfs = 1;
237 break;
238 case PRE_65_BETA:
239 if (val)
240 noval('o', o_opts, PRE_65_BETA);
241 if (pre_65_beta)
242 respec('o', o_opts,
243 PRE_65_BETA);
244 pre_65_beta = 1;
245 break;
9f38f08d 246 case IHASH_SIZE:
3a19fb7d
CH
247 do_warn(
248 _("-o ihash option has been removed and will be ignored\n"));
9f38f08d
MV
249 break;
250 case BHASH_SIZE:
12be365e
BN
251 if (max_mem_specified)
252 do_abort(
3a19fb7d 253 _("-o bhash option cannot be used with -m option\n"));
5e656dbb 254 libxfs_bhash_size = (int)strtol(val, NULL, 0);
2556c98b 255 bhash_option_used = 1;
cb5b3ef4 256 break;
add3cb90 257 case AG_STRIDE:
5e656dbb 258 ag_stride = (int)strtol(val, NULL, 0);
3b6ac903 259 break;
d4dd6ab5
CH
260 case FORCE_GEO:
261 if (val)
262 noval('o', o_opts, FORCE_GEO);
263 if (force_geo)
264 respec('o', o_opts, FORCE_GEO);
265 force_geo = 1;
266 break;
364a126c
DC
267 case PHASE2_THREADS:
268 phase2_threads = (int)strtol(val, NULL, 0);
269 break;
2bd0ea18
NS
270 default:
271 unknown('o', val);
272 break;
273 }
274 }
275 break;
4af916f8
BN
276 case 'c':
277 p = optarg;
278 while (*p) {
279 char *val;
280
ab870d0e 281 switch (getsubopt(&p, c_opts, &val)) {
4af916f8 282 case CONVERT_LAZY_COUNT:
5e656dbb 283 lazy_count = (int)strtol(val, NULL, 0);
4af916f8
BN
284 convert_lazy_count = 1;
285 break;
286 default:
287 unknown('c', val);
288 break;
289 }
290 }
291 break;
2bd0ea18
NS
292 case 'l':
293 log_name = optarg;
294 log_spec = 1;
295 break;
42a564ab
ES
296 case 'r':
297 rt_name = optarg;
298 rt_spec = 1;
299 break;
2bd0ea18
NS
300 case 'f':
301 isa_file = 1;
302 break;
12be365e
BN
303 case 'm':
304 if (bhash_option_used)
305 do_abort(_("-m option cannot be used with "
306 "-o bhash option\n"));
5e656dbb 307 max_mem_specified = strtol(optarg, NULL, 0);
12be365e 308 break;
d321ceac
NS
309 case 'L':
310 zap_log = 1;
311 break;
2bd0ea18
NS
312 case 'n':
313 no_modify = 1;
314 break;
6089b6f0
NS
315 case 'd':
316 dangerously = 1;
317 break;
2bd0ea18 318 case 'v':
3b6ac903 319 verbose++;
2bd0ea18
NS
320 break;
321 case 'V':
507f4e33 322 printf(_("%s version %s\n"), progname, VERSION);
3d98fe63 323 exit(0);
cb5b3ef4 324 case 'P':
2556c98b 325 do_prefetch = 0;
3b6ac903 326 break;
06fbdda9 327 case 't':
5e656dbb 328 report_interval = (int)strtol(optarg, NULL, 0);
06fbdda9 329 break;
7c3e94a3
JT
330 case 'e':
331 report_corrected = true;
332 break;
2bd0ea18
NS
333 case '?':
334 usage();
335 }
336 }
337
338 if (argc - optind != 1)
339 usage();
340
341 if ((fs_name = argv[optind]) == NULL)
342 usage();
7c3e94a3
JT
343
344 if (report_corrected && no_modify)
345 usage();
2bd0ea18
NS
346}
347
b1559967 348void __attribute__((noreturn))
2bd0ea18
NS
349do_error(char const *msg, ...)
350{
351 va_list args;
352
507f4e33 353 fprintf(stderr, _("\nfatal error -- "));
2bd0ea18
NS
354
355 va_start(args, msg);
079afa09
CH
356 vfprintf(stderr, msg, args);
357 if (dumpcore)
358 abort();
359 exit(1);
2bd0ea18
NS
360}
361
362/*
363 * like do_error, only the error is internal, no system
364 * error so no oserror processing
365 */
b1559967 366void __attribute__((noreturn))
2bd0ea18
NS
367do_abort(char const *msg, ...)
368{
369 va_list args;
370
371 va_start(args, msg);
079afa09
CH
372 vfprintf(stderr, msg, args);
373 if (dumpcore)
374 abort();
375 exit(1);
2bd0ea18
NS
376}
377
378void
379do_warn(char const *msg, ...)
380{
381 va_list args;
382
383 fs_is_dirty = 1;
384
385 va_start(args, msg);
079afa09 386 vfprintf(stderr, msg, args);
2bd0ea18
NS
387 va_end(args);
388}
389
390/* no formatting */
391
392void
393do_log(char const *msg, ...)
394{
395 va_list args;
396
397 va_start(args, msg);
079afa09 398 vfprintf(stderr, msg, args);
2bd0ea18
NS
399 va_end(args);
400}
401
8b8a6b02 402static void
2bd0ea18
NS
403calc_mkfs(xfs_mount_t *mp)
404{
405 xfs_agblock_t fino_bno;
406 int do_inoalign;
407
408 do_inoalign = mp->m_sinoalign;
409
410 /*
7b370905
BF
411 * Pre-calculate the geometry of ag 0. We know what it looks like
412 * because we know what mkfs does: 2 allocation btree roots (by block
413 * and by size), the inode allocation btree root, the free inode
414 * allocation btree root (if enabled) and some number of blocks to
415 * prefill the agfl.
de046644
DC
416 *
417 * Because the current shape of the btrees may differ from the current
418 * shape, we open code the mkfs freelist block count here. mkfs creates
419 * single level trees, so the calculation is pertty straight forward for
7ddb50f8 420 * the trees that use the AGFL.
2bd0ea18
NS
421 */
422 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
423 bcntbt_root = bnobt_root + 1;
424 inobt_root = bnobt_root + 2;
de046644 425 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
7b370905
BF
426 if (xfs_sb_version_hasfinobt(&mp->m_sb))
427 fino_bno++;
7ddb50f8
DW
428 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
429 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
0f94fa4b 430 fino_bno++;
7ddb50f8 431 }
18c44aa9
DW
432 if (xfs_sb_version_hasreflink(&mp->m_sb))
433 fino_bno++;
2bd0ea18 434
d4dd6ab5 435 /*
649bfa9a
CH
436 * If the log is allocated in the first allocation group we need to
437 * add the number of blocks used by the log to the above calculation.
438 *
439 * This can happens with filesystems that only have a single
440 * allocation group, or very odd geometries created by old mkfs
441 * versions on very small filesystems.
d4dd6ab5 442 */
649bfa9a
CH
443 if (mp->m_sb.sb_logstart &&
444 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
445
d4dd6ab5
CH
446 /*
447 * XXX(hch): verify that sb_logstart makes sense?
448 */
449 fino_bno += mp->m_sb.sb_logblocks;
450 }
451
2bd0ea18
NS
452 /*
453 * ditto the location of the first inode chunks in the fs ('/')
454 */
5e656dbb 455 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
2bd0ea18
NS
456 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
457 mp->m_sb.sb_unit), 0);
5e656dbb 458 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
2bd0ea18
NS
459 mp->m_sb.sb_inoalignmt > 1) {
460 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
461 roundup(fino_bno,
462 mp->m_sb.sb_inoalignmt),
463 0);
464 } else {
465 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
466 }
467
ff105f75 468 ASSERT(mp->m_ialloc_blks > 0);
2bd0ea18 469
ff105f75 470 if (mp->m_ialloc_blks > 1)
2bd0ea18
NS
471 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
472 else
473 last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
474
475 /*
476 * now the first 3 inodes in the system
477 */
478 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
479 do_warn(
5d1b7f0f 480_("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
481 mp->m_sb.sb_rootino,
482 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
483 first_prealloc_ino);
2bd0ea18
NS
484
485 if (!no_modify)
486 do_warn(
5d1b7f0f 487 _("resetting superblock root inode pointer to %u\n"),
2bd0ea18
NS
488 first_prealloc_ino);
489 else
490 do_warn(
5d1b7f0f 491 _("would reset superblock root inode pointer to %u\n"),
2bd0ea18
NS
492 first_prealloc_ino);
493
494 /*
495 * just set the value -- safe since the superblock
496 * doesn't get flushed out if no_modify is set
497 */
498 mp->m_sb.sb_rootino = first_prealloc_ino;
499 }
500
501 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
502 do_warn(
5d1b7f0f 503_("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
504 mp->m_sb.sb_rbmino,
505 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
506 first_prealloc_ino + 1);
2bd0ea18
NS
507
508 if (!no_modify)
509 do_warn(
5d1b7f0f 510 _("resetting superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
511 first_prealloc_ino + 1);
512 else
513 do_warn(
5d1b7f0f 514 _("would reset superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
515 first_prealloc_ino + 1);
516
517 /*
518 * just set the value -- safe since the superblock
519 * doesn't get flushed out if no_modify is set
520 */
521 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
522 }
523
524 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
525 do_warn(
5d1b7f0f
CH
526_("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
527 mp->m_sb.sb_rsumino,
528 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
529 first_prealloc_ino + 2);
2bd0ea18
NS
530
531 if (!no_modify)
532 do_warn(
5d1b7f0f 533 _("resetting superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
534 first_prealloc_ino + 2);
535 else
536 do_warn(
5d1b7f0f 537 _("would reset superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
538 first_prealloc_ino + 2);
539
540 /*
541 * just set the value -- safe since the superblock
542 * doesn't get flushed out if no_modify is set
543 */
544 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
545 }
546
547}
548
1926558d
BF
549/*
550 * v5 superblock metadata track the LSN of last modification and thus require
551 * that the current LSN is always moving forward. The current LSN is reset if
552 * the log has been cleared, which puts the log behind parts of the filesystem
553 * on-disk and can disrupt log recovery.
554 *
555 * We have tracked the maximum LSN of every piece of metadata that has been read
556 * in via the read verifiers. Compare the max LSN with the log and if the log is
557 * behind, bump the cycle number and reformat the log.
558 */
559static void
560format_log_max_lsn(
561 struct xfs_mount *mp)
562{
563 struct xlog *log = mp->m_log;
564 int max_cycle;
565 int max_block;
566 int new_cycle;
567 xfs_daddr_t logstart;
568 xfs_daddr_t logblocks;
569 int logversion;
570
571 if (!xfs_sb_version_hascrc(&mp->m_sb))
572 return;
573
574 /*
575 * If the log is ahead of the highest metadata LSN we've seen, we're
576 * safe and there's nothing to do.
577 */
578 max_cycle = CYCLE_LSN(libxfs_max_lsn);
579 max_block = BLOCK_LSN(libxfs_max_lsn);
580 if (max_cycle < log->l_curr_cycle ||
581 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
582 return;
583
584 /*
585 * Going to the next cycle should be sufficient but we bump by a few
586 * counts to help cover any metadata LSNs we could have missed.
587 */
588 new_cycle = max_cycle + 3;
589 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
590 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
591 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
592
593 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
594 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
595
596 if (no_modify) {
597 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
598 return;
599 }
600
601 do_warn(_("Format log to cycle %d.\n"), new_cycle);
1c12a814
BF
602 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
603 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
571a78a7 604 XLOG_FMT, new_cycle, true);
1926558d
BF
605}
606
4a32b9e9
DC
607/*
608 * mkfs increases the AG count for "multidisk" configurations, we want
609 * to target these for an increase in thread count. Hence check the superlock
610 * geometry information to determine if mkfs considered this a multidisk
611 * configuration.
612 */
613static bool
614is_multidisk_filesystem(
615 struct xfs_mount *mp)
616{
617 struct xfs_sb *sbp = &mp->m_sb;
618
619 /* High agcount filesystems are always considered "multidisk" */
620 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
621 return true;
622
623 /*
624 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
625 * multi-disk array, so we don't either.
626 */
627 if (!sbp->sb_unit)
628 return false;
629
630 ASSERT(sbp->sb_width);
631 return true;
632}
633
28a0a30f
ZL
634/*
635 * if the sector size of the filesystem we are trying to repair is
636 * smaller than that of the underlying filesystem (i.e. we are repairing
637 * an image), the we have to turn off direct IO because we cannot do IO
638 * smaller than the host filesystem's sector size.
639 */
640static void
641check_fs_vs_host_sectsize(
642 struct xfs_sb *sb)
643{
644 int fd;
645 long old_flags;
646 struct xfs_fsop_geom_v1 geom = { 0 };
647
648 fd = libxfs_device_to_fd(x.ddev);
649
650 if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
651 do_log(_("Cannot get host filesystem geometry.\n"
652 "Repair may fail if there is a sector size mismatch between\n"
653 "the image and the host filesystem.\n"));
654 geom.sectsize = BBSIZE;
655 }
656
657 if (sb->sb_sectsize < geom.sectsize) {
658 old_flags = fcntl(fd, F_GETFL, 0);
659 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
660 do_warn(_(
661 "Sector size on host filesystem larger than image sector size.\n"
662 "Cannot turn off direct IO, so exiting.\n"));
663 exit(1);
664 }
665 }
666}
667
2bd0ea18
NS
668int
669main(int argc, char **argv)
670{
2bd0ea18
NS
671 xfs_mount_t *temp_mp;
672 xfs_mount_t *mp;
5e656dbb 673 xfs_dsb_t *dsb;
2bd0ea18
NS
674 xfs_buf_t *sbp;
675 xfs_mount_t xfs_m;
1d6cb115 676 struct xlog log = {0};
06fbdda9 677 char *msgbuf;
88f364a9
DC
678 struct xfs_sb psb;
679 int rval;
2bd0ea18
NS
680
681 progname = basename(argv[0]);
507f4e33
NS
682 setlocale(LC_ALL, "");
683 bindtextdomain(PACKAGE, LOCALEDIR);
684 textdomain(PACKAGE);
beed0dc8 685 dinode_bmbt_translation_init();
2bd0ea18
NS
686
687 temp_mp = &xfs_m;
688 setbuf(stdout, NULL);
689
690 process_args(argc, argv);
d321ceac 691 xfs_init(&x);
2bd0ea18 692
2556c98b
BN
693 msgbuf = malloc(DURATION_BUF_SIZE);
694
06fbdda9
MV
695 timestamp(PHASE_START, 0, NULL);
696 timestamp(PHASE_END, 0, NULL);
697
28a0a30f
ZL
698 /* -f forces this, but let's be nice and autodetect it, as well. */
699 if (!isa_file) {
700 int fd = libxfs_device_to_fd(x.ddev);
701 struct stat statbuf;
702
703 if (fstat(fd, &statbuf) < 0)
704 do_warn(_("%s: couldn't stat \"%s\"\n"),
705 progname, fs_name);
706 else if (S_ISREG(statbuf.st_mode))
707 isa_file = 1;
708 }
709
710 if (isa_file) {
711 /* Best effort attempt to validate fs vs host sector size */
712 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
713 if (rval == XR_OK)
714 check_fs_vs_host_sectsize(&psb);
715 }
716
2bd0ea18
NS
717 /* do phase1 to make sure we have a superblock */
718 phase1(temp_mp);
06fbdda9 719 timestamp(PHASE_END, 1, NULL);
2bd0ea18
NS
720
721 if (no_modify && primary_sb_modified) {
507f4e33
NS
722 do_warn(_("Primary superblock would have been modified.\n"
723 "Cannot proceed further in no_modify mode.\n"
724 "Exiting now.\n"));
2bd0ea18
NS
725 exit(1);
726 }
727
88f364a9
DC
728 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
729 if (rval != XR_OK) {
730 do_warn(_("Primary superblock bad after phase 1!\n"
731 "Exiting now.\n"));
732 exit(1);
733 }
2bd0ea18 734
f63fd268 735 /*
28a0a30f
ZL
736 * Now that we have completely validated the superblock, geometry may
737 * have changed; re-check geometry vs the host filesystem geometry
f63fd268 738 */
28a0a30f
ZL
739 if (isa_file)
740 check_fs_vs_host_sectsize(&psb);
88f364a9 741
1d6cb115
BF
742 /*
743 * Prepare the mount structure. Point the log reference to our local
744 * copy so it's available to the various phases. The log bits are
745 * initialized in phase 2.
746 */
88f364a9
DC
747 memset(&xfs_m, 0, sizeof(xfs_mount_t));
748 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
2bd0ea18
NS
749
750 if (!mp) {
507f4e33
NS
751 fprintf(stderr,
752 _("%s: cannot repair this filesystem. Sorry.\n"),
2bd0ea18
NS
753 progname);
754 exit(1);
755 }
1d6cb115 756 mp->m_log = &log;
2bd0ea18 757
23639f77
ES
758 /* Spit out function & line on these corruption macros */
759 if (verbose > 2)
760 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
761
2bd0ea18
NS
762 /*
763 * set XFS-independent status vars from the mount/sb structure
764 */
765 glob_agcount = mp->m_sb.sb_agcount;
766
767 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
5a707ca1 768 max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
edf3f9d0 769 inodes_per_cluster = MAX(mp->m_sb.sb_inopblock,
ff105f75 770 mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
2bd0ea18 771
0cce4aa1
DC
772 /*
773 * Automatic striding for high agcount filesystems.
774 *
775 * More AGs indicates that the filesystem is either large or can handle
776 * more IO parallelism. Either way, we should try to process multiple
777 * AGs at a time in such a configuration to try to saturate the
778 * underlying storage and speed the repair process. Only do this if
779 * prefetching is enabled.
780 *
781 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
782 * to target these for an increase in thread count. Hence a stride value
783 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
784 * on such filesystems.
12b55baf
DC
785 *
786 * Limit the maximum thread count based on the available CPU power that
787 * is available. If we use too many threads, we might run out of memory
788 * and CPU power before we run out of IO concurrency. We limit to 8
789 * threads/CPU as this is enough threads to saturate a CPU on fast
790 * devices, yet few enough that it will saturate but won't overload slow
791 * devices.
4a32b9e9
DC
792 *
793 * Multidisk filesystems can handle more IO parallelism so we should try
794 * to process multiple AGs at a time in such a configuration to try to
795 * saturate the underlying storage and speed the repair process. Only do
796 * this if prefetching is enabled.
0cce4aa1 797 */
4a32b9e9
DC
798 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
799 /*
800 * For small agcount multidisk systems, just double the
801 * parallelism. For larger AG count filesystems (32 and above)
802 * use more parallelism, and linearly increase the parallelism
803 * with the number of AGs.
804 */
805 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
806 }
0cce4aa1 807
add3cb90 808 if (ag_stride) {
12b55baf
DC
809 int max_threads = platform_nproc() * 8;
810
2556c98b 811 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
12b55baf
DC
812 while (thread_count > max_threads) {
813 ag_stride *= 2;
814 thread_count = (glob_agcount + ag_stride - 1) /
815 ag_stride;
816 }
817 if (thread_count > 0)
818 thread_init();
819 else {
820 thread_count = 1;
821 ag_stride = 0;
822 }
add3cb90
BN
823 }
824
2556c98b 825 if (ag_stride && report_interval) {
06fbdda9 826 init_progress_rpt();
06fbdda9
MV
827 if (msgbuf) {
828 do_log(_(" - reporting progress in intervals of %s\n"),
829 duration(report_interval, msgbuf));
06fbdda9
MV
830 }
831 }
832
2556c98b
BN
833 /*
834 * Adjust libxfs cache sizes based on system memory,
835 * filesystem size and inode count.
836 *
837 * We'll set the cache size based on 3/4s the memory minus
838 * space used by the inode AVL tree and block usage map.
839 *
840 * Inode AVL tree space is approximately 4 bytes per inode,
841 * block usage map is currently 1 byte for 2 blocks.
842 *
843 * We assume most blocks will be inode clusters.
844 *
845 * Calculations are done in kilobyte units.
846 */
847
12be365e 848 if (!bhash_option_used || max_mem_specified) {
2556c98b 849 unsigned long mem_used;
12be365e
BN
850 unsigned long max_mem;
851 struct rlimit rlim;
2556c98b 852
2556c98b 853 libxfs_bcache_purge();
2556c98b
BN
854 cache_destroy(libxfs_bcache);
855
856 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
12be365e
BN
857 (mp->m_sb.sb_dblocks >> (10 + 1)) +
858 50000; /* rough estimate of 50MB overhead */
859 max_mem = max_mem_specified ? max_mem_specified * 1024 :
860 libxfs_physmem() * 3 / 4;
861
862 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
863 rlim.rlim_cur != RLIM_INFINITY) {
864 rlim.rlim_cur = rlim.rlim_max;
865 setrlimit(RLIMIT_AS, &rlim);
866 /* use approximately 80% of rlimit to avoid overrun */
867 max_mem = MIN(max_mem, rlim.rlim_cur / 1280);
868 } else
869 max_mem = MIN(max_mem, (LONG_MAX >> 10) + 1);
2556c98b
BN
870
871 if (verbose > 1)
5d1b7f0f
CH
872 do_log(
873 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
12be365e
BN
874 max_mem, mp->m_sb.sb_icount,
875 mp->m_sb.sb_icount >> (10 - 2),
876 mp->m_sb.sb_dblocks,
877 mp->m_sb.sb_dblocks >> (10 + 1));
878
879 if (max_mem <= mem_used) {
0335a835
DC
880 if (max_mem_specified) {
881 do_abort(
882 _("Required memory for repair is greater that the maximum specified\n"
883 "with the -m option. Please increase it to at least %lu.\n"),
12be365e 884 mem_used / 1024);
0335a835 885 }
70a4820f 886 do_log(
61510437
DC
887 _("Memory available for repair (%luMB) may not be sufficient.\n"
888 "At least %luMB is needed to repair this filesystem efficiently\n"
889 "If repair fails due to lack of memory, please\n"),
890 max_mem / 1024, mem_used / 1024);
891 if (do_prefetch)
70a4820f 892 do_log(
61510437
DC
893 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
894 else
70a4820f 895 do_log(
61510437
DC
896 _("increase system RAM and/or swap space to at least %luMB.\n"),
897 mem_used * 2 / 1024);
898
899 max_mem = mem_used;
2556c98b
BN
900 }
901
61510437
DC
902 max_mem -= mem_used;
903 if (max_mem >= (1 << 30))
904 max_mem = 1 << 30;
905 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
906 (mp->m_inode_cluster_size >> 10));
907 if (libxfs_bhash_size < 512)
908 libxfs_bhash_size = 512;
909
2556c98b
BN
910 if (verbose)
911 do_log(_(" - block cache size set to %d entries\n"),
912 libxfs_bhash_size * HASH_CACHE_RATIO);
913
ba9ecd40 914 libxfs_bcache = cache_init(0, libxfs_bhash_size,
2556c98b
BN
915 &libxfs_bcache_operations);
916 }
917
2bd0ea18
NS
918 /*
919 * calculate what mkfs would do to this filesystem
920 */
921 calc_mkfs(mp);
922
923 /*
c1f7a46c 924 * initialize block alloc map
2bd0ea18 925 */
c1f7a46c
BN
926 init_bmaps(mp);
927 incore_ino_init(mp);
928 incore_ext_init(mp);
2d273771 929 rmaps_init(mp);
c1f7a46c
BN
930
931 /* initialize random globals now that we know the fs geometry */
932 inodes_per_block = mp->m_sb.sb_inopblock;
2bd0ea18
NS
933
934 if (parse_sb_version(&mp->m_sb)) {
935 do_warn(
507f4e33 936 _("Found unsupported filesystem features. Exiting now.\n"));
2bd0ea18
NS
937 return(1);
938 }
939
940 /* make sure the per-ag freespace maps are ok so we can mount the fs */
364a126c 941 phase2(mp, phase2_threads);
06fbdda9 942 timestamp(PHASE_END, 2, NULL);
2bd0ea18 943
2556c98b
BN
944 if (do_prefetch)
945 init_prefetch(mp);
946
8100dd79 947 phase3(mp, phase2_threads);
06fbdda9 948 timestamp(PHASE_END, 3, NULL);
2bd0ea18
NS
949
950 phase4(mp);
06fbdda9 951 timestamp(PHASE_END, 4, NULL);
2bd0ea18
NS
952
953 if (no_modify)
507f4e33 954 printf(_("No modify flag set, skipping phase 5\n"));
3b6ac903 955 else {
2bd0ea18 956 phase5(mp);
3b6ac903 957 }
06fbdda9 958 timestamp(PHASE_END, 5, NULL);
2bd0ea18 959
c1f7a46c
BN
960 /*
961 * Done with the block usage maps, toss them...
962 */
2d273771 963 rmaps_free(mp);
c1f7a46c
BN
964 free_bmaps(mp);
965
2bd0ea18
NS
966 if (!bad_ino_btree) {
967 phase6(mp);
06fbdda9 968 timestamp(PHASE_END, 6, NULL);
2bd0ea18 969
e161d4a8 970 phase7(mp, phase2_threads);
06fbdda9 971 timestamp(PHASE_END, 7, NULL);
2bd0ea18
NS
972 } else {
973 do_warn(
507f4e33 974_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
2bd0ea18
NS
975 }
976
0340d706 977 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
2bd0ea18
NS
978 if (!no_modify) {
979 do_warn(
507f4e33 980_("Warning: no quota inodes were found. Quotas disabled.\n"));
2bd0ea18
NS
981 } else {
982 do_warn(
507f4e33 983_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
2bd0ea18
NS
984 }
985 } else if (lost_quotas) {
986 if (!no_modify) {
987 do_warn(
507f4e33 988_("Warning: quota inodes were cleared. Quotas disabled.\n"));
2bd0ea18
NS
989 } else {
990 do_warn(
507f4e33 991_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
2bd0ea18
NS
992 }
993 } else {
994 if (lost_uquotino) {
995 if (!no_modify) {
996 do_warn(
507f4e33
NS
997_("Warning: user quota information was cleared.\n"
998 "User quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
999 } else {
1000 do_warn(
507f4e33
NS
1001_("Warning: user quota information would be cleared.\n"
1002 "User quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1003 }
1004 }
1005
b36eef04 1006 if (lost_gquotino) {
2bd0ea18
NS
1007 if (!no_modify) {
1008 do_warn(
507f4e33
NS
1009_("Warning: group quota information was cleared.\n"
1010 "Group quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1011 } else {
1012 do_warn(
507f4e33
NS
1013_("Warning: group quota information would be cleared.\n"
1014 "Group quotas could not be enforced until limit information was recreated.\n"));
9b27bdbb
NS
1015 }
1016 }
1017
1018 if (lost_pquotino) {
1019 if (!no_modify) {
1020 do_warn(
1021_("Warning: project quota information was cleared.\n"
1022 "Project quotas can not be enforced until limit information is recreated.\n"));
1023 } else {
1024 do_warn(
1025_("Warning: project quota information would be cleared.\n"
1026 "Project quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1027 }
1028 }
1029 }
1030
2556c98b 1031 if (ag_stride && report_interval)
06fbdda9 1032 stop_progress_rpt();
9f38f08d 1033
2bd0ea18 1034 if (no_modify) {
1926558d
BF
1035 /*
1036 * Warn if the current LSN is problematic and the log requires a
1037 * reformat.
1038 */
1039 format_log_max_lsn(mp);
1040
2bd0ea18 1041 do_log(
507f4e33 1042 _("No modify flag set, skipping filesystem flush and exiting.\n"));
3b6ac903 1043 if (verbose)
06fbdda9 1044 summary_report();
2bd0ea18
NS
1045 if (fs_is_dirty)
1046 return(1);
1047
1048 return(0);
1049 }
1050
1051 /*
1052 * Clear the quota flags if they're on.
1053 */
1054 sbp = libxfs_getsb(mp, 0);
1055 if (!sbp)
507f4e33 1056 do_error(_("couldn't get superblock\n"));
2bd0ea18 1057
5e656dbb 1058 dsb = XFS_BUF_TO_SBP(sbp);
2bd0ea18 1059
342aef1e 1060 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
5e656dbb
BN
1061 do_warn(_("Note - quota info will be regenerated on next "
1062 "quota mount.\n"));
342aef1e 1063 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
2bd0ea18
NS
1064 }
1065
6bf4721d 1066 if (copied_sunit) {
2bd0ea18 1067 do_warn(
6bf4721d
ES
1068_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1069 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
5e656dbb 1070 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
dfc130f3 1071 }
2bd0ea18
NS
1072
1073 libxfs_writebuf(sbp, 0);
1074
2556c98b 1075 /*
1926558d
BF
1076 * Done. Flush all cached buffers and inodes first to ensure all
1077 * verifiers are run (where we discover the max metadata LSN), reformat
1078 * the log if necessary and unmount.
2556c98b
BN
1079 */
1080 libxfs_bcache_flush();
1926558d 1081 format_log_max_lsn(mp);
2bd0ea18 1082 libxfs_umount(mp);
1926558d 1083
d321ceac
NS
1084 if (x.rtdev)
1085 libxfs_device_close(x.rtdev);
1086 if (x.logdev && x.logdev != x.ddev)
1087 libxfs_device_close(x.logdev);
1088 libxfs_device_close(x.ddev);
2ce8bff5 1089 libxfs_destroy();
2bd0ea18 1090
06fbdda9
MV
1091 if (verbose)
1092 summary_report();
507f4e33 1093 do_log(_("done\n"));
3ae81520
ES
1094
1095 if (dangerously && !no_modify)
1096 do_warn(
1097_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1098
4c0a98ae
BN
1099 pftrace_done();
1100
0a223eb8
ES
1101 free(msgbuf);
1102
7c3e94a3
JT
1103 if (fs_is_dirty && report_corrected)
1104 return (4);
3b6ac903
MV
1105 return (0);
1106}