]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/xfs_repair.c
xfs_repair: remove unused fs_attributes_allowed
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
CommitLineData
2bd0ea18 1/*
da23017d
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
dfc130f3 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
dfc130f3 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
dfc130f3 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18
NS
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "libxlog.h"
12be365e 21#include <sys/resource.h>
4a32b9e9 22#include "xfs_multidisk.h"
2bd0ea18
NS
23#include "avl.h"
24#include "avl64.h"
25#include "globals.h"
26#include "versions.h"
27#include "agheader.h"
28#include "protos.h"
29#include "incore.h"
30#include "err_protos.h"
cb5b3ef4 31#include "prefetch.h"
3b6ac903 32#include "threads.h"
06fbdda9 33#include "progress.h"
beed0dc8 34#include "dinode.h"
9e0f480e
DW
35#include "slab.h"
36#include "rmap.h"
2bd0ea18
NS
37
38#define rounddown(x, y) (((x)/(y))*(y))
39
2bd0ea18
NS
40#define XR_MAX_SECT_SIZE (64 * 1024)
41
42/*
43 * option tables for getsubopt calls
44 */
45
46/*
4af916f8 47 * -o: user-supplied override options
2bd0ea18 48 */
8b8a6b02 49static char *o_opts[] = {
2bd0ea18
NS
50#define ASSUME_XFS 0
51 "assume_xfs",
52#define PRE_65_BETA 1
53 "fs_is_pre_65_beta",
9f38f08d
MV
54#define IHASH_SIZE 2
55 "ihash",
56#define BHASH_SIZE 3
57 "bhash",
2556c98b 58#define AG_STRIDE 4
add3cb90 59 "ag_stride",
d4dd6ab5
CH
60#define FORCE_GEO 5
61 "force_geometry",
364a126c
DC
62#define PHASE2_THREADS 6
63 "phase2_threads",
2bd0ea18
NS
64 NULL
65};
66
4af916f8
BN
67/*
68 * -c: conversion options
69 */
8b8a6b02 70static char *c_opts[] = {
4af916f8
BN
71#define CONVERT_LAZY_COUNT 0
72 "lazycount",
73 NULL
74};
75
76
2556c98b 77static int bhash_option_used;
12be365e 78static long max_mem_specified; /* in megabytes */
364a126c 79static int phase2_threads = 32;
7c3e94a3 80static bool report_corrected;
2556c98b 81
2bd0ea18
NS
82static void
83usage(void)
84{
4af916f8
BN
85 do_warn(_(
86"Usage: %s [options] device\n"
87"\n"
88"Options:\n"
89" -f The device is a file\n"
90" -L Force log zeroing. Do this as a last resort.\n"
91" -l logdev Specifies the device where the external log resides.\n"
92" -m maxmem Maximum amount of memory to be used in megabytes.\n"
93" -n No modify mode, just checks the filesystem for damage.\n"
7c3e94a3 94" (Cannot be used together with -e.)\n"
4af916f8
BN
95" -P Disables prefetching.\n"
96" -r rtdev Specifies the device where the realtime section resides.\n"
97" -v Verbose output.\n"
98" -c subopts Change filesystem parameters - use xfs_admin.\n"
99" -o subopts Override default behaviour, refer to man page.\n"
79e106f0 100" -t interval Reporting interval in seconds.\n"
4af916f8 101" -d Repair dangerously.\n"
7c3e94a3
JT
102" -e Exit with a non-zero code if any errors were repaired.\n"
103" (Cannot be used together with -n.)\n"
4af916f8 104" -V Reports version and exits.\n"), progname);
2bd0ea18
NS
105 exit(1);
106}
107
2bd0ea18
NS
108char *
109err_string(int err_code)
110{
507f4e33
NS
111 static char *err_message[XR_BAD_ERR_CODE];
112 static int done;
113
114 if (!done) {
115 err_message[XR_OK] = _("no error");
116 err_message[XR_BAD_MAGIC] = _("bad magic number");
117 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
118 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
4af916f8 119 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
507f4e33
NS
120 err_message[XR_BAD_INPROGRESS] =
121 _("filesystem mkfs-in-progress bit set");
122 err_message[XR_BAD_FS_SIZE_DATA] =
123 _("inconsistent filesystem geometry information");
124 err_message[XR_BAD_INO_SIZE_DATA] =
125 _("bad inode size or inconsistent with number of inodes/block"),
126 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
127 err_message[XR_AGF_GEO_MISMATCH] =
128 _("AGF geometry info conflicts with filesystem geometry");
129 err_message[XR_AGI_GEO_MISMATCH] =
130 _("AGI geometry info conflicts with filesystem geometry");
131 err_message[XR_SB_GEO_MISMATCH] =
132 _("AG superblock geometry info conflicts with filesystem geometry");
133 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
134 err_message[XR_BAD_RT_GEO_DATA] =
135 _("inconsistent filesystem geometry in realtime filesystem component");
136 err_message[XR_BAD_INO_MAX_PCT] =
137 _("maximum indicated percentage of inodes > 100%");
138 err_message[XR_BAD_INO_ALIGN] =
139 _("inconsistent inode alignment value");
140 err_message[XR_INSUFF_SEC_SB] =
141 _("not enough secondary superblocks with matching geometry");
142 err_message[XR_BAD_SB_UNIT] =
143 _("bad stripe unit in superblock");
144 err_message[XR_BAD_SB_WIDTH] =
145 _("bad stripe width in superblock");
146 err_message[XR_BAD_SVN] =
147 _("bad shared version number in superblock");
88f364a9
DC
148 err_message[XR_BAD_CRC] =
149 _("bad CRC in superblock");
02b56f87
DW
150 err_message[XR_BAD_DIR_SIZE_DATA] =
151 _("inconsistent directory geometry information");
507f4e33
NS
152 done = 1;
153 }
154
2bd0ea18 155 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
507f4e33 156 do_abort(_("bad error code - %d\n"), err_code);
2bd0ea18
NS
157
158 return(err_message[err_code]);
159}
160
161static void
162noval(char opt, char *tbl[], int idx)
163{
507f4e33 164 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
2bd0ea18
NS
165 usage();
166}
167
168static void
169respec(char opt, char *tbl[], int idx)
170{
171 do_warn("-%c ", opt);
172 if (tbl)
173 do_warn("%s ", tbl[idx]);
507f4e33 174 do_warn(_("option respecified\n"));
2bd0ea18
NS
175 usage();
176}
177
178static void
179unknown(char opt, char *s)
180{
507f4e33 181 do_warn(_("unknown option -%c %s\n"), opt, s);
2bd0ea18
NS
182 usage();
183}
184
185/*
186 * sets only the global argument flags and variables
187 */
8b8a6b02 188static void
2bd0ea18
NS
189process_args(int argc, char **argv)
190{
191 char *p;
192 int c;
193
194 log_spec = 0;
195 fs_is_dirty = 0;
196 verbose = 0;
197 no_modify = 0;
c781939c 198 dangerously = 0;
2bd0ea18 199 isa_file = 0;
d321ceac 200 zap_log = 0;
2bd0ea18 201 dumpcore = 0;
0f012a4c 202 full_ino_ex_data = 0;
2bd0ea18
NS
203 delete_attr_ok = 1;
204 force_geo = 0;
205 assume_xfs = 0;
6bf4721d 206 copied_sunit = 0;
2bd0ea18
NS
207 sb_inoalignmt = 0;
208 sb_unit = 0;
209 sb_width = 0;
9b1d68ec 210 fs_attributes2_allowed = 1;
2bd0ea18
NS
211 fs_quotas_allowed = 1;
212 fs_aligned_inodes_allowed = 1;
213 fs_sb_feature_bits_allowed = 1;
214 fs_has_extflgbit_allowed = 1;
215 pre_65_beta = 0;
216 fs_shared_allowed = 1;
add3cb90 217 ag_stride = 0;
2556c98b 218 thread_count = 1;
06fbdda9 219 report_interval = PROG_RPT_DEFAULT;
7c3e94a3 220 report_corrected = false;
2bd0ea18
NS
221
222 /*
223 * XXX have to add suboption processing here
224 * attributes, quotas, nlinks, aligned_inos, sb_fbits
225 */
7c3e94a3 226 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
2bd0ea18
NS
227 switch (c) {
228 case 'D':
229 dumpcore = 1;
230 break;
231 case 'o':
232 p = optarg;
233 while (*p != '\0') {
234 char *val;
235
ab870d0e 236 switch (getsubopt(&p, o_opts, &val)) {
2bd0ea18
NS
237 case ASSUME_XFS:
238 if (val)
239 noval('o', o_opts, ASSUME_XFS);
240 if (assume_xfs)
241 respec('o', o_opts, ASSUME_XFS);
242 assume_xfs = 1;
243 break;
244 case PRE_65_BETA:
245 if (val)
246 noval('o', o_opts, PRE_65_BETA);
247 if (pre_65_beta)
248 respec('o', o_opts,
249 PRE_65_BETA);
250 pre_65_beta = 1;
251 break;
9f38f08d 252 case IHASH_SIZE:
3a19fb7d
CH
253 do_warn(
254 _("-o ihash option has been removed and will be ignored\n"));
9f38f08d
MV
255 break;
256 case BHASH_SIZE:
12be365e
BN
257 if (max_mem_specified)
258 do_abort(
3a19fb7d 259 _("-o bhash option cannot be used with -m option\n"));
5e656dbb 260 libxfs_bhash_size = (int)strtol(val, NULL, 0);
2556c98b 261 bhash_option_used = 1;
cb5b3ef4 262 break;
add3cb90 263 case AG_STRIDE:
5e656dbb 264 ag_stride = (int)strtol(val, NULL, 0);
3b6ac903 265 break;
d4dd6ab5
CH
266 case FORCE_GEO:
267 if (val)
268 noval('o', o_opts, FORCE_GEO);
269 if (force_geo)
270 respec('o', o_opts, FORCE_GEO);
271 force_geo = 1;
272 break;
364a126c
DC
273 case PHASE2_THREADS:
274 phase2_threads = (int)strtol(val, NULL, 0);
275 break;
2bd0ea18
NS
276 default:
277 unknown('o', val);
278 break;
279 }
280 }
281 break;
4af916f8
BN
282 case 'c':
283 p = optarg;
284 while (*p) {
285 char *val;
286
ab870d0e 287 switch (getsubopt(&p, c_opts, &val)) {
4af916f8 288 case CONVERT_LAZY_COUNT:
5e656dbb 289 lazy_count = (int)strtol(val, NULL, 0);
4af916f8
BN
290 convert_lazy_count = 1;
291 break;
292 default:
293 unknown('c', val);
294 break;
295 }
296 }
297 break;
2bd0ea18
NS
298 case 'l':
299 log_name = optarg;
300 log_spec = 1;
301 break;
42a564ab
ES
302 case 'r':
303 rt_name = optarg;
304 rt_spec = 1;
305 break;
2bd0ea18
NS
306 case 'f':
307 isa_file = 1;
308 break;
12be365e
BN
309 case 'm':
310 if (bhash_option_used)
311 do_abort(_("-m option cannot be used with "
312 "-o bhash option\n"));
5e656dbb 313 max_mem_specified = strtol(optarg, NULL, 0);
12be365e 314 break;
d321ceac
NS
315 case 'L':
316 zap_log = 1;
317 break;
2bd0ea18
NS
318 case 'n':
319 no_modify = 1;
320 break;
6089b6f0
NS
321 case 'd':
322 dangerously = 1;
323 break;
2bd0ea18 324 case 'v':
3b6ac903 325 verbose++;
2bd0ea18
NS
326 break;
327 case 'V':
507f4e33 328 printf(_("%s version %s\n"), progname, VERSION);
3d98fe63 329 exit(0);
cb5b3ef4 330 case 'P':
2556c98b 331 do_prefetch = 0;
3b6ac903 332 break;
06fbdda9 333 case 't':
5e656dbb 334 report_interval = (int)strtol(optarg, NULL, 0);
06fbdda9 335 break;
7c3e94a3
JT
336 case 'e':
337 report_corrected = true;
338 break;
2bd0ea18
NS
339 case '?':
340 usage();
341 }
342 }
343
344 if (argc - optind != 1)
345 usage();
346
347 if ((fs_name = argv[optind]) == NULL)
348 usage();
7c3e94a3
JT
349
350 if (report_corrected && no_modify)
351 usage();
2bd0ea18
NS
352}
353
b1559967 354void __attribute__((noreturn))
2bd0ea18
NS
355do_error(char const *msg, ...)
356{
357 va_list args;
358
507f4e33 359 fprintf(stderr, _("\nfatal error -- "));
2bd0ea18
NS
360
361 va_start(args, msg);
079afa09
CH
362 vfprintf(stderr, msg, args);
363 if (dumpcore)
364 abort();
365 exit(1);
2bd0ea18
NS
366}
367
368/*
369 * like do_error, only the error is internal, no system
370 * error so no oserror processing
371 */
b1559967 372void __attribute__((noreturn))
2bd0ea18
NS
373do_abort(char const *msg, ...)
374{
375 va_list args;
376
377 va_start(args, msg);
079afa09
CH
378 vfprintf(stderr, msg, args);
379 if (dumpcore)
380 abort();
381 exit(1);
2bd0ea18
NS
382}
383
384void
385do_warn(char const *msg, ...)
386{
387 va_list args;
388
389 fs_is_dirty = 1;
390
391 va_start(args, msg);
079afa09 392 vfprintf(stderr, msg, args);
2bd0ea18
NS
393 va_end(args);
394}
395
396/* no formatting */
397
398void
399do_log(char const *msg, ...)
400{
401 va_list args;
402
403 va_start(args, msg);
079afa09 404 vfprintf(stderr, msg, args);
2bd0ea18
NS
405 va_end(args);
406}
407
8b8a6b02 408static void
2bd0ea18
NS
409calc_mkfs(xfs_mount_t *mp)
410{
411 xfs_agblock_t fino_bno;
412 int do_inoalign;
413
414 do_inoalign = mp->m_sinoalign;
415
416 /*
7b370905
BF
417 * Pre-calculate the geometry of ag 0. We know what it looks like
418 * because we know what mkfs does: 2 allocation btree roots (by block
419 * and by size), the inode allocation btree root, the free inode
420 * allocation btree root (if enabled) and some number of blocks to
421 * prefill the agfl.
de046644
DC
422 *
423 * Because the current shape of the btrees may differ from the current
424 * shape, we open code the mkfs freelist block count here. mkfs creates
425 * single level trees, so the calculation is pertty straight forward for
7ddb50f8 426 * the trees that use the AGFL.
2bd0ea18
NS
427 */
428 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
429 bcntbt_root = bnobt_root + 1;
430 inobt_root = bnobt_root + 2;
de046644 431 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
7b370905
BF
432 if (xfs_sb_version_hasfinobt(&mp->m_sb))
433 fino_bno++;
7ddb50f8
DW
434 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
435 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
0f94fa4b 436 fino_bno++;
7ddb50f8 437 }
18c44aa9
DW
438 if (xfs_sb_version_hasreflink(&mp->m_sb))
439 fino_bno++;
2bd0ea18 440
d4dd6ab5 441 /*
649bfa9a
CH
442 * If the log is allocated in the first allocation group we need to
443 * add the number of blocks used by the log to the above calculation.
444 *
445 * This can happens with filesystems that only have a single
446 * allocation group, or very odd geometries created by old mkfs
447 * versions on very small filesystems.
d4dd6ab5 448 */
649bfa9a
CH
449 if (mp->m_sb.sb_logstart &&
450 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
451
d4dd6ab5
CH
452 /*
453 * XXX(hch): verify that sb_logstart makes sense?
454 */
455 fino_bno += mp->m_sb.sb_logblocks;
456 }
457
2bd0ea18
NS
458 /*
459 * ditto the location of the first inode chunks in the fs ('/')
460 */
5e656dbb 461 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
2bd0ea18
NS
462 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
463 mp->m_sb.sb_unit), 0);
5e656dbb 464 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
2bd0ea18
NS
465 mp->m_sb.sb_inoalignmt > 1) {
466 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
467 roundup(fino_bno,
468 mp->m_sb.sb_inoalignmt),
469 0);
470 } else {
471 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
472 }
473
ff105f75 474 ASSERT(mp->m_ialloc_blks > 0);
2bd0ea18 475
ff105f75 476 if (mp->m_ialloc_blks > 1)
2bd0ea18
NS
477 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
478 else
479 last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
480
481 /*
482 * now the first 3 inodes in the system
483 */
484 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
485 do_warn(
5d1b7f0f 486_("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
487 mp->m_sb.sb_rootino,
488 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
489 first_prealloc_ino);
2bd0ea18
NS
490
491 if (!no_modify)
492 do_warn(
5d1b7f0f 493 _("resetting superblock root inode pointer to %u\n"),
2bd0ea18
NS
494 first_prealloc_ino);
495 else
496 do_warn(
5d1b7f0f 497 _("would reset superblock root inode pointer to %u\n"),
2bd0ea18
NS
498 first_prealloc_ino);
499
500 /*
501 * just set the value -- safe since the superblock
502 * doesn't get flushed out if no_modify is set
503 */
504 mp->m_sb.sb_rootino = first_prealloc_ino;
505 }
506
507 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
508 do_warn(
5d1b7f0f 509_("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
510 mp->m_sb.sb_rbmino,
511 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
512 first_prealloc_ino + 1);
2bd0ea18
NS
513
514 if (!no_modify)
515 do_warn(
5d1b7f0f 516 _("resetting superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
517 first_prealloc_ino + 1);
518 else
519 do_warn(
5d1b7f0f 520 _("would reset superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
521 first_prealloc_ino + 1);
522
523 /*
524 * just set the value -- safe since the superblock
525 * doesn't get flushed out if no_modify is set
526 */
527 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
528 }
529
530 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
531 do_warn(
5d1b7f0f
CH
532_("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
533 mp->m_sb.sb_rsumino,
534 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
535 first_prealloc_ino + 2);
2bd0ea18
NS
536
537 if (!no_modify)
538 do_warn(
5d1b7f0f 539 _("resetting superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
540 first_prealloc_ino + 2);
541 else
542 do_warn(
5d1b7f0f 543 _("would reset superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
544 first_prealloc_ino + 2);
545
546 /*
547 * just set the value -- safe since the superblock
548 * doesn't get flushed out if no_modify is set
549 */
550 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
551 }
552
553}
554
1926558d
BF
555/*
556 * v5 superblock metadata track the LSN of last modification and thus require
557 * that the current LSN is always moving forward. The current LSN is reset if
558 * the log has been cleared, which puts the log behind parts of the filesystem
559 * on-disk and can disrupt log recovery.
560 *
561 * We have tracked the maximum LSN of every piece of metadata that has been read
562 * in via the read verifiers. Compare the max LSN with the log and if the log is
563 * behind, bump the cycle number and reformat the log.
564 */
565static void
566format_log_max_lsn(
567 struct xfs_mount *mp)
568{
569 struct xlog *log = mp->m_log;
570 int max_cycle;
571 int max_block;
572 int new_cycle;
573 xfs_daddr_t logstart;
574 xfs_daddr_t logblocks;
575 int logversion;
576
577 if (!xfs_sb_version_hascrc(&mp->m_sb))
578 return;
579
580 /*
581 * If the log is ahead of the highest metadata LSN we've seen, we're
582 * safe and there's nothing to do.
583 */
584 max_cycle = CYCLE_LSN(libxfs_max_lsn);
585 max_block = BLOCK_LSN(libxfs_max_lsn);
586 if (max_cycle < log->l_curr_cycle ||
587 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
588 return;
589
590 /*
591 * Going to the next cycle should be sufficient but we bump by a few
592 * counts to help cover any metadata LSNs we could have missed.
593 */
594 new_cycle = max_cycle + 3;
595 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
596 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
597 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
598
599 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
600 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
601
602 if (no_modify) {
603 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
604 return;
605 }
606
607 do_warn(_("Format log to cycle %d.\n"), new_cycle);
1c12a814
BF
608 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
609 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
571a78a7 610 XLOG_FMT, new_cycle, true);
1926558d
BF
611}
612
4a32b9e9
DC
613/*
614 * mkfs increases the AG count for "multidisk" configurations, we want
615 * to target these for an increase in thread count. Hence check the superlock
616 * geometry information to determine if mkfs considered this a multidisk
617 * configuration.
618 */
619static bool
620is_multidisk_filesystem(
621 struct xfs_mount *mp)
622{
623 struct xfs_sb *sbp = &mp->m_sb;
624
625 /* High agcount filesystems are always considered "multidisk" */
626 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
627 return true;
628
629 /*
630 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
631 * multi-disk array, so we don't either.
632 */
633 if (!sbp->sb_unit)
634 return false;
635
636 ASSERT(sbp->sb_width);
637 return true;
638}
639
28a0a30f
ZL
640/*
641 * if the sector size of the filesystem we are trying to repair is
642 * smaller than that of the underlying filesystem (i.e. we are repairing
643 * an image), the we have to turn off direct IO because we cannot do IO
644 * smaller than the host filesystem's sector size.
645 */
646static void
647check_fs_vs_host_sectsize(
648 struct xfs_sb *sb)
649{
650 int fd;
651 long old_flags;
652 struct xfs_fsop_geom_v1 geom = { 0 };
653
654 fd = libxfs_device_to_fd(x.ddev);
655
656 if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
657 do_log(_("Cannot get host filesystem geometry.\n"
658 "Repair may fail if there is a sector size mismatch between\n"
659 "the image and the host filesystem.\n"));
660 geom.sectsize = BBSIZE;
661 }
662
663 if (sb->sb_sectsize < geom.sectsize) {
664 old_flags = fcntl(fd, F_GETFL, 0);
665 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
666 do_warn(_(
667 "Sector size on host filesystem larger than image sector size.\n"
668 "Cannot turn off direct IO, so exiting.\n"));
669 exit(1);
670 }
671 }
672}
673
2bd0ea18
NS
674int
675main(int argc, char **argv)
676{
2bd0ea18
NS
677 xfs_mount_t *temp_mp;
678 xfs_mount_t *mp;
5e656dbb 679 xfs_dsb_t *dsb;
2bd0ea18
NS
680 xfs_buf_t *sbp;
681 xfs_mount_t xfs_m;
1d6cb115 682 struct xlog log = {0};
06fbdda9 683 char *msgbuf;
88f364a9
DC
684 struct xfs_sb psb;
685 int rval;
2bd0ea18
NS
686
687 progname = basename(argv[0]);
507f4e33
NS
688 setlocale(LC_ALL, "");
689 bindtextdomain(PACKAGE, LOCALEDIR);
690 textdomain(PACKAGE);
beed0dc8 691 dinode_bmbt_translation_init();
2bd0ea18
NS
692
693 temp_mp = &xfs_m;
694 setbuf(stdout, NULL);
695
696 process_args(argc, argv);
d321ceac 697 xfs_init(&x);
2bd0ea18 698
2556c98b
BN
699 msgbuf = malloc(DURATION_BUF_SIZE);
700
06fbdda9
MV
701 timestamp(PHASE_START, 0, NULL);
702 timestamp(PHASE_END, 0, NULL);
703
28a0a30f
ZL
704 /* -f forces this, but let's be nice and autodetect it, as well. */
705 if (!isa_file) {
706 int fd = libxfs_device_to_fd(x.ddev);
707 struct stat statbuf;
708
709 if (fstat(fd, &statbuf) < 0)
710 do_warn(_("%s: couldn't stat \"%s\"\n"),
711 progname, fs_name);
712 else if (S_ISREG(statbuf.st_mode))
713 isa_file = 1;
714 }
715
716 if (isa_file) {
717 /* Best effort attempt to validate fs vs host sector size */
718 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
719 if (rval == XR_OK)
720 check_fs_vs_host_sectsize(&psb);
721 }
722
2bd0ea18
NS
723 /* do phase1 to make sure we have a superblock */
724 phase1(temp_mp);
06fbdda9 725 timestamp(PHASE_END, 1, NULL);
2bd0ea18
NS
726
727 if (no_modify && primary_sb_modified) {
507f4e33
NS
728 do_warn(_("Primary superblock would have been modified.\n"
729 "Cannot proceed further in no_modify mode.\n"
730 "Exiting now.\n"));
2bd0ea18
NS
731 exit(1);
732 }
733
88f364a9
DC
734 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
735 if (rval != XR_OK) {
736 do_warn(_("Primary superblock bad after phase 1!\n"
737 "Exiting now.\n"));
738 exit(1);
739 }
2bd0ea18 740
f63fd268 741 /*
28a0a30f
ZL
742 * Now that we have completely validated the superblock, geometry may
743 * have changed; re-check geometry vs the host filesystem geometry
f63fd268 744 */
28a0a30f
ZL
745 if (isa_file)
746 check_fs_vs_host_sectsize(&psb);
88f364a9 747
1d6cb115
BF
748 /*
749 * Prepare the mount structure. Point the log reference to our local
750 * copy so it's available to the various phases. The log bits are
751 * initialized in phase 2.
752 */
88f364a9
DC
753 memset(&xfs_m, 0, sizeof(xfs_mount_t));
754 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
2bd0ea18
NS
755
756 if (!mp) {
507f4e33
NS
757 fprintf(stderr,
758 _("%s: cannot repair this filesystem. Sorry.\n"),
2bd0ea18
NS
759 progname);
760 exit(1);
761 }
1d6cb115 762 mp->m_log = &log;
2bd0ea18 763
23639f77
ES
764 /* Spit out function & line on these corruption macros */
765 if (verbose > 2)
766 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
767
2bd0ea18
NS
768 /*
769 * set XFS-independent status vars from the mount/sb structure
770 */
771 glob_agcount = mp->m_sb.sb_agcount;
772
773 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
5a707ca1 774 max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
edf3f9d0 775 inodes_per_cluster = MAX(mp->m_sb.sb_inopblock,
ff105f75 776 mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
2bd0ea18 777
0cce4aa1
DC
778 /*
779 * Automatic striding for high agcount filesystems.
780 *
781 * More AGs indicates that the filesystem is either large or can handle
782 * more IO parallelism. Either way, we should try to process multiple
783 * AGs at a time in such a configuration to try to saturate the
784 * underlying storage and speed the repair process. Only do this if
785 * prefetching is enabled.
786 *
787 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
788 * to target these for an increase in thread count. Hence a stride value
789 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
790 * on such filesystems.
12b55baf
DC
791 *
792 * Limit the maximum thread count based on the available CPU power that
793 * is available. If we use too many threads, we might run out of memory
794 * and CPU power before we run out of IO concurrency. We limit to 8
795 * threads/CPU as this is enough threads to saturate a CPU on fast
796 * devices, yet few enough that it will saturate but won't overload slow
797 * devices.
4a32b9e9
DC
798 *
799 * Multidisk filesystems can handle more IO parallelism so we should try
800 * to process multiple AGs at a time in such a configuration to try to
801 * saturate the underlying storage and speed the repair process. Only do
802 * this if prefetching is enabled.
0cce4aa1 803 */
4a32b9e9
DC
804 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
805 /*
806 * For small agcount multidisk systems, just double the
807 * parallelism. For larger AG count filesystems (32 and above)
808 * use more parallelism, and linearly increase the parallelism
809 * with the number of AGs.
810 */
811 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
812 }
0cce4aa1 813
add3cb90 814 if (ag_stride) {
12b55baf
DC
815 int max_threads = platform_nproc() * 8;
816
2556c98b 817 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
12b55baf
DC
818 while (thread_count > max_threads) {
819 ag_stride *= 2;
820 thread_count = (glob_agcount + ag_stride - 1) /
821 ag_stride;
822 }
823 if (thread_count > 0)
824 thread_init();
825 else {
826 thread_count = 1;
827 ag_stride = 0;
828 }
add3cb90
BN
829 }
830
2556c98b 831 if (ag_stride && report_interval) {
06fbdda9 832 init_progress_rpt();
06fbdda9
MV
833 if (msgbuf) {
834 do_log(_(" - reporting progress in intervals of %s\n"),
835 duration(report_interval, msgbuf));
06fbdda9
MV
836 }
837 }
838
2556c98b
BN
839 /*
840 * Adjust libxfs cache sizes based on system memory,
841 * filesystem size and inode count.
842 *
843 * We'll set the cache size based on 3/4s the memory minus
844 * space used by the inode AVL tree and block usage map.
845 *
846 * Inode AVL tree space is approximately 4 bytes per inode,
847 * block usage map is currently 1 byte for 2 blocks.
848 *
849 * We assume most blocks will be inode clusters.
850 *
851 * Calculations are done in kilobyte units.
852 */
853
12be365e 854 if (!bhash_option_used || max_mem_specified) {
2556c98b 855 unsigned long mem_used;
12be365e
BN
856 unsigned long max_mem;
857 struct rlimit rlim;
2556c98b 858
2556c98b 859 libxfs_bcache_purge();
2556c98b
BN
860 cache_destroy(libxfs_bcache);
861
862 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
12be365e
BN
863 (mp->m_sb.sb_dblocks >> (10 + 1)) +
864 50000; /* rough estimate of 50MB overhead */
865 max_mem = max_mem_specified ? max_mem_specified * 1024 :
866 libxfs_physmem() * 3 / 4;
867
868 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
869 rlim.rlim_cur != RLIM_INFINITY) {
870 rlim.rlim_cur = rlim.rlim_max;
871 setrlimit(RLIMIT_AS, &rlim);
872 /* use approximately 80% of rlimit to avoid overrun */
873 max_mem = MIN(max_mem, rlim.rlim_cur / 1280);
874 } else
875 max_mem = MIN(max_mem, (LONG_MAX >> 10) + 1);
2556c98b
BN
876
877 if (verbose > 1)
5d1b7f0f
CH
878 do_log(
879 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
12be365e
BN
880 max_mem, mp->m_sb.sb_icount,
881 mp->m_sb.sb_icount >> (10 - 2),
882 mp->m_sb.sb_dblocks,
883 mp->m_sb.sb_dblocks >> (10 + 1));
884
885 if (max_mem <= mem_used) {
0335a835
DC
886 if (max_mem_specified) {
887 do_abort(
888 _("Required memory for repair is greater that the maximum specified\n"
889 "with the -m option. Please increase it to at least %lu.\n"),
12be365e 890 mem_used / 1024);
0335a835 891 }
70a4820f 892 do_log(
61510437
DC
893 _("Memory available for repair (%luMB) may not be sufficient.\n"
894 "At least %luMB is needed to repair this filesystem efficiently\n"
895 "If repair fails due to lack of memory, please\n"),
896 max_mem / 1024, mem_used / 1024);
897 if (do_prefetch)
70a4820f 898 do_log(
61510437
DC
899 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
900 else
70a4820f 901 do_log(
61510437
DC
902 _("increase system RAM and/or swap space to at least %luMB.\n"),
903 mem_used * 2 / 1024);
904
905 max_mem = mem_used;
2556c98b
BN
906 }
907
61510437
DC
908 max_mem -= mem_used;
909 if (max_mem >= (1 << 30))
910 max_mem = 1 << 30;
911 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
912 (mp->m_inode_cluster_size >> 10));
913 if (libxfs_bhash_size < 512)
914 libxfs_bhash_size = 512;
915
2556c98b
BN
916 if (verbose)
917 do_log(_(" - block cache size set to %d entries\n"),
918 libxfs_bhash_size * HASH_CACHE_RATIO);
919
ba9ecd40 920 libxfs_bcache = cache_init(0, libxfs_bhash_size,
2556c98b
BN
921 &libxfs_bcache_operations);
922 }
923
2bd0ea18
NS
924 /*
925 * calculate what mkfs would do to this filesystem
926 */
927 calc_mkfs(mp);
928
929 /*
c1f7a46c 930 * initialize block alloc map
2bd0ea18 931 */
c1f7a46c
BN
932 init_bmaps(mp);
933 incore_ino_init(mp);
934 incore_ext_init(mp);
2d273771 935 rmaps_init(mp);
c1f7a46c
BN
936
937 /* initialize random globals now that we know the fs geometry */
938 inodes_per_block = mp->m_sb.sb_inopblock;
2bd0ea18
NS
939
940 if (parse_sb_version(&mp->m_sb)) {
941 do_warn(
507f4e33 942 _("Found unsupported filesystem features. Exiting now.\n"));
2bd0ea18
NS
943 return(1);
944 }
945
946 /* make sure the per-ag freespace maps are ok so we can mount the fs */
364a126c 947 phase2(mp, phase2_threads);
06fbdda9 948 timestamp(PHASE_END, 2, NULL);
2bd0ea18 949
2556c98b
BN
950 if (do_prefetch)
951 init_prefetch(mp);
952
8100dd79 953 phase3(mp, phase2_threads);
06fbdda9 954 timestamp(PHASE_END, 3, NULL);
2bd0ea18
NS
955
956 phase4(mp);
06fbdda9 957 timestamp(PHASE_END, 4, NULL);
2bd0ea18
NS
958
959 if (no_modify)
507f4e33 960 printf(_("No modify flag set, skipping phase 5\n"));
3b6ac903 961 else {
2bd0ea18 962 phase5(mp);
3b6ac903 963 }
06fbdda9 964 timestamp(PHASE_END, 5, NULL);
2bd0ea18 965
c1f7a46c
BN
966 /*
967 * Done with the block usage maps, toss them...
968 */
2d273771 969 rmaps_free(mp);
c1f7a46c
BN
970 free_bmaps(mp);
971
2bd0ea18
NS
972 if (!bad_ino_btree) {
973 phase6(mp);
06fbdda9 974 timestamp(PHASE_END, 6, NULL);
2bd0ea18 975
e161d4a8 976 phase7(mp, phase2_threads);
06fbdda9 977 timestamp(PHASE_END, 7, NULL);
2bd0ea18
NS
978 } else {
979 do_warn(
507f4e33 980_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
2bd0ea18
NS
981 }
982
0340d706 983 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
2bd0ea18
NS
984 if (!no_modify) {
985 do_warn(
507f4e33 986_("Warning: no quota inodes were found. Quotas disabled.\n"));
2bd0ea18
NS
987 } else {
988 do_warn(
507f4e33 989_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
2bd0ea18
NS
990 }
991 } else if (lost_quotas) {
992 if (!no_modify) {
993 do_warn(
507f4e33 994_("Warning: quota inodes were cleared. Quotas disabled.\n"));
2bd0ea18
NS
995 } else {
996 do_warn(
507f4e33 997_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
2bd0ea18
NS
998 }
999 } else {
1000 if (lost_uquotino) {
1001 if (!no_modify) {
1002 do_warn(
507f4e33
NS
1003_("Warning: user quota information was cleared.\n"
1004 "User quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1005 } else {
1006 do_warn(
507f4e33
NS
1007_("Warning: user quota information would be cleared.\n"
1008 "User quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1009 }
1010 }
1011
b36eef04 1012 if (lost_gquotino) {
2bd0ea18
NS
1013 if (!no_modify) {
1014 do_warn(
507f4e33
NS
1015_("Warning: group quota information was cleared.\n"
1016 "Group quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
1017 } else {
1018 do_warn(
507f4e33
NS
1019_("Warning: group quota information would be cleared.\n"
1020 "Group quotas could not be enforced until limit information was recreated.\n"));
9b27bdbb
NS
1021 }
1022 }
1023
1024 if (lost_pquotino) {
1025 if (!no_modify) {
1026 do_warn(
1027_("Warning: project quota information was cleared.\n"
1028 "Project quotas can not be enforced until limit information is recreated.\n"));
1029 } else {
1030 do_warn(
1031_("Warning: project quota information would be cleared.\n"
1032 "Project quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1033 }
1034 }
1035 }
1036
2556c98b 1037 if (ag_stride && report_interval)
06fbdda9 1038 stop_progress_rpt();
9f38f08d 1039
2bd0ea18 1040 if (no_modify) {
1926558d
BF
1041 /*
1042 * Warn if the current LSN is problematic and the log requires a
1043 * reformat.
1044 */
1045 format_log_max_lsn(mp);
1046
2bd0ea18 1047 do_log(
507f4e33 1048 _("No modify flag set, skipping filesystem flush and exiting.\n"));
3b6ac903 1049 if (verbose)
06fbdda9 1050 summary_report();
2bd0ea18
NS
1051 if (fs_is_dirty)
1052 return(1);
1053
1054 return(0);
1055 }
1056
1057 /*
1058 * Clear the quota flags if they're on.
1059 */
1060 sbp = libxfs_getsb(mp, 0);
1061 if (!sbp)
507f4e33 1062 do_error(_("couldn't get superblock\n"));
2bd0ea18 1063
5e656dbb 1064 dsb = XFS_BUF_TO_SBP(sbp);
2bd0ea18 1065
342aef1e 1066 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
5e656dbb
BN
1067 do_warn(_("Note - quota info will be regenerated on next "
1068 "quota mount.\n"));
342aef1e 1069 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
2bd0ea18
NS
1070 }
1071
6bf4721d 1072 if (copied_sunit) {
2bd0ea18 1073 do_warn(
6bf4721d
ES
1074_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1075 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
5e656dbb 1076 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
dfc130f3 1077 }
2bd0ea18
NS
1078
1079 libxfs_writebuf(sbp, 0);
1080
2556c98b 1081 /*
1926558d
BF
1082 * Done. Flush all cached buffers and inodes first to ensure all
1083 * verifiers are run (where we discover the max metadata LSN), reformat
1084 * the log if necessary and unmount.
2556c98b
BN
1085 */
1086 libxfs_bcache_flush();
1926558d 1087 format_log_max_lsn(mp);
2bd0ea18 1088 libxfs_umount(mp);
1926558d 1089
d321ceac
NS
1090 if (x.rtdev)
1091 libxfs_device_close(x.rtdev);
1092 if (x.logdev && x.logdev != x.ddev)
1093 libxfs_device_close(x.logdev);
1094 libxfs_device_close(x.ddev);
2ce8bff5 1095 libxfs_destroy();
2bd0ea18 1096
06fbdda9
MV
1097 if (verbose)
1098 summary_report();
507f4e33 1099 do_log(_("done\n"));
3ae81520
ES
1100
1101 if (dangerously && !no_modify)
1102 do_warn(
1103_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1104
4c0a98ae
BN
1105 pftrace_done();
1106
0a223eb8
ES
1107 free(msgbuf);
1108
7c3e94a3
JT
1109 if (fs_is_dirty && report_corrected)
1110 return (4);
3b6ac903
MV
1111 return (0);
1112}