]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/xfs_repair.c
xfsprogs: replace [fl]stat64 by equivalent [fl]stat
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
CommitLineData
2bd0ea18 1/*
da23017d
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
dfc130f3 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
dfc130f3 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
dfc130f3 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18
NS
17 */
18
6b803e5a
CH
19#include "libxfs.h"
20#include "libxlog.h"
12be365e 21#include <sys/resource.h>
4a32b9e9 22#include "xfs_multidisk.h"
2bd0ea18
NS
23#include "avl.h"
24#include "avl64.h"
25#include "globals.h"
26#include "versions.h"
27#include "agheader.h"
28#include "protos.h"
29#include "incore.h"
30#include "err_protos.h"
cb5b3ef4 31#include "prefetch.h"
3b6ac903 32#include "threads.h"
06fbdda9 33#include "progress.h"
beed0dc8 34#include "dinode.h"
9e0f480e
DW
35#include "slab.h"
36#include "rmap.h"
2bd0ea18
NS
37
38#define rounddown(x, y) (((x)/(y))*(y))
39
2bd0ea18
NS
40#define XR_MAX_SECT_SIZE (64 * 1024)
41
42/*
43 * option tables for getsubopt calls
44 */
45
46/*
4af916f8 47 * -o: user-supplied override options
2bd0ea18 48 */
8b8a6b02 49static char *o_opts[] = {
2bd0ea18
NS
50#define ASSUME_XFS 0
51 "assume_xfs",
52#define PRE_65_BETA 1
53 "fs_is_pre_65_beta",
9f38f08d
MV
54#define IHASH_SIZE 2
55 "ihash",
56#define BHASH_SIZE 3
57 "bhash",
2556c98b 58#define AG_STRIDE 4
add3cb90 59 "ag_stride",
d4dd6ab5
CH
60#define FORCE_GEO 5
61 "force_geometry",
364a126c
DC
62#define PHASE2_THREADS 6
63 "phase2_threads",
2bd0ea18
NS
64 NULL
65};
66
4af916f8
BN
67/*
68 * -c: conversion options
69 */
8b8a6b02 70static char *c_opts[] = {
4af916f8
BN
71#define CONVERT_LAZY_COUNT 0
72 "lazycount",
73 NULL
74};
75
76
2556c98b 77static int bhash_option_used;
12be365e 78static long max_mem_specified; /* in megabytes */
364a126c 79static int phase2_threads = 32;
2556c98b 80
2bd0ea18
NS
81static void
82usage(void)
83{
4af916f8
BN
84 do_warn(_(
85"Usage: %s [options] device\n"
86"\n"
87"Options:\n"
88" -f The device is a file\n"
89" -L Force log zeroing. Do this as a last resort.\n"
90" -l logdev Specifies the device where the external log resides.\n"
91" -m maxmem Maximum amount of memory to be used in megabytes.\n"
92" -n No modify mode, just checks the filesystem for damage.\n"
93" -P Disables prefetching.\n"
94" -r rtdev Specifies the device where the realtime section resides.\n"
95" -v Verbose output.\n"
96" -c subopts Change filesystem parameters - use xfs_admin.\n"
97" -o subopts Override default behaviour, refer to man page.\n"
79e106f0 98" -t interval Reporting interval in seconds.\n"
4af916f8
BN
99" -d Repair dangerously.\n"
100" -V Reports version and exits.\n"), progname);
2bd0ea18
NS
101 exit(1);
102}
103
2bd0ea18
NS
104char *
105err_string(int err_code)
106{
507f4e33
NS
107 static char *err_message[XR_BAD_ERR_CODE];
108 static int done;
109
110 if (!done) {
111 err_message[XR_OK] = _("no error");
112 err_message[XR_BAD_MAGIC] = _("bad magic number");
113 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
114 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
4af916f8 115 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
507f4e33
NS
116 err_message[XR_BAD_INPROGRESS] =
117 _("filesystem mkfs-in-progress bit set");
118 err_message[XR_BAD_FS_SIZE_DATA] =
119 _("inconsistent filesystem geometry information");
120 err_message[XR_BAD_INO_SIZE_DATA] =
121 _("bad inode size or inconsistent with number of inodes/block"),
122 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
123 err_message[XR_AGF_GEO_MISMATCH] =
124 _("AGF geometry info conflicts with filesystem geometry");
125 err_message[XR_AGI_GEO_MISMATCH] =
126 _("AGI geometry info conflicts with filesystem geometry");
127 err_message[XR_SB_GEO_MISMATCH] =
128 _("AG superblock geometry info conflicts with filesystem geometry");
129 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
130 err_message[XR_BAD_RT_GEO_DATA] =
131 _("inconsistent filesystem geometry in realtime filesystem component");
132 err_message[XR_BAD_INO_MAX_PCT] =
133 _("maximum indicated percentage of inodes > 100%");
134 err_message[XR_BAD_INO_ALIGN] =
135 _("inconsistent inode alignment value");
136 err_message[XR_INSUFF_SEC_SB] =
137 _("not enough secondary superblocks with matching geometry");
138 err_message[XR_BAD_SB_UNIT] =
139 _("bad stripe unit in superblock");
140 err_message[XR_BAD_SB_WIDTH] =
141 _("bad stripe width in superblock");
142 err_message[XR_BAD_SVN] =
143 _("bad shared version number in superblock");
88f364a9
DC
144 err_message[XR_BAD_CRC] =
145 _("bad CRC in superblock");
507f4e33
NS
146 done = 1;
147 }
148
2bd0ea18 149 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
507f4e33 150 do_abort(_("bad error code - %d\n"), err_code);
2bd0ea18
NS
151
152 return(err_message[err_code]);
153}
154
155static void
156noval(char opt, char *tbl[], int idx)
157{
507f4e33 158 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
2bd0ea18
NS
159 usage();
160}
161
162static void
163respec(char opt, char *tbl[], int idx)
164{
165 do_warn("-%c ", opt);
166 if (tbl)
167 do_warn("%s ", tbl[idx]);
507f4e33 168 do_warn(_("option respecified\n"));
2bd0ea18
NS
169 usage();
170}
171
172static void
173unknown(char opt, char *s)
174{
507f4e33 175 do_warn(_("unknown option -%c %s\n"), opt, s);
2bd0ea18
NS
176 usage();
177}
178
179/*
180 * sets only the global argument flags and variables
181 */
8b8a6b02 182static void
2bd0ea18
NS
183process_args(int argc, char **argv)
184{
185 char *p;
186 int c;
187
188 log_spec = 0;
189 fs_is_dirty = 0;
190 verbose = 0;
191 no_modify = 0;
c781939c 192 dangerously = 0;
2bd0ea18 193 isa_file = 0;
d321ceac 194 zap_log = 0;
2bd0ea18 195 dumpcore = 0;
0f012a4c 196 full_ino_ex_data = 0;
2bd0ea18
NS
197 delete_attr_ok = 1;
198 force_geo = 0;
199 assume_xfs = 0;
6bf4721d 200 copied_sunit = 0;
2bd0ea18
NS
201 sb_inoalignmt = 0;
202 sb_unit = 0;
203 sb_width = 0;
204 fs_attributes_allowed = 1;
9b1d68ec 205 fs_attributes2_allowed = 1;
2bd0ea18
NS
206 fs_quotas_allowed = 1;
207 fs_aligned_inodes_allowed = 1;
208 fs_sb_feature_bits_allowed = 1;
209 fs_has_extflgbit_allowed = 1;
210 pre_65_beta = 0;
211 fs_shared_allowed = 1;
add3cb90 212 ag_stride = 0;
2556c98b 213 thread_count = 1;
06fbdda9 214 report_interval = PROG_RPT_DEFAULT;
2bd0ea18
NS
215
216 /*
217 * XXX have to add suboption processing here
218 * attributes, quotas, nlinks, aligned_inos, sb_fbits
219 */
4af916f8 220 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPt:")) != EOF) {
2bd0ea18
NS
221 switch (c) {
222 case 'D':
223 dumpcore = 1;
224 break;
225 case 'o':
226 p = optarg;
227 while (*p != '\0') {
228 char *val;
229
ab870d0e 230 switch (getsubopt(&p, o_opts, &val)) {
2bd0ea18
NS
231 case ASSUME_XFS:
232 if (val)
233 noval('o', o_opts, ASSUME_XFS);
234 if (assume_xfs)
235 respec('o', o_opts, ASSUME_XFS);
236 assume_xfs = 1;
237 break;
238 case PRE_65_BETA:
239 if (val)
240 noval('o', o_opts, PRE_65_BETA);
241 if (pre_65_beta)
242 respec('o', o_opts,
243 PRE_65_BETA);
244 pre_65_beta = 1;
245 break;
9f38f08d 246 case IHASH_SIZE:
3a19fb7d
CH
247 do_warn(
248 _("-o ihash option has been removed and will be ignored\n"));
9f38f08d
MV
249 break;
250 case BHASH_SIZE:
12be365e
BN
251 if (max_mem_specified)
252 do_abort(
3a19fb7d 253 _("-o bhash option cannot be used with -m option\n"));
5e656dbb 254 libxfs_bhash_size = (int)strtol(val, NULL, 0);
2556c98b 255 bhash_option_used = 1;
cb5b3ef4 256 break;
add3cb90 257 case AG_STRIDE:
5e656dbb 258 ag_stride = (int)strtol(val, NULL, 0);
3b6ac903 259 break;
d4dd6ab5
CH
260 case FORCE_GEO:
261 if (val)
262 noval('o', o_opts, FORCE_GEO);
263 if (force_geo)
264 respec('o', o_opts, FORCE_GEO);
265 force_geo = 1;
266 break;
364a126c
DC
267 case PHASE2_THREADS:
268 phase2_threads = (int)strtol(val, NULL, 0);
269 break;
2bd0ea18
NS
270 default:
271 unknown('o', val);
272 break;
273 }
274 }
275 break;
4af916f8
BN
276 case 'c':
277 p = optarg;
278 while (*p) {
279 char *val;
280
ab870d0e 281 switch (getsubopt(&p, c_opts, &val)) {
4af916f8 282 case CONVERT_LAZY_COUNT:
5e656dbb 283 lazy_count = (int)strtol(val, NULL, 0);
4af916f8
BN
284 convert_lazy_count = 1;
285 break;
286 default:
287 unknown('c', val);
288 break;
289 }
290 }
291 break;
2bd0ea18
NS
292 case 'l':
293 log_name = optarg;
294 log_spec = 1;
295 break;
42a564ab
ES
296 case 'r':
297 rt_name = optarg;
298 rt_spec = 1;
299 break;
2bd0ea18
NS
300 case 'f':
301 isa_file = 1;
302 break;
12be365e
BN
303 case 'm':
304 if (bhash_option_used)
305 do_abort(_("-m option cannot be used with "
306 "-o bhash option\n"));
5e656dbb 307 max_mem_specified = strtol(optarg, NULL, 0);
12be365e 308 break;
d321ceac
NS
309 case 'L':
310 zap_log = 1;
311 break;
2bd0ea18
NS
312 case 'n':
313 no_modify = 1;
314 break;
6089b6f0
NS
315 case 'd':
316 dangerously = 1;
317 break;
2bd0ea18 318 case 'v':
3b6ac903 319 verbose++;
2bd0ea18
NS
320 break;
321 case 'V':
507f4e33 322 printf(_("%s version %s\n"), progname, VERSION);
3d98fe63 323 exit(0);
cb5b3ef4 324 case 'P':
2556c98b 325 do_prefetch = 0;
3b6ac903 326 break;
06fbdda9 327 case 't':
5e656dbb 328 report_interval = (int)strtol(optarg, NULL, 0);
06fbdda9 329 break;
2bd0ea18
NS
330 case '?':
331 usage();
332 }
333 }
334
335 if (argc - optind != 1)
336 usage();
337
338 if ((fs_name = argv[optind]) == NULL)
339 usage();
340}
341
b1559967 342void __attribute__((noreturn))
2bd0ea18
NS
343do_error(char const *msg, ...)
344{
345 va_list args;
346
507f4e33 347 fprintf(stderr, _("\nfatal error -- "));
2bd0ea18
NS
348
349 va_start(args, msg);
079afa09
CH
350 vfprintf(stderr, msg, args);
351 if (dumpcore)
352 abort();
353 exit(1);
2bd0ea18
NS
354}
355
356/*
357 * like do_error, only the error is internal, no system
358 * error so no oserror processing
359 */
b1559967 360void __attribute__((noreturn))
2bd0ea18
NS
361do_abort(char const *msg, ...)
362{
363 va_list args;
364
365 va_start(args, msg);
079afa09
CH
366 vfprintf(stderr, msg, args);
367 if (dumpcore)
368 abort();
369 exit(1);
2bd0ea18
NS
370}
371
372void
373do_warn(char const *msg, ...)
374{
375 va_list args;
376
377 fs_is_dirty = 1;
378
379 va_start(args, msg);
079afa09 380 vfprintf(stderr, msg, args);
2bd0ea18
NS
381 va_end(args);
382}
383
384/* no formatting */
385
386void
387do_log(char const *msg, ...)
388{
389 va_list args;
390
391 va_start(args, msg);
079afa09 392 vfprintf(stderr, msg, args);
2bd0ea18
NS
393 va_end(args);
394}
395
8b8a6b02 396static void
2bd0ea18
NS
397calc_mkfs(xfs_mount_t *mp)
398{
399 xfs_agblock_t fino_bno;
400 int do_inoalign;
401
402 do_inoalign = mp->m_sinoalign;
403
404 /*
7b370905
BF
405 * Pre-calculate the geometry of ag 0. We know what it looks like
406 * because we know what mkfs does: 2 allocation btree roots (by block
407 * and by size), the inode allocation btree root, the free inode
408 * allocation btree root (if enabled) and some number of blocks to
409 * prefill the agfl.
de046644
DC
410 *
411 * Because the current shape of the btrees may differ from the current
412 * shape, we open code the mkfs freelist block count here. mkfs creates
413 * single level trees, so the calculation is pertty straight forward for
7ddb50f8 414 * the trees that use the AGFL.
2bd0ea18
NS
415 */
416 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
417 bcntbt_root = bnobt_root + 1;
418 inobt_root = bnobt_root + 2;
de046644 419 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
7b370905
BF
420 if (xfs_sb_version_hasfinobt(&mp->m_sb))
421 fino_bno++;
7ddb50f8
DW
422 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
423 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
0f94fa4b 424 fino_bno++;
7ddb50f8 425 }
18c44aa9
DW
426 if (xfs_sb_version_hasreflink(&mp->m_sb))
427 fino_bno++;
2bd0ea18 428
d4dd6ab5 429 /*
649bfa9a
CH
430 * If the log is allocated in the first allocation group we need to
431 * add the number of blocks used by the log to the above calculation.
432 *
433 * This can happens with filesystems that only have a single
434 * allocation group, or very odd geometries created by old mkfs
435 * versions on very small filesystems.
d4dd6ab5 436 */
649bfa9a
CH
437 if (mp->m_sb.sb_logstart &&
438 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
439
d4dd6ab5
CH
440 /*
441 * XXX(hch): verify that sb_logstart makes sense?
442 */
443 fino_bno += mp->m_sb.sb_logblocks;
444 }
445
2bd0ea18
NS
446 /*
447 * ditto the location of the first inode chunks in the fs ('/')
448 */
5e656dbb 449 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
2bd0ea18
NS
450 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
451 mp->m_sb.sb_unit), 0);
5e656dbb 452 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
2bd0ea18
NS
453 mp->m_sb.sb_inoalignmt > 1) {
454 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
455 roundup(fino_bno,
456 mp->m_sb.sb_inoalignmt),
457 0);
458 } else {
459 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
460 }
461
ff105f75 462 ASSERT(mp->m_ialloc_blks > 0);
2bd0ea18 463
ff105f75 464 if (mp->m_ialloc_blks > 1)
2bd0ea18
NS
465 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
466 else
467 last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
468
469 /*
470 * now the first 3 inodes in the system
471 */
472 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
473 do_warn(
5d1b7f0f 474_("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
475 mp->m_sb.sb_rootino,
476 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
477 first_prealloc_ino);
2bd0ea18
NS
478
479 if (!no_modify)
480 do_warn(
5d1b7f0f 481 _("resetting superblock root inode pointer to %u\n"),
2bd0ea18
NS
482 first_prealloc_ino);
483 else
484 do_warn(
5d1b7f0f 485 _("would reset superblock root inode pointer to %u\n"),
2bd0ea18
NS
486 first_prealloc_ino);
487
488 /*
489 * just set the value -- safe since the superblock
490 * doesn't get flushed out if no_modify is set
491 */
492 mp->m_sb.sb_rootino = first_prealloc_ino;
493 }
494
495 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
496 do_warn(
5d1b7f0f 497_("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
507f4e33
NS
498 mp->m_sb.sb_rbmino,
499 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
500 first_prealloc_ino + 1);
2bd0ea18
NS
501
502 if (!no_modify)
503 do_warn(
5d1b7f0f 504 _("resetting superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
505 first_prealloc_ino + 1);
506 else
507 do_warn(
5d1b7f0f 508 _("would reset superblock realtime bitmap ino pointer to %u\n"),
2bd0ea18
NS
509 first_prealloc_ino + 1);
510
511 /*
512 * just set the value -- safe since the superblock
513 * doesn't get flushed out if no_modify is set
514 */
515 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
516 }
517
518 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
519 do_warn(
5d1b7f0f
CH
520_("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
521 mp->m_sb.sb_rsumino,
522 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
523 first_prealloc_ino + 2);
2bd0ea18
NS
524
525 if (!no_modify)
526 do_warn(
5d1b7f0f 527 _("resetting superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
528 first_prealloc_ino + 2);
529 else
530 do_warn(
5d1b7f0f 531 _("would reset superblock realtime summary ino pointer to %u\n"),
2bd0ea18
NS
532 first_prealloc_ino + 2);
533
534 /*
535 * just set the value -- safe since the superblock
536 * doesn't get flushed out if no_modify is set
537 */
538 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
539 }
540
541}
542
1926558d
BF
543/*
544 * v5 superblock metadata track the LSN of last modification and thus require
545 * that the current LSN is always moving forward. The current LSN is reset if
546 * the log has been cleared, which puts the log behind parts of the filesystem
547 * on-disk and can disrupt log recovery.
548 *
549 * We have tracked the maximum LSN of every piece of metadata that has been read
550 * in via the read verifiers. Compare the max LSN with the log and if the log is
551 * behind, bump the cycle number and reformat the log.
552 */
553static void
554format_log_max_lsn(
555 struct xfs_mount *mp)
556{
557 struct xlog *log = mp->m_log;
558 int max_cycle;
559 int max_block;
560 int new_cycle;
561 xfs_daddr_t logstart;
562 xfs_daddr_t logblocks;
563 int logversion;
564
565 if (!xfs_sb_version_hascrc(&mp->m_sb))
566 return;
567
568 /*
569 * If the log is ahead of the highest metadata LSN we've seen, we're
570 * safe and there's nothing to do.
571 */
572 max_cycle = CYCLE_LSN(libxfs_max_lsn);
573 max_block = BLOCK_LSN(libxfs_max_lsn);
574 if (max_cycle < log->l_curr_cycle ||
575 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
576 return;
577
578 /*
579 * Going to the next cycle should be sufficient but we bump by a few
580 * counts to help cover any metadata LSNs we could have missed.
581 */
582 new_cycle = max_cycle + 3;
583 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
584 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
585 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
586
587 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
588 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
589
590 if (no_modify) {
591 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
592 return;
593 }
594
595 do_warn(_("Format log to cycle %d.\n"), new_cycle);
1c12a814
BF
596 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
597 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
571a78a7 598 XLOG_FMT, new_cycle, true);
1926558d
BF
599}
600
4a32b9e9
DC
601/*
602 * mkfs increases the AG count for "multidisk" configurations, we want
603 * to target these for an increase in thread count. Hence check the superlock
604 * geometry information to determine if mkfs considered this a multidisk
605 * configuration.
606 */
607static bool
608is_multidisk_filesystem(
609 struct xfs_mount *mp)
610{
611 struct xfs_sb *sbp = &mp->m_sb;
612
613 /* High agcount filesystems are always considered "multidisk" */
614 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
615 return true;
616
617 /*
618 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
619 * multi-disk array, so we don't either.
620 */
621 if (!sbp->sb_unit)
622 return false;
623
624 ASSERT(sbp->sb_width);
625 return true;
626}
627
2bd0ea18
NS
628int
629main(int argc, char **argv)
630{
2bd0ea18
NS
631 xfs_mount_t *temp_mp;
632 xfs_mount_t *mp;
5e656dbb 633 xfs_dsb_t *dsb;
2bd0ea18
NS
634 xfs_buf_t *sbp;
635 xfs_mount_t xfs_m;
1d6cb115 636 struct xlog log = {0};
06fbdda9 637 char *msgbuf;
88f364a9
DC
638 struct xfs_sb psb;
639 int rval;
2bd0ea18
NS
640
641 progname = basename(argv[0]);
507f4e33
NS
642 setlocale(LC_ALL, "");
643 bindtextdomain(PACKAGE, LOCALEDIR);
644 textdomain(PACKAGE);
beed0dc8 645 dinode_bmbt_translation_init();
2bd0ea18
NS
646
647 temp_mp = &xfs_m;
648 setbuf(stdout, NULL);
649
650 process_args(argc, argv);
d321ceac 651 xfs_init(&x);
2bd0ea18 652
2556c98b
BN
653 msgbuf = malloc(DURATION_BUF_SIZE);
654
06fbdda9
MV
655 timestamp(PHASE_START, 0, NULL);
656 timestamp(PHASE_END, 0, NULL);
657
2bd0ea18
NS
658 /* do phase1 to make sure we have a superblock */
659 phase1(temp_mp);
06fbdda9 660 timestamp(PHASE_END, 1, NULL);
2bd0ea18
NS
661
662 if (no_modify && primary_sb_modified) {
507f4e33
NS
663 do_warn(_("Primary superblock would have been modified.\n"
664 "Cannot proceed further in no_modify mode.\n"
665 "Exiting now.\n"));
2bd0ea18
NS
666 exit(1);
667 }
668
88f364a9
DC
669 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
670 if (rval != XR_OK) {
671 do_warn(_("Primary superblock bad after phase 1!\n"
672 "Exiting now.\n"));
673 exit(1);
674 }
2bd0ea18 675
76de6bb7
ES
676 /* -f forces this, but let's be nice and autodetect it, as well. */
677 if (!isa_file) {
678 int fd = libxfs_device_to_fd(x.ddev);
f594a0d1 679 struct stat statbuf;
76de6bb7 680
f594a0d1 681 if (fstat(fd, &statbuf) < 0)
76de6bb7
ES
682 do_warn(_("%s: couldn't stat \"%s\"\n"),
683 progname, fs_name);
684 else if (S_ISREG(statbuf.st_mode))
685 isa_file = 1;
686 }
687
f63fd268
DC
688 /*
689 * if the sector size of the filesystem we are trying to repair is
690 * smaller than that of the underlying filesystem (i.e. we are repairing
691 * an image), the we have to turn off direct IO because we cannot do IO
692 * smaller than the host filesystem's sector size.
693 */
694 if (isa_file) {
695 int fd = libxfs_device_to_fd(x.ddev);
696 struct xfs_fsop_geom_v1 geom = { 0 };
697
698 if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
699 do_warn(_("Cannot get host filesystem geometry.\n"
700 "Repair may fail if there is a sector size mismatch between\n"
701 "the image and the host filesystem.\n"));
702 geom.sectsize = BBSIZE;
703 }
704
88f364a9 705 if (psb.sb_sectsize < geom.sectsize) {
f63fd268
DC
706 long old_flags;
707
708 old_flags = fcntl(fd, F_GETFL, 0);
709 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
710 do_warn(_(
711 "Sector size on host filesystem larger than image sector size.\n"
712 "Cannot turn off direct IO, so exiting.\n"));
713 exit(1);
714 }
715 }
716 }
88f364a9 717
1d6cb115
BF
718 /*
719 * Prepare the mount structure. Point the log reference to our local
720 * copy so it's available to the various phases. The log bits are
721 * initialized in phase 2.
722 */
88f364a9
DC
723 memset(&xfs_m, 0, sizeof(xfs_mount_t));
724 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
2bd0ea18
NS
725
726 if (!mp) {
507f4e33
NS
727 fprintf(stderr,
728 _("%s: cannot repair this filesystem. Sorry.\n"),
2bd0ea18
NS
729 progname);
730 exit(1);
731 }
1d6cb115 732 mp->m_log = &log;
2bd0ea18 733
23639f77
ES
734 /* Spit out function & line on these corruption macros */
735 if (verbose > 2)
736 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
737
2bd0ea18
NS
738 /*
739 * set XFS-independent status vars from the mount/sb structure
740 */
741 glob_agcount = mp->m_sb.sb_agcount;
742
743 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
e0607266 744 max_symlink_blocks = libxfs_symlink_blocks(mp, MAXPATHLEN);
edf3f9d0 745 inodes_per_cluster = MAX(mp->m_sb.sb_inopblock,
ff105f75 746 mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
2bd0ea18 747
0cce4aa1
DC
748 /*
749 * Automatic striding for high agcount filesystems.
750 *
751 * More AGs indicates that the filesystem is either large or can handle
752 * more IO parallelism. Either way, we should try to process multiple
753 * AGs at a time in such a configuration to try to saturate the
754 * underlying storage and speed the repair process. Only do this if
755 * prefetching is enabled.
756 *
757 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
758 * to target these for an increase in thread count. Hence a stride value
759 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
760 * on such filesystems.
12b55baf
DC
761 *
762 * Limit the maximum thread count based on the available CPU power that
763 * is available. If we use too many threads, we might run out of memory
764 * and CPU power before we run out of IO concurrency. We limit to 8
765 * threads/CPU as this is enough threads to saturate a CPU on fast
766 * devices, yet few enough that it will saturate but won't overload slow
767 * devices.
4a32b9e9
DC
768 *
769 * Multidisk filesystems can handle more IO parallelism so we should try
770 * to process multiple AGs at a time in such a configuration to try to
771 * saturate the underlying storage and speed the repair process. Only do
772 * this if prefetching is enabled.
0cce4aa1 773 */
4a32b9e9
DC
774 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
775 /*
776 * For small agcount multidisk systems, just double the
777 * parallelism. For larger AG count filesystems (32 and above)
778 * use more parallelism, and linearly increase the parallelism
779 * with the number of AGs.
780 */
781 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
782 }
0cce4aa1 783
add3cb90 784 if (ag_stride) {
12b55baf
DC
785 int max_threads = platform_nproc() * 8;
786
2556c98b 787 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
12b55baf
DC
788 while (thread_count > max_threads) {
789 ag_stride *= 2;
790 thread_count = (glob_agcount + ag_stride - 1) /
791 ag_stride;
792 }
793 if (thread_count > 0)
794 thread_init();
795 else {
796 thread_count = 1;
797 ag_stride = 0;
798 }
add3cb90
BN
799 }
800
2556c98b 801 if (ag_stride && report_interval) {
06fbdda9 802 init_progress_rpt();
06fbdda9
MV
803 if (msgbuf) {
804 do_log(_(" - reporting progress in intervals of %s\n"),
805 duration(report_interval, msgbuf));
06fbdda9
MV
806 }
807 }
808
2556c98b
BN
809 /*
810 * Adjust libxfs cache sizes based on system memory,
811 * filesystem size and inode count.
812 *
813 * We'll set the cache size based on 3/4s the memory minus
814 * space used by the inode AVL tree and block usage map.
815 *
816 * Inode AVL tree space is approximately 4 bytes per inode,
817 * block usage map is currently 1 byte for 2 blocks.
818 *
819 * We assume most blocks will be inode clusters.
820 *
821 * Calculations are done in kilobyte units.
822 */
823
12be365e 824 if (!bhash_option_used || max_mem_specified) {
2556c98b 825 unsigned long mem_used;
12be365e
BN
826 unsigned long max_mem;
827 struct rlimit rlim;
2556c98b 828
2556c98b 829 libxfs_bcache_purge();
2556c98b
BN
830 cache_destroy(libxfs_bcache);
831
832 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
12be365e
BN
833 (mp->m_sb.sb_dblocks >> (10 + 1)) +
834 50000; /* rough estimate of 50MB overhead */
835 max_mem = max_mem_specified ? max_mem_specified * 1024 :
836 libxfs_physmem() * 3 / 4;
837
838 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
839 rlim.rlim_cur != RLIM_INFINITY) {
840 rlim.rlim_cur = rlim.rlim_max;
841 setrlimit(RLIMIT_AS, &rlim);
842 /* use approximately 80% of rlimit to avoid overrun */
843 max_mem = MIN(max_mem, rlim.rlim_cur / 1280);
844 } else
845 max_mem = MIN(max_mem, (LONG_MAX >> 10) + 1);
2556c98b
BN
846
847 if (verbose > 1)
5d1b7f0f
CH
848 do_log(
849 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
12be365e
BN
850 max_mem, mp->m_sb.sb_icount,
851 mp->m_sb.sb_icount >> (10 - 2),
852 mp->m_sb.sb_dblocks,
853 mp->m_sb.sb_dblocks >> (10 + 1));
854
855 if (max_mem <= mem_used) {
0335a835
DC
856 if (max_mem_specified) {
857 do_abort(
858 _("Required memory for repair is greater that the maximum specified\n"
859 "with the -m option. Please increase it to at least %lu.\n"),
12be365e 860 mem_used / 1024);
0335a835 861 }
70a4820f 862 do_log(
61510437
DC
863 _("Memory available for repair (%luMB) may not be sufficient.\n"
864 "At least %luMB is needed to repair this filesystem efficiently\n"
865 "If repair fails due to lack of memory, please\n"),
866 max_mem / 1024, mem_used / 1024);
867 if (do_prefetch)
70a4820f 868 do_log(
61510437
DC
869 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
870 else
70a4820f 871 do_log(
61510437
DC
872 _("increase system RAM and/or swap space to at least %luMB.\n"),
873 mem_used * 2 / 1024);
874
875 max_mem = mem_used;
2556c98b
BN
876 }
877
61510437
DC
878 max_mem -= mem_used;
879 if (max_mem >= (1 << 30))
880 max_mem = 1 << 30;
881 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
882 (mp->m_inode_cluster_size >> 10));
883 if (libxfs_bhash_size < 512)
884 libxfs_bhash_size = 512;
885
2556c98b
BN
886 if (verbose)
887 do_log(_(" - block cache size set to %d entries\n"),
888 libxfs_bhash_size * HASH_CACHE_RATIO);
889
ba9ecd40 890 libxfs_bcache = cache_init(0, libxfs_bhash_size,
2556c98b
BN
891 &libxfs_bcache_operations);
892 }
893
2bd0ea18
NS
894 /*
895 * calculate what mkfs would do to this filesystem
896 */
897 calc_mkfs(mp);
898
899 /*
c1f7a46c 900 * initialize block alloc map
2bd0ea18 901 */
c1f7a46c
BN
902 init_bmaps(mp);
903 incore_ino_init(mp);
904 incore_ext_init(mp);
2d273771 905 rmaps_init(mp);
c1f7a46c
BN
906
907 /* initialize random globals now that we know the fs geometry */
908 inodes_per_block = mp->m_sb.sb_inopblock;
2bd0ea18
NS
909
910 if (parse_sb_version(&mp->m_sb)) {
911 do_warn(
507f4e33 912 _("Found unsupported filesystem features. Exiting now.\n"));
2bd0ea18
NS
913 return(1);
914 }
915
916 /* make sure the per-ag freespace maps are ok so we can mount the fs */
364a126c 917 phase2(mp, phase2_threads);
06fbdda9 918 timestamp(PHASE_END, 2, NULL);
2bd0ea18 919
2556c98b
BN
920 if (do_prefetch)
921 init_prefetch(mp);
922
8100dd79 923 phase3(mp, phase2_threads);
06fbdda9 924 timestamp(PHASE_END, 3, NULL);
2bd0ea18
NS
925
926 phase4(mp);
06fbdda9 927 timestamp(PHASE_END, 4, NULL);
2bd0ea18
NS
928
929 if (no_modify)
507f4e33 930 printf(_("No modify flag set, skipping phase 5\n"));
3b6ac903 931 else {
2bd0ea18 932 phase5(mp);
3b6ac903 933 }
06fbdda9 934 timestamp(PHASE_END, 5, NULL);
2bd0ea18 935
c1f7a46c
BN
936 /*
937 * Done with the block usage maps, toss them...
938 */
2d273771 939 rmaps_free(mp);
c1f7a46c
BN
940 free_bmaps(mp);
941
2bd0ea18
NS
942 if (!bad_ino_btree) {
943 phase6(mp);
06fbdda9 944 timestamp(PHASE_END, 6, NULL);
2bd0ea18 945
e161d4a8 946 phase7(mp, phase2_threads);
06fbdda9 947 timestamp(PHASE_END, 7, NULL);
2bd0ea18
NS
948 } else {
949 do_warn(
507f4e33 950_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
2bd0ea18
NS
951 }
952
0340d706 953 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
2bd0ea18
NS
954 if (!no_modify) {
955 do_warn(
507f4e33 956_("Warning: no quota inodes were found. Quotas disabled.\n"));
2bd0ea18
NS
957 } else {
958 do_warn(
507f4e33 959_("Warning: no quota inodes were found. Quotas would be disabled.\n"));
2bd0ea18
NS
960 }
961 } else if (lost_quotas) {
962 if (!no_modify) {
963 do_warn(
507f4e33 964_("Warning: quota inodes were cleared. Quotas disabled.\n"));
2bd0ea18
NS
965 } else {
966 do_warn(
507f4e33 967_("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
2bd0ea18
NS
968 }
969 } else {
970 if (lost_uquotino) {
971 if (!no_modify) {
972 do_warn(
507f4e33
NS
973_("Warning: user quota information was cleared.\n"
974 "User quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
975 } else {
976 do_warn(
507f4e33
NS
977_("Warning: user quota information would be cleared.\n"
978 "User quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
979 }
980 }
981
b36eef04 982 if (lost_gquotino) {
2bd0ea18
NS
983 if (!no_modify) {
984 do_warn(
507f4e33
NS
985_("Warning: group quota information was cleared.\n"
986 "Group quotas can not be enforced until limit information is recreated.\n"));
2bd0ea18
NS
987 } else {
988 do_warn(
507f4e33
NS
989_("Warning: group quota information would be cleared.\n"
990 "Group quotas could not be enforced until limit information was recreated.\n"));
9b27bdbb
NS
991 }
992 }
993
994 if (lost_pquotino) {
995 if (!no_modify) {
996 do_warn(
997_("Warning: project quota information was cleared.\n"
998 "Project quotas can not be enforced until limit information is recreated.\n"));
999 } else {
1000 do_warn(
1001_("Warning: project quota information would be cleared.\n"
1002 "Project quotas could not be enforced until limit information was recreated.\n"));
2bd0ea18
NS
1003 }
1004 }
1005 }
1006
2556c98b 1007 if (ag_stride && report_interval)
06fbdda9 1008 stop_progress_rpt();
9f38f08d 1009
2bd0ea18 1010 if (no_modify) {
1926558d
BF
1011 /*
1012 * Warn if the current LSN is problematic and the log requires a
1013 * reformat.
1014 */
1015 format_log_max_lsn(mp);
1016
2bd0ea18 1017 do_log(
507f4e33 1018 _("No modify flag set, skipping filesystem flush and exiting.\n"));
3b6ac903 1019 if (verbose)
06fbdda9 1020 summary_report();
2bd0ea18
NS
1021 if (fs_is_dirty)
1022 return(1);
1023
1024 return(0);
1025 }
1026
1027 /*
1028 * Clear the quota flags if they're on.
1029 */
1030 sbp = libxfs_getsb(mp, 0);
1031 if (!sbp)
507f4e33 1032 do_error(_("couldn't get superblock\n"));
2bd0ea18 1033
5e656dbb 1034 dsb = XFS_BUF_TO_SBP(sbp);
2bd0ea18 1035
342aef1e 1036 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
5e656dbb
BN
1037 do_warn(_("Note - quota info will be regenerated on next "
1038 "quota mount.\n"));
342aef1e 1039 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
2bd0ea18
NS
1040 }
1041
6bf4721d 1042 if (copied_sunit) {
2bd0ea18 1043 do_warn(
6bf4721d
ES
1044_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1045 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
5e656dbb 1046 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
dfc130f3 1047 }
2bd0ea18
NS
1048
1049 libxfs_writebuf(sbp, 0);
1050
2556c98b 1051 /*
1926558d
BF
1052 * Done. Flush all cached buffers and inodes first to ensure all
1053 * verifiers are run (where we discover the max metadata LSN), reformat
1054 * the log if necessary and unmount.
2556c98b
BN
1055 */
1056 libxfs_bcache_flush();
1926558d 1057 format_log_max_lsn(mp);
2bd0ea18 1058 libxfs_umount(mp);
1926558d 1059
d321ceac
NS
1060 if (x.rtdev)
1061 libxfs_device_close(x.rtdev);
1062 if (x.logdev && x.logdev != x.ddev)
1063 libxfs_device_close(x.logdev);
1064 libxfs_device_close(x.ddev);
2bd0ea18 1065
06fbdda9
MV
1066 if (verbose)
1067 summary_report();
507f4e33 1068 do_log(_("done\n"));
3ae81520
ES
1069
1070 if (dangerously && !no_modify)
1071 do_warn(
1072_("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1073
4c0a98ae
BN
1074 pftrace_done();
1075
0a223eb8
ES
1076 free(msgbuf);
1077
3b6ac903
MV
1078 return (0);
1079}