]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/xfs_repair.c
3338a7b8c97fec254cb2a5a3b2998273e1812f09
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "libxlog.h"
9 #include <sys/resource.h>
10 #include "xfs_multidisk.h"
11 #include "avl.h"
12 #include "libfrog/avl64.h"
13 #include "globals.h"
14 #include "versions.h"
15 #include "agheader.h"
16 #include "protos.h"
17 #include "incore.h"
18 #include "err_protos.h"
19 #include "prefetch.h"
20 #include "threads.h"
21 #include "progress.h"
22 #include "dinode.h"
23 #include "slab.h"
24 #include "rmap.h"
25 #include "libfrog/fsgeom.h"
26 #include "libfrog/platform.h"
27
28 /*
29 * option tables for getsubopt calls
30 */
31
32 /*
33 * -o: user-supplied override options
34 */
35 enum o_opt_nums {
36 ASSUME_XFS = 0,
37 IHASH_SIZE,
38 BHASH_SIZE,
39 AG_STRIDE,
40 FORCE_GEO,
41 PHASE2_THREADS,
42 O_MAX_OPTS,
43 };
44
45 static char *o_opts[] = {
46 [ASSUME_XFS] = "assume_xfs",
47 [IHASH_SIZE] = "ihash",
48 [BHASH_SIZE] = "bhash",
49 [AG_STRIDE] = "ag_stride",
50 [FORCE_GEO] = "force_geometry",
51 [PHASE2_THREADS] = "phase2_threads",
52 [O_MAX_OPTS] = NULL,
53 };
54
55 /*
56 * -c: conversion options
57 */
58 enum c_opt_nums {
59 CONVERT_LAZY_COUNT = 0,
60 C_MAX_OPTS,
61 };
62
63 static char *c_opts[] = {
64 [CONVERT_LAZY_COUNT] = "lazycount",
65 [C_MAX_OPTS] = NULL,
66 };
67
68
69 static int bhash_option_used;
70 static long max_mem_specified; /* in megabytes */
71 static int phase2_threads = 32;
72 static bool report_corrected;
73
74 static void
75 usage(void)
76 {
77 do_warn(_(
78 "Usage: %s [options] device\n"
79 "\n"
80 "Options:\n"
81 " -f The device is a file\n"
82 " -L Force log zeroing. Do this as a last resort.\n"
83 " -l logdev Specifies the device where the external log resides.\n"
84 " -m maxmem Maximum amount of memory to be used in megabytes.\n"
85 " -n No modify mode, just checks the filesystem for damage.\n"
86 " (Cannot be used together with -e.)\n"
87 " -P Disables prefetching.\n"
88 " -r rtdev Specifies the device where the realtime section resides.\n"
89 " -v Verbose output.\n"
90 " -c subopts Change filesystem parameters - use xfs_admin.\n"
91 " -o subopts Override default behaviour, refer to man page.\n"
92 " -t interval Reporting interval in seconds.\n"
93 " -d Repair dangerously.\n"
94 " -e Exit with a non-zero code if any errors were repaired.\n"
95 " (Cannot be used together with -n.)\n"
96 " -V Reports version and exits.\n"), progname);
97 exit(1);
98 }
99
100 char *
101 err_string(int err_code)
102 {
103 static char *err_message[XR_BAD_ERR_CODE];
104 static int done;
105
106 if (!done) {
107 err_message[XR_OK] = _("no error");
108 err_message[XR_BAD_MAGIC] = _("bad magic number");
109 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
110 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
111 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
112 err_message[XR_BAD_INPROGRESS] =
113 _("filesystem mkfs-in-progress bit set");
114 err_message[XR_BAD_FS_SIZE_DATA] =
115 _("inconsistent filesystem geometry information");
116 err_message[XR_BAD_INO_SIZE_DATA] =
117 _("bad inode size or inconsistent with number of inodes/block"),
118 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
119 err_message[XR_AGF_GEO_MISMATCH] =
120 _("AGF geometry info conflicts with filesystem geometry");
121 err_message[XR_AGI_GEO_MISMATCH] =
122 _("AGI geometry info conflicts with filesystem geometry");
123 err_message[XR_SB_GEO_MISMATCH] =
124 _("AG superblock geometry info conflicts with filesystem geometry");
125 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
126 err_message[XR_BAD_RT_GEO_DATA] =
127 _("inconsistent filesystem geometry in realtime filesystem component");
128 err_message[XR_BAD_INO_MAX_PCT] =
129 _("maximum indicated percentage of inodes > 100%");
130 err_message[XR_BAD_INO_ALIGN] =
131 _("inconsistent inode alignment value");
132 err_message[XR_INSUFF_SEC_SB] =
133 _("not enough secondary superblocks with matching geometry");
134 err_message[XR_BAD_SB_UNIT] =
135 _("bad stripe unit in superblock");
136 err_message[XR_BAD_SB_WIDTH] =
137 _("bad stripe width in superblock");
138 err_message[XR_BAD_SVN] =
139 _("bad shared version number in superblock");
140 err_message[XR_BAD_CRC] =
141 _("bad CRC in superblock");
142 err_message[XR_BAD_DIR_SIZE_DATA] =
143 _("inconsistent directory geometry information");
144 err_message[XR_BAD_LOG_GEOMETRY] =
145 _("inconsistent log geometry information");
146 done = 1;
147 }
148
149 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
150 do_abort(_("bad error code - %d\n"), err_code);
151
152 return(err_message[err_code]);
153 }
154
155 static void
156 noval(char opt, char *tbl[], int idx)
157 {
158 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
159 usage();
160 }
161
162 static void
163 respec(char opt, char *tbl[], int idx)
164 {
165 do_warn("-%c ", opt);
166 if (tbl)
167 do_warn("%s ", tbl[idx]);
168 do_warn(_("option respecified\n"));
169 usage();
170 }
171
172 static void
173 unknown(char opt, char *s)
174 {
175 do_warn(_("unknown option -%c %s\n"), opt, s);
176 usage();
177 }
178
179 /*
180 * sets only the global argument flags and variables
181 */
182 static void
183 process_args(int argc, char **argv)
184 {
185 char *p;
186 int c;
187
188 log_spec = 0;
189 fs_is_dirty = 0;
190 verbose = 0;
191 no_modify = 0;
192 dangerously = 0;
193 isa_file = 0;
194 zap_log = 0;
195 dumpcore = 0;
196 full_ino_ex_data = 0;
197 force_geo = 0;
198 assume_xfs = 0;
199 copied_sunit = 0;
200 sb_inoalignmt = 0;
201 sb_unit = 0;
202 sb_width = 0;
203 ag_stride = 0;
204 thread_count = 1;
205 report_interval = PROG_RPT_DEFAULT;
206 report_corrected = false;
207
208 /*
209 * XXX have to add suboption processing here
210 * attributes, quotas, nlinks, aligned_inos, sb_fbits
211 */
212 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF) {
213 switch (c) {
214 case 'D':
215 dumpcore = 1;
216 break;
217 case 'o':
218 p = optarg;
219 while (*p != '\0') {
220 char *val;
221
222 switch (getsubopt(&p, o_opts, &val)) {
223 case ASSUME_XFS:
224 if (val)
225 noval('o', o_opts, ASSUME_XFS);
226 if (assume_xfs)
227 respec('o', o_opts, ASSUME_XFS);
228 assume_xfs = 1;
229 break;
230 case IHASH_SIZE:
231 do_warn(
232 _("-o ihash option has been removed and will be ignored\n"));
233 break;
234 case BHASH_SIZE:
235 if (max_mem_specified)
236 do_abort(
237 _("-o bhash option cannot be used with -m option\n"));
238 if (!val)
239 do_abort(
240 _("-o bhash requires a parameter\n"));
241 libxfs_bhash_size = (int)strtol(val, NULL, 0);
242 bhash_option_used = 1;
243 break;
244 case AG_STRIDE:
245 if (!val)
246 do_abort(
247 _("-o ag_stride requires a parameter\n"));
248 ag_stride = (int)strtol(val, NULL, 0);
249 break;
250 case FORCE_GEO:
251 if (val)
252 noval('o', o_opts, FORCE_GEO);
253 if (force_geo)
254 respec('o', o_opts, FORCE_GEO);
255 force_geo = 1;
256 break;
257 case PHASE2_THREADS:
258 if (!val)
259 do_abort(
260 _("-o phase2_threads requires a parameter\n"));
261 phase2_threads = (int)strtol(val, NULL, 0);
262 break;
263 default:
264 unknown('o', val);
265 break;
266 }
267 }
268 break;
269 case 'c':
270 p = optarg;
271 while (*p) {
272 char *val;
273
274 switch (getsubopt(&p, c_opts, &val)) {
275 case CONVERT_LAZY_COUNT:
276 if (!val)
277 do_abort(
278 _("-c lazycount requires a parameter\n"));
279 lazy_count = (int)strtol(val, NULL, 0);
280 convert_lazy_count = 1;
281 break;
282 default:
283 unknown('c', val);
284 break;
285 }
286 }
287 break;
288 case 'l':
289 log_name = optarg;
290 log_spec = 1;
291 break;
292 case 'r':
293 rt_name = optarg;
294 rt_spec = 1;
295 break;
296 case 'f':
297 isa_file = 1;
298 break;
299 case 'm':
300 if (bhash_option_used)
301 do_abort(_("-m option cannot be used with "
302 "-o bhash option\n"));
303 max_mem_specified = strtol(optarg, NULL, 0);
304 break;
305 case 'L':
306 zap_log = 1;
307 break;
308 case 'n':
309 no_modify = 1;
310 break;
311 case 'd':
312 dangerously = 1;
313 break;
314 case 'v':
315 verbose++;
316 break;
317 case 'V':
318 printf(_("%s version %s\n"), progname, VERSION);
319 exit(0);
320 case 'P':
321 do_prefetch = 0;
322 break;
323 case 't':
324 report_interval = (int)strtol(optarg, NULL, 0);
325 break;
326 case 'e':
327 report_corrected = true;
328 break;
329 case '?':
330 usage();
331 }
332 }
333
334 if (argc - optind != 1)
335 usage();
336
337 if ((fs_name = argv[optind]) == NULL)
338 usage();
339
340 if (report_corrected && no_modify)
341 usage();
342 }
343
344 void __attribute__((noreturn))
345 do_error(char const *msg, ...)
346 {
347 va_list args;
348
349 fprintf(stderr, _("\nfatal error -- "));
350
351 va_start(args, msg);
352 vfprintf(stderr, msg, args);
353 if (dumpcore)
354 abort();
355 exit(1);
356 }
357
358 /*
359 * like do_error, only the error is internal, no system
360 * error so no oserror processing
361 */
362 void __attribute__((noreturn))
363 do_abort(char const *msg, ...)
364 {
365 va_list args;
366
367 va_start(args, msg);
368 vfprintf(stderr, msg, args);
369 if (dumpcore)
370 abort();
371 exit(1);
372 }
373
374 void
375 do_warn(char const *msg, ...)
376 {
377 va_list args;
378
379 fs_is_dirty = 1;
380
381 va_start(args, msg);
382 vfprintf(stderr, msg, args);
383 va_end(args);
384 }
385
386 /* no formatting */
387
388 void
389 do_log(char const *msg, ...)
390 {
391 va_list args;
392
393 va_start(args, msg);
394 vfprintf(stderr, msg, args);
395 va_end(args);
396 }
397
398 static void
399 calc_mkfs(xfs_mount_t *mp)
400 {
401 xfs_agblock_t fino_bno;
402 int do_inoalign;
403
404 do_inoalign = M_IGEO(mp)->ialloc_align;
405
406 /*
407 * Pre-calculate the geometry of ag 0. We know what it looks like
408 * because we know what mkfs does: 2 allocation btree roots (by block
409 * and by size), the inode allocation btree root, the free inode
410 * allocation btree root (if enabled) and some number of blocks to
411 * prefill the agfl.
412 *
413 * Because the current shape of the btrees may differ from the current
414 * shape, we open code the mkfs freelist block count here. mkfs creates
415 * single level trees, so the calculation is pertty straight forward for
416 * the trees that use the AGFL.
417 */
418 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
419 bcntbt_root = bnobt_root + 1;
420 inobt_root = bnobt_root + 2;
421 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
422 if (xfs_sb_version_hasfinobt(&mp->m_sb))
423 fino_bno++;
424 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
425 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
426 fino_bno++;
427 }
428 if (xfs_sb_version_hasreflink(&mp->m_sb))
429 fino_bno++;
430
431 /*
432 * If the log is allocated in the first allocation group we need to
433 * add the number of blocks used by the log to the above calculation.
434 *
435 * This can happens with filesystems that only have a single
436 * allocation group, or very odd geometries created by old mkfs
437 * versions on very small filesystems.
438 */
439 if (mp->m_sb.sb_logstart &&
440 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
441
442 /*
443 * XXX(hch): verify that sb_logstart makes sense?
444 */
445 fino_bno += mp->m_sb.sb_logblocks;
446 }
447
448 /*
449 * ditto the location of the first inode chunks in the fs ('/')
450 */
451 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
452 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, roundup(fino_bno,
453 mp->m_sb.sb_unit));
454 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
455 mp->m_sb.sb_inoalignmt > 1) {
456 first_prealloc_ino = XFS_AGB_TO_AGINO(mp,
457 roundup(fino_bno,
458 mp->m_sb.sb_inoalignmt));
459 } else {
460 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno);
461 }
462
463 ASSERT(M_IGEO(mp)->ialloc_blks > 0);
464
465 if (M_IGEO(mp)->ialloc_blks > 1)
466 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
467 else
468 last_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno + 1);
469
470 /*
471 * now the first 3 inodes in the system
472 */
473 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
474 do_warn(
475 _("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
476 mp->m_sb.sb_rootino,
477 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
478 first_prealloc_ino);
479
480 if (!no_modify)
481 do_warn(
482 _("resetting superblock root inode pointer to %u\n"),
483 first_prealloc_ino);
484 else
485 do_warn(
486 _("would reset superblock root inode pointer to %u\n"),
487 first_prealloc_ino);
488
489 /*
490 * just set the value -- safe since the superblock
491 * doesn't get flushed out if no_modify is set
492 */
493 mp->m_sb.sb_rootino = first_prealloc_ino;
494 }
495
496 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
497 do_warn(
498 _("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
499 mp->m_sb.sb_rbmino,
500 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
501 first_prealloc_ino + 1);
502
503 if (!no_modify)
504 do_warn(
505 _("resetting superblock realtime bitmap ino pointer to %u\n"),
506 first_prealloc_ino + 1);
507 else
508 do_warn(
509 _("would reset superblock realtime bitmap ino pointer to %u\n"),
510 first_prealloc_ino + 1);
511
512 /*
513 * just set the value -- safe since the superblock
514 * doesn't get flushed out if no_modify is set
515 */
516 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
517 }
518
519 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
520 do_warn(
521 _("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
522 mp->m_sb.sb_rsumino,
523 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
524 first_prealloc_ino + 2);
525
526 if (!no_modify)
527 do_warn(
528 _("resetting superblock realtime summary ino pointer to %u\n"),
529 first_prealloc_ino + 2);
530 else
531 do_warn(
532 _("would reset superblock realtime summary ino pointer to %u\n"),
533 first_prealloc_ino + 2);
534
535 /*
536 * just set the value -- safe since the superblock
537 * doesn't get flushed out if no_modify is set
538 */
539 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
540 }
541
542 }
543
544 /*
545 * v5 superblock metadata track the LSN of last modification and thus require
546 * that the current LSN is always moving forward. The current LSN is reset if
547 * the log has been cleared, which puts the log behind parts of the filesystem
548 * on-disk and can disrupt log recovery.
549 *
550 * We have tracked the maximum LSN of every piece of metadata that has been read
551 * in via the read verifiers. Compare the max LSN with the log and if the log is
552 * behind, bump the cycle number and reformat the log.
553 */
554 static void
555 format_log_max_lsn(
556 struct xfs_mount *mp)
557 {
558 struct xlog *log = mp->m_log;
559 int max_cycle;
560 int max_block;
561 int new_cycle;
562 xfs_daddr_t logstart;
563 xfs_daddr_t logblocks;
564 int logversion;
565
566 if (!xfs_sb_version_hascrc(&mp->m_sb))
567 return;
568
569 /*
570 * If the log is ahead of the highest metadata LSN we've seen, we're
571 * safe and there's nothing to do.
572 */
573 max_cycle = CYCLE_LSN(libxfs_max_lsn);
574 max_block = BLOCK_LSN(libxfs_max_lsn);
575 if (max_cycle < log->l_curr_cycle ||
576 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
577 return;
578
579 /*
580 * Going to the next cycle should be sufficient but we bump by a few
581 * counts to help cover any metadata LSNs we could have missed.
582 */
583 new_cycle = max_cycle + 3;
584 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
585 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
586 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
587
588 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
589 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
590
591 if (no_modify) {
592 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
593 return;
594 }
595
596 do_warn(_("Format log to cycle %d.\n"), new_cycle);
597 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
598 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
599 XLOG_FMT, new_cycle, true);
600 }
601
602 /*
603 * mkfs increases the AG count for "multidisk" configurations, we want
604 * to target these for an increase in thread count. Hence check the superlock
605 * geometry information to determine if mkfs considered this a multidisk
606 * configuration.
607 */
608 static bool
609 is_multidisk_filesystem(
610 struct xfs_mount *mp)
611 {
612 struct xfs_sb *sbp = &mp->m_sb;
613
614 /* High agcount filesystems are always considered "multidisk" */
615 if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
616 return true;
617
618 /*
619 * If it doesn't have a sunit/swidth, mkfs didn't consider it a
620 * multi-disk array, so we don't either.
621 */
622 if (!sbp->sb_unit)
623 return false;
624
625 ASSERT(sbp->sb_width);
626 return true;
627 }
628
629 /*
630 * if the sector size of the filesystem we are trying to repair is
631 * smaller than that of the underlying filesystem (i.e. we are repairing
632 * an image), the we have to turn off direct IO because we cannot do IO
633 * smaller than the host filesystem's sector size.
634 */
635 static void
636 check_fs_vs_host_sectsize(
637 struct xfs_sb *sb)
638 {
639 int fd, ret;
640 long old_flags;
641 struct xfs_fsop_geom geom = { 0 };
642
643 fd = libxfs_device_to_fd(x.ddev);
644
645 ret = xfrog_geometry(fd, &geom);
646 if (ret) {
647 do_log(_("Cannot get host filesystem geometry.\n"
648 "Repair may fail if there is a sector size mismatch between\n"
649 "the image and the host filesystem.\n"));
650 geom.sectsize = BBSIZE;
651 }
652
653 if (sb->sb_sectsize < geom.sectsize) {
654 old_flags = fcntl(fd, F_GETFL, 0);
655 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
656 do_warn(_(
657 "Sector size on host filesystem larger than image sector size.\n"
658 "Cannot turn off direct IO, so exiting.\n"));
659 exit(1);
660 }
661 }
662 }
663
664 int
665 main(int argc, char **argv)
666 {
667 xfs_mount_t *temp_mp;
668 xfs_mount_t *mp;
669 xfs_dsb_t *dsb;
670 xfs_buf_t *sbp;
671 xfs_mount_t xfs_m;
672 struct xlog log = {0};
673 char *msgbuf;
674 struct xfs_sb psb;
675 int rval;
676 struct xfs_ino_geometry *igeo;
677
678 progname = basename(argv[0]);
679 setlocale(LC_ALL, "");
680 bindtextdomain(PACKAGE, LOCALEDIR);
681 textdomain(PACKAGE);
682 dinode_bmbt_translation_init();
683
684 temp_mp = &xfs_m;
685 setbuf(stdout, NULL);
686
687 process_args(argc, argv);
688 xfs_init(&x);
689
690 msgbuf = malloc(DURATION_BUF_SIZE);
691
692 timestamp(PHASE_START, 0, NULL);
693 timestamp(PHASE_END, 0, NULL);
694
695 /* -f forces this, but let's be nice and autodetect it, as well. */
696 if (!isa_file) {
697 int fd = libxfs_device_to_fd(x.ddev);
698 struct stat statbuf;
699
700 if (fstat(fd, &statbuf) < 0)
701 do_warn(_("%s: couldn't stat \"%s\"\n"),
702 progname, fs_name);
703 else if (S_ISREG(statbuf.st_mode))
704 isa_file = 1;
705 }
706
707 if (isa_file) {
708 /* Best effort attempt to validate fs vs host sector size */
709 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
710 if (rval == XR_OK)
711 check_fs_vs_host_sectsize(&psb);
712 }
713
714 /* do phase1 to make sure we have a superblock */
715 phase1(temp_mp);
716 timestamp(PHASE_END, 1, NULL);
717
718 if (no_modify && primary_sb_modified) {
719 do_warn(_("Primary superblock would have been modified.\n"
720 "Cannot proceed further in no_modify mode.\n"
721 "Exiting now.\n"));
722 exit(1);
723 }
724
725 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
726 if (rval != XR_OK) {
727 do_warn(_("Primary superblock bad after phase 1!\n"
728 "Exiting now.\n"));
729 exit(1);
730 }
731
732 /*
733 * Now that we have completely validated the superblock, geometry may
734 * have changed; re-check geometry vs the host filesystem geometry
735 */
736 if (isa_file)
737 check_fs_vs_host_sectsize(&psb);
738
739 /*
740 * Prepare the mount structure. Point the log reference to our local
741 * copy so it's available to the various phases. The log bits are
742 * initialized in phase 2.
743 */
744 memset(&xfs_m, 0, sizeof(xfs_mount_t));
745 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
746
747 if (!mp) {
748 fprintf(stderr,
749 _("%s: cannot repair this filesystem. Sorry.\n"),
750 progname);
751 exit(1);
752 }
753 mp->m_log = &log;
754 igeo = M_IGEO(mp);
755
756 /* Spit out function & line on these corruption macros */
757 if (verbose > 2)
758 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
759
760 /*
761 * set XFS-independent status vars from the mount/sb structure
762 */
763 glob_agcount = mp->m_sb.sb_agcount;
764
765 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
766 max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
767
768 /*
769 * Automatic striding for high agcount filesystems.
770 *
771 * More AGs indicates that the filesystem is either large or can handle
772 * more IO parallelism. Either way, we should try to process multiple
773 * AGs at a time in such a configuration to try to saturate the
774 * underlying storage and speed the repair process. Only do this if
775 * prefetching is enabled.
776 *
777 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
778 * to target these for an increase in thread count. Hence a stride value
779 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
780 * on such filesystems.
781 *
782 * Limit the maximum thread count based on the available CPU power that
783 * is available. If we use too many threads, we might run out of memory
784 * and CPU power before we run out of IO concurrency. We limit to 8
785 * threads/CPU as this is enough threads to saturate a CPU on fast
786 * devices, yet few enough that it will saturate but won't overload slow
787 * devices.
788 *
789 * Multidisk filesystems can handle more IO parallelism so we should try
790 * to process multiple AGs at a time in such a configuration to try to
791 * saturate the underlying storage and speed the repair process. Only do
792 * this if prefetching is enabled.
793 */
794 if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
795 /*
796 * For small agcount multidisk systems, just double the
797 * parallelism. For larger AG count filesystems (32 and above)
798 * use more parallelism, and linearly increase the parallelism
799 * with the number of AGs.
800 */
801 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
802 }
803
804 if (ag_stride) {
805 int max_threads = platform_nproc() * 8;
806
807 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
808 while (thread_count > max_threads) {
809 ag_stride *= 2;
810 thread_count = (glob_agcount + ag_stride - 1) /
811 ag_stride;
812 }
813 if (thread_count > 0)
814 thread_init();
815 else {
816 thread_count = 1;
817 ag_stride = 0;
818 }
819 }
820
821 if (ag_stride && report_interval) {
822 init_progress_rpt();
823 if (msgbuf) {
824 do_log(_(" - reporting progress in intervals of %s\n"),
825 duration(report_interval, msgbuf));
826 }
827 }
828
829 /*
830 * Adjust libxfs cache sizes based on system memory,
831 * filesystem size and inode count.
832 *
833 * We'll set the cache size based on 3/4s the memory minus
834 * space used by the inode AVL tree and block usage map.
835 *
836 * Inode AVL tree space is approximately 4 bytes per inode,
837 * block usage map is currently 1 byte for 2 blocks.
838 *
839 * We assume most blocks will be inode clusters.
840 *
841 * Calculations are done in kilobyte units.
842 */
843
844 if (!bhash_option_used || max_mem_specified) {
845 unsigned long mem_used;
846 unsigned long max_mem;
847 struct rlimit rlim;
848
849 libxfs_bcache_purge();
850 cache_destroy(libxfs_bcache);
851
852 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
853 (mp->m_sb.sb_dblocks >> (10 + 1)) +
854 50000; /* rough estimate of 50MB overhead */
855 max_mem = max_mem_specified ? max_mem_specified * 1024 :
856 platform_physmem() * 3 / 4;
857
858 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
859 rlim.rlim_cur != RLIM_INFINITY) {
860 rlim.rlim_cur = rlim.rlim_max;
861 setrlimit(RLIMIT_AS, &rlim);
862 /* use approximately 80% of rlimit to avoid overrun */
863 max_mem = min(max_mem, rlim.rlim_cur / 1280);
864 } else
865 max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
866
867 if (verbose > 1)
868 do_log(
869 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
870 max_mem, mp->m_sb.sb_icount,
871 mp->m_sb.sb_icount >> (10 - 2),
872 mp->m_sb.sb_dblocks,
873 mp->m_sb.sb_dblocks >> (10 + 1));
874
875 if (max_mem <= mem_used) {
876 if (max_mem_specified) {
877 do_abort(
878 _("Required memory for repair is greater that the maximum specified\n"
879 "with the -m option. Please increase it to at least %lu.\n"),
880 mem_used / 1024);
881 }
882 do_log(
883 _("Memory available for repair (%luMB) may not be sufficient.\n"
884 "At least %luMB is needed to repair this filesystem efficiently\n"
885 "If repair fails due to lack of memory, please\n"),
886 max_mem / 1024, mem_used / 1024);
887 if (do_prefetch)
888 do_log(
889 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
890 else
891 do_log(
892 _("increase system RAM and/or swap space to at least %luMB.\n"),
893 mem_used * 2 / 1024);
894
895 max_mem = mem_used;
896 }
897
898 max_mem -= mem_used;
899 if (max_mem >= (1 << 30))
900 max_mem = 1 << 30;
901 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
902 (igeo->inode_cluster_size >> 10));
903 if (libxfs_bhash_size < 512)
904 libxfs_bhash_size = 512;
905
906 if (verbose)
907 do_log(_(" - block cache size set to %d entries\n"),
908 libxfs_bhash_size * HASH_CACHE_RATIO);
909
910 libxfs_bcache = cache_init(0, libxfs_bhash_size,
911 &libxfs_bcache_operations);
912 }
913
914 /*
915 * calculate what mkfs would do to this filesystem
916 */
917 calc_mkfs(mp);
918
919 /*
920 * initialize block alloc map
921 */
922 init_bmaps(mp);
923 incore_ino_init(mp);
924 incore_ext_init(mp);
925 rmaps_init(mp);
926
927 /* initialize random globals now that we know the fs geometry */
928 inodes_per_block = mp->m_sb.sb_inopblock;
929
930 if (parse_sb_version(&mp->m_sb)) {
931 do_warn(
932 _("Found unsupported filesystem features. Exiting now.\n"));
933 return(1);
934 }
935
936 /* make sure the per-ag freespace maps are ok so we can mount the fs */
937 phase2(mp, phase2_threads);
938 timestamp(PHASE_END, 2, NULL);
939
940 if (do_prefetch)
941 init_prefetch(mp);
942
943 phase3(mp, phase2_threads);
944 timestamp(PHASE_END, 3, NULL);
945
946 phase4(mp);
947 timestamp(PHASE_END, 4, NULL);
948
949 if (no_modify)
950 printf(_("No modify flag set, skipping phase 5\n"));
951 else {
952 phase5(mp);
953 }
954 timestamp(PHASE_END, 5, NULL);
955
956 /*
957 * Done with the block usage maps, toss them...
958 */
959 rmaps_free(mp);
960 free_bmaps(mp);
961
962 if (!bad_ino_btree) {
963 phase6(mp);
964 timestamp(PHASE_END, 6, NULL);
965
966 phase7(mp, phase2_threads);
967 timestamp(PHASE_END, 7, NULL);
968 } else {
969 do_warn(
970 _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
971 }
972
973 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
974 if (!no_modify) {
975 do_warn(
976 _("Warning: no quota inodes were found. Quotas disabled.\n"));
977 } else {
978 do_warn(
979 _("Warning: no quota inodes were found. Quotas would be disabled.\n"));
980 }
981 } else if (lost_quotas) {
982 if (!no_modify) {
983 do_warn(
984 _("Warning: quota inodes were cleared. Quotas disabled.\n"));
985 } else {
986 do_warn(
987 _("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
988 }
989 } else {
990 if (lost_uquotino) {
991 if (!no_modify) {
992 do_warn(
993 _("Warning: user quota information was cleared.\n"
994 "User quotas can not be enforced until limit information is recreated.\n"));
995 } else {
996 do_warn(
997 _("Warning: user quota information would be cleared.\n"
998 "User quotas could not be enforced until limit information was recreated.\n"));
999 }
1000 }
1001
1002 if (lost_gquotino) {
1003 if (!no_modify) {
1004 do_warn(
1005 _("Warning: group quota information was cleared.\n"
1006 "Group quotas can not be enforced until limit information is recreated.\n"));
1007 } else {
1008 do_warn(
1009 _("Warning: group quota information would be cleared.\n"
1010 "Group quotas could not be enforced until limit information was recreated.\n"));
1011 }
1012 }
1013
1014 if (lost_pquotino) {
1015 if (!no_modify) {
1016 do_warn(
1017 _("Warning: project quota information was cleared.\n"
1018 "Project quotas can not be enforced until limit information is recreated.\n"));
1019 } else {
1020 do_warn(
1021 _("Warning: project quota information would be cleared.\n"
1022 "Project quotas could not be enforced until limit information was recreated.\n"));
1023 }
1024 }
1025 }
1026
1027 if (ag_stride && report_interval)
1028 stop_progress_rpt();
1029
1030 if (no_modify) {
1031 /*
1032 * Warn if the current LSN is problematic and the log requires a
1033 * reformat.
1034 */
1035 format_log_max_lsn(mp);
1036
1037 do_log(
1038 _("No modify flag set, skipping filesystem flush and exiting.\n"));
1039 if (verbose)
1040 summary_report();
1041 if (fs_is_dirty)
1042 return(1);
1043
1044 return(0);
1045 }
1046
1047 /*
1048 * Clear the quota flags if they're on.
1049 */
1050 sbp = libxfs_getsb(mp);
1051 if (!sbp)
1052 do_error(_("couldn't get superblock\n"));
1053
1054 dsb = XFS_BUF_TO_SBP(sbp);
1055
1056 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
1057 do_warn(_("Note - quota info will be regenerated on next "
1058 "quota mount.\n"));
1059 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
1060 }
1061
1062 if (copied_sunit) {
1063 do_warn(
1064 _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1065 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
1066 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
1067 }
1068
1069 libxfs_writebuf(sbp, 0);
1070
1071 /*
1072 * Done. Flush all cached buffers and inodes first to ensure all
1073 * verifiers are run (where we discover the max metadata LSN), reformat
1074 * the log if necessary and unmount.
1075 */
1076 libxfs_bcache_flush();
1077 format_log_max_lsn(mp);
1078 libxfs_umount(mp);
1079
1080 if (x.rtdev)
1081 libxfs_device_close(x.rtdev);
1082 if (x.logdev && x.logdev != x.ddev)
1083 libxfs_device_close(x.logdev);
1084 libxfs_device_close(x.ddev);
1085 libxfs_destroy();
1086
1087 if (verbose)
1088 summary_report();
1089 do_log(_("done\n"));
1090
1091 if (dangerously && !no_modify)
1092 do_warn(
1093 _("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1094
1095 pftrace_done();
1096
1097 free(msgbuf);
1098
1099 if (fs_is_dirty && report_corrected)
1100 return (4);
1101 return (0);
1102 }