]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/xfs_repair.c
repair: parallelise uncertin inode processing in phase 3
[thirdparty/xfsprogs-dev.git] / repair / xfs_repair.c
1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "libxfs.h"
20 #include "libxlog.h"
21 #include <sys/resource.h>
22 #include "avl.h"
23 #include "avl64.h"
24 #include "globals.h"
25 #include "versions.h"
26 #include "agheader.h"
27 #include "protos.h"
28 #include "incore.h"
29 #include "err_protos.h"
30 #include "prefetch.h"
31 #include "threads.h"
32 #include "progress.h"
33 #include "dinode.h"
34
35 #define rounddown(x, y) (((x)/(y))*(y))
36
37 #define XR_MAX_SECT_SIZE (64 * 1024)
38
39 /*
40 * option tables for getsubopt calls
41 */
42
43 /*
44 * -o: user-supplied override options
45 */
46 static char *o_opts[] = {
47 #define ASSUME_XFS 0
48 "assume_xfs",
49 #define PRE_65_BETA 1
50 "fs_is_pre_65_beta",
51 #define IHASH_SIZE 2
52 "ihash",
53 #define BHASH_SIZE 3
54 "bhash",
55 #define AG_STRIDE 4
56 "ag_stride",
57 #define FORCE_GEO 5
58 "force_geometry",
59 #define PHASE2_THREADS 6
60 "phase2_threads",
61 NULL
62 };
63
64 /*
65 * -c: conversion options
66 */
67 static char *c_opts[] = {
68 #define CONVERT_LAZY_COUNT 0
69 "lazycount",
70 NULL
71 };
72
73
74 static int bhash_option_used;
75 static long max_mem_specified; /* in megabytes */
76 static int phase2_threads = 32;
77
78 static void
79 usage(void)
80 {
81 do_warn(_(
82 "Usage: %s [options] device\n"
83 "\n"
84 "Options:\n"
85 " -f The device is a file\n"
86 " -L Force log zeroing. Do this as a last resort.\n"
87 " -l logdev Specifies the device where the external log resides.\n"
88 " -m maxmem Maximum amount of memory to be used in megabytes.\n"
89 " -n No modify mode, just checks the filesystem for damage.\n"
90 " -P Disables prefetching.\n"
91 " -r rtdev Specifies the device where the realtime section resides.\n"
92 " -v Verbose output.\n"
93 " -c subopts Change filesystem parameters - use xfs_admin.\n"
94 " -o subopts Override default behaviour, refer to man page.\n"
95 " -t interval Reporting interval in seconds.\n"
96 " -d Repair dangerously.\n"
97 " -V Reports version and exits.\n"), progname);
98 exit(1);
99 }
100
101 char *
102 err_string(int err_code)
103 {
104 static char *err_message[XR_BAD_ERR_CODE];
105 static int done;
106
107 if (!done) {
108 err_message[XR_OK] = _("no error");
109 err_message[XR_BAD_MAGIC] = _("bad magic number");
110 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
111 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
112 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
113 err_message[XR_BAD_INPROGRESS] =
114 _("filesystem mkfs-in-progress bit set");
115 err_message[XR_BAD_FS_SIZE_DATA] =
116 _("inconsistent filesystem geometry information");
117 err_message[XR_BAD_INO_SIZE_DATA] =
118 _("bad inode size or inconsistent with number of inodes/block"),
119 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
120 err_message[XR_AGF_GEO_MISMATCH] =
121 _("AGF geometry info conflicts with filesystem geometry");
122 err_message[XR_AGI_GEO_MISMATCH] =
123 _("AGI geometry info conflicts with filesystem geometry");
124 err_message[XR_SB_GEO_MISMATCH] =
125 _("AG superblock geometry info conflicts with filesystem geometry");
126 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
127 err_message[XR_BAD_RT_GEO_DATA] =
128 _("inconsistent filesystem geometry in realtime filesystem component");
129 err_message[XR_BAD_INO_MAX_PCT] =
130 _("maximum indicated percentage of inodes > 100%");
131 err_message[XR_BAD_INO_ALIGN] =
132 _("inconsistent inode alignment value");
133 err_message[XR_INSUFF_SEC_SB] =
134 _("not enough secondary superblocks with matching geometry");
135 err_message[XR_BAD_SB_UNIT] =
136 _("bad stripe unit in superblock");
137 err_message[XR_BAD_SB_WIDTH] =
138 _("bad stripe width in superblock");
139 err_message[XR_BAD_SVN] =
140 _("bad shared version number in superblock");
141 err_message[XR_BAD_CRC] =
142 _("bad CRC in superblock");
143 done = 1;
144 }
145
146 if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
147 do_abort(_("bad error code - %d\n"), err_code);
148
149 return(err_message[err_code]);
150 }
151
152 static void
153 noval(char opt, char *tbl[], int idx)
154 {
155 do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
156 usage();
157 }
158
159 static void
160 respec(char opt, char *tbl[], int idx)
161 {
162 do_warn("-%c ", opt);
163 if (tbl)
164 do_warn("%s ", tbl[idx]);
165 do_warn(_("option respecified\n"));
166 usage();
167 }
168
169 static void
170 unknown(char opt, char *s)
171 {
172 do_warn(_("unknown option -%c %s\n"), opt, s);
173 usage();
174 }
175
176 /*
177 * sets only the global argument flags and variables
178 */
179 static void
180 process_args(int argc, char **argv)
181 {
182 char *p;
183 int c;
184
185 log_spec = 0;
186 fs_is_dirty = 0;
187 verbose = 0;
188 no_modify = 0;
189 dangerously = 0;
190 isa_file = 0;
191 zap_log = 0;
192 dumpcore = 0;
193 full_ino_ex_data = 0;
194 delete_attr_ok = 1;
195 force_geo = 0;
196 assume_xfs = 0;
197 copied_sunit = 0;
198 sb_inoalignmt = 0;
199 sb_unit = 0;
200 sb_width = 0;
201 fs_attributes_allowed = 1;
202 fs_attributes2_allowed = 1;
203 fs_quotas_allowed = 1;
204 fs_aligned_inodes_allowed = 1;
205 fs_sb_feature_bits_allowed = 1;
206 fs_has_extflgbit_allowed = 1;
207 pre_65_beta = 0;
208 fs_shared_allowed = 1;
209 ag_stride = 0;
210 thread_count = 1;
211 report_interval = PROG_RPT_DEFAULT;
212
213 /*
214 * XXX have to add suboption processing here
215 * attributes, quotas, nlinks, aligned_inos, sb_fbits
216 */
217 while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPt:")) != EOF) {
218 switch (c) {
219 case 'D':
220 dumpcore = 1;
221 break;
222 case 'o':
223 p = optarg;
224 while (*p != '\0') {
225 char *val;
226
227 switch (getsubopt(&p, (constpp)o_opts, &val)) {
228 case ASSUME_XFS:
229 if (val)
230 noval('o', o_opts, ASSUME_XFS);
231 if (assume_xfs)
232 respec('o', o_opts, ASSUME_XFS);
233 assume_xfs = 1;
234 break;
235 case PRE_65_BETA:
236 if (val)
237 noval('o', o_opts, PRE_65_BETA);
238 if (pre_65_beta)
239 respec('o', o_opts,
240 PRE_65_BETA);
241 pre_65_beta = 1;
242 break;
243 case IHASH_SIZE:
244 do_warn(
245 _("-o ihash option has been removed and will be ignored\n"));
246 break;
247 case BHASH_SIZE:
248 if (max_mem_specified)
249 do_abort(
250 _("-o bhash option cannot be used with -m option\n"));
251 libxfs_bhash_size = (int)strtol(val, NULL, 0);
252 bhash_option_used = 1;
253 break;
254 case AG_STRIDE:
255 ag_stride = (int)strtol(val, NULL, 0);
256 break;
257 case FORCE_GEO:
258 if (val)
259 noval('o', o_opts, FORCE_GEO);
260 if (force_geo)
261 respec('o', o_opts, FORCE_GEO);
262 force_geo = 1;
263 break;
264 case PHASE2_THREADS:
265 phase2_threads = (int)strtol(val, NULL, 0);
266 break;
267 default:
268 unknown('o', val);
269 break;
270 }
271 }
272 break;
273 case 'c':
274 p = optarg;
275 while (*p) {
276 char *val;
277
278 switch (getsubopt(&p, (constpp)c_opts, &val)) {
279 case CONVERT_LAZY_COUNT:
280 lazy_count = (int)strtol(val, NULL, 0);
281 convert_lazy_count = 1;
282 break;
283 default:
284 unknown('c', val);
285 break;
286 }
287 }
288 break;
289 case 'l':
290 log_name = optarg;
291 log_spec = 1;
292 break;
293 case 'r':
294 rt_name = optarg;
295 rt_spec = 1;
296 break;
297 case 'f':
298 isa_file = 1;
299 break;
300 case 'm':
301 if (bhash_option_used)
302 do_abort(_("-m option cannot be used with "
303 "-o bhash option\n"));
304 max_mem_specified = strtol(optarg, NULL, 0);
305 break;
306 case 'L':
307 zap_log = 1;
308 break;
309 case 'n':
310 no_modify = 1;
311 break;
312 case 'd':
313 dangerously = 1;
314 break;
315 case 'v':
316 verbose++;
317 break;
318 case 'V':
319 printf(_("%s version %s\n"), progname, VERSION);
320 exit(0);
321 case 'P':
322 do_prefetch = 0;
323 break;
324 case 't':
325 report_interval = (int)strtol(optarg, NULL, 0);
326 break;
327 case '?':
328 usage();
329 }
330 }
331
332 if (argc - optind != 1)
333 usage();
334
335 if ((fs_name = argv[optind]) == NULL)
336 usage();
337 }
338
339 void __attribute__((noreturn))
340 do_error(char const *msg, ...)
341 {
342 va_list args;
343
344 fprintf(stderr, _("\nfatal error -- "));
345
346 va_start(args, msg);
347 vfprintf(stderr, msg, args);
348 if (dumpcore)
349 abort();
350 exit(1);
351 }
352
353 /*
354 * like do_error, only the error is internal, no system
355 * error so no oserror processing
356 */
357 void __attribute__((noreturn))
358 do_abort(char const *msg, ...)
359 {
360 va_list args;
361
362 va_start(args, msg);
363 vfprintf(stderr, msg, args);
364 if (dumpcore)
365 abort();
366 exit(1);
367 }
368
369 void
370 do_warn(char const *msg, ...)
371 {
372 va_list args;
373
374 fs_is_dirty = 1;
375
376 va_start(args, msg);
377 vfprintf(stderr, msg, args);
378 va_end(args);
379 }
380
381 /* no formatting */
382
383 void
384 do_log(char const *msg, ...)
385 {
386 va_list args;
387
388 va_start(args, msg);
389 vfprintf(stderr, msg, args);
390 va_end(args);
391 }
392
393 static void
394 calc_mkfs(xfs_mount_t *mp)
395 {
396 xfs_agblock_t fino_bno;
397 int do_inoalign;
398
399 do_inoalign = mp->m_sinoalign;
400
401 /*
402 * Pre-calculate the geometry of ag 0. We know what it looks like
403 * because we know what mkfs does: 2 allocation btree roots (by block
404 * and by size), the inode allocation btree root, the free inode
405 * allocation btree root (if enabled) and some number of blocks to
406 * prefill the agfl.
407 *
408 * Because the current shape of the btrees may differ from the current
409 * shape, we open code the mkfs freelist block count here. mkfs creates
410 * single level trees, so the calculation is pertty straight forward for
411 * the two trees that use the AGFL.
412 */
413 bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
414 bcntbt_root = bnobt_root + 1;
415 inobt_root = bnobt_root + 2;
416 fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
417 if (xfs_sb_version_hasfinobt(&mp->m_sb))
418 fino_bno++;
419
420 /*
421 * If the log is allocated in the first allocation group we need to
422 * add the number of blocks used by the log to the above calculation.
423 *
424 * This can happens with filesystems that only have a single
425 * allocation group, or very odd geometries created by old mkfs
426 * versions on very small filesystems.
427 */
428 if (mp->m_sb.sb_logstart &&
429 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
430
431 /*
432 * XXX(hch): verify that sb_logstart makes sense?
433 */
434 fino_bno += mp->m_sb.sb_logblocks;
435 }
436
437 /*
438 * ditto the location of the first inode chunks in the fs ('/')
439 */
440 if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign) {
441 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
442 mp->m_sb.sb_unit), 0);
443 } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
444 mp->m_sb.sb_inoalignmt > 1) {
445 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
446 roundup(fino_bno,
447 mp->m_sb.sb_inoalignmt),
448 0);
449 } else {
450 first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
451 }
452
453 ASSERT(mp->m_ialloc_blks > 0);
454
455 if (mp->m_ialloc_blks > 1)
456 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
457 else
458 last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
459
460 /*
461 * now the first 3 inodes in the system
462 */
463 if (mp->m_sb.sb_rootino != first_prealloc_ino) {
464 do_warn(
465 _("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
466 mp->m_sb.sb_rootino,
467 (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
468 first_prealloc_ino);
469
470 if (!no_modify)
471 do_warn(
472 _("resetting superblock root inode pointer to %u\n"),
473 first_prealloc_ino);
474 else
475 do_warn(
476 _("would reset superblock root inode pointer to %u\n"),
477 first_prealloc_ino);
478
479 /*
480 * just set the value -- safe since the superblock
481 * doesn't get flushed out if no_modify is set
482 */
483 mp->m_sb.sb_rootino = first_prealloc_ino;
484 }
485
486 if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) {
487 do_warn(
488 _("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
489 mp->m_sb.sb_rbmino,
490 (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
491 first_prealloc_ino + 1);
492
493 if (!no_modify)
494 do_warn(
495 _("resetting superblock realtime bitmap ino pointer to %u\n"),
496 first_prealloc_ino + 1);
497 else
498 do_warn(
499 _("would reset superblock realtime bitmap ino pointer to %u\n"),
500 first_prealloc_ino + 1);
501
502 /*
503 * just set the value -- safe since the superblock
504 * doesn't get flushed out if no_modify is set
505 */
506 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
507 }
508
509 if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) {
510 do_warn(
511 _("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
512 mp->m_sb.sb_rsumino,
513 (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
514 first_prealloc_ino + 2);
515
516 if (!no_modify)
517 do_warn(
518 _("resetting superblock realtime summary ino pointer to %u\n"),
519 first_prealloc_ino + 2);
520 else
521 do_warn(
522 _("would reset superblock realtime summary ino pointer to %u\n"),
523 first_prealloc_ino + 2);
524
525 /*
526 * just set the value -- safe since the superblock
527 * doesn't get flushed out if no_modify is set
528 */
529 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
530 }
531
532 }
533
534 /*
535 * v5 superblock metadata track the LSN of last modification and thus require
536 * that the current LSN is always moving forward. The current LSN is reset if
537 * the log has been cleared, which puts the log behind parts of the filesystem
538 * on-disk and can disrupt log recovery.
539 *
540 * We have tracked the maximum LSN of every piece of metadata that has been read
541 * in via the read verifiers. Compare the max LSN with the log and if the log is
542 * behind, bump the cycle number and reformat the log.
543 */
544 static void
545 format_log_max_lsn(
546 struct xfs_mount *mp)
547 {
548 struct xlog *log = mp->m_log;
549 int max_cycle;
550 int max_block;
551 int new_cycle;
552 xfs_daddr_t logstart;
553 xfs_daddr_t logblocks;
554 int logversion;
555
556 if (!xfs_sb_version_hascrc(&mp->m_sb))
557 return;
558
559 /*
560 * If the log is ahead of the highest metadata LSN we've seen, we're
561 * safe and there's nothing to do.
562 */
563 max_cycle = CYCLE_LSN(libxfs_max_lsn);
564 max_block = BLOCK_LSN(libxfs_max_lsn);
565 if (max_cycle < log->l_curr_cycle ||
566 (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
567 return;
568
569 /*
570 * Going to the next cycle should be sufficient but we bump by a few
571 * counts to help cover any metadata LSNs we could have missed.
572 */
573 new_cycle = max_cycle + 3;
574 logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
575 logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
576 logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
577
578 do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
579 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
580
581 if (no_modify) {
582 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
583 return;
584 }
585
586 do_warn(_("Format log to cycle %d.\n"), new_cycle);
587 libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
588 &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
589 XLOG_FMT, new_cycle, true);
590 }
591
592 int
593 main(int argc, char **argv)
594 {
595 xfs_mount_t *temp_mp;
596 xfs_mount_t *mp;
597 xfs_dsb_t *dsb;
598 xfs_buf_t *sbp;
599 xfs_mount_t xfs_m;
600 struct xlog log = {0};
601 char *msgbuf;
602 struct xfs_sb psb;
603 int rval;
604
605 progname = basename(argv[0]);
606 setlocale(LC_ALL, "");
607 bindtextdomain(PACKAGE, LOCALEDIR);
608 textdomain(PACKAGE);
609 dinode_bmbt_translation_init();
610
611 temp_mp = &xfs_m;
612 setbuf(stdout, NULL);
613
614 process_args(argc, argv);
615 xfs_init(&x);
616
617 msgbuf = malloc(DURATION_BUF_SIZE);
618
619 timestamp(PHASE_START, 0, NULL);
620 timestamp(PHASE_END, 0, NULL);
621
622 /* do phase1 to make sure we have a superblock */
623 phase1(temp_mp);
624 timestamp(PHASE_END, 1, NULL);
625
626 if (no_modify && primary_sb_modified) {
627 do_warn(_("Primary superblock would have been modified.\n"
628 "Cannot proceed further in no_modify mode.\n"
629 "Exiting now.\n"));
630 exit(1);
631 }
632
633 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
634 if (rval != XR_OK) {
635 do_warn(_("Primary superblock bad after phase 1!\n"
636 "Exiting now.\n"));
637 exit(1);
638 }
639
640 /* -f forces this, but let's be nice and autodetect it, as well. */
641 if (!isa_file) {
642 int fd = libxfs_device_to_fd(x.ddev);
643 struct stat64 statbuf;
644
645 if (fstat64(fd, &statbuf) < 0)
646 do_warn(_("%s: couldn't stat \"%s\"\n"),
647 progname, fs_name);
648 else if (S_ISREG(statbuf.st_mode))
649 isa_file = 1;
650 }
651
652 /*
653 * if the sector size of the filesystem we are trying to repair is
654 * smaller than that of the underlying filesystem (i.e. we are repairing
655 * an image), the we have to turn off direct IO because we cannot do IO
656 * smaller than the host filesystem's sector size.
657 */
658 if (isa_file) {
659 int fd = libxfs_device_to_fd(x.ddev);
660 struct xfs_fsop_geom_v1 geom = { 0 };
661
662 if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
663 do_warn(_("Cannot get host filesystem geometry.\n"
664 "Repair may fail if there is a sector size mismatch between\n"
665 "the image and the host filesystem.\n"));
666 geom.sectsize = BBSIZE;
667 }
668
669 if (psb.sb_sectsize < geom.sectsize) {
670 long old_flags;
671
672 old_flags = fcntl(fd, F_GETFL, 0);
673 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
674 do_warn(_(
675 "Sector size on host filesystem larger than image sector size.\n"
676 "Cannot turn off direct IO, so exiting.\n"));
677 exit(1);
678 }
679 }
680 }
681
682 /*
683 * Prepare the mount structure. Point the log reference to our local
684 * copy so it's available to the various phases. The log bits are
685 * initialized in phase 2.
686 */
687 memset(&xfs_m, 0, sizeof(xfs_mount_t));
688 mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
689
690 if (!mp) {
691 fprintf(stderr,
692 _("%s: cannot repair this filesystem. Sorry.\n"),
693 progname);
694 exit(1);
695 }
696 mp->m_log = &log;
697
698 /* Spit out function & line on these corruption macros */
699 if (verbose > 2)
700 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
701
702 /*
703 * set XFS-independent status vars from the mount/sb structure
704 */
705 glob_agcount = mp->m_sb.sb_agcount;
706
707 chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
708 max_symlink_blocks = libxfs_symlink_blocks(mp, MAXPATHLEN);
709 inodes_per_cluster = MAX(mp->m_sb.sb_inopblock,
710 mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
711
712 /*
713 * Automatic striding for high agcount filesystems.
714 *
715 * More AGs indicates that the filesystem is either large or can handle
716 * more IO parallelism. Either way, we should try to process multiple
717 * AGs at a time in such a configuration to try to saturate the
718 * underlying storage and speed the repair process. Only do this if
719 * prefetching is enabled.
720 *
721 * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
722 * to target these for an increase in thread count. Hence a stride value
723 * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
724 * on such filesystems.
725 *
726 * Limit the maximum thread count based on the available CPU power that
727 * is available. If we use too many threads, we might run out of memory
728 * and CPU power before we run out of IO concurrency. We limit to 8
729 * threads/CPU as this is enough threads to saturate a CPU on fast
730 * devices, yet few enough that it will saturate but won't overload slow
731 * devices.
732 */
733 if (!ag_stride && glob_agcount >= 16 && do_prefetch)
734 ag_stride = 15;
735
736 if (ag_stride) {
737 int max_threads = platform_nproc() * 8;
738
739 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
740 while (thread_count > max_threads) {
741 ag_stride *= 2;
742 thread_count = (glob_agcount + ag_stride - 1) /
743 ag_stride;
744 }
745 if (thread_count > 0)
746 thread_init();
747 else {
748 thread_count = 1;
749 ag_stride = 0;
750 }
751 }
752
753 if (ag_stride && report_interval) {
754 init_progress_rpt();
755 if (msgbuf) {
756 do_log(_(" - reporting progress in intervals of %s\n"),
757 duration(report_interval, msgbuf));
758 }
759 }
760
761 /*
762 * Adjust libxfs cache sizes based on system memory,
763 * filesystem size and inode count.
764 *
765 * We'll set the cache size based on 3/4s the memory minus
766 * space used by the inode AVL tree and block usage map.
767 *
768 * Inode AVL tree space is approximately 4 bytes per inode,
769 * block usage map is currently 1 byte for 2 blocks.
770 *
771 * We assume most blocks will be inode clusters.
772 *
773 * Calculations are done in kilobyte units.
774 */
775
776 if (!bhash_option_used || max_mem_specified) {
777 unsigned long mem_used;
778 unsigned long max_mem;
779 struct rlimit rlim;
780
781 libxfs_bcache_purge();
782 cache_destroy(libxfs_bcache);
783
784 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
785 (mp->m_sb.sb_dblocks >> (10 + 1)) +
786 50000; /* rough estimate of 50MB overhead */
787 max_mem = max_mem_specified ? max_mem_specified * 1024 :
788 libxfs_physmem() * 3 / 4;
789
790 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
791 rlim.rlim_cur != RLIM_INFINITY) {
792 rlim.rlim_cur = rlim.rlim_max;
793 setrlimit(RLIMIT_AS, &rlim);
794 /* use approximately 80% of rlimit to avoid overrun */
795 max_mem = MIN(max_mem, rlim.rlim_cur / 1280);
796 } else
797 max_mem = MIN(max_mem, (LONG_MAX >> 10) + 1);
798
799 if (verbose > 1)
800 do_log(
801 _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
802 max_mem, mp->m_sb.sb_icount,
803 mp->m_sb.sb_icount >> (10 - 2),
804 mp->m_sb.sb_dblocks,
805 mp->m_sb.sb_dblocks >> (10 + 1));
806
807 if (max_mem <= mem_used) {
808 if (max_mem_specified) {
809 do_abort(
810 _("Required memory for repair is greater that the maximum specified\n"
811 "with the -m option. Please increase it to at least %lu.\n"),
812 mem_used / 1024);
813 }
814 do_warn(
815 _("Memory available for repair (%luMB) may not be sufficient.\n"
816 "At least %luMB is needed to repair this filesystem efficiently\n"
817 "If repair fails due to lack of memory, please\n"),
818 max_mem / 1024, mem_used / 1024);
819 if (do_prefetch)
820 do_warn(
821 _("turn prefetching off (-P) to reduce the memory footprint.\n"));
822 else
823 do_warn(
824 _("increase system RAM and/or swap space to at least %luMB.\n"),
825 mem_used * 2 / 1024);
826
827 max_mem = mem_used;
828 }
829
830 max_mem -= mem_used;
831 if (max_mem >= (1 << 30))
832 max_mem = 1 << 30;
833 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
834 (mp->m_inode_cluster_size >> 10));
835 if (libxfs_bhash_size < 512)
836 libxfs_bhash_size = 512;
837
838 if (verbose)
839 do_log(_(" - block cache size set to %d entries\n"),
840 libxfs_bhash_size * HASH_CACHE_RATIO);
841
842 libxfs_bcache = cache_init(0, libxfs_bhash_size,
843 &libxfs_bcache_operations);
844 }
845
846 /*
847 * calculate what mkfs would do to this filesystem
848 */
849 calc_mkfs(mp);
850
851 /*
852 * initialize block alloc map
853 */
854 init_bmaps(mp);
855 incore_ino_init(mp);
856 incore_ext_init(mp);
857
858 /* initialize random globals now that we know the fs geometry */
859 inodes_per_block = mp->m_sb.sb_inopblock;
860
861 if (parse_sb_version(&mp->m_sb)) {
862 do_warn(
863 _("Found unsupported filesystem features. Exiting now.\n"));
864 return(1);
865 }
866
867 /* make sure the per-ag freespace maps are ok so we can mount the fs */
868 phase2(mp, phase2_threads);
869 timestamp(PHASE_END, 2, NULL);
870
871 if (do_prefetch)
872 init_prefetch(mp);
873
874 phase3(mp, phase2_threads);
875 timestamp(PHASE_END, 3, NULL);
876
877 phase4(mp);
878 timestamp(PHASE_END, 4, NULL);
879
880 if (no_modify)
881 printf(_("No modify flag set, skipping phase 5\n"));
882 else {
883 phase5(mp);
884 }
885 timestamp(PHASE_END, 5, NULL);
886
887 /*
888 * Done with the block usage maps, toss them...
889 */
890 free_bmaps(mp);
891
892 if (!bad_ino_btree) {
893 phase6(mp);
894 timestamp(PHASE_END, 6, NULL);
895
896 phase7(mp, phase2_threads);
897 timestamp(PHASE_END, 7, NULL);
898 } else {
899 do_warn(
900 _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
901 }
902
903 if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
904 if (!no_modify) {
905 do_warn(
906 _("Warning: no quota inodes were found. Quotas disabled.\n"));
907 } else {
908 do_warn(
909 _("Warning: no quota inodes were found. Quotas would be disabled.\n"));
910 }
911 } else if (lost_quotas) {
912 if (!no_modify) {
913 do_warn(
914 _("Warning: quota inodes were cleared. Quotas disabled.\n"));
915 } else {
916 do_warn(
917 _("Warning: quota inodes would be cleared. Quotas would be disabled.\n"));
918 }
919 } else {
920 if (lost_uquotino) {
921 if (!no_modify) {
922 do_warn(
923 _("Warning: user quota information was cleared.\n"
924 "User quotas can not be enforced until limit information is recreated.\n"));
925 } else {
926 do_warn(
927 _("Warning: user quota information would be cleared.\n"
928 "User quotas could not be enforced until limit information was recreated.\n"));
929 }
930 }
931
932 if (lost_gquotino) {
933 if (!no_modify) {
934 do_warn(
935 _("Warning: group quota information was cleared.\n"
936 "Group quotas can not be enforced until limit information is recreated.\n"));
937 } else {
938 do_warn(
939 _("Warning: group quota information would be cleared.\n"
940 "Group quotas could not be enforced until limit information was recreated.\n"));
941 }
942 }
943
944 if (lost_pquotino) {
945 if (!no_modify) {
946 do_warn(
947 _("Warning: project quota information was cleared.\n"
948 "Project quotas can not be enforced until limit information is recreated.\n"));
949 } else {
950 do_warn(
951 _("Warning: project quota information would be cleared.\n"
952 "Project quotas could not be enforced until limit information was recreated.\n"));
953 }
954 }
955 }
956
957 if (ag_stride && report_interval)
958 stop_progress_rpt();
959
960 if (no_modify) {
961 /*
962 * Warn if the current LSN is problematic and the log requires a
963 * reformat.
964 */
965 format_log_max_lsn(mp);
966
967 do_log(
968 _("No modify flag set, skipping filesystem flush and exiting.\n"));
969 if (verbose)
970 summary_report();
971 if (fs_is_dirty)
972 return(1);
973
974 return(0);
975 }
976
977 /*
978 * Clear the quota flags if they're on.
979 */
980 sbp = libxfs_getsb(mp, 0);
981 if (!sbp)
982 do_error(_("couldn't get superblock\n"));
983
984 dsb = XFS_BUF_TO_SBP(sbp);
985
986 if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
987 do_warn(_("Note - quota info will be regenerated on next "
988 "quota mount.\n"));
989 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
990 }
991
992 if (copied_sunit) {
993 do_warn(
994 _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
995 "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
996 be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
997 }
998
999 libxfs_writebuf(sbp, 0);
1000
1001 /*
1002 * Done. Flush all cached buffers and inodes first to ensure all
1003 * verifiers are run (where we discover the max metadata LSN), reformat
1004 * the log if necessary and unmount.
1005 */
1006 libxfs_bcache_flush();
1007 format_log_max_lsn(mp);
1008 libxfs_umount(mp);
1009
1010 if (x.rtdev)
1011 libxfs_device_close(x.rtdev);
1012 if (x.logdev && x.logdev != x.ddev)
1013 libxfs_device_close(x.logdev);
1014 libxfs_device_close(x.ddev);
1015
1016 if (verbose)
1017 summary_report();
1018 do_log(_("done\n"));
1019
1020 if (dangerously && !no_modify)
1021 do_warn(
1022 _("Repair of readonly mount complete. Immediate reboot encouraged.\n"));
1023
1024 pftrace_done();
1025
1026 free(msgbuf);
1027
1028 return (0);
1029 }