repair/xfs_repair.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "libxfs.h"
   8 #include "libxlog.h"
   9 #include <sys/resource.h>
  10 #include "xfs_multidisk.h"
  11 #include "avl.h"
  12 #include "avl64.h"
  13 #include "globals.h"
  14 #include "versions.h"
  15 #include "agheader.h"
  16 #include "protos.h"
  17 #include "incore.h"
  18 #include "err_protos.h"
  19 #include "prefetch.h"
  20 #include "threads.h"
  21 #include "progress.h"
  22 #include "dinode.h"
  23 #include "slab.h"
  24 #include "rmap.h"
  25
  26 /*
  27  * option tables for getsubopt calls
  28  */
  29
  30 /*
  31  * -o: user-supplied override options
  32  */
  33 enum o_opt_nums {
  34         ASSUME_XFS = 0,
  35         IHASH_SIZE,
  36         BHASH_SIZE,
  37         AG_STRIDE,
  38         FORCE_GEO,
  39         PHASE2_THREADS,
  40         O_MAX_OPTS,
  41 };
  42
  43 static char *o_opts[] = {
  44         [ASSUME_XFS]            = "assume_xfs",
  45         [IHASH_SIZE]            = "ihash",
  46         [BHASH_SIZE]            = "bhash",
  47         [AG_STRIDE]             = "ag_stride",
  48         [FORCE_GEO]             = "force_geometry",
  49         [PHASE2_THREADS]        = "phase2_threads",
  50         [O_MAX_OPTS]            = NULL,
  51 };
  52
  53 /*
  54  * -c: conversion options
  55  */
  56 enum c_opt_nums {
  57         CONVERT_LAZY_COUNT = 0,
  58         C_MAX_OPTS,
  59 };
  60
  61 static char *c_opts[] = {
  62         [CONVERT_LAZY_COUNT]    = "lazycount",
  63         [C_MAX_OPTS]            = NULL,
  64 };
  65
  66
  67 static int      bhash_option_used;
  68 static long     max_mem_specified;      /* in megabytes */
  69 static int      phase2_threads = 32;
  70 static bool     report_corrected;
  71
  72 static void
  73 usage(void)
  74 {
  75         do_warn(_(
  76 "Usage: %s [options] device\n"
  77 "\n"
  78 "Options:\n"
  79 "  -f           The device is a file\n"
  80 "  -L           Force log zeroing. Do this as a last resort.\n"
  81 "  -l logdev    Specifies the device where the external log resides.\n"
  82 "  -m maxmem    Maximum amount of memory to be used in megabytes.\n"
  83 "  -n           No modify mode, just checks the filesystem for damage.\n"
  84 "               (Cannot be used together with -e.)\n"
  85 "  -P           Disables prefetching.\n"
  86 "  -r rtdev     Specifies the device where the realtime section resides.\n"
  87 "  -v           Verbose output.\n"
  88 "  -c subopts   Change filesystem parameters - use xfs_admin.\n"
  89 "  -o subopts   Override default behaviour, refer to man page.\n"
  90 "  -t interval  Reporting interval in seconds.\n"
  91 "  -d           Repair dangerously.\n"
  92 "  -e           Exit with a non-zero code if any errors were repaired.\n"
  93 "               (Cannot be used together with -n.)\n"
  94 "  -V           Reports version and exits.\n"), progname);
  95         exit(1);
  96 }
  97
  98 char *
  99 err_string(int err_code)
 100 {
 101         static char *err_message[XR_BAD_ERR_CODE];
 102         static int done;
 103
 104         if (!done) {
 105                 err_message[XR_OK] = _("no error");
 106                 err_message[XR_BAD_MAGIC] = _("bad magic number");
 107                 err_message[XR_BAD_BLOCKSIZE] = _("bad blocksize field");
 108                 err_message[XR_BAD_BLOCKLOG] = _("bad blocksize log field");
 109                 err_message[XR_BAD_VERSION] = _("bad or unsupported version");
 110                 err_message[XR_BAD_INPROGRESS] =
 111                         _("filesystem mkfs-in-progress bit set");
 112                 err_message[XR_BAD_FS_SIZE_DATA] =
 113                         _("inconsistent filesystem geometry information");
 114                 err_message[XR_BAD_INO_SIZE_DATA] =
 115         _("bad inode size or inconsistent with number of inodes/block"),
 116                 err_message[XR_BAD_SECT_SIZE_DATA] = _("bad sector size");
 117                 err_message[XR_AGF_GEO_MISMATCH] =
 118         _("AGF geometry info conflicts with filesystem geometry");
 119                 err_message[XR_AGI_GEO_MISMATCH] =
 120         _("AGI geometry info conflicts with filesystem geometry");
 121                 err_message[XR_SB_GEO_MISMATCH] =
 122         _("AG superblock geometry info conflicts with filesystem geometry");
 123                 err_message[XR_EOF] = _("attempted to perform I/O beyond EOF");
 124                 err_message[XR_BAD_RT_GEO_DATA] =
 125         _("inconsistent filesystem geometry in realtime filesystem component");
 126                 err_message[XR_BAD_INO_MAX_PCT] =
 127                         _("maximum indicated percentage of inodes > 100%");
 128                 err_message[XR_BAD_INO_ALIGN] =
 129                         _("inconsistent inode alignment value");
 130                 err_message[XR_INSUFF_SEC_SB] =
 131         _("not enough secondary superblocks with matching geometry");
 132                 err_message[XR_BAD_SB_UNIT] =
 133                         _("bad stripe unit in superblock");
 134                 err_message[XR_BAD_SB_WIDTH] =
 135                         _("bad stripe width in superblock");
 136                 err_message[XR_BAD_SVN] =
 137                         _("bad shared version number in superblock");
 138                 err_message[XR_BAD_CRC] =
 139                         _("bad CRC in superblock");
 140                 err_message[XR_BAD_DIR_SIZE_DATA] =
 141                         _("inconsistent directory geometry information");
 142                 err_message[XR_BAD_LOG_GEOMETRY] =
 143                         _("inconsistent log geometry information");
 144                 done = 1;
 145         }
 146
 147         if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
 148                 do_abort(_("bad error code - %d\n"), err_code);
 149
 150         return(err_message[err_code]);
 151 }
 152
 153 static void
 154 noval(char opt, char *tbl[], int idx)
 155 {
 156         do_warn(_("-%c %s option cannot have a value\n"), opt, tbl[idx]);
 157         usage();
 158 }
 159
 160 static void
 161 respec(char opt, char *tbl[], int idx)
 162 {
 163         do_warn("-%c ", opt);
 164         if (tbl)
 165                 do_warn("%s ", tbl[idx]);
 166         do_warn(_("option respecified\n"));
 167         usage();
 168 }
 169
 170 static void
 171 unknown(char opt, char *s)
 172 {
 173         do_warn(_("unknown option -%c %s\n"), opt, s);
 174         usage();
 175 }
 176
 177 /*
 178  * sets only the global argument flags and variables
 179  */
 180 static void
 181 process_args(int argc, char **argv)
 182 {
 183         char *p;
 184         int c;
 185
 186         log_spec = 0;
 187         fs_is_dirty = 0;
 188         verbose = 0;
 189         no_modify = 0;
 190         dangerously = 0;
 191         isa_file = 0;
 192         zap_log = 0;
 193         dumpcore = 0;
 194         full_ino_ex_data = 0;
 195         force_geo = 0;
 196         assume_xfs = 0;
 197         copied_sunit = 0;
 198         sb_inoalignmt = 0;
 199         sb_unit = 0;
 200         sb_width = 0;
 201         ag_stride = 0;
 202         thread_count = 1;
 203         report_interval = PROG_RPT_DEFAULT;
 204         report_corrected = false;
 205
 206         /*
 207          * XXX have to add suboption processing here
 208          * attributes, quotas, nlinks, aligned_inos, sb_fbits
 209          */
 210         while ((c = getopt(argc, argv, "c:o:fl:m:r:LnDvVdPet:")) != EOF)  {
 211                 switch (c) {
 212                 case 'D':
 213                         dumpcore = 1;
 214                         break;
 215                 case 'o':
 216                         p = optarg;
 217                         while (*p != '\0')  {
 218                                 char *val;
 219
 220                                 switch (getsubopt(&p, o_opts, &val))  {
 221                                 case ASSUME_XFS:
 222                                         if (val)
 223                                                 noval('o', o_opts, ASSUME_XFS);
 224                                         if (assume_xfs)
 225                                                 respec('o', o_opts, ASSUME_XFS);
 226                                         assume_xfs = 1;
 227                                         break;
 228                                 case IHASH_SIZE:
 229                                         do_warn(
 230                 _("-o ihash option has been removed and will be ignored\n"));
 231                                         break;
 232                                 case BHASH_SIZE:
 233                                         if (max_mem_specified)
 234                                                 do_abort(
 235                 _("-o bhash option cannot be used with -m option\n"));
 236                                         if (!val)
 237                                                 do_abort(
 238                 _("-o bhash requires a parameter\n"));
 239                                         libxfs_bhash_size = (int)strtol(val, NULL, 0);
 240                                         bhash_option_used = 1;
 241                                         break;
 242                                 case AG_STRIDE:
 243                                         if (!val)
 244                                                 do_abort(
 245                 _("-o ag_stride requires a parameter\n"));
 246                                         ag_stride = (int)strtol(val, NULL, 0);
 247                                         break;
 248                                 case FORCE_GEO:
 249                                         if (val)
 250                                                 noval('o', o_opts, FORCE_GEO);
 251                                         if (force_geo)
 252                                                 respec('o', o_opts, FORCE_GEO);
 253                                         force_geo = 1;
 254                                         break;
 255                                 case PHASE2_THREADS:
 256                                         if (!val)
 257                                                 do_abort(
 258                 _("-o phase2_threads requires a parameter\n"));
 259                                         phase2_threads = (int)strtol(val, NULL, 0);
 260                                         break;
 261                                 default:
 262                                         unknown('o', val);
 263                                         break;
 264                                 }
 265                         }
 266                         break;
 267                 case 'c':
 268                         p = optarg;
 269                         while (*p) {
 270                                 char *val;
 271
 272                                 switch (getsubopt(&p, c_opts, &val)) {
 273                                 case CONVERT_LAZY_COUNT:
 274                                         if (!val)
 275                                                 do_abort(
 276                 _("-c lazycount requires a parameter\n"));
 277                                         lazy_count = (int)strtol(val, NULL, 0);
 278                                         convert_lazy_count = 1;
 279                                         break;
 280                                 default:
 281                                         unknown('c', val);
 282                                         break;
 283                                 }
 284                         }
 285                         break;
 286                 case 'l':
 287                         log_name = optarg;
 288                         log_spec = 1;
 289                         break;
 290                 case 'r':
 291                         rt_name = optarg;
 292                         rt_spec = 1;
 293                         break;
 294                 case 'f':
 295                         isa_file = 1;
 296                         break;
 297                 case 'm':
 298                         if (bhash_option_used)
 299                                 do_abort(_("-m option cannot be used with "
 300                                                 "-o bhash option\n"));
 301                         max_mem_specified = strtol(optarg, NULL, 0);
 302                         break;
 303                 case 'L':
 304                         zap_log = 1;
 305                         break;
 306                 case 'n':
 307                         no_modify = 1;
 308                         break;
 309                 case 'd':
 310                         dangerously = 1;
 311                         break;
 312                 case 'v':
 313                         verbose++;
 314                         break;
 315                 case 'V':
 316                         printf(_("%s version %s\n"), progname, VERSION);
 317                         exit(0);
 318                 case 'P':
 319                         do_prefetch = 0;
 320                         break;
 321                 case 't':
 322                         report_interval = (int)strtol(optarg, NULL, 0);
 323                         break;
 324                 case 'e':
 325                         report_corrected = true;
 326                         break;
 327                 case '?':
 328                         usage();
 329                 }
 330         }
 331
 332         if (argc - optind != 1)
 333                 usage();
 334
 335         if ((fs_name = argv[optind]) == NULL)
 336                 usage();
 337
 338         if (report_corrected && no_modify)
 339                 usage();
 340 }
 341
 342 void __attribute__((noreturn))
 343 do_error(char const *msg, ...)
 344 {
 345         va_list args;
 346
 347         fprintf(stderr, _("\nfatal error -- "));
 348
 349         va_start(args, msg);
 350         vfprintf(stderr, msg, args);
 351         if (dumpcore)
 352                 abort();
 353         exit(1);
 354 }
 355
 356 /*
 357  * like do_error, only the error is internal, no system
 358  * error so no oserror processing
 359  */
 360 void __attribute__((noreturn))
 361 do_abort(char const *msg, ...)
 362 {
 363         va_list args;
 364
 365         va_start(args, msg);
 366         vfprintf(stderr, msg, args);
 367         if (dumpcore)
 368                 abort();
 369         exit(1);
 370 }
 371
 372 void
 373 do_warn(char const *msg, ...)
 374 {
 375         va_list args;
 376
 377         fs_is_dirty = 1;
 378
 379         va_start(args, msg);
 380         vfprintf(stderr, msg, args);
 381         va_end(args);
 382 }
 383
 384 /* no formatting */
 385
 386 void
 387 do_log(char const *msg, ...)
 388 {
 389         va_list args;
 390
 391         va_start(args, msg);
 392         vfprintf(stderr, msg, args);
 393         va_end(args);
 394 }
 395
 396 static void
 397 calc_mkfs(xfs_mount_t *mp)
 398 {
 399         xfs_agblock_t   fino_bno;
 400         int             do_inoalign;
 401
 402         do_inoalign = mp->m_sinoalign;
 403
 404         /*
 405          * Pre-calculate the geometry of ag 0. We know what it looks like
 406          * because we know what mkfs does: 2 allocation btree roots (by block
 407          * and by size), the inode allocation btree root, the free inode
 408          * allocation btree root (if enabled) and some number of blocks to
 409          * prefill the agfl.
 410          *
 411          * Because the current shape of the btrees may differ from the current
 412          * shape, we open code the mkfs freelist block count here. mkfs creates
 413          * single level trees, so the calculation is pertty straight forward for
 414          * the trees that use the AGFL.
 415          */
 416         bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
 417         bcntbt_root = bnobt_root + 1;
 418         inobt_root = bnobt_root + 2;
 419         fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
 420         if (xfs_sb_version_hasfinobt(&mp->m_sb))
 421                 fino_bno++;
 422         if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 423                 fino_bno += min(2, mp->m_rmap_maxlevels); /* agfl blocks */
 424                 fino_bno++;
 425         }
 426         if (xfs_sb_version_hasreflink(&mp->m_sb))
 427                 fino_bno++;
 428
 429         /*
 430          * If the log is allocated in the first allocation group we need to
 431          * add the number of blocks used by the log to the above calculation.
 432          *
 433          * This can happens with filesystems that only have a single
 434          * allocation group, or very odd geometries created by old mkfs
 435          * versions on very small filesystems.
 436          */
 437         if (mp->m_sb.sb_logstart &&
 438             XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) {
 439
 440                 /*
 441                  * XXX(hch): verify that sb_logstart makes sense?
 442                  */
 443                  fino_bno += mp->m_sb.sb_logblocks;
 444         }
 445
 446         /*
 447          * ditto the location of the first inode chunks in the fs ('/')
 448          */
 449         if (xfs_sb_version_hasdalign(&mp->m_sb) && do_inoalign)  {
 450                 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, roundup(fino_bno,
 451                                         mp->m_sb.sb_unit));
 452         } else if (xfs_sb_version_hasalign(&mp->m_sb) &&
 453                                         mp->m_sb.sb_inoalignmt > 1)  {
 454                 first_prealloc_ino = XFS_AGB_TO_AGINO(mp,
 455                                         roundup(fino_bno,
 456                                                 mp->m_sb.sb_inoalignmt));
 457         } else  {
 458                 first_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno);
 459         }
 460
 461         ASSERT(mp->m_ialloc_blks > 0);
 462
 463         if (mp->m_ialloc_blks > 1)
 464                 last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
 465         else
 466                 last_prealloc_ino = XFS_AGB_TO_AGINO(mp, fino_bno + 1);
 467
 468         /*
 469          * now the first 3 inodes in the system
 470          */
 471         if (mp->m_sb.sb_rootino != first_prealloc_ino)  {
 472                 do_warn(
 473 _("sb root inode value %" PRIu64 " %sinconsistent with calculated value %u\n"),
 474                         mp->m_sb.sb_rootino,
 475                         (mp->m_sb.sb_rootino == NULLFSINO ? "(NULLFSINO) ":""),
 476                         first_prealloc_ino);
 477
 478                 if (!no_modify)
 479                         do_warn(
 480                 _("resetting superblock root inode pointer to %u\n"),
 481                                 first_prealloc_ino);
 482                 else
 483                         do_warn(
 484                 _("would reset superblock root inode pointer to %u\n"),
 485                                 first_prealloc_ino);
 486
 487                 /*
 488                  * just set the value -- safe since the superblock
 489                  * doesn't get flushed out if no_modify is set
 490                  */
 491                 mp->m_sb.sb_rootino = first_prealloc_ino;
 492         }
 493
 494         if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1)  {
 495                 do_warn(
 496 _("sb realtime bitmap inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
 497                         mp->m_sb.sb_rbmino,
 498                         (mp->m_sb.sb_rbmino == NULLFSINO ? "(NULLFSINO) ":""),
 499                         first_prealloc_ino + 1);
 500
 501                 if (!no_modify)
 502                         do_warn(
 503                 _("resetting superblock realtime bitmap ino pointer to %u\n"),
 504                                 first_prealloc_ino + 1);
 505                 else
 506                         do_warn(
 507                 _("would reset superblock realtime bitmap ino pointer to %u\n"),
 508                                 first_prealloc_ino + 1);
 509
 510                 /*
 511                  * just set the value -- safe since the superblock
 512                  * doesn't get flushed out if no_modify is set
 513                  */
 514                 mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
 515         }
 516
 517         if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2)  {
 518                 do_warn(
 519 _("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u\n"),
 520                         mp->m_sb.sb_rsumino,
 521                         (mp->m_sb.sb_rsumino == NULLFSINO ? "(NULLFSINO) ":""),
 522                         first_prealloc_ino + 2);
 523
 524                 if (!no_modify)
 525                         do_warn(
 526                 _("resetting superblock realtime summary ino pointer to %u\n"),
 527                                 first_prealloc_ino + 2);
 528                 else
 529                         do_warn(
 530                 _("would reset superblock realtime summary ino pointer to %u\n"),
 531                                 first_prealloc_ino + 2);
 532
 533                 /*
 534                  * just set the value -- safe since the superblock
 535                  * doesn't get flushed out if no_modify is set
 536                  */
 537                 mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
 538         }
 539
 540 }
 541
 542 /*
 543  * v5 superblock metadata track the LSN of last modification and thus require
 544  * that the current LSN is always moving forward. The current LSN is reset if
 545  * the log has been cleared, which puts the log behind parts of the filesystem
 546  * on-disk and can disrupt log recovery.
 547  *
 548  * We have tracked the maximum LSN of every piece of metadata that has been read
 549  * in via the read verifiers. Compare the max LSN with the log and if the log is
 550  * behind, bump the cycle number and reformat the log.
 551  */
 552 static void
 553 format_log_max_lsn(
 554         struct xfs_mount        *mp)
 555 {
 556         struct xlog             *log = mp->m_log;
 557         int                     max_cycle;
 558         int                     max_block;
 559         int                     new_cycle;
 560         xfs_daddr_t             logstart;
 561         xfs_daddr_t             logblocks;
 562         int                     logversion;
 563
 564         if (!xfs_sb_version_hascrc(&mp->m_sb))
 565                 return;
 566
 567         /*
 568          * If the log is ahead of the highest metadata LSN we've seen, we're
 569          * safe and there's nothing to do.
 570          */
 571         max_cycle = CYCLE_LSN(libxfs_max_lsn);
 572         max_block = BLOCK_LSN(libxfs_max_lsn);
 573         if (max_cycle < log->l_curr_cycle ||
 574             (max_cycle == log->l_curr_cycle && max_block < log->l_curr_block))
 575                 return;
 576
 577         /*
 578          * Going to the next cycle should be sufficient but we bump by a few
 579          * counts to help cover any metadata LSNs we could have missed.
 580          */
 581         new_cycle = max_cycle + 3;
 582         logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart);
 583         logblocks = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
 584         logversion = xfs_sb_version_haslogv2(&mp->m_sb) ? 2 : 1;
 585
 586         do_warn(_("Maximum metadata LSN (%d:%d) is ahead of log (%d:%d).\n"),
 587                 max_cycle, max_block, log->l_curr_cycle, log->l_curr_block);
 588
 589         if (no_modify) {
 590                 do_warn(_("Would format log to cycle %d.\n"), new_cycle);
 591                 return;
 592         }
 593
 594         do_warn(_("Format log to cycle %d.\n"), new_cycle);
 595         libxfs_log_clear(log->l_dev, NULL, logstart, logblocks,
 596                          &mp->m_sb.sb_uuid, logversion, mp->m_sb.sb_logsunit,
 597                          XLOG_FMT, new_cycle, true);
 598 }
 599
 600 /*
 601  * mkfs increases the AG count for "multidisk" configurations, we want
 602  * to target these for an increase in thread count. Hence check the superlock
 603  * geometry information to determine if mkfs considered this a multidisk
 604  * configuration.
 605  */
 606 static bool
 607 is_multidisk_filesystem(
 608         struct xfs_mount        *mp)
 609 {
 610         struct xfs_sb           *sbp = &mp->m_sb;
 611
 612         /* High agcount filesystems are always considered "multidisk" */
 613         if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT)
 614                 return true;
 615
 616         /*
 617          * If it doesn't have a sunit/swidth, mkfs didn't consider it a
 618          * multi-disk array, so we don't either.
 619          */
 620         if (!sbp->sb_unit)
 621                 return false;
 622
 623         ASSERT(sbp->sb_width);
 624         return true;
 625 }
 626
 627 /*
 628  * if the sector size of the filesystem we are trying to repair is
 629  * smaller than that of the underlying filesystem (i.e. we are repairing
 630  * an image), the we have to turn off direct IO because we cannot do IO
 631  * smaller than the host filesystem's sector size.
 632  */
 633 static void
 634 check_fs_vs_host_sectsize(
 635         struct xfs_sb   *sb)
 636 {
 637         int     fd;
 638         long    old_flags;
 639         struct xfs_fsop_geom_v1 geom = { 0 };
 640
 641         fd = libxfs_device_to_fd(x.ddev);
 642
 643         if (ioctl(fd, XFS_IOC_FSGEOMETRY_V1, &geom) < 0) {
 644                 do_log(_("Cannot get host filesystem geometry.\n"
 645         "Repair may fail if there is a sector size mismatch between\n"
 646         "the image and the host filesystem.\n"));
 647                 geom.sectsize = BBSIZE;
 648         }
 649
 650         if (sb->sb_sectsize < geom.sectsize) {
 651                 old_flags = fcntl(fd, F_GETFL, 0);
 652                 if (fcntl(fd, F_SETFL, old_flags & ~O_DIRECT) < 0) {
 653                         do_warn(_(
 654         "Sector size on host filesystem larger than image sector size.\n"
 655         "Cannot turn off direct IO, so exiting.\n"));
 656                         exit(1);
 657                 }
 658         }
 659 }
 660
 661 int
 662 main(int argc, char **argv)
 663 {
 664         xfs_mount_t     *temp_mp;
 665         xfs_mount_t     *mp;
 666         xfs_dsb_t       *dsb;
 667         xfs_buf_t       *sbp;
 668         xfs_mount_t     xfs_m;
 669         struct xlog     log = {0};
 670         char            *msgbuf;
 671         struct xfs_sb   psb;
 672         int             rval;
 673
 674         progname = basename(argv[0]);
 675         setlocale(LC_ALL, "");
 676         bindtextdomain(PACKAGE, LOCALEDIR);
 677         textdomain(PACKAGE);
 678         dinode_bmbt_translation_init();
 679
 680         temp_mp = &xfs_m;
 681         setbuf(stdout, NULL);
 682
 683         process_args(argc, argv);
 684         xfs_init(&x);
 685
 686         msgbuf = malloc(DURATION_BUF_SIZE);
 687
 688         timestamp(PHASE_START, 0, NULL);
 689         timestamp(PHASE_END, 0, NULL);
 690
 691         /* -f forces this, but let's be nice and autodetect it, as well. */
 692         if (!isa_file) {
 693                 int             fd = libxfs_device_to_fd(x.ddev);
 694                 struct stat     statbuf;
 695
 696                 if (fstat(fd, &statbuf) < 0)
 697                         do_warn(_("%s: couldn't stat \"%s\"\n"),
 698                                 progname, fs_name);
 699                 else if (S_ISREG(statbuf.st_mode))
 700                         isa_file = 1;
 701         }
 702
 703         if (isa_file) {
 704                 /* Best effort attempt to validate fs vs host sector size */
 705                 rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
 706                 if (rval == XR_OK)
 707                         check_fs_vs_host_sectsize(&psb);
 708         }
 709
 710         /* do phase1 to make sure we have a superblock */
 711         phase1(temp_mp);
 712         timestamp(PHASE_END, 1, NULL);
 713
 714         if (no_modify && primary_sb_modified)  {
 715                 do_warn(_("Primary superblock would have been modified.\n"
 716                           "Cannot proceed further in no_modify mode.\n"
 717                           "Exiting now.\n"));
 718                 exit(1);
 719         }
 720
 721         rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0);
 722         if (rval != XR_OK) {
 723                 do_warn(_("Primary superblock bad after phase 1!\n"
 724                           "Exiting now.\n"));
 725                 exit(1);
 726         }
 727
 728         /*
 729          * Now that we have completely validated the superblock, geometry may
 730          * have changed; re-check geometry vs the host filesystem geometry
 731          */
 732         if (isa_file)
 733                 check_fs_vs_host_sectsize(&psb);
 734
 735         /*
 736          * Prepare the mount structure. Point the log reference to our local
 737          * copy so it's available to the various phases. The log bits are
 738          * initialized in phase 2.
 739          */
 740         memset(&xfs_m, 0, sizeof(xfs_mount_t));
 741         mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0);
 742
 743         if (!mp)  {
 744                 fprintf(stderr,
 745                         _("%s: cannot repair this filesystem.  Sorry.\n"),
 746                         progname);
 747                 exit(1);
 748         }
 749         mp->m_log = &log;
 750
 751         /* Spit out function & line on these corruption macros */
 752         if (verbose > 2)
 753                 mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
 754
 755         /*
 756          * set XFS-independent status vars from the mount/sb structure
 757          */
 758         glob_agcount = mp->m_sb.sb_agcount;
 759
 760         chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
 761         max_symlink_blocks = libxfs_symlink_blocks(mp, XFS_SYMLINK_MAXLEN);
 762         inodes_per_cluster = max(mp->m_sb.sb_inopblock,
 763                         mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog);
 764
 765         /*
 766          * Automatic striding for high agcount filesystems.
 767          *
 768          * More AGs indicates that the filesystem is either large or can handle
 769          * more IO parallelism. Either way, we should try to process multiple
 770          * AGs at a time in such a configuration to try to saturate the
 771          * underlying storage and speed the repair process. Only do this if
 772          * prefetching is enabled.
 773          *
 774          * Given mkfs defaults for 16AGs for "multidisk" configurations, we want
 775          * to target these for an increase in thread count. Hence a stride value
 776          * of 15 is chosen to ensure we get at least 2 AGs being scanned at once
 777          * on such filesystems.
 778          *
 779          * Limit the maximum thread count based on the available CPU power that
 780          * is available. If we use too many threads, we might run out of memory
 781          * and CPU power before we run out of IO concurrency. We limit to 8
 782          * threads/CPU as this is enough threads to saturate a CPU on fast
 783          * devices, yet few enough that it will saturate but won't overload slow
 784          * devices.
 785          *
 786          * Multidisk filesystems can handle more IO parallelism so we should try
 787          * to process multiple AGs at a time in such a configuration to try to
 788          * saturate the underlying storage and speed the repair process. Only do
 789          * this if prefetching is enabled.
 790          */
 791         if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) {
 792                 /*
 793                  * For small agcount multidisk systems, just double the
 794                  * parallelism. For larger AG count filesystems (32 and above)
 795                  * use more parallelism, and linearly increase the parallelism
 796                  * with the number of AGs.
 797                  */
 798                 ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1;
 799         }
 800
 801         if (ag_stride) {
 802                 int max_threads = platform_nproc() * 8;
 803
 804                 thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
 805                 while (thread_count > max_threads) {
 806                         ag_stride *= 2;
 807                         thread_count = (glob_agcount + ag_stride - 1) /
 808                                                                 ag_stride;
 809                 }
 810                 if (thread_count > 0)
 811                         thread_init();
 812                 else {
 813                         thread_count = 1;
 814                         ag_stride = 0;
 815                 }
 816         }
 817
 818         if (ag_stride && report_interval) {
 819                 init_progress_rpt();
 820                 if (msgbuf) {
 821                         do_log(_("        - reporting progress in intervals of %s\n"),
 822                         duration(report_interval, msgbuf));
 823                 }
 824         }
 825
 826         /*
 827          * Adjust libxfs cache sizes based on system memory,
 828          * filesystem size and inode count.
 829          *
 830          * We'll set the cache size based on 3/4s the memory minus
 831          * space used by the inode AVL tree and block usage map.
 832          *
 833          * Inode AVL tree space is approximately 4 bytes per inode,
 834          * block usage map is currently 1 byte for 2 blocks.
 835          *
 836          * We assume most blocks will be inode clusters.
 837          *
 838          * Calculations are done in kilobyte units.
 839          */
 840
 841         if (!bhash_option_used || max_mem_specified) {
 842                 unsigned long   mem_used;
 843                 unsigned long   max_mem;
 844                 struct rlimit   rlim;
 845
 846                 libxfs_bcache_purge();
 847                 cache_destroy(libxfs_bcache);
 848
 849                 mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
 850                                         (mp->m_sb.sb_dblocks >> (10 + 1)) +
 851                                         50000;  /* rough estimate of 50MB overhead */
 852                 max_mem = max_mem_specified ? max_mem_specified * 1024 :
 853                                                 libxfs_physmem() * 3 / 4;
 854
 855                 if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
 856                                         rlim.rlim_cur != RLIM_INFINITY) {
 857                         rlim.rlim_cur = rlim.rlim_max;
 858                         setrlimit(RLIMIT_AS, &rlim);
 859                         /* use approximately 80% of rlimit to avoid overrun */
 860                         max_mem = min(max_mem, rlim.rlim_cur / 1280);
 861                 } else
 862                         max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
 863
 864                 if (verbose > 1)
 865                         do_log(
 866         _("        - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
 867                                 max_mem, mp->m_sb.sb_icount,
 868                                 mp->m_sb.sb_icount >> (10 - 2),
 869                                 mp->m_sb.sb_dblocks,
 870                                 mp->m_sb.sb_dblocks >> (10 + 1));
 871
 872                 if (max_mem <= mem_used) {
 873                         if (max_mem_specified) {
 874                                 do_abort(
 875         _("Required memory for repair is greater that the maximum specified\n"
 876           "with the -m option. Please increase it to at least %lu.\n"),
 877                                         mem_used / 1024);
 878                         }
 879                         do_log(
 880         _("Memory available for repair (%luMB) may not be sufficient.\n"
 881           "At least %luMB is needed to repair this filesystem efficiently\n"
 882           "If repair fails due to lack of memory, please\n"),
 883                                 max_mem / 1024, mem_used / 1024);
 884                         if (do_prefetch)
 885                                 do_log(
 886         _("turn prefetching off (-P) to reduce the memory footprint.\n"));
 887                         else
 888                                 do_log(
 889         _("increase system RAM and/or swap space to at least %luMB.\n"),
 890                         mem_used * 2 / 1024);
 891
 892                         max_mem = mem_used;
 893                 }
 894
 895                 max_mem -= mem_used;
 896                 if (max_mem >= (1 << 30))
 897                         max_mem = 1 << 30;
 898                 libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
 899                                 (mp->m_inode_cluster_size >> 10));
 900                 if (libxfs_bhash_size < 512)
 901                         libxfs_bhash_size = 512;
 902
 903                 if (verbose)
 904                         do_log(_("        - block cache size set to %d entries\n"),
 905                                 libxfs_bhash_size * HASH_CACHE_RATIO);
 906
 907                 libxfs_bcache = cache_init(0, libxfs_bhash_size,
 908                                                 &libxfs_bcache_operations);
 909         }
 910
 911         /*
 912          * calculate what mkfs would do to this filesystem
 913          */
 914         calc_mkfs(mp);
 915
 916         /*
 917          * initialize block alloc map
 918          */
 919         init_bmaps(mp);
 920         incore_ino_init(mp);
 921         incore_ext_init(mp);
 922         rmaps_init(mp);
 923
 924         /* initialize random globals now that we know the fs geometry */
 925         inodes_per_block = mp->m_sb.sb_inopblock;
 926
 927         if (parse_sb_version(&mp->m_sb))  {
 928                 do_warn(
 929         _("Found unsupported filesystem features.  Exiting now.\n"));
 930                 return(1);
 931         }
 932
 933         /* make sure the per-ag freespace maps are ok so we can mount the fs */
 934         phase2(mp, phase2_threads);
 935         timestamp(PHASE_END, 2, NULL);
 936
 937         if (do_prefetch)
 938                 init_prefetch(mp);
 939
 940         phase3(mp, phase2_threads);
 941         timestamp(PHASE_END, 3, NULL);
 942
 943         phase4(mp);
 944         timestamp(PHASE_END, 4, NULL);
 945
 946         if (no_modify)
 947                 printf(_("No modify flag set, skipping phase 5\n"));
 948         else {
 949                 phase5(mp);
 950         }
 951         timestamp(PHASE_END, 5, NULL);
 952
 953         /*
 954          * Done with the block usage maps, toss them...
 955          */
 956         rmaps_free(mp);
 957         free_bmaps(mp);
 958
 959         if (!bad_ino_btree)  {
 960                 phase6(mp);
 961                 timestamp(PHASE_END, 6, NULL);
 962
 963                 phase7(mp, phase2_threads);
 964                 timestamp(PHASE_END, 7, NULL);
 965         } else  {
 966                 do_warn(
 967 _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
 968         }
 969
 970         if (lost_quotas && !have_uquotino && !have_gquotino && !have_pquotino) {
 971                 if (!no_modify)  {
 972                         do_warn(
 973 _("Warning:  no quota inodes were found.  Quotas disabled.\n"));
 974                 } else  {
 975                         do_warn(
 976 _("Warning:  no quota inodes were found.  Quotas would be disabled.\n"));
 977                 }
 978         } else if (lost_quotas)  {
 979                 if (!no_modify)  {
 980                         do_warn(
 981 _("Warning:  quota inodes were cleared.  Quotas disabled.\n"));
 982                 } else  {
 983                         do_warn(
 984 _("Warning:  quota inodes would be cleared.  Quotas would be disabled.\n"));
 985                 }
 986         } else  {
 987                 if (lost_uquotino)  {
 988                         if (!no_modify)  {
 989                                 do_warn(
 990 _("Warning:  user quota information was cleared.\n"
 991   "User quotas can not be enforced until limit information is recreated.\n"));
 992                         } else  {
 993                                 do_warn(
 994 _("Warning:  user quota information would be cleared.\n"
 995   "User quotas could not be enforced until limit information was recreated.\n"));
 996                         }
 997                 }
 998
 999                 if (lost_gquotino)  {
1000                         if (!no_modify)  {
1001                                 do_warn(
1002 _("Warning:  group quota information was cleared.\n"
1003   "Group quotas can not be enforced until limit information is recreated.\n"));
1004                         } else  {
1005                                 do_warn(
1006 _("Warning:  group quota information would be cleared.\n"
1007   "Group quotas could not be enforced until limit information was recreated.\n"));
1008                         }
1009                 }
1010
1011                 if (lost_pquotino)  {
1012                         if (!no_modify)  {
1013                                 do_warn(
1014 _("Warning:  project quota information was cleared.\n"
1015   "Project quotas can not be enforced until limit information is recreated.\n"));
1016                         } else  {
1017                                 do_warn(
1018 _("Warning:  project quota information would be cleared.\n"
1019   "Project quotas could not be enforced until limit information was recreated.\n"));
1020                         }
1021                 }
1022         }
1023
1024         if (ag_stride && report_interval)
1025                 stop_progress_rpt();
1026
1027         if (no_modify)  {
1028                 /*
1029                  * Warn if the current LSN is problematic and the log requires a
1030                  * reformat.
1031                  */
1032                 format_log_max_lsn(mp);
1033
1034                 do_log(
1035         _("No modify flag set, skipping filesystem flush and exiting.\n"));
1036                 if (verbose)
1037                         summary_report();
1038                 if (fs_is_dirty)
1039                         return(1);
1040
1041                 return(0);
1042         }
1043
1044         /*
1045          * Clear the quota flags if they're on.
1046          */
1047         sbp = libxfs_getsb(mp, 0);
1048         if (!sbp)
1049                 do_error(_("couldn't get superblock\n"));
1050
1051         dsb = XFS_BUF_TO_SBP(sbp);
1052
1053         if (be16_to_cpu(dsb->sb_qflags) & XFS_ALL_QUOTA_CHKD) {
1054                 do_warn(_("Note - quota info will be regenerated on next "
1055                         "quota mount.\n"));
1056                 dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD);
1057         }
1058
1059         if (copied_sunit) {
1060                 do_warn(
1061 _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n"
1062   "Please reset with mount -o sunit=<value>,swidth=<value> if necessary\n"),
1063                         be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width));
1064         }
1065
1066         libxfs_writebuf(sbp, 0);
1067
1068         /*
1069          * Done. Flush all cached buffers and inodes first to ensure all
1070          * verifiers are run (where we discover the max metadata LSN), reformat
1071          * the log if necessary and unmount.
1072          */
1073         libxfs_bcache_flush();
1074         format_log_max_lsn(mp);
1075         libxfs_umount(mp);
1076
1077         if (x.rtdev)
1078                 libxfs_device_close(x.rtdev);
1079         if (x.logdev && x.logdev != x.ddev)
1080                 libxfs_device_close(x.logdev);
1081         libxfs_device_close(x.ddev);
1082         libxfs_destroy();
1083
1084         if (verbose)
1085                 summary_report();
1086         do_log(_("done\n"));
1087
1088         if (dangerously && !no_modify)
1089                 do_warn(
1090 _("Repair of readonly mount complete.  Immediate reboot encouraged.\n"));
1091
1092         pftrace_done();
1093
1094         free(msgbuf);
1095
1096         if (fs_is_dirty && report_corrected)
1097                 return (4);
1098         return (0);
1099 }