scrub/xfs_scrub.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Copyright (C) 2018 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5  */
   6 #include "xfs.h"
   7 #include <pthread.h>
   8 #include <stdlib.h>
   9 #include <paths.h>
  10 #include <sys/time.h>
  11 #include <sys/resource.h>
  12 #include <sys/statvfs.h>
  13 #include "platform_defs.h"
  14 #include "input.h"
  15 #include "path.h"
  16 #include "xfs_scrub.h"
  17 #include "common.h"
  18 #include "unicrash.h"
  19 #include "progress.h"
  20
  21 /*
  22  * XFS Online Metadata Scrub (and Repair)
  23  *
  24  * The XFS scrubber uses custom XFS ioctls to probe more deeply into the
  25  * internals of the filesystem.  It takes advantage of scrubbing ioctls
  26  * to check all the records stored in a metadata object and to
  27  * cross-reference those records against the other filesystem metadata.
  28  *
  29  * After the program gathers command line arguments to figure out
  30  * exactly what the program is going to do, scrub execution is split up
  31  * into several separate phases:
  32  *
  33  * The "find geometry" phase queries XFS for the filesystem geometry.
  34  * The block devices for the data, realtime, and log devices are opened.
  35  * Kernel ioctls are test-queried to see if they actually work (the scrub
  36  * ioctl in particular), and any other filesystem-specific information
  37  * is gathered.
  38  *
  39  * In the "check internal metadata" phase, we call the metadata scrub
  40  * ioctl to check the filesystem's internal per-AG btrees.  This
  41  * includes the AG superblock, AGF, AGFL, and AGI headers, freespace
  42  * btrees, the regular and free inode btrees, the reverse mapping
  43  * btrees, and the reference counting btrees.  If the realtime device is
  44  * enabled, the realtime bitmap and reverse mapping btrees are checked.
  45  * Quotas, if enabled, are also checked in this phase.
  46  *
  47  * Each AG (and the realtime device) has its metadata checked in a
  48  * separate thread for better performance.  Errors in the internal
  49  * metadata can be fixed here prior to the inode scan; refer to the
  50  * section about the "repair filesystem" phase for more information.
  51  *
  52  * The "scan all inodes" phase uses BULKSTAT to scan all the inodes in
  53  * an AG in disk order.  The BULKSTAT information provides enough
  54  * information to construct a file handle that is used to check the
  55  * following parts of every file:
  56  *
  57  *  - The inode record
  58  *  - All three block forks (data, attr, CoW)
  59  *  - If it's a symlink, the symlink target.
  60  *  - If it's a directory, the directory entries.
  61  *  - All extended attributes
  62  *  - The parent pointer
  63  *
  64  * Multiple threads are started to check each the inodes of each AG in
  65  * parallel.  Errors in file metadata can be fixed here; see the section
  66  * about the "repair filesystem" phase for more information.
  67  *
  68  * Next comes the (configurable) "repair filesystem" phase.  The user
  69  * can instruct this program to fix all problems encountered; to fix
  70  * only optimality problems and leave the corruptions; or not to touch
  71  * the filesystem at all.  Any metadata repairs that did not succeed in
  72  * the previous two phases are retried here; if there are uncorrectable
  73  * errors, xfs_scrub stops here.
  74  *
  75  * To perform the actual repairs (or optimizations), we iterate all the
  76  * items on the per-AG action item list and ask the kernel to repair
  77  * them.  Items which are successfully repaired are removed from the
  78  * list.  If an item is not acted upon successfully (or the kernel asks us
  79  * to try again), we retry the actions until there is nothing left to
  80  * fix or we fail to make forward progress.  In that event, the
  81  * unfinished items are recorded as errors.  If there are no errors at
  82  * this point, we call FSTRIM on the filesystem.
  83  *
  84  * The next phase is the "check directory tree" phase.  In this phase,
  85  * every directory is opened (via file handle) to confirm that each
  86  * directory is connected to the root.  Directory entries are checked
  87  * for ambiguous Unicode normalization mappings, which is to say that we
  88  * look for pairs of entries whose utf-8 strings normalize to the same
  89  * code point sequence and map to different inodes, because that could
  90  * be used to trick a user into opening the wrong file.  The names of
  91  * extended attributes are checked for Unicode normalization collisions.
  92  *
  93  * In the "verify data file integrity" phase, we employ GETFSMAP to read
  94  * the reverse-mappings of all AGs and issue direct-reads of the
  95  * underlying disk blocks.  We rely on the underlying storage to have
  96  * checksummed the data blocks appropriately.  Multiple threads are
  97  * started to check each AG in parallel; a separate thread pool is used
  98  * to handle the direct reads.
  99  *
 100  * In the "check summary counters" phase, use GETFSMAP to tally up the
 101  * blocks and BULKSTAT to tally up the inodes we saw and compare that to
 102  * the statfs output.  This gives the user a rough estimate of how
 103  * thorough the scrub was.
 104  */
 105
 106 /*
 107  * Known debug tweaks (pass -d and set the environment variable):
 108  * XFS_SCRUB_FORCE_ERROR        -- pretend all metadata is corrupt
 109  * XFS_SCRUB_FORCE_REPAIR       -- repair all metadata even if it's ok
 110  * XFS_SCRUB_NO_KERNEL          -- pretend there is no kernel ioctl
 111  * XFS_SCRUB_NO_SCSI_VERIFY     -- disable SCSI VERIFY (if present)
 112  * XFS_SCRUB_PHASE              -- run only this scrub phase
 113  * XFS_SCRUB_THREADS            -- start exactly this number of threads
 114  *
 115  * Available even in non-debug mode:
 116  * SERVICE_MODE                 -- compress all error codes to 1 for LSB
 117  *                                 service action compliance
 118  */
 119
 120 /* Program name; needed for libfrog error reports. */
 121 char                            *progname = "xfs_scrub";
 122
 123 /* Debug level; higher values mean more verbosity. */
 124 unsigned int                    debug;
 125
 126 /* Display resource usage at the end of each phase? */
 127 static bool                     display_rusage;
 128
 129 /* Background mode; higher values insert more pauses between scrub calls. */
 130 unsigned int                    bg_mode;
 131
 132 /* Maximum number of processors available to us. */
 133 int                             nproc;
 134
 135 /* Number of threads we're allowed to use. */
 136 unsigned int                    nr_threads;
 137
 138 /* Verbosity; higher values print more information. */
 139 bool                            verbose;
 140
 141 /* Should we scrub the data blocks? */
 142 static bool                     scrub_data;
 143
 144 /* Size of a memory page. */
 145 long                            page_size;
 146
 147 /* Should we FSTRIM after a successful run? */
 148 bool                            want_fstrim = true;
 149
 150 /* If stdout/stderr are ttys, we can use richer terminal control. */
 151 bool                            stderr_isatty;
 152 bool                            stdout_isatty;
 153
 154 /*
 155  * If we are running as a service, we need to be careful about what
 156  * error codes we return to the calling process.
 157  */
 158 bool                            is_service;
 159
 160 #define SCRUB_RET_SUCCESS       (0)     /* no problems left behind */
 161 #define SCRUB_RET_CORRUPT       (1)     /* corruption remains on fs */
 162 #define SCRUB_RET_UNOPTIMIZED   (2)     /* fs could be optimized */
 163 #define SCRUB_RET_OPERROR       (4)     /* operational problems */
 164 #define SCRUB_RET_SYNTAX        (8)     /* cmdline args rejected */
 165
 166 static void __attribute__((noreturn))
 167 usage(void)
 168 {
 169         fprintf(stderr, _("Usage: %s [OPTIONS] mountpoint\n"), progname);
 170         fprintf(stderr, "\n");
 171         fprintf(stderr, _("Options:\n"));
 172         fprintf(stderr, _("  -a count     Stop after this many errors are found.\n"));
 173         fprintf(stderr, _("  -b           Background mode.\n"));
 174         fprintf(stderr, _("  -C fd        Print progress information to this fd.\n"));
 175         fprintf(stderr, _("  -e behavior  What to do if errors are found.\n"));
 176         fprintf(stderr, _("  -k           Do not FITRIM the free space.\n"));
 177         fprintf(stderr, _("  -m path      Path to /etc/mtab.\n"));
 178         fprintf(stderr, _("  -n           Dry run.  Do not modify anything.\n"));
 179         fprintf(stderr, _("  -T           Display timing/usage information.\n"));
 180         fprintf(stderr, _("  -v           Verbose output.\n"));
 181         fprintf(stderr, _("  -V           Print version.\n"));
 182         fprintf(stderr, _("  -x           Scrub file data too.\n"));
 183
 184         exit(SCRUB_RET_SYNTAX);
 185 }
 186
 187 #ifndef RUSAGE_BOTH
 188 # define RUSAGE_BOTH            (-2)
 189 #endif
 190
 191 /* Get resource usage for ourselves and all children. */
 192 static int
 193 scrub_getrusage(
 194         struct rusage           *usage)
 195 {
 196         struct rusage           cusage;
 197         int                     err;
 198
 199         err = getrusage(RUSAGE_BOTH, usage);
 200         if (!err)
 201                 return err;
 202
 203         err = getrusage(RUSAGE_SELF, usage);
 204         if (err)
 205                 return err;
 206
 207         err = getrusage(RUSAGE_CHILDREN, &cusage);
 208         if (err)
 209                 return err;
 210
 211         usage->ru_minflt += cusage.ru_minflt;
 212         usage->ru_majflt += cusage.ru_majflt;
 213         usage->ru_nswap += cusage.ru_nswap;
 214         usage->ru_inblock += cusage.ru_inblock;
 215         usage->ru_oublock += cusage.ru_oublock;
 216         usage->ru_msgsnd += cusage.ru_msgsnd;
 217         usage->ru_msgrcv += cusage.ru_msgrcv;
 218         usage->ru_nsignals += cusage.ru_nsignals;
 219         usage->ru_nvcsw += cusage.ru_nvcsw;
 220         usage->ru_nivcsw += cusage.ru_nivcsw;
 221         return 0;
 222 }
 223
 224 /*
 225  * Scrub Phase Dispatch
 226  *
 227  * The operations of the scrub program are split up into several
 228  * different phases.  Each phase builds upon the metadata checked in the
 229  * previous phase, which is to say that we may skip phase (X + 1) if our
 230  * scans in phase (X) reveal corruption.  A phase may be skipped
 231  * entirely.
 232  */
 233
 234 /* Resource usage for each phase. */
 235 struct phase_rusage {
 236         struct rusage           ruse;
 237         struct timeval          time;
 238         unsigned long long      verified_bytes;
 239         void                    *brk_start;
 240         const char              *descr;
 241 };
 242
 243 /* Operations for each phase. */
 244 #define DATASCAN_DUMMY_FN       ((void *)1)
 245 #define REPAIR_DUMMY_FN         ((void *)2)
 246 struct phase_ops {
 247         char            *descr;
 248         bool            (*fn)(struct scrub_ctx *);
 249         bool            (*estimate_work)(struct scrub_ctx *, uint64_t *,
 250                                          unsigned int *, int *);
 251         bool            must_run;
 252 };
 253
 254 /* Start tracking resource usage for a phase. */
 255 static bool
 256 phase_start(
 257         struct phase_rusage     *pi,
 258         unsigned int            phase,
 259         const char              *descr)
 260 {
 261         int                     error;
 262
 263         memset(pi, 0, sizeof(*pi));
 264         error = scrub_getrusage(&pi->ruse);
 265         if (error) {
 266                 perror(_("getrusage"));
 267                 return false;
 268         }
 269         pi->brk_start = sbrk(0);
 270
 271         error = gettimeofday(&pi->time, NULL);
 272         if (error) {
 273                 perror(_("gettimeofday"));
 274                 return false;
 275         }
 276
 277         pi->descr = descr;
 278         if ((verbose || display_rusage) && descr) {
 279                 fprintf(stdout, _("Phase %u: %s\n"), phase, descr);
 280                 fflush(stdout);
 281         }
 282         return true;
 283 }
 284
 285 /* Report usage stats. */
 286 static bool
 287 phase_end(
 288         struct phase_rusage     *pi,
 289         unsigned int            phase)
 290 {
 291         struct rusage           ruse_now;
 292 #ifdef HAVE_MALLINFO
 293         struct mallinfo         mall_now;
 294 #endif
 295         struct timeval          time_now;
 296         char                    phasebuf[DESCR_BUFSZ];
 297         double                  dt;
 298         unsigned long long      in, out;
 299         unsigned long long      io;
 300         double                  i, o, t;
 301         double                  din, dout, dtot;
 302         char                    *iu, *ou, *tu, *dinu, *doutu, *dtotu;
 303         int                     error;
 304
 305         if (!display_rusage)
 306                 return true;
 307
 308         error = gettimeofday(&time_now, NULL);
 309         if (error) {
 310                 perror(_("gettimeofday"));
 311                 return false;
 312         }
 313         dt = timeval_subtract(&time_now, &pi->time);
 314
 315         error = scrub_getrusage(&ruse_now);
 316         if (error) {
 317                 perror(_("getrusage"));
 318                 return false;
 319         }
 320
 321         if (phase)
 322                 snprintf(phasebuf, DESCR_BUFSZ, _("Phase %u: "), phase);
 323         else
 324                 phasebuf[0] = 0;
 325
 326 #define kbytes(x)       (((unsigned long)(x) + 1023) / 1024)
 327 #ifdef HAVE_MALLINFO
 328
 329         mall_now = mallinfo();
 330         fprintf(stdout, _("%sMemory used: %luk/%luk (%luk/%luk), "),
 331                 phasebuf,
 332                 kbytes(mall_now.arena), kbytes(mall_now.hblkhd),
 333                 kbytes(mall_now.uordblks), kbytes(mall_now.fordblks));
 334 #else
 335         fprintf(stdout, _("%sMemory used: %luk, "),
 336                 phasebuf,
 337                 (unsigned long) kbytes(((char *) sbrk(0)) -
 338                                        ((char *) pi->brk_start)));
 339 #endif
 340 #undef kbytes
 341
 342         fprintf(stdout, _("time: %5.2f/%5.2f/%5.2fs\n"),
 343                 timeval_subtract(&time_now, &pi->time),
 344                 timeval_subtract(&ruse_now.ru_utime, &pi->ruse.ru_utime),
 345                 timeval_subtract(&ruse_now.ru_stime, &pi->ruse.ru_stime));
 346
 347         /* I/O usage */
 348         in =  ((unsigned long long)ruse_now.ru_inblock -
 349                         pi->ruse.ru_inblock) << BBSHIFT;
 350         out = ((unsigned long long)ruse_now.ru_oublock -
 351                         pi->ruse.ru_oublock) << BBSHIFT;
 352         io = in + out;
 353         if (io) {
 354                 i = auto_space_units(in, &iu);
 355                 o = auto_space_units(out, &ou);
 356                 t = auto_space_units(io, &tu);
 357                 din = auto_space_units(in / dt, &dinu);
 358                 dout = auto_space_units(out / dt, &doutu);
 359                 dtot = auto_space_units(io / dt, &dtotu);
 360                 fprintf(stdout,
 361 _("%sI/O: %.1f%s in, %.1f%s out, %.1f%s tot\n"),
 362                         phasebuf, i, iu, o, ou, t, tu);
 363                 fprintf(stdout,
 364 _("%sI/O rate: %.1f%s/s in, %.1f%s/s out, %.1f%s/s tot\n"),
 365                         phasebuf, din, dinu, dout, doutu, dtot, dtotu);
 366         }
 367         fflush(stdout);
 368
 369         return true;
 370 }
 371
 372 /* Run all the phases of the scrubber. */
 373 static bool
 374 run_scrub_phases(
 375         struct scrub_ctx        *ctx,
 376         FILE                    *progress_fp)
 377 {
 378         struct phase_ops phases[] =
 379         {
 380                 {
 381                         .descr = _("Find filesystem geometry."),
 382                         .fn = xfs_setup_fs,
 383                         .must_run = true,
 384                 },
 385                 {
 386                         .descr = _("Check internal metadata."),
 387                         .fn = xfs_scan_metadata,
 388                         .estimate_work = xfs_estimate_metadata_work,
 389                 },
 390                 {
 391                         .descr = _("Scan all inodes."),
 392                         .fn = xfs_scan_inodes,
 393                         .estimate_work = xfs_estimate_inodes_work,
 394                 },
 395                 {
 396                         .descr = _("Defer filesystem repairs."),
 397                         .fn = REPAIR_DUMMY_FN,
 398                         .estimate_work = xfs_estimate_repair_work,
 399                 },
 400                 {
 401                         .descr = _("Check directory tree."),
 402                         .fn = xfs_scan_connections,
 403                         .estimate_work = xfs_estimate_inodes_work,
 404                 },
 405                 {
 406                         .descr = _("Verify data file integrity."),
 407                         .fn = DATASCAN_DUMMY_FN,
 408                         .estimate_work = xfs_estimate_verify_work,
 409                 },
 410                 {
 411                         .descr = _("Check summary counters."),
 412                         .fn = xfs_scan_summary,
 413                         .must_run = true,
 414                 },
 415                 {
 416                         NULL
 417                 },
 418         };
 419         struct phase_rusage     pi;
 420         struct phase_ops        *sp;
 421         uint64_t                max_work;
 422         bool                    moveon = true;
 423         unsigned int            debug_phase = 0;
 424         unsigned int            phase;
 425         int                     rshift;
 426
 427         if (debug_tweak_on("XFS_SCRUB_PHASE"))
 428                 debug_phase = atoi(getenv("XFS_SCRUB_PHASE"));
 429
 430         /* Run all phases of the scrub tool. */
 431         for (phase = 1, sp = phases; sp->fn; sp++, phase++) {
 432                 /* Turn on certain phases if user said to. */
 433                 if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) {
 434                         sp->fn = xfs_scan_blocks;
 435                 } else if (sp->fn == REPAIR_DUMMY_FN &&
 436                            ctx->mode == SCRUB_MODE_REPAIR) {
 437                         sp->descr = _("Repair filesystem.");
 438                         sp->fn = xfs_repair_fs;
 439                         sp->must_run = true;
 440                 }
 441
 442                 /* Skip certain phases unless they're turned on. */
 443                 if (sp->fn == REPAIR_DUMMY_FN ||
 444                     sp->fn == DATASCAN_DUMMY_FN)
 445                         continue;
 446
 447                 /* Allow debug users to force a particular phase. */
 448                 if (debug_phase && phase != debug_phase && !sp->must_run)
 449                         continue;
 450
 451                 /* Run this phase. */
 452                 moveon = phase_start(&pi, phase, sp->descr);
 453                 if (!moveon)
 454                         break;
 455                 if (sp->estimate_work) {
 456                         unsigned int            work_threads;
 457
 458                         moveon = sp->estimate_work(ctx, &max_work,
 459                                         &work_threads, &rshift);
 460                         if (!moveon)
 461                                 break;
 462                         moveon = progress_init_phase(ctx, progress_fp, phase,
 463                                         max_work, rshift, work_threads);
 464                 } else {
 465                         moveon = progress_init_phase(ctx, NULL, phase, 0, 0, 0);
 466                 }
 467                 if (!moveon)
 468                         break;
 469                 moveon = sp->fn(ctx);
 470                 if (!moveon) {
 471                         str_info(ctx, ctx->mntpoint,
 472 _("Scrub aborted after phase %d."),
 473                                         phase);
 474                         break;
 475                 }
 476                 progress_end_phase();
 477                 moveon = phase_end(&pi, phase);
 478                 if (!moveon)
 479                         break;
 480
 481                 /* Too many errors? */
 482                 moveon = !xfs_scrub_excessive_errors(ctx);
 483                 if (!moveon)
 484                         break;
 485         }
 486
 487         return moveon;
 488 }
 489
 490 static void
 491 report_modifications(
 492         struct scrub_ctx        *ctx)
 493 {
 494         if (ctx->repairs == 0 && ctx->preens == 0)
 495                 return;
 496
 497         if (ctx->repairs && ctx->preens)
 498                 fprintf(stdout,
 499 _("%s: repairs made: %llu; optimizations made: %llu.\n"),
 500                                 ctx->mntpoint, ctx->repairs, ctx->preens);
 501         else if (ctx->preens == 0)
 502                 fprintf(stdout,
 503 _("%s: repairs made: %llu.\n"),
 504                                 ctx->mntpoint, ctx->repairs);
 505         else if (ctx->repairs == 0)
 506                 fprintf(stdout,
 507 _("%s: optimizations made: %llu.\n"),
 508                                 ctx->mntpoint, ctx->preens);
 509 }
 510
 511 static void
 512 report_outcome(
 513         struct scrub_ctx        *ctx)
 514 {
 515         unsigned long long      total_errors;
 516
 517         total_errors = ctx->errors_found + ctx->runtime_errors;
 518
 519         if (total_errors == 0 && ctx->warnings_found == 0) {
 520                 log_info(ctx, _("No errors found."));
 521                 return;
 522         }
 523
 524         if (total_errors == 0) {
 525                 fprintf(stderr, _("%s: warnings found: %llu\n"), ctx->mntpoint,
 526                                 ctx->warnings_found);
 527                 log_warn(ctx, _("warnings found: %llu"), ctx->warnings_found);
 528         } else if (ctx->warnings_found == 0) {
 529                 fprintf(stderr, _("%s: errors found: %llu\n"), ctx->mntpoint,
 530                                 total_errors);
 531                 log_err(ctx, _("errors found: %llu"), total_errors);
 532         } else {
 533                 fprintf(stderr, _("%s: errors found: %llu; warnings found: %llu\n"),
 534                                 ctx->mntpoint, total_errors,
 535                                 ctx->warnings_found);
 536                 log_err(ctx, _("errors found: %llu; warnings found: %llu"),
 537                                 total_errors, ctx->warnings_found);
 538         }
 539
 540         /*
 541          * Don't advise the user to run repair unless we were successful in
 542          * setting up the scrub and we actually saw corruptions.  Warnings
 543          * are not corruptions.
 544          */
 545         if (ctx->scrub_setup_succeeded && total_errors > 0) {
 546                 char            *msg;
 547
 548                 if (ctx->mode == SCRUB_MODE_DRY_RUN)
 549                         msg = _("%s: Re-run xfs_scrub without -n.\n");
 550                 else
 551                         msg = _("%s: Unmount and run xfs_repair.\n");
 552
 553                 fprintf(stderr, msg, ctx->mntpoint);
 554         }
 555 }
 556
 557 int
 558 main(
 559         int                     argc,
 560         char                    **argv)
 561 {
 562         struct scrub_ctx        ctx = {0};
 563         struct phase_rusage     all_pi;
 564         char                    *mtab = NULL;
 565         FILE                    *progress_fp = NULL;
 566         struct fs_path          *fsp;
 567         bool                    moveon = true;
 568         int                     c;
 569         int                     fd;
 570         int                     ret = SCRUB_RET_SUCCESS;
 571
 572         fprintf(stdout, "EXPERIMENTAL xfs_scrub program in use! Use at your own risk!\n");
 573
 574         progname = basename(argv[0]);
 575         setlocale(LC_ALL, "");
 576         bindtextdomain(PACKAGE, LOCALEDIR);
 577         textdomain(PACKAGE);
 578
 579         pthread_mutex_init(&ctx.lock, NULL);
 580         ctx.mode = SCRUB_MODE_REPAIR;
 581         ctx.error_action = ERRORS_CONTINUE;
 582         while ((c = getopt(argc, argv, "a:bC:de:km:nTvxV")) != EOF) {
 583                 switch (c) {
 584                 case 'a':
 585                         ctx.max_errors = cvt_u64(optarg, 10);
 586                         if (errno) {
 587                                 perror(optarg);
 588                                 usage();
 589                         }
 590                         break;
 591                 case 'b':
 592                         nr_threads = 1;
 593                         bg_mode++;
 594                         break;
 595                 case 'C':
 596                         errno = 0;
 597                         fd = cvt_u32(optarg, 10);
 598                         if (errno) {
 599                                 perror(optarg);
 600                                 usage();
 601                         }
 602                         progress_fp = fdopen(fd, "w");
 603                         if (!progress_fp) {
 604                                 perror(optarg);
 605                                 usage();
 606                         }
 607                         break;
 608                 case 'd':
 609                         debug++;
 610                         break;
 611                 case 'e':
 612                         if (!strcmp("continue", optarg))
 613                                 ctx.error_action = ERRORS_CONTINUE;
 614                         else if (!strcmp("shutdown", optarg))
 615                                 ctx.error_action = ERRORS_SHUTDOWN;
 616                         else {
 617                                 fprintf(stderr,
 618         _("Unknown error behavior \"%s\".\n"),
 619                                                 optarg);
 620                                 usage();
 621                         }
 622                         break;
 623                 case 'k':
 624                         want_fstrim = false;
 625                         break;
 626                 case 'm':
 627                         mtab = optarg;
 628                         break;
 629                 case 'n':
 630                         ctx.mode = SCRUB_MODE_DRY_RUN;
 631                         break;
 632                 case 'T':
 633                         display_rusage = true;
 634                         break;
 635                 case 'v':
 636                         verbose = true;
 637                         break;
 638                 case 'V':
 639                         fprintf(stdout, _("%s version %s\n"), progname,
 640                                         VERSION);
 641                         fflush(stdout);
 642                         return SCRUB_RET_SUCCESS;
 643                 case 'x':
 644                         scrub_data = true;
 645                         break;
 646                 case '?':
 647                         /* fall through */
 648                 default:
 649                         usage();
 650                 }
 651         }
 652
 653         /* Override thread count if debugger */
 654         if (debug_tweak_on("XFS_SCRUB_THREADS")) {
 655                 unsigned int    x;
 656
 657                 x = cvt_u32(getenv("XFS_SCRUB_THREADS"), 10);
 658                 if (errno) {
 659                         perror("nr_threads");
 660                         usage();
 661                 }
 662                 nr_threads = x;
 663         }
 664
 665         if (optind != argc - 1)
 666                 usage();
 667
 668         ctx.mntpoint = argv[optind];
 669
 670         stdout_isatty = isatty(STDOUT_FILENO);
 671         stderr_isatty = isatty(STDERR_FILENO);
 672
 673         /* If interactive, start the progress bar. */
 674         if (stdout_isatty && !progress_fp)
 675                 progress_fp = fdopen(1, "w+");
 676
 677         if (getenv("SERVICE_MODE"))
 678                 is_service = true;
 679
 680         /* Initialize overall phase stats. */
 681         moveon = phase_start(&all_pi, 0, NULL);
 682         if (!moveon)
 683                 return SCRUB_RET_OPERROR;
 684
 685         /* Find the mount record for the passed-in argument. */
 686         if (stat(argv[optind], &ctx.mnt_sb) < 0) {
 687                 fprintf(stderr,
 688                         _("%s: could not stat: %s: %s\n"),
 689                         progname, argv[optind], strerror(errno));
 690                 ctx.runtime_errors++;
 691                 goto out;
 692         }
 693
 694         /*
 695          * If the user did not specify an explicit mount table, try to use
 696          * /proc/mounts if it is available, else /etc/mtab.  We prefer
 697          * /proc/mounts because it is kernel controlled, while /etc/mtab
 698          * may contain garbage that userspace tools like pam_mounts wrote
 699          * into it.
 700          */
 701         if (!mtab) {
 702                 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
 703                         mtab = _PATH_PROC_MOUNTS;
 704                 else
 705                         mtab = _PATH_MOUNTED;
 706         }
 707
 708         fs_table_initialise(0, NULL, 0, NULL);
 709         fsp = fs_table_lookup_mount(ctx.mntpoint);
 710         if (!fsp) {
 711                 fprintf(stderr, _("%s: Not a XFS mount point.\n"),
 712                                 ctx.mntpoint);
 713                 ret |= SCRUB_RET_SYNTAX;
 714                 goto out;
 715         }
 716         memcpy(&ctx.fsinfo, fsp, sizeof(struct fs_path));
 717
 718         /* How many CPUs? */
 719         nproc = sysconf(_SC_NPROCESSORS_ONLN);
 720         if (nproc < 1)
 721                 nproc = 1;
 722
 723         /* Set up a page-aligned buffer for read verification. */
 724         page_size = sysconf(_SC_PAGESIZE);
 725         if (page_size < 0) {
 726                 str_errno(&ctx, ctx.mntpoint);
 727                 goto out;
 728         }
 729
 730         if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
 731                 ctx.mode = SCRUB_MODE_REPAIR;
 732
 733         /* Scrub a filesystem. */
 734         moveon = run_scrub_phases(&ctx, progress_fp);
 735         if (!moveon && ctx.runtime_errors == 0)
 736                 ctx.runtime_errors++;
 737
 738         /*
 739          * Excessive errors will cause the scrub phases to bail out early.
 740          * We don't want every thread yelling that into the output, so check
 741          * if we hit the threshold and tell the user *once*.
 742          */
 743         if (xfs_scrub_excessive_errors(&ctx))
 744                 str_info(&ctx, ctx.mntpoint, _("Too many errors; aborting."));
 745
 746         if (debug_tweak_on("XFS_SCRUB_FORCE_ERROR"))
 747                 str_error(&ctx, ctx.mntpoint, _("Injecting error."));
 748
 749         /* Clean up scan data. */
 750         moveon = xfs_cleanup_fs(&ctx);
 751         if (!moveon && ctx.runtime_errors == 0)
 752                 ctx.runtime_errors++;
 753
 754 out:
 755         report_modifications(&ctx);
 756         report_outcome(&ctx);
 757
 758         if (ctx.errors_found) {
 759                 if (ctx.error_action == ERRORS_SHUTDOWN)
 760                         xfs_shutdown_fs(&ctx);
 761                 ret |= SCRUB_RET_CORRUPT;
 762         }
 763         if (ctx.warnings_found)
 764                 ret |= SCRUB_RET_UNOPTIMIZED;
 765         if (ctx.runtime_errors)
 766                 ret |= SCRUB_RET_OPERROR;
 767         phase_end(&all_pi, 0);
 768         if (progress_fp)
 769                 fclose(progress_fp);
 770
 771         /*
 772          * If we're being run as a service, the return code must fit the LSB
 773          * init script action error guidelines, which is to say that we
 774          * compress all errors to 1 ("generic or unspecified error", LSB 5.0
 775          * section 22.2) and hope the admin will scan the log for what
 776          * actually happened.
 777          *
 778          * We have to sleep 2 seconds here because journald uses the pid to
 779          * connect our log messages to the systemd service.  This is critical
 780          * for capturing all the log messages if the scrub fails, because the
 781          * fail service uses the service name to gather log messages for the
 782          * error report.
 783          *
 784          * Note: We don't count a lack of kernel support as a service failure
 785          * because we haven't determined that there's anything wrong with the
 786          * filesystem.
 787          */
 788         if (is_service) {
 789                 sleep(2);
 790                 if (!ctx.scrub_setup_succeeded)
 791                         return 0;
 792                 if (ret != SCRUB_RET_SUCCESS)
 793                         return 1;
 794         }
 795
 796         return ret;
 797 }