scrub/xfs_scrub.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Copyright (C) 2018 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5  */
   6 #include "xfs.h"
   7 #include <pthread.h>
   8 #include <stdlib.h>
   9 #include <paths.h>
  10 #include <sys/time.h>
  11 #include <sys/resource.h>
  12 #include <sys/statvfs.h>
  13 #include "platform_defs.h"
  14 #include "input.h"
  15 #include "libfrog/paths.h"
  16 #include "xfs_scrub.h"
  17 #include "common.h"
  18 #include "descr.h"
  19 #include "unicrash.h"
  20 #include "progress.h"
  21
  22 /*
  23  * XFS Online Metadata Scrub (and Repair)
  24  *
  25  * The XFS scrubber uses custom XFS ioctls to probe more deeply into the
  26  * internals of the filesystem.  It takes advantage of scrubbing ioctls
  27  * to check all the records stored in a metadata object and to
  28  * cross-reference those records against the other filesystem metadata.
  29  *
  30  * After the program gathers command line arguments to figure out
  31  * exactly what the program is going to do, scrub execution is split up
  32  * into several separate phases:
  33  *
  34  * The "find geometry" phase queries XFS for the filesystem geometry.
  35  * The block devices for the data, realtime, and log devices are opened.
  36  * Kernel ioctls are test-queried to see if they actually work (the scrub
  37  * ioctl in particular), and any other filesystem-specific information
  38  * is gathered.
  39  *
  40  * In the "check internal metadata" phase, we call the metadata scrub
  41  * ioctl to check the filesystem's internal per-AG btrees.  This
  42  * includes the AG superblock, AGF, AGFL, and AGI headers, freespace
  43  * btrees, the regular and free inode btrees, the reverse mapping
  44  * btrees, and the reference counting btrees.  If the realtime device is
  45  * enabled, the realtime bitmap and reverse mapping btrees are checked.
  46  * Quotas, if enabled, are also checked in this phase.
  47  *
  48  * Each AG (and the realtime device) has its metadata checked in a
  49  * separate thread for better performance.  Errors in the internal
  50  * metadata can be fixed here prior to the inode scan; refer to the
  51  * section about the "repair filesystem" phase for more information.
  52  *
  53  * The "scan all inodes" phase uses BULKSTAT to scan all the inodes in
  54  * an AG in disk order.  The BULKSTAT information provides enough
  55  * information to construct a file handle that is used to check the
  56  * following parts of every file:
  57  *
  58  *  - The inode record
  59  *  - All three block forks (data, attr, CoW)
  60  *  - If it's a symlink, the symlink target.
  61  *  - If it's a directory, the directory entries.
  62  *  - All extended attributes
  63  *  - The parent pointer
  64  *
  65  * Multiple threads are started to check each the inodes of each AG in
  66  * parallel.  Errors in file metadata can be fixed here; see the section
  67  * about the "repair filesystem" phase for more information.
  68  *
  69  * Next comes the (configurable) "repair filesystem" phase.  The user
  70  * can instruct this program to fix all problems encountered; to fix
  71  * only optimality problems and leave the corruptions; or not to touch
  72  * the filesystem at all.  Any metadata repairs that did not succeed in
  73  * the previous two phases are retried here; if there are uncorrectable
  74  * errors, xfs_scrub stops here.
  75  *
  76  * To perform the actual repairs (or optimizations), we iterate all the
  77  * items on the per-AG action item list and ask the kernel to repair
  78  * them.  Items which are successfully repaired are removed from the
  79  * list.  If an item is not acted upon successfully (or the kernel asks us
  80  * to try again), we retry the actions until there is nothing left to
  81  * fix or we fail to make forward progress.  In that event, the
  82  * unfinished items are recorded as errors.  If there are no errors at
  83  * this point, we call FSTRIM on the filesystem.
  84  *
  85  * The next phase is the "check directory tree" phase.  In this phase,
  86  * every directory is opened (via file handle) to confirm that each
  87  * directory is connected to the root.  Directory entries are checked
  88  * for ambiguous Unicode normalization mappings, which is to say that we
  89  * look for pairs of entries whose utf-8 strings normalize to the same
  90  * code point sequence and map to different inodes, because that could
  91  * be used to trick a user into opening the wrong file.  The names of
  92  * extended attributes are checked for Unicode normalization collisions.
  93  *
  94  * In the "verify data file integrity" phase, we employ GETFSMAP to read
  95  * the reverse-mappings of all AGs and issue direct-reads of the
  96  * underlying disk blocks.  We rely on the underlying storage to have
  97  * checksummed the data blocks appropriately.  Multiple threads are
  98  * started to check each AG in parallel; a separate thread pool is used
  99  * to handle the direct reads.
 100  *
 101  * In the "check summary counters" phase, use GETFSMAP to tally up the
 102  * blocks and BULKSTAT to tally up the inodes we saw and compare that to
 103  * the statfs output.  This gives the user a rough estimate of how
 104  * thorough the scrub was.
 105  */
 106
 107 /*
 108  * Known debug tweaks (pass -d and set the environment variable):
 109  * XFS_SCRUB_FORCE_ERROR        -- pretend all metadata is corrupt
 110  * XFS_SCRUB_FORCE_REPAIR       -- repair all metadata even if it's ok
 111  * XFS_SCRUB_NO_KERNEL          -- pretend there is no kernel ioctl
 112  * XFS_SCRUB_NO_SCSI_VERIFY     -- disable SCSI VERIFY (if present)
 113  * XFS_SCRUB_PHASE              -- run only this scrub phase
 114  * XFS_SCRUB_THREADS            -- start exactly this number of threads
 115  * XFS_SCRUB_DISK_ERROR_INTERVAL-- simulate a disk error every this many bytes
 116  * XFS_SCRUB_DISK_VERIFY_SKIP   -- pretend disk verify read calls succeeded
 117  *
 118  * Available even in non-debug mode:
 119  * SERVICE_MODE                 -- compress all error codes to 1 for LSB
 120  *                                 service action compliance
 121  */
 122
 123 /* Program name; needed for libfrog error reports. */
 124 char                            *progname = "xfs_scrub";
 125
 126 /* Debug level; higher values mean more verbosity. */
 127 unsigned int                    debug;
 128
 129 /* Display resource usage at the end of each phase? */
 130 static bool                     display_rusage;
 131
 132 /* Background mode; higher values insert more pauses between scrub calls. */
 133 unsigned int                    bg_mode;
 134
 135 /* Number of threads we're allowed to use. */
 136 unsigned int                    force_nr_threads;
 137
 138 /* Verbosity; higher values print more information. */
 139 bool                            verbose;
 140
 141 /* Should we scrub the data blocks? */
 142 static bool                     scrub_data;
 143
 144 /* Size of a memory page. */
 145 long                            page_size;
 146
 147 /* Should we FSTRIM after a successful run? */
 148 bool                            want_fstrim = true;
 149
 150 /* If stdout/stderr are ttys, we can use richer terminal control. */
 151 bool                            stderr_isatty;
 152 bool                            stdout_isatty;
 153
 154 /*
 155  * If we are running as a service, we need to be careful about what
 156  * error codes we return to the calling process.
 157  */
 158 bool                            is_service;
 159
 160 #define SCRUB_RET_SUCCESS       (0)     /* no problems left behind */
 161 #define SCRUB_RET_CORRUPT       (1)     /* corruption remains on fs */
 162 #define SCRUB_RET_UNOPTIMIZED   (2)     /* fs could be optimized */
 163 #define SCRUB_RET_OPERROR       (4)     /* operational problems */
 164 #define SCRUB_RET_SYNTAX        (8)     /* cmdline args rejected */
 165
 166 static void __attribute__((noreturn))
 167 usage(void)
 168 {
 169         fprintf(stderr, _("Usage: %s [OPTIONS] mountpoint\n"), progname);
 170         fprintf(stderr, "\n");
 171         fprintf(stderr, _("Options:\n"));
 172         fprintf(stderr, _("  -a count     Stop after this many errors are found.\n"));
 173         fprintf(stderr, _("  -b           Background mode.\n"));
 174         fprintf(stderr, _("  -C fd        Print progress information to this fd.\n"));
 175         fprintf(stderr, _("  -e behavior  What to do if errors are found.\n"));
 176         fprintf(stderr, _("  -k           Do not FITRIM the free space.\n"));
 177         fprintf(stderr, _("  -m path      Path to /etc/mtab.\n"));
 178         fprintf(stderr, _("  -n           Dry run.  Do not modify anything.\n"));
 179         fprintf(stderr, _("  -T           Display timing/usage information.\n"));
 180         fprintf(stderr, _("  -v           Verbose output.\n"));
 181         fprintf(stderr, _("  -V           Print version.\n"));
 182         fprintf(stderr, _("  -x           Scrub file data too.\n"));
 183
 184         exit(SCRUB_RET_SYNTAX);
 185 }
 186
 187 #ifndef RUSAGE_BOTH
 188 # define RUSAGE_BOTH            (-2)
 189 #endif
 190
 191 /* Get resource usage for ourselves and all children. */
 192 static int
 193 scrub_getrusage(
 194         struct rusage           *usage)
 195 {
 196         struct rusage           cusage;
 197         int                     err;
 198
 199         err = getrusage(RUSAGE_BOTH, usage);
 200         if (!err)
 201                 return err;
 202
 203         err = getrusage(RUSAGE_SELF, usage);
 204         if (err)
 205                 return err;
 206
 207         err = getrusage(RUSAGE_CHILDREN, &cusage);
 208         if (err)
 209                 return err;
 210
 211         usage->ru_minflt += cusage.ru_minflt;
 212         usage->ru_majflt += cusage.ru_majflt;
 213         usage->ru_nswap += cusage.ru_nswap;
 214         usage->ru_inblock += cusage.ru_inblock;
 215         usage->ru_oublock += cusage.ru_oublock;
 216         usage->ru_msgsnd += cusage.ru_msgsnd;
 217         usage->ru_msgrcv += cusage.ru_msgrcv;
 218         usage->ru_nsignals += cusage.ru_nsignals;
 219         usage->ru_nvcsw += cusage.ru_nvcsw;
 220         usage->ru_nivcsw += cusage.ru_nivcsw;
 221         return 0;
 222 }
 223
 224 /*
 225  * Scrub Phase Dispatch
 226  *
 227  * The operations of the scrub program are split up into several
 228  * different phases.  Each phase builds upon the metadata checked in the
 229  * previous phase, which is to say that we may skip phase (X + 1) if our
 230  * scans in phase (X) reveal corruption.  A phase may be skipped
 231  * entirely.
 232  */
 233
 234 /* Resource usage for each phase. */
 235 struct phase_rusage {
 236         struct rusage           ruse;
 237         struct timeval          time;
 238         unsigned long long      verified_bytes;
 239         void                    *brk_start;
 240         const char              *descr;
 241 };
 242
 243 /* Operations for each phase. */
 244 #define DATASCAN_DUMMY_FN       ((void *)1)
 245 #define REPAIR_DUMMY_FN         ((void *)2)
 246 struct phase_ops {
 247         char            *descr;
 248         int             (*fn)(struct scrub_ctx *ctx);
 249         int             (*estimate_work)(struct scrub_ctx *ctx, uint64_t *items,
 250                                          unsigned int *threads, int *rshift);
 251         bool            must_run;
 252 };
 253
 254 /* Start tracking resource usage for a phase. */
 255 static int
 256 phase_start(
 257         struct phase_rusage     *pi,
 258         unsigned int            phase,
 259         const char              *descr)
 260 {
 261         int                     error;
 262
 263         memset(pi, 0, sizeof(*pi));
 264         error = scrub_getrusage(&pi->ruse);
 265         if (error) {
 266                 perror(_("getrusage"));
 267                 return error;
 268         }
 269         pi->brk_start = sbrk(0);
 270
 271         error = gettimeofday(&pi->time, NULL);
 272         if (error) {
 273                 perror(_("gettimeofday"));
 274                 return error;
 275         }
 276
 277         pi->descr = descr;
 278         if ((verbose || display_rusage) && descr) {
 279                 fprintf(stdout, _("Phase %u: %s\n"), phase, descr);
 280                 fflush(stdout);
 281         }
 282         return error;
 283 }
 284
 285 static inline unsigned long long
 286 kbytes(unsigned long long x)
 287 {
 288         return (x + 1023) / 1024;
 289 }
 290
 291 static void
 292 report_mem_usage(
 293         const char                      *phase,
 294         const struct phase_rusage       *pi)
 295 {
 296 #if defined(HAVE_MALLINFO2) || defined(HAVE_MALLINFO)
 297 # ifdef HAVE_MALLINFO2
 298         struct mallinfo2                mall_now = mallinfo2();
 299 # else
 300         struct mallinfo                 mall_now = mallinfo();
 301 # endif
 302         fprintf(stdout, _("%sMemory used: %lluk/%lluk (%lluk/%lluk), "),
 303                 phase,
 304                 kbytes(mall_now.arena), kbytes(mall_now.hblkhd),
 305                 kbytes(mall_now.uordblks), kbytes(mall_now.fordblks));
 306 #else
 307         fprintf(stdout, _("%sMemory used: %lluk, "),
 308                 phase,
 309                 kbytes(((char *) sbrk(0)) - ((char *) pi->brk_start)));
 310 #endif
 311 }
 312
 313 /* Report usage stats. */
 314 static int
 315 phase_end(
 316         struct phase_rusage     *pi,
 317         unsigned int            phase)
 318 {
 319         struct rusage           ruse_now;
 320         struct timeval          time_now;
 321         char                    phasebuf[DESCR_BUFSZ];
 322         double                  dt;
 323         unsigned long long      in, out;
 324         unsigned long long      io;
 325         double                  i, o, t;
 326         double                  din, dout, dtot;
 327         char                    *iu, *ou, *tu, *dinu, *doutu, *dtotu;
 328         int                     error;
 329
 330         if (!display_rusage)
 331                 return 0;
 332
 333         error = gettimeofday(&time_now, NULL);
 334         if (error) {
 335                 perror(_("gettimeofday"));
 336                 return error;
 337         }
 338         dt = timeval_subtract(&time_now, &pi->time);
 339
 340         error = scrub_getrusage(&ruse_now);
 341         if (error) {
 342                 perror(_("getrusage"));
 343                 return error;
 344         }
 345
 346         if (phase)
 347                 snprintf(phasebuf, DESCR_BUFSZ, _("Phase %u: "), phase);
 348         else
 349                 phasebuf[0] = 0;
 350
 351         report_mem_usage(phasebuf, pi);
 352
 353         fprintf(stdout, _("time: %5.2f/%5.2f/%5.2fs\n"),
 354                 timeval_subtract(&time_now, &pi->time),
 355                 timeval_subtract(&ruse_now.ru_utime, &pi->ruse.ru_utime),
 356                 timeval_subtract(&ruse_now.ru_stime, &pi->ruse.ru_stime));
 357
 358         /* I/O usage */
 359         in =  ((unsigned long long)ruse_now.ru_inblock -
 360                         pi->ruse.ru_inblock) << BBSHIFT;
 361         out = ((unsigned long long)ruse_now.ru_oublock -
 362                         pi->ruse.ru_oublock) << BBSHIFT;
 363         io = in + out;
 364         if (io) {
 365                 i = auto_space_units(in, &iu);
 366                 o = auto_space_units(out, &ou);
 367                 t = auto_space_units(io, &tu);
 368                 din = auto_space_units(in / dt, &dinu);
 369                 dout = auto_space_units(out / dt, &doutu);
 370                 dtot = auto_space_units(io / dt, &dtotu);
 371                 fprintf(stdout,
 372 _("%sI/O: %.1f%s in, %.1f%s out, %.1f%s tot\n"),
 373                         phasebuf, i, iu, o, ou, t, tu);
 374                 fprintf(stdout,
 375 _("%sI/O rate: %.1f%s/s in, %.1f%s/s out, %.1f%s/s tot\n"),
 376                         phasebuf, din, dinu, dout, doutu, dtot, dtotu);
 377         }
 378         fflush(stdout);
 379
 380         return 0;
 381 }
 382
 383 /* Run all the phases of the scrubber. */
 384 static bool
 385 run_scrub_phases(
 386         struct scrub_ctx        *ctx,
 387         FILE                    *progress_fp)
 388 {
 389         struct phase_ops phases[] =
 390         {
 391                 {
 392                         .descr = _("Find filesystem geometry."),
 393                         .fn = phase1_func,
 394                         .must_run = true,
 395                 },
 396                 {
 397                         .descr = _("Check internal metadata."),
 398                         .fn = phase2_func,
 399                         .estimate_work = phase2_estimate,
 400                 },
 401                 {
 402                         .descr = _("Scan all inodes."),
 403                         .fn = phase3_func,
 404                         .estimate_work = phase3_estimate,
 405                 },
 406                 {
 407                         .descr = _("Defer filesystem repairs."),
 408                         .fn = REPAIR_DUMMY_FN,
 409                         .estimate_work = phase4_estimate,
 410                 },
 411                 {
 412                         .descr = _("Check directory tree."),
 413                         .fn = phase5_func,
 414                         .estimate_work = phase5_estimate,
 415                 },
 416                 {
 417                         .descr = _("Verify data file integrity."),
 418                         .fn = DATASCAN_DUMMY_FN,
 419                         .estimate_work = phase6_estimate,
 420                 },
 421                 {
 422                         .descr = _("Check summary counters."),
 423                         .fn = phase7_func,
 424                         .must_run = true,
 425                 },
 426                 {
 427                         NULL
 428                 },
 429         };
 430         struct phase_rusage     pi;
 431         struct phase_ops        *sp;
 432         uint64_t                max_work;
 433         unsigned int            debug_phase = 0;
 434         unsigned int            phase;
 435         int                     rshift;
 436         int                     ret = 0;
 437
 438         if (debug_tweak_on("XFS_SCRUB_PHASE"))
 439                 debug_phase = atoi(getenv("XFS_SCRUB_PHASE"));
 440
 441         /* Run all phases of the scrub tool. */
 442         for (phase = 1, sp = phases; sp->fn; sp++, phase++) {
 443                 /* Turn on certain phases if user said to. */
 444                 if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) {
 445                         sp->fn = phase6_func;
 446                 } else if (sp->fn == REPAIR_DUMMY_FN &&
 447                            ctx->mode == SCRUB_MODE_REPAIR) {
 448                         sp->descr = _("Repair filesystem.");
 449                         sp->fn = phase4_func;
 450                         sp->must_run = true;
 451                 }
 452
 453                 /* Skip certain phases unless they're turned on. */
 454                 if (sp->fn == REPAIR_DUMMY_FN ||
 455                     sp->fn == DATASCAN_DUMMY_FN)
 456                         continue;
 457
 458                 /* Allow debug users to force a particular phase. */
 459                 if (debug_phase && phase != debug_phase && !sp->must_run)
 460                         continue;
 461
 462                 /* Run this phase. */
 463                 ret = phase_start(&pi, phase, sp->descr);
 464                 if (ret)
 465                         break;
 466                 if (sp->estimate_work) {
 467                         unsigned int            work_threads;
 468
 469                         ret = sp->estimate_work(ctx, &max_work,
 470                                         &work_threads, &rshift);
 471                         if (ret)
 472                                 break;
 473
 474                         /*
 475                          * The thread that starts the worker threads is also
 476                          * allowed to contribute to the progress counters and
 477                          * whatever other per-thread data we need to allocate.
 478                          */
 479                         work_threads++;
 480                         ret = progress_init_phase(ctx, progress_fp, phase,
 481                                         max_work, rshift, work_threads);
 482                         if (ret)
 483                                 break;
 484                         ret = descr_init_phase(ctx, work_threads);
 485                 } else {
 486                         ret = progress_init_phase(ctx, NULL, phase, 0, 0, 0);
 487                         if (ret)
 488                                 break;
 489                         ret = descr_init_phase(ctx, 1);
 490                 }
 491                 if (ret)
 492                         break;
 493                 ret = sp->fn(ctx);
 494                 if (ret) {
 495                         str_info(ctx, ctx->mntpoint,
 496 _("Scrub aborted after phase %d."),
 497                                         phase);
 498                         break;
 499                 }
 500                 progress_end_phase();
 501                 descr_end_phase();
 502                 ret = phase_end(&pi, phase);
 503                 if (ret)
 504                         break;
 505
 506                 /* Too many errors? */
 507                 if (scrub_excessive_errors(ctx)) {
 508                         ret = ECANCELED;
 509                         break;
 510                 }
 511         }
 512
 513         return ret;
 514 }
 515
 516 static void
 517 report_modifications(
 518         struct scrub_ctx        *ctx)
 519 {
 520         if (ctx->repairs == 0 && ctx->preens == 0)
 521                 return;
 522
 523         if (ctx->repairs && ctx->preens)
 524                 fprintf(stdout,
 525 _("%s: repairs made: %llu; optimizations made: %llu.\n"),
 526                                 ctx->mntpoint, ctx->repairs, ctx->preens);
 527         else if (ctx->preens == 0)
 528                 fprintf(stdout,
 529 _("%s: repairs made: %llu.\n"),
 530                                 ctx->mntpoint, ctx->repairs);
 531         else if (ctx->repairs == 0)
 532                 fprintf(stdout,
 533 _("%s: optimizations made: %llu.\n"),
 534                                 ctx->mntpoint, ctx->preens);
 535 }
 536
 537 static void
 538 report_outcome(
 539         struct scrub_ctx        *ctx)
 540 {
 541         unsigned long long      actionable_errors;
 542
 543         actionable_errors = ctx->corruptions_found + ctx->runtime_errors;
 544
 545         if (actionable_errors == 0 &&
 546             ctx->unfixable_errors == 0 &&
 547             ctx->warnings_found == 0) {
 548                 log_info(ctx, _("No problems found."));
 549                 return;
 550         }
 551
 552         if (ctx->unfixable_errors) {
 553                 fprintf(stderr, _("%s: unfixable errors found: %llu\n"),
 554                                 ctx->mntpoint, ctx->unfixable_errors);
 555                 log_err(ctx, _("unfixable errors found: %llu"),
 556                                 ctx->unfixable_errors);
 557         }
 558
 559         if (ctx->corruptions_found > 0) {
 560                 fprintf(stderr, _("%s: corruptions found: %llu\n"),
 561                                 ctx->mntpoint, ctx->corruptions_found);
 562                 log_err(ctx, _("corruptions found: %llu"),
 563                                 ctx->corruptions_found);
 564         }
 565
 566         if (ctx->runtime_errors > 0) {
 567                 fprintf(stderr, _("%s: operational errors found: %llu\n"),
 568                                 ctx->mntpoint, ctx->runtime_errors);
 569                 log_err(ctx, _("operational errors found: %llu"),
 570                                 ctx->runtime_errors);
 571         }
 572
 573         if (ctx->warnings_found > 0) {
 574                 fprintf(stderr, _("%s: warnings found: %llu\n"), ctx->mntpoint,
 575                                 ctx->warnings_found);
 576                 log_warn(ctx, _("warnings found: %llu"), ctx->warnings_found);
 577         }
 578
 579         /*
 580          * Don't advise the user to run repair unless we were successful in
 581          * setting up the scrub and we actually saw corruptions.  Warnings
 582          * are not corruptions.
 583          */
 584         if (ctx->scrub_setup_succeeded && actionable_errors > 0) {
 585                 char            *msg;
 586
 587                 if (ctx->mode == SCRUB_MODE_DRY_RUN)
 588                         msg = _("%s: Re-run xfs_scrub without -n.\n");
 589                 else
 590                         msg = _("%s: Unmount and run xfs_repair.\n");
 591
 592                 fprintf(stderr, msg, ctx->mntpoint);
 593         }
 594 }
 595
 596 /* Compile-time features discoverable via version strings */
 597 #ifdef HAVE_LIBICU
 598 # define XFS_SCRUB_HAVE_UNICODE "+"
 599 #else
 600 # define XFS_SCRUB_HAVE_UNICODE "-"
 601 #endif
 602
 603 int
 604 main(
 605         int                     argc,
 606         char                    **argv)
 607 {
 608         struct scrub_ctx        ctx = {0};
 609         struct phase_rusage     all_pi;
 610         char                    *mtab = NULL;
 611         FILE                    *progress_fp = NULL;
 612         struct fs_path          *fsp;
 613         int                     vflag = 0;
 614         int                     c;
 615         int                     fd;
 616         int                     ret = SCRUB_RET_SUCCESS;
 617         int                     error;
 618
 619         fprintf(stdout, "EXPERIMENTAL xfs_scrub program in use! Use at your own risk!\n");
 620
 621         progname = basename(argv[0]);
 622         setlocale(LC_ALL, "");
 623         bindtextdomain(PACKAGE, LOCALEDIR);
 624         textdomain(PACKAGE);
 625         if (unicrash_load()) {
 626                 fprintf(stderr,
 627         _("%s: couldn't initialize Unicode library.\n"),
 628                                 progname);
 629                 goto out;
 630         }
 631
 632         pthread_mutex_init(&ctx.lock, NULL);
 633         ctx.mode = SCRUB_MODE_REPAIR;
 634         ctx.error_action = ERRORS_CONTINUE;
 635         while ((c = getopt(argc, argv, "a:bC:de:km:nTvxV")) != EOF) {
 636                 switch (c) {
 637                 case 'a':
 638                         ctx.max_errors = cvt_u64(optarg, 10);
 639                         if (errno) {
 640                                 perror(optarg);
 641                                 usage();
 642                         }
 643                         break;
 644                 case 'b':
 645                         force_nr_threads = 1;
 646                         bg_mode++;
 647                         break;
 648                 case 'C':
 649                         errno = 0;
 650                         fd = cvt_u32(optarg, 10);
 651                         if (errno) {
 652                                 perror(optarg);
 653                                 usage();
 654                         }
 655                         progress_fp = fdopen(fd, "w");
 656                         if (!progress_fp) {
 657                                 perror(optarg);
 658                                 usage();
 659                         }
 660                         break;
 661                 case 'd':
 662                         debug++;
 663                         break;
 664                 case 'e':
 665                         if (!strcmp("continue", optarg))
 666                                 ctx.error_action = ERRORS_CONTINUE;
 667                         else if (!strcmp("shutdown", optarg))
 668                                 ctx.error_action = ERRORS_SHUTDOWN;
 669                         else {
 670                                 fprintf(stderr,
 671         _("Unknown error behavior \"%s\".\n"),
 672                                                 optarg);
 673                                 usage();
 674                         }
 675                         break;
 676                 case 'k':
 677                         want_fstrim = false;
 678                         break;
 679                 case 'm':
 680                         mtab = optarg;
 681                         break;
 682                 case 'n':
 683                         ctx.mode = SCRUB_MODE_DRY_RUN;
 684                         break;
 685                 case 'T':
 686                         display_rusage = true;
 687                         break;
 688                 case 'v':
 689                         verbose = true;
 690                         break;
 691                 case 'V':
 692                         vflag++;
 693                         break;
 694                 case 'x':
 695                         scrub_data = true;
 696                         break;
 697                 default:
 698                         usage();
 699                 }
 700         }
 701
 702         if (vflag) {
 703                 if (vflag == 1)
 704                         fprintf(stdout, _("%s version %s\n"),
 705                                         progname, VERSION);
 706                 else
 707                         fprintf(stdout, _("%s version %s %sUnicode\n"),
 708                                         progname, VERSION,
 709                                         XFS_SCRUB_HAVE_UNICODE);
 710                 fflush(stdout);
 711                 return SCRUB_RET_SUCCESS;
 712         }
 713
 714         /* Override thread count if debugger */
 715         if (debug_tweak_on("XFS_SCRUB_THREADS")) {
 716                 unsigned int    x;
 717
 718                 x = cvt_u32(getenv("XFS_SCRUB_THREADS"), 10);
 719                 if (errno) {
 720                         perror("nr_threads");
 721                         usage();
 722                 }
 723                 force_nr_threads = x;
 724         }
 725
 726         if (optind != argc - 1)
 727                 usage();
 728
 729         ctx.mntpoint = argv[optind];
 730
 731         stdout_isatty = isatty(STDOUT_FILENO);
 732         stderr_isatty = isatty(STDERR_FILENO);
 733
 734         /* If interactive, start the progress bar. */
 735         if (stdout_isatty && !progress_fp)
 736                 progress_fp = fdopen(1, "w+");
 737
 738         if (getenv("SERVICE_MODE"))
 739                 is_service = true;
 740
 741         /* Initialize overall phase stats. */
 742         error = phase_start(&all_pi, 0, NULL);
 743         if (error)
 744                 return SCRUB_RET_OPERROR;
 745
 746         /* Find the mount record for the passed-in argument. */
 747         if (stat(argv[optind], &ctx.mnt_sb) < 0) {
 748                 fprintf(stderr,
 749                         _("%s: could not stat: %s: %s\n"),
 750                         progname, argv[optind], strerror(errno));
 751                 ctx.runtime_errors++;
 752                 goto out;
 753         }
 754
 755         /*
 756          * If the user did not specify an explicit mount table, try to use
 757          * /proc/mounts if it is available, else /etc/mtab.  We prefer
 758          * /proc/mounts because it is kernel controlled, while /etc/mtab
 759          * may contain garbage that userspace tools like pam_mounts wrote
 760          * into it.
 761          */
 762         if (!mtab) {
 763                 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
 764                         mtab = _PATH_PROC_MOUNTS;
 765                 else
 766                         mtab = _PATH_MOUNTED;
 767         }
 768
 769         fs_table_initialise(0, NULL, 0, NULL);
 770         fsp = fs_table_lookup_mount(ctx.mntpoint);
 771         if (!fsp) {
 772                 fprintf(stderr, _("%s: Not a XFS mount point.\n"),
 773                                 ctx.mntpoint);
 774                 ret |= SCRUB_RET_SYNTAX;
 775                 goto out;
 776         }
 777         memcpy(&ctx.fsinfo, fsp, sizeof(struct fs_path));
 778
 779         /* Set up a page-aligned buffer for read verification. */
 780         page_size = sysconf(_SC_PAGESIZE);
 781         if (page_size < 0) {
 782                 str_errno(&ctx, ctx.mntpoint);
 783                 goto out;
 784         }
 785
 786         if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
 787                 ctx.mode = SCRUB_MODE_REPAIR;
 788
 789         /* Scrub a filesystem. */
 790         error = run_scrub_phases(&ctx, progress_fp);
 791         if (error && ctx.runtime_errors == 0)
 792                 ctx.runtime_errors++;
 793
 794         /*
 795          * Excessive errors will cause the scrub phases to bail out early.
 796          * We don't want every thread yelling that into the output, so check
 797          * if we hit the threshold and tell the user *once*.
 798          */
 799         if (scrub_excessive_errors(&ctx))
 800                 str_info(&ctx, ctx.mntpoint, _("Too many errors; aborting."));
 801
 802         if (debug_tweak_on("XFS_SCRUB_FORCE_ERROR"))
 803                 str_info(&ctx, ctx.mntpoint, _("Injecting error."));
 804
 805         /* Clean up scan data. */
 806         error = scrub_cleanup(&ctx);
 807         if (error && ctx.runtime_errors == 0)
 808                 ctx.runtime_errors++;
 809
 810 out:
 811         report_modifications(&ctx);
 812         report_outcome(&ctx);
 813
 814         if (ctx.corruptions_found) {
 815                 if (ctx.error_action == ERRORS_SHUTDOWN)
 816                         xfs_shutdown_fs(&ctx);
 817                 ret |= SCRUB_RET_CORRUPT;
 818         }
 819         if (ctx.warnings_found)
 820                 ret |= SCRUB_RET_UNOPTIMIZED;
 821         if (ctx.runtime_errors)
 822                 ret |= SCRUB_RET_OPERROR;
 823         phase_end(&all_pi, 0);
 824         if (progress_fp)
 825                 fclose(progress_fp);
 826         unicrash_unload();
 827
 828         /*
 829          * If we're being run as a service, the return code must fit the LSB
 830          * init script action error guidelines, which is to say that we
 831          * compress all errors to 1 ("generic or unspecified error", LSB 5.0
 832          * section 22.2) and hope the admin will scan the log for what
 833          * actually happened.
 834          *
 835          * We have to sleep 2 seconds here because journald uses the pid to
 836          * connect our log messages to the systemd service.  This is critical
 837          * for capturing all the log messages if the scrub fails, because the
 838          * fail service uses the service name to gather log messages for the
 839          * error report.
 840          *
 841          * Note: We don't count a lack of kernel support as a service failure
 842          * because we haven't determined that there's anything wrong with the
 843          * filesystem.
 844          */
 845         if (is_service) {
 846                 sleep(2);
 847                 if (!ctx.scrub_setup_succeeded)
 848                         return 0;
 849                 if (ret != SCRUB_RET_SUCCESS)
 850                         return 1;
 851         }
 852
 853         return ret;
 854 }