]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/xfs_scrub.c
7ee02b669f1426335f1d81f83b279dbdd927cf99
[thirdparty/xfsprogs-dev.git] / scrub / xfs_scrub.c
1 /*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20 #include <stdio.h>
21 #include <pthread.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/statvfs.h>
27 #include "platform_defs.h"
28 #include "xfs.h"
29 #include "xfs_fs.h"
30 #include "input.h"
31 #include "path.h"
32 #include "xfs_scrub.h"
33 #include "common.h"
34 #include "unicrash.h"
35 #include "progress.h"
36
37 /*
38 * XFS Online Metadata Scrub (and Repair)
39 *
40 * The XFS scrubber uses custom XFS ioctls to probe more deeply into the
41 * internals of the filesystem. It takes advantage of scrubbing ioctls
42 * to check all the records stored in a metadata object and to
43 * cross-reference those records against the other filesystem metadata.
44 *
45 * After the program gathers command line arguments to figure out
46 * exactly what the program is going to do, scrub execution is split up
47 * into several separate phases:
48 *
49 * The "find geometry" phase queries XFS for the filesystem geometry.
50 * The block devices for the data, realtime, and log devices are opened.
51 * Kernel ioctls are test-queried to see if they actually work (the scrub
52 * ioctl in particular), and any other filesystem-specific information
53 * is gathered.
54 *
55 * In the "check internal metadata" phase, we call the metadata scrub
56 * ioctl to check the filesystem's internal per-AG btrees. This
57 * includes the AG superblock, AGF, AGFL, and AGI headers, freespace
58 * btrees, the regular and free inode btrees, the reverse mapping
59 * btrees, and the reference counting btrees. If the realtime device is
60 * enabled, the realtime bitmap and reverse mapping btrees are checked.
61 * Quotas, if enabled, are also checked in this phase.
62 *
63 * Each AG (and the realtime device) has its metadata checked in a
64 * separate thread for better performance. Errors in the internal
65 * metadata can be fixed here prior to the inode scan; refer to the
66 * section about the "repair filesystem" phase for more information.
67 *
68 * The "scan all inodes" phase uses BULKSTAT to scan all the inodes in
69 * an AG in disk order. The BULKSTAT information provides enough
70 * information to construct a file handle that is used to check the
71 * following parts of every file:
72 *
73 * - The inode record
74 * - All three block forks (data, attr, CoW)
75 * - If it's a symlink, the symlink target.
76 * - If it's a directory, the directory entries.
77 * - All extended attributes
78 * - The parent pointer
79 *
80 * Multiple threads are started to check each the inodes of each AG in
81 * parallel. Errors in file metadata can be fixed here; see the section
82 * about the "repair filesystem" phase for more information.
83 *
84 * Next comes the (configurable) "repair filesystem" phase. The user
85 * can instruct this program to fix all problems encountered; to fix
86 * only optimality problems and leave the corruptions; or not to touch
87 * the filesystem at all. Any metadata repairs that did not succeed in
88 * the previous two phases are retried here; if there are uncorrectable
89 * errors, xfs_scrub stops here.
90 *
91 * The next phase is the "check directory tree" phase. In this phase,
92 * every directory is opened (via file handle) to confirm that each
93 * directory is connected to the root. Directory entries are checked
94 * for ambiguous Unicode normalization mappings, which is to say that we
95 * look for pairs of entries whose utf-8 strings normalize to the same
96 * code point sequence and map to different inodes, because that could
97 * be used to trick a user into opening the wrong file. The names of
98 * extended attributes are checked for Unicode normalization collisions.
99 *
100 * In the "verify data file integrity" phase, we employ GETFSMAP to read
101 * the reverse-mappings of all AGs and issue direct-reads of the
102 * underlying disk blocks. We rely on the underlying storage to have
103 * checksummed the data blocks appropriately. Multiple threads are
104 * started to check each AG in parallel; a separate thread pool is used
105 * to handle the direct reads.
106 *
107 * In the "check summary counters" phase, use GETFSMAP to tally up the
108 * blocks and BULKSTAT to tally up the inodes we saw and compare that to
109 * the statfs output. This gives the user a rough estimate of how
110 * thorough the scrub was.
111 */
112
113 /*
114 * Known debug tweaks (pass -d and set the environment variable):
115 * XFS_SCRUB_FORCE_ERROR -- pretend all metadata is corrupt
116 * XFS_SCRUB_FORCE_REPAIR -- repair all metadata even if it's ok
117 * XFS_SCRUB_NO_KERNEL -- pretend there is no kernel ioctl
118 * XFS_SCRUB_NO_SCSI_VERIFY -- disable SCSI VERIFY (if present)
119 * XFS_SCRUB_PHASE -- run only this scrub phase
120 * XFS_SCRUB_THREADS -- start exactly this number of threads
121 *
122 * Available even in non-debug mode:
123 * SERVICE_MODE -- compress all error codes to 1 for LSB
124 * service action compliance
125 */
126
127 /* Program name; needed for libfrog error reports. */
128 char *progname = "xfs_scrub";
129
130 /* Debug level; higher values mean more verbosity. */
131 unsigned int debug;
132
133 /* Display resource usage at the end of each phase? */
134 static bool display_rusage;
135
136 /* Background mode; higher values insert more pauses between scrub calls. */
137 unsigned int bg_mode;
138
139 /* Maximum number of processors available to us. */
140 int nproc;
141
142 /* Number of threads we're allowed to use. */
143 unsigned int nr_threads;
144
145 /* Verbosity; higher values print more information. */
146 bool verbose;
147
148 /* Should we scrub the data blocks? */
149 static bool scrub_data;
150
151 /* Size of a memory page. */
152 long page_size;
153
154 /* Should we FSTRIM after a successful run? */
155 bool want_fstrim = true;
156
157 /* If stdout/stderr are ttys, we can use richer terminal control. */
158 bool stderr_isatty;
159 bool stdout_isatty;
160
161 /*
162 * If we are running as a service, we need to be careful about what
163 * error codes we return to the calling process.
164 */
165 bool is_service;
166
167 #define SCRUB_RET_SUCCESS (0) /* no problems left behind */
168 #define SCRUB_RET_CORRUPT (1) /* corruption remains on fs */
169 #define SCRUB_RET_UNOPTIMIZED (2) /* fs could be optimized */
170 #define SCRUB_RET_OPERROR (4) /* operational problems */
171 #define SCRUB_RET_SYNTAX (8) /* cmdline args rejected */
172
173 static void __attribute__((noreturn))
174 usage(void)
175 {
176 fprintf(stderr, _("Usage: %s [OPTIONS] mountpoint | device\n"), progname);
177 fprintf(stderr, "\n");
178 fprintf(stderr, _("Options:\n"));
179 fprintf(stderr, _(" -a count Stop after this many errors are found.\n"));
180 fprintf(stderr, _(" -b Background mode.\n"));
181 fprintf(stderr, _(" -C fd Print progress information to this fd.\n"));
182 fprintf(stderr, _(" -e behavior What to do if errors are found.\n"));
183 fprintf(stderr, _(" -k Do not FITRIM the free space.\n"));
184 fprintf(stderr, _(" -m path Path to /etc/mtab.\n"));
185 fprintf(stderr, _(" -n Dry run. Do not modify anything.\n"));
186 fprintf(stderr, _(" -T Display timing/usage information.\n"));
187 fprintf(stderr, _(" -v Verbose output.\n"));
188 fprintf(stderr, _(" -V Print version.\n"));
189 fprintf(stderr, _(" -x Scrub file data too.\n"));
190
191 exit(SCRUB_RET_SYNTAX);
192 }
193
194 #ifndef RUSAGE_BOTH
195 # define RUSAGE_BOTH (-2)
196 #endif
197
198 /* Get resource usage for ourselves and all children. */
199 static int
200 scrub_getrusage(
201 struct rusage *usage)
202 {
203 struct rusage cusage;
204 int err;
205
206 err = getrusage(RUSAGE_BOTH, usage);
207 if (!err)
208 return err;
209
210 err = getrusage(RUSAGE_SELF, usage);
211 if (err)
212 return err;
213
214 err = getrusage(RUSAGE_CHILDREN, &cusage);
215 if (err)
216 return err;
217
218 usage->ru_minflt += cusage.ru_minflt;
219 usage->ru_majflt += cusage.ru_majflt;
220 usage->ru_nswap += cusage.ru_nswap;
221 usage->ru_inblock += cusage.ru_inblock;
222 usage->ru_oublock += cusage.ru_oublock;
223 usage->ru_msgsnd += cusage.ru_msgsnd;
224 usage->ru_msgrcv += cusage.ru_msgrcv;
225 usage->ru_nsignals += cusage.ru_nsignals;
226 usage->ru_nvcsw += cusage.ru_nvcsw;
227 usage->ru_nivcsw += cusage.ru_nivcsw;
228 return 0;
229 }
230
231 /*
232 * Scrub Phase Dispatch
233 *
234 * The operations of the scrub program are split up into several
235 * different phases. Each phase builds upon the metadata checked in the
236 * previous phase, which is to say that we may skip phase (X + 1) if our
237 * scans in phase (X) reveal corruption. A phase may be skipped
238 * entirely.
239 */
240
241 /* Resource usage for each phase. */
242 struct phase_rusage {
243 struct rusage ruse;
244 struct timeval time;
245 unsigned long long verified_bytes;
246 void *brk_start;
247 const char *descr;
248 };
249
250 /* Operations for each phase. */
251 #define DATASCAN_DUMMY_FN ((void *)1)
252 #define REPAIR_DUMMY_FN ((void *)2)
253 struct phase_ops {
254 char *descr;
255 bool (*fn)(struct scrub_ctx *);
256 bool (*estimate_work)(struct scrub_ctx *, uint64_t *,
257 unsigned int *, int *);
258 bool must_run;
259 };
260
261 /* Start tracking resource usage for a phase. */
262 static bool
263 phase_start(
264 struct phase_rusage *pi,
265 unsigned int phase,
266 const char *descr)
267 {
268 int error;
269
270 memset(pi, 0, sizeof(*pi));
271 error = scrub_getrusage(&pi->ruse);
272 if (error) {
273 perror(_("getrusage"));
274 return false;
275 }
276 pi->brk_start = sbrk(0);
277
278 error = gettimeofday(&pi->time, NULL);
279 if (error) {
280 perror(_("gettimeofday"));
281 return false;
282 }
283
284 pi->descr = descr;
285 if ((verbose || display_rusage) && descr) {
286 fprintf(stdout, _("Phase %u: %s\n"), phase, descr);
287 fflush(stdout);
288 }
289 return true;
290 }
291
292 /* Report usage stats. */
293 static bool
294 phase_end(
295 struct phase_rusage *pi,
296 unsigned int phase)
297 {
298 struct rusage ruse_now;
299 #ifdef HAVE_MALLINFO
300 struct mallinfo mall_now;
301 #endif
302 struct timeval time_now;
303 char phasebuf[DESCR_BUFSZ];
304 double dt;
305 unsigned long long in, out;
306 unsigned long long io;
307 double i, o, t;
308 double din, dout, dtot;
309 char *iu, *ou, *tu, *dinu, *doutu, *dtotu;
310 int error;
311
312 if (!display_rusage)
313 return true;
314
315 error = gettimeofday(&time_now, NULL);
316 if (error) {
317 perror(_("gettimeofday"));
318 return false;
319 }
320 dt = timeval_subtract(&time_now, &pi->time);
321
322 error = scrub_getrusage(&ruse_now);
323 if (error) {
324 perror(_("getrusage"));
325 return false;
326 }
327
328 if (phase)
329 snprintf(phasebuf, DESCR_BUFSZ, _("Phase %u: "), phase);
330 else
331 phasebuf[0] = 0;
332
333 #define kbytes(x) (((unsigned long)(x) + 1023) / 1024)
334 #ifdef HAVE_MALLINFO
335
336 mall_now = mallinfo();
337 fprintf(stdout, _("%sMemory used: %luk/%luk (%luk/%luk), "),
338 phasebuf,
339 kbytes(mall_now.arena), kbytes(mall_now.hblkhd),
340 kbytes(mall_now.uordblks), kbytes(mall_now.fordblks));
341 #else
342 fprintf(stdout, _("%sMemory used: %luk, "),
343 phasebuf,
344 (unsigned long) kbytes(((char *) sbrk(0)) -
345 ((char *) pi->brk_start)));
346 #endif
347 #undef kbytes
348
349 fprintf(stdout, _("time: %5.2f/%5.2f/%5.2fs\n"),
350 timeval_subtract(&time_now, &pi->time),
351 timeval_subtract(&ruse_now.ru_utime, &pi->ruse.ru_utime),
352 timeval_subtract(&ruse_now.ru_stime, &pi->ruse.ru_stime));
353
354 /* I/O usage */
355 in = ((unsigned long long)ruse_now.ru_inblock -
356 pi->ruse.ru_inblock) << BBSHIFT;
357 out = ((unsigned long long)ruse_now.ru_oublock -
358 pi->ruse.ru_oublock) << BBSHIFT;
359 io = in + out;
360 if (io) {
361 i = auto_space_units(in, &iu);
362 o = auto_space_units(out, &ou);
363 t = auto_space_units(io, &tu);
364 din = auto_space_units(in / dt, &dinu);
365 dout = auto_space_units(out / dt, &doutu);
366 dtot = auto_space_units(io / dt, &dtotu);
367 fprintf(stdout,
368 _("%sI/O: %.1f%s in, %.1f%s out, %.1f%s tot\n"),
369 phasebuf, i, iu, o, ou, t, tu);
370 fprintf(stdout,
371 _("%sI/O rate: %.1f%s/s in, %.1f%s/s out, %.1f%s/s tot\n"),
372 phasebuf, din, dinu, dout, doutu, dtot, dtotu);
373 }
374 fflush(stdout);
375
376 return true;
377 }
378
379 /* Run all the phases of the scrubber. */
380 static bool
381 run_scrub_phases(
382 struct scrub_ctx *ctx,
383 FILE *progress_fp)
384 {
385 struct phase_ops phases[] =
386 {
387 {
388 .descr = _("Find filesystem geometry."),
389 .fn = xfs_setup_fs,
390 .must_run = true,
391 },
392 {
393 .descr = _("Check internal metadata."),
394 .fn = xfs_scan_metadata,
395 .estimate_work = xfs_estimate_metadata_work,
396 },
397 {
398 .descr = _("Scan all inodes."),
399 .fn = xfs_scan_inodes,
400 .estimate_work = xfs_estimate_inodes_work,
401 },
402 {
403 .descr = _("Defer filesystem repairs."),
404 .fn = REPAIR_DUMMY_FN,
405 .estimate_work = xfs_estimate_repair_work,
406 },
407 {
408 .descr = _("Check directory tree."),
409 .fn = xfs_scan_connections,
410 .estimate_work = xfs_estimate_inodes_work,
411 },
412 {
413 .descr = _("Verify data file integrity."),
414 .fn = DATASCAN_DUMMY_FN,
415 .estimate_work = xfs_estimate_verify_work,
416 },
417 {
418 .descr = _("Check summary counters."),
419 .fn = xfs_scan_summary,
420 .must_run = true,
421 },
422 {
423 NULL
424 },
425 };
426 struct phase_rusage pi;
427 struct phase_ops *sp;
428 uint64_t max_work;
429 bool moveon = true;
430 unsigned int debug_phase = 0;
431 unsigned int phase;
432 unsigned int nr_threads;
433 int rshift;
434
435 if (debug && debug_tweak_on("XFS_SCRUB_PHASE"))
436 debug_phase = atoi(getenv("XFS_SCRUB_PHASE"));
437
438 /* Run all phases of the scrub tool. */
439 for (phase = 1, sp = phases; sp->fn; sp++, phase++) {
440 /* Turn on certain phases if user said to. */
441 if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) {
442 sp->fn = xfs_scan_blocks;
443 } else if (sp->fn == REPAIR_DUMMY_FN &&
444 ctx->mode == SCRUB_MODE_REPAIR) {
445 sp->descr = _("Repair filesystem.");
446 sp->fn = xfs_repair_fs;
447 sp->must_run = true;
448 }
449
450 /* Skip certain phases unless they're turned on. */
451 if (sp->fn == REPAIR_DUMMY_FN ||
452 sp->fn == DATASCAN_DUMMY_FN)
453 continue;
454
455 /* Allow debug users to force a particular phase. */
456 if (debug_phase && phase != debug_phase && !sp->must_run)
457 continue;
458
459 /* Run this phase. */
460 moveon = phase_start(&pi, phase, sp->descr);
461 if (!moveon)
462 break;
463 if (sp->estimate_work) {
464 moveon = sp->estimate_work(ctx, &max_work, &nr_threads,
465 &rshift);
466 if (!moveon)
467 break;
468 moveon = progress_init_phase(ctx, progress_fp, phase,
469 max_work, rshift, nr_threads);
470 } else {
471 moveon = progress_init_phase(ctx, NULL, phase, 0, 0, 0);
472 }
473 if (!moveon)
474 break;
475 moveon = sp->fn(ctx);
476 if (!moveon) {
477 str_info(ctx, ctx->mntpoint,
478 _("Scrub aborted after phase %d."),
479 phase);
480 break;
481 }
482 progress_end_phase();
483 moveon = phase_end(&pi, phase);
484 if (!moveon)
485 break;
486
487 /* Too many errors? */
488 moveon = !xfs_scrub_excessive_errors(ctx);
489 if (!moveon)
490 break;
491 }
492
493 return moveon;
494 }
495
496 static void
497 report_outcome(
498 struct scrub_ctx *ctx)
499 {
500 unsigned long long total_errors;
501
502 total_errors = ctx->errors_found + ctx->runtime_errors;
503
504 if (total_errors == 0 && ctx->warnings_found == 0) {
505 log_info(ctx, _("No errors found."));
506 return;
507 }
508
509 if (total_errors == 0) {
510 fprintf(stderr, _("%s: warnings found: %llu\n"), ctx->mntpoint,
511 ctx->warnings_found);
512 log_warn(ctx, _("warnings found: %llu"), ctx->warnings_found);
513 } else if (ctx->warnings_found == 0) {
514 fprintf(stderr, _("%s: errors found: %llu\n"), ctx->mntpoint,
515 total_errors);
516 log_err(ctx, _("errors found: %llu"), total_errors);
517 } else {
518 fprintf(stderr, _("%s: errors found: %llu; warnings found: %llu\n"),
519 ctx->mntpoint, total_errors,
520 ctx->warnings_found);
521 log_err(ctx, _("errors found: %llu; warnings found: %llu"),
522 total_errors, ctx->warnings_found);
523 }
524
525 /*
526 * Don't advise the user to run repair unless we were successful in
527 * setting up the scrub and we actually saw corruptions. Warnings
528 * are not corruptions.
529 */
530 if (ctx->scrub_setup_succeeded && total_errors > 0)
531 fprintf(stderr, _("%s: Unmount and run xfs_repair.\n"),
532 ctx->mntpoint);
533 }
534
535 int
536 main(
537 int argc,
538 char **argv)
539 {
540 struct scrub_ctx ctx = {0};
541 struct phase_rusage all_pi;
542 char *mtab = NULL;
543 FILE *progress_fp = NULL;
544 bool moveon = true;
545 bool ismnt;
546 int c;
547 int fd;
548 int ret = SCRUB_RET_SUCCESS;
549
550 fprintf(stdout, "EXPERIMENTAL xfs_scrub program in use! Use at your own risk!\n");
551
552 progname = basename(argv[0]);
553 setlocale(LC_ALL, "");
554 bindtextdomain(PACKAGE, LOCALEDIR);
555 textdomain(PACKAGE);
556
557 pthread_mutex_init(&ctx.lock, NULL);
558 ctx.mode = SCRUB_MODE_REPAIR;
559 ctx.error_action = ERRORS_CONTINUE;
560 while ((c = getopt(argc, argv, "a:bC:de:km:nTvxV")) != EOF) {
561 switch (c) {
562 case 'a':
563 ctx.max_errors = cvt_u64(optarg, 10);
564 if (errno) {
565 perror(optarg);
566 usage();
567 }
568 break;
569 case 'b':
570 nr_threads = 1;
571 bg_mode++;
572 break;
573 case 'C':
574 errno = 0;
575 fd = cvt_u32(optarg, 10);
576 if (errno) {
577 perror(optarg);
578 usage();
579 }
580 progress_fp = fdopen(fd, "w");
581 if (!progress_fp) {
582 perror(optarg);
583 usage();
584 }
585 break;
586 case 'd':
587 debug++;
588 break;
589 case 'e':
590 if (!strcmp("continue", optarg))
591 ctx.error_action = ERRORS_CONTINUE;
592 else if (!strcmp("shutdown", optarg))
593 ctx.error_action = ERRORS_SHUTDOWN;
594 else {
595 fprintf(stderr,
596 _("Unknown error behavior \"%s\".\n"),
597 optarg);
598 usage();
599 }
600 break;
601 case 'k':
602 want_fstrim = false;
603 break;
604 case 'm':
605 mtab = optarg;
606 break;
607 case 'n':
608 ctx.mode = SCRUB_MODE_DRY_RUN;
609 break;
610 case 'T':
611 display_rusage = true;
612 break;
613 case 'v':
614 verbose = true;
615 break;
616 case 'V':
617 fprintf(stdout, _("%s version %s\n"), progname,
618 VERSION);
619 fflush(stdout);
620 return SCRUB_RET_SUCCESS;
621 case 'x':
622 scrub_data = true;
623 break;
624 case '?':
625 /* fall through */
626 default:
627 usage();
628 }
629 }
630
631 /* Override thread count if debugger */
632 if (debug_tweak_on("XFS_SCRUB_THREADS")) {
633 unsigned int x;
634
635 x = cvt_u32(getenv("XFS_SCRUB_THREADS"), 10);
636 if (errno) {
637 perror("nr_threads");
638 usage();
639 }
640 nr_threads = x;
641 }
642
643 if (optind != argc - 1)
644 usage();
645
646 ctx.mntpoint = strdup(argv[optind]);
647
648 stdout_isatty = isatty(STDOUT_FILENO);
649 stderr_isatty = isatty(STDERR_FILENO);
650
651 /* If interactive, start the progress bar. */
652 if (stdout_isatty && !progress_fp)
653 progress_fp = fdopen(1, "w+");
654
655 if (getenv("SERVICE_MODE"))
656 is_service = true;
657
658 /* Initialize overall phase stats. */
659 moveon = phase_start(&all_pi, 0, NULL);
660 if (!moveon)
661 return SCRUB_RET_OPERROR;
662
663 /* Find the mount record for the passed-in argument. */
664 if (stat(argv[optind], &ctx.mnt_sb) < 0) {
665 fprintf(stderr,
666 _("%s: could not stat: %s: %s\n"),
667 progname, argv[optind], strerror(errno));
668 ctx.runtime_errors++;
669 goto out;
670 }
671
672 /*
673 * If the user did not specify an explicit mount table, try to use
674 * /proc/mounts if it is available, else /etc/mtab. We prefer
675 * /proc/mounts because it is kernel controlled, while /etc/mtab
676 * may contain garbage that userspace tools like pam_mounts wrote
677 * into it.
678 */
679 if (!mtab) {
680 if (access(_PATH_PROC_MOUNTS, R_OK) == 0)
681 mtab = _PATH_PROC_MOUNTS;
682 else
683 mtab = _PATH_MOUNTED;
684 }
685
686 ismnt = find_mountpoint(mtab, &ctx);
687 if (!ismnt) {
688 fprintf(stderr,
689 _("%s: Not a XFS mount point or block device.\n"),
690 ctx.mntpoint);
691 ret |= SCRUB_RET_SYNTAX;
692 goto out;
693 }
694
695 /* How many CPUs? */
696 nproc = sysconf(_SC_NPROCESSORS_ONLN);
697 if (nproc < 1)
698 nproc = 1;
699
700 /* Set up a page-aligned buffer for read verification. */
701 page_size = sysconf(_SC_PAGESIZE);
702 if (page_size < 0) {
703 str_errno(&ctx, ctx.mntpoint);
704 goto out;
705 }
706
707 if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
708 ctx.mode = SCRUB_MODE_REPAIR;
709
710 /* Scrub a filesystem. */
711 moveon = run_scrub_phases(&ctx, progress_fp);
712 if (!moveon && ctx.runtime_errors == 0)
713 ctx.runtime_errors++;
714
715 /*
716 * Excessive errors will cause the scrub phases to bail out early.
717 * We don't want every thread yelling that into the output, so check
718 * if we hit the threshold and tell the user *once*.
719 */
720 if (xfs_scrub_excessive_errors(&ctx))
721 str_info(&ctx, ctx.mntpoint, _("Too many errors; aborting."));
722
723 if (debug_tweak_on("XFS_SCRUB_FORCE_ERROR"))
724 str_error(&ctx, ctx.mntpoint, _("Injecting error."));
725
726 /* Clean up scan data. */
727 moveon = xfs_cleanup_fs(&ctx);
728 if (!moveon && ctx.runtime_errors == 0)
729 ctx.runtime_errors++;
730
731 out:
732 report_outcome(&ctx);
733
734 if (ctx.errors_found) {
735 if (ctx.error_action == ERRORS_SHUTDOWN)
736 xfs_shutdown_fs(&ctx);
737 ret |= SCRUB_RET_CORRUPT;
738 }
739 if (ctx.warnings_found)
740 ret |= SCRUB_RET_UNOPTIMIZED;
741 if (ctx.runtime_errors)
742 ret |= SCRUB_RET_OPERROR;
743 phase_end(&all_pi, 0);
744 if (progress_fp)
745 fclose(progress_fp);
746 free(ctx.blkdev);
747 free(ctx.mntpoint);
748
749 /*
750 * If we're being run as a service, the return code must fit the LSB
751 * init script action error guidelines, which is to say that we
752 * compress all errors to 1 ("generic or unspecified error", LSB 5.0
753 * section 22.2) and hope the admin will scan the log for what
754 * actually happened.
755 *
756 * We have to sleep 2 seconds here because journald uses the pid to
757 * connect our log messages to the systemd service. This is critical
758 * for capturing all the log messages if the scrub fails, because the
759 * fail service uses the service name to gather log messages for the
760 * error report.
761 */
762 if (is_service) {
763 sleep(2);
764 if (ret != SCRUB_RET_SUCCESS)
765 return 1;
766 }
767
768 return ret;
769 }