]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
95b1e505 DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
95b1e505 | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
95b1e505 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
f0585fce | 7 | #include <pthread.h> |
828105d1 | 8 | #include <stdlib.h> |
e2bc34de | 9 | #include <paths.h> |
173a0283 DW |
10 | #include <sys/time.h> |
11 | #include <sys/resource.h> | |
50a573a7 | 12 | #include <sys/statvfs.h> |
828105d1 | 13 | #include "platform_defs.h" |
828105d1 | 14 | #include "input.h" |
50a573a7 | 15 | #include "path.h" |
95b1e505 | 16 | #include "xfs_scrub.h" |
828105d1 | 17 | #include "common.h" |
4bbed4ec | 18 | #include "unicrash.h" |
ed60d210 | 19 | #include "progress.h" |
95b1e505 DW |
20 | |
21 | /* | |
22 | * XFS Online Metadata Scrub (and Repair) | |
23 | * | |
24 | * The XFS scrubber uses custom XFS ioctls to probe more deeply into the | |
25 | * internals of the filesystem. It takes advantage of scrubbing ioctls | |
26 | * to check all the records stored in a metadata object and to | |
27 | * cross-reference those records against the other filesystem metadata. | |
28 | * | |
29 | * After the program gathers command line arguments to figure out | |
30 | * exactly what the program is going to do, scrub execution is split up | |
31 | * into several separate phases: | |
32 | * | |
33 | * The "find geometry" phase queries XFS for the filesystem geometry. | |
34 | * The block devices for the data, realtime, and log devices are opened. | |
35 | * Kernel ioctls are test-queried to see if they actually work (the scrub | |
36 | * ioctl in particular), and any other filesystem-specific information | |
37 | * is gathered. | |
38 | * | |
39 | * In the "check internal metadata" phase, we call the metadata scrub | |
40 | * ioctl to check the filesystem's internal per-AG btrees. This | |
41 | * includes the AG superblock, AGF, AGFL, and AGI headers, freespace | |
42 | * btrees, the regular and free inode btrees, the reverse mapping | |
43 | * btrees, and the reference counting btrees. If the realtime device is | |
44 | * enabled, the realtime bitmap and reverse mapping btrees are checked. | |
45 | * Quotas, if enabled, are also checked in this phase. | |
46 | * | |
47 | * Each AG (and the realtime device) has its metadata checked in a | |
48 | * separate thread for better performance. Errors in the internal | |
49 | * metadata can be fixed here prior to the inode scan; refer to the | |
50 | * section about the "repair filesystem" phase for more information. | |
51 | * | |
52 | * The "scan all inodes" phase uses BULKSTAT to scan all the inodes in | |
53 | * an AG in disk order. The BULKSTAT information provides enough | |
54 | * information to construct a file handle that is used to check the | |
55 | * following parts of every file: | |
56 | * | |
57 | * - The inode record | |
58 | * - All three block forks (data, attr, CoW) | |
59 | * - If it's a symlink, the symlink target. | |
60 | * - If it's a directory, the directory entries. | |
61 | * - All extended attributes | |
62 | * - The parent pointer | |
63 | * | |
64 | * Multiple threads are started to check each the inodes of each AG in | |
65 | * parallel. Errors in file metadata can be fixed here; see the section | |
66 | * about the "repair filesystem" phase for more information. | |
67 | * | |
68 | * Next comes the (configurable) "repair filesystem" phase. The user | |
69 | * can instruct this program to fix all problems encountered; to fix | |
70 | * only optimality problems and leave the corruptions; or not to touch | |
71 | * the filesystem at all. Any metadata repairs that did not succeed in | |
72 | * the previous two phases are retried here; if there are uncorrectable | |
73 | * errors, xfs_scrub stops here. | |
74 | * | |
ee310b0c DW |
75 | * To perform the actual repairs (or optimizations), we iterate all the |
76 | * items on the per-AG action item list and ask the kernel to repair | |
77 | * them. Items which are successfully repaired are removed from the | |
78 | * list. If an item is not acted upon successfully (or the kernel asks us | |
79 | * to try again), we retry the actions until there is nothing left to | |
80 | * fix or we fail to make forward progress. In that event, the | |
81 | * unfinished items are recorded as errors. If there are no errors at | |
82 | * this point, we call FSTRIM on the filesystem. | |
83 | * | |
95b1e505 DW |
84 | * The next phase is the "check directory tree" phase. In this phase, |
85 | * every directory is opened (via file handle) to confirm that each | |
86 | * directory is connected to the root. Directory entries are checked | |
87 | * for ambiguous Unicode normalization mappings, which is to say that we | |
88 | * look for pairs of entries whose utf-8 strings normalize to the same | |
89 | * code point sequence and map to different inodes, because that could | |
90 | * be used to trick a user into opening the wrong file. The names of | |
91 | * extended attributes are checked for Unicode normalization collisions. | |
92 | * | |
93 | * In the "verify data file integrity" phase, we employ GETFSMAP to read | |
94 | * the reverse-mappings of all AGs and issue direct-reads of the | |
95 | * underlying disk blocks. We rely on the underlying storage to have | |
96 | * checksummed the data blocks appropriately. Multiple threads are | |
97 | * started to check each AG in parallel; a separate thread pool is used | |
98 | * to handle the direct reads. | |
99 | * | |
100 | * In the "check summary counters" phase, use GETFSMAP to tally up the | |
101 | * blocks and BULKSTAT to tally up the inodes we saw and compare that to | |
102 | * the statfs output. This gives the user a rough estimate of how | |
103 | * thorough the scrub was. | |
104 | */ | |
105 | ||
828105d1 DW |
106 | /* |
107 | * Known debug tweaks (pass -d and set the environment variable): | |
108 | * XFS_SCRUB_FORCE_ERROR -- pretend all metadata is corrupt | |
109 | * XFS_SCRUB_FORCE_REPAIR -- repair all metadata even if it's ok | |
110 | * XFS_SCRUB_NO_KERNEL -- pretend there is no kernel ioctl | |
111 | * XFS_SCRUB_NO_SCSI_VERIFY -- disable SCSI VERIFY (if present) | |
112 | * XFS_SCRUB_PHASE -- run only this scrub phase | |
113 | * XFS_SCRUB_THREADS -- start exactly this number of threads | |
824b5807 DW |
114 | * |
115 | * Available even in non-debug mode: | |
116 | * SERVICE_MODE -- compress all error codes to 1 for LSB | |
117 | * service action compliance | |
828105d1 DW |
118 | */ |
119 | ||
95b1e505 DW |
120 | /* Program name; needed for libfrog error reports. */ |
121 | char *progname = "xfs_scrub"; | |
122 | ||
f0585fce DW |
123 | /* Debug level; higher values mean more verbosity. */ |
124 | unsigned int debug; | |
125 | ||
828105d1 DW |
126 | /* Display resource usage at the end of each phase? */ |
127 | static bool display_rusage; | |
128 | ||
129 | /* Background mode; higher values insert more pauses between scrub calls. */ | |
130 | unsigned int bg_mode; | |
131 | ||
132 | /* Maximum number of processors available to us. */ | |
133 | int nproc; | |
134 | ||
135 | /* Number of threads we're allowed to use. */ | |
136 | unsigned int nr_threads; | |
137 | ||
138 | /* Verbosity; higher values print more information. */ | |
139 | bool verbose; | |
140 | ||
141 | /* Should we scrub the data blocks? */ | |
142 | static bool scrub_data; | |
143 | ||
144 | /* Size of a memory page. */ | |
145 | long page_size; | |
146 | ||
7e36bc0f DW |
147 | /* Should we FSTRIM after a successful run? */ |
148 | bool want_fstrim = true; | |
149 | ||
ed60d210 DW |
150 | /* If stdout/stderr are ttys, we can use richer terminal control. */ |
151 | bool stderr_isatty; | |
152 | bool stdout_isatty; | |
153 | ||
824b5807 DW |
154 | /* |
155 | * If we are running as a service, we need to be careful about what | |
156 | * error codes we return to the calling process. | |
157 | */ | |
7c309151 | 158 | bool is_service; |
824b5807 | 159 | |
828105d1 DW |
160 | #define SCRUB_RET_SUCCESS (0) /* no problems left behind */ |
161 | #define SCRUB_RET_CORRUPT (1) /* corruption remains on fs */ | |
162 | #define SCRUB_RET_UNOPTIMIZED (2) /* fs could be optimized */ | |
163 | #define SCRUB_RET_OPERROR (4) /* operational problems */ | |
164 | #define SCRUB_RET_SYNTAX (8) /* cmdline args rejected */ | |
165 | ||
166 | static void __attribute__((noreturn)) | |
167 | usage(void) | |
168 | { | |
680eacaa | 169 | fprintf(stderr, _("Usage: %s [OPTIONS] mountpoint\n"), progname); |
828105d1 DW |
170 | fprintf(stderr, "\n"); |
171 | fprintf(stderr, _("Options:\n")); | |
172 | fprintf(stderr, _(" -a count Stop after this many errors are found.\n")); | |
173 | fprintf(stderr, _(" -b Background mode.\n")); | |
ed60d210 | 174 | fprintf(stderr, _(" -C fd Print progress information to this fd.\n")); |
828105d1 | 175 | fprintf(stderr, _(" -e behavior What to do if errors are found.\n")); |
7e36bc0f | 176 | fprintf(stderr, _(" -k Do not FITRIM the free space.\n")); |
828105d1 DW |
177 | fprintf(stderr, _(" -m path Path to /etc/mtab.\n")); |
178 | fprintf(stderr, _(" -n Dry run. Do not modify anything.\n")); | |
179 | fprintf(stderr, _(" -T Display timing/usage information.\n")); | |
180 | fprintf(stderr, _(" -v Verbose output.\n")); | |
181 | fprintf(stderr, _(" -V Print version.\n")); | |
182 | fprintf(stderr, _(" -x Scrub file data too.\n")); | |
828105d1 DW |
183 | |
184 | exit(SCRUB_RET_SYNTAX); | |
185 | } | |
186 | ||
173a0283 DW |
187 | #ifndef RUSAGE_BOTH |
188 | # define RUSAGE_BOTH (-2) | |
189 | #endif | |
190 | ||
191 | /* Get resource usage for ourselves and all children. */ | |
192 | static int | |
193 | scrub_getrusage( | |
194 | struct rusage *usage) | |
195 | { | |
196 | struct rusage cusage; | |
197 | int err; | |
198 | ||
199 | err = getrusage(RUSAGE_BOTH, usage); | |
200 | if (!err) | |
201 | return err; | |
202 | ||
203 | err = getrusage(RUSAGE_SELF, usage); | |
204 | if (err) | |
205 | return err; | |
206 | ||
207 | err = getrusage(RUSAGE_CHILDREN, &cusage); | |
208 | if (err) | |
209 | return err; | |
210 | ||
211 | usage->ru_minflt += cusage.ru_minflt; | |
212 | usage->ru_majflt += cusage.ru_majflt; | |
213 | usage->ru_nswap += cusage.ru_nswap; | |
214 | usage->ru_inblock += cusage.ru_inblock; | |
215 | usage->ru_oublock += cusage.ru_oublock; | |
216 | usage->ru_msgsnd += cusage.ru_msgsnd; | |
217 | usage->ru_msgrcv += cusage.ru_msgrcv; | |
218 | usage->ru_nsignals += cusage.ru_nsignals; | |
219 | usage->ru_nvcsw += cusage.ru_nvcsw; | |
220 | usage->ru_nivcsw += cusage.ru_nivcsw; | |
221 | return 0; | |
222 | } | |
223 | ||
224 | /* | |
225 | * Scrub Phase Dispatch | |
226 | * | |
227 | * The operations of the scrub program are split up into several | |
228 | * different phases. Each phase builds upon the metadata checked in the | |
229 | * previous phase, which is to say that we may skip phase (X + 1) if our | |
230 | * scans in phase (X) reveal corruption. A phase may be skipped | |
231 | * entirely. | |
232 | */ | |
233 | ||
234 | /* Resource usage for each phase. */ | |
235 | struct phase_rusage { | |
236 | struct rusage ruse; | |
237 | struct timeval time; | |
238 | unsigned long long verified_bytes; | |
239 | void *brk_start; | |
240 | const char *descr; | |
241 | }; | |
242 | ||
243 | /* Operations for each phase. */ | |
244 | #define DATASCAN_DUMMY_FN ((void *)1) | |
245 | #define REPAIR_DUMMY_FN ((void *)2) | |
246 | struct phase_ops { | |
247 | char *descr; | |
248 | bool (*fn)(struct scrub_ctx *); | |
ed60d210 DW |
249 | bool (*estimate_work)(struct scrub_ctx *, uint64_t *, |
250 | unsigned int *, int *); | |
173a0283 DW |
251 | bool must_run; |
252 | }; | |
253 | ||
254 | /* Start tracking resource usage for a phase. */ | |
255 | static bool | |
256 | phase_start( | |
257 | struct phase_rusage *pi, | |
258 | unsigned int phase, | |
259 | const char *descr) | |
260 | { | |
261 | int error; | |
262 | ||
263 | memset(pi, 0, sizeof(*pi)); | |
264 | error = scrub_getrusage(&pi->ruse); | |
265 | if (error) { | |
266 | perror(_("getrusage")); | |
267 | return false; | |
268 | } | |
269 | pi->brk_start = sbrk(0); | |
270 | ||
271 | error = gettimeofday(&pi->time, NULL); | |
272 | if (error) { | |
273 | perror(_("gettimeofday")); | |
274 | return false; | |
275 | } | |
276 | ||
277 | pi->descr = descr; | |
278 | if ((verbose || display_rusage) && descr) { | |
279 | fprintf(stdout, _("Phase %u: %s\n"), phase, descr); | |
280 | fflush(stdout); | |
281 | } | |
282 | return true; | |
283 | } | |
284 | ||
285 | /* Report usage stats. */ | |
286 | static bool | |
287 | phase_end( | |
288 | struct phase_rusage *pi, | |
289 | unsigned int phase) | |
290 | { | |
291 | struct rusage ruse_now; | |
292 | #ifdef HAVE_MALLINFO | |
293 | struct mallinfo mall_now; | |
294 | #endif | |
295 | struct timeval time_now; | |
296 | char phasebuf[DESCR_BUFSZ]; | |
297 | double dt; | |
298 | unsigned long long in, out; | |
299 | unsigned long long io; | |
300 | double i, o, t; | |
301 | double din, dout, dtot; | |
302 | char *iu, *ou, *tu, *dinu, *doutu, *dtotu; | |
303 | int error; | |
304 | ||
305 | if (!display_rusage) | |
306 | return true; | |
307 | ||
308 | error = gettimeofday(&time_now, NULL); | |
309 | if (error) { | |
310 | perror(_("gettimeofday")); | |
311 | return false; | |
312 | } | |
313 | dt = timeval_subtract(&time_now, &pi->time); | |
314 | ||
315 | error = scrub_getrusage(&ruse_now); | |
316 | if (error) { | |
317 | perror(_("getrusage")); | |
318 | return false; | |
319 | } | |
320 | ||
321 | if (phase) | |
322 | snprintf(phasebuf, DESCR_BUFSZ, _("Phase %u: "), phase); | |
323 | else | |
324 | phasebuf[0] = 0; | |
325 | ||
326 | #define kbytes(x) (((unsigned long)(x) + 1023) / 1024) | |
327 | #ifdef HAVE_MALLINFO | |
328 | ||
329 | mall_now = mallinfo(); | |
330 | fprintf(stdout, _("%sMemory used: %luk/%luk (%luk/%luk), "), | |
331 | phasebuf, | |
332 | kbytes(mall_now.arena), kbytes(mall_now.hblkhd), | |
333 | kbytes(mall_now.uordblks), kbytes(mall_now.fordblks)); | |
334 | #else | |
335 | fprintf(stdout, _("%sMemory used: %luk, "), | |
336 | phasebuf, | |
337 | (unsigned long) kbytes(((char *) sbrk(0)) - | |
338 | ((char *) pi->brk_start))); | |
339 | #endif | |
340 | #undef kbytes | |
341 | ||
342 | fprintf(stdout, _("time: %5.2f/%5.2f/%5.2fs\n"), | |
343 | timeval_subtract(&time_now, &pi->time), | |
344 | timeval_subtract(&ruse_now.ru_utime, &pi->ruse.ru_utime), | |
345 | timeval_subtract(&ruse_now.ru_stime, &pi->ruse.ru_stime)); | |
346 | ||
347 | /* I/O usage */ | |
348 | in = ((unsigned long long)ruse_now.ru_inblock - | |
349 | pi->ruse.ru_inblock) << BBSHIFT; | |
350 | out = ((unsigned long long)ruse_now.ru_oublock - | |
351 | pi->ruse.ru_oublock) << BBSHIFT; | |
352 | io = in + out; | |
353 | if (io) { | |
354 | i = auto_space_units(in, &iu); | |
355 | o = auto_space_units(out, &ou); | |
356 | t = auto_space_units(io, &tu); | |
357 | din = auto_space_units(in / dt, &dinu); | |
358 | dout = auto_space_units(out / dt, &doutu); | |
359 | dtot = auto_space_units(io / dt, &dtotu); | |
360 | fprintf(stdout, | |
361 | _("%sI/O: %.1f%s in, %.1f%s out, %.1f%s tot\n"), | |
362 | phasebuf, i, iu, o, ou, t, tu); | |
363 | fprintf(stdout, | |
364 | _("%sI/O rate: %.1f%s/s in, %.1f%s/s out, %.1f%s/s tot\n"), | |
365 | phasebuf, din, dinu, dout, doutu, dtot, dtotu); | |
366 | } | |
367 | fflush(stdout); | |
368 | ||
369 | return true; | |
370 | } | |
371 | ||
372 | /* Run all the phases of the scrubber. */ | |
373 | static bool | |
374 | run_scrub_phases( | |
ed60d210 DW |
375 | struct scrub_ctx *ctx, |
376 | FILE *progress_fp) | |
173a0283 DW |
377 | { |
378 | struct phase_ops phases[] = | |
379 | { | |
380 | { | |
381 | .descr = _("Find filesystem geometry."), | |
50a573a7 DW |
382 | .fn = xfs_setup_fs, |
383 | .must_run = true, | |
173a0283 DW |
384 | }, |
385 | { | |
386 | .descr = _("Check internal metadata."), | |
e758ad01 | 387 | .fn = xfs_scan_metadata, |
ed60d210 | 388 | .estimate_work = xfs_estimate_metadata_work, |
173a0283 DW |
389 | }, |
390 | { | |
391 | .descr = _("Scan all inodes."), | |
fa16b376 | 392 | .fn = xfs_scan_inodes, |
ed60d210 | 393 | .estimate_work = xfs_estimate_inodes_work, |
173a0283 DW |
394 | }, |
395 | { | |
396 | .descr = _("Defer filesystem repairs."), | |
397 | .fn = REPAIR_DUMMY_FN, | |
ed60d210 | 398 | .estimate_work = xfs_estimate_repair_work, |
173a0283 DW |
399 | }, |
400 | { | |
401 | .descr = _("Check directory tree."), | |
c4892e76 | 402 | .fn = xfs_scan_connections, |
ed60d210 | 403 | .estimate_work = xfs_estimate_inodes_work, |
173a0283 DW |
404 | }, |
405 | { | |
406 | .descr = _("Verify data file integrity."), | |
407 | .fn = DATASCAN_DUMMY_FN, | |
ed60d210 | 408 | .estimate_work = xfs_estimate_verify_work, |
173a0283 DW |
409 | }, |
410 | { | |
411 | .descr = _("Check summary counters."), | |
698c6c7c DW |
412 | .fn = xfs_scan_summary, |
413 | .must_run = true, | |
173a0283 DW |
414 | }, |
415 | { | |
416 | NULL | |
417 | }, | |
418 | }; | |
419 | struct phase_rusage pi; | |
420 | struct phase_ops *sp; | |
ed60d210 | 421 | uint64_t max_work; |
173a0283 DW |
422 | bool moveon = true; |
423 | unsigned int debug_phase = 0; | |
424 | unsigned int phase; | |
ed60d210 DW |
425 | unsigned int nr_threads; |
426 | int rshift; | |
173a0283 DW |
427 | |
428 | if (debug && debug_tweak_on("XFS_SCRUB_PHASE")) | |
429 | debug_phase = atoi(getenv("XFS_SCRUB_PHASE")); | |
430 | ||
431 | /* Run all phases of the scrub tool. */ | |
432 | for (phase = 1, sp = phases; sp->fn; sp++, phase++) { | |
b364a9c0 | 433 | /* Turn on certain phases if user said to. */ |
7e36bc0f | 434 | if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) { |
b364a9c0 | 435 | sp->fn = xfs_scan_blocks; |
1658224d DW |
436 | } else if (sp->fn == REPAIR_DUMMY_FN && |
437 | ctx->mode == SCRUB_MODE_REPAIR) { | |
438 | sp->descr = _("Repair filesystem."); | |
439 | sp->fn = xfs_repair_fs; | |
440 | sp->must_run = true; | |
7e36bc0f | 441 | } |
b364a9c0 | 442 | |
173a0283 DW |
443 | /* Skip certain phases unless they're turned on. */ |
444 | if (sp->fn == REPAIR_DUMMY_FN || | |
445 | sp->fn == DATASCAN_DUMMY_FN) | |
446 | continue; | |
447 | ||
448 | /* Allow debug users to force a particular phase. */ | |
449 | if (debug_phase && phase != debug_phase && !sp->must_run) | |
450 | continue; | |
451 | ||
452 | /* Run this phase. */ | |
453 | moveon = phase_start(&pi, phase, sp->descr); | |
ed60d210 DW |
454 | if (!moveon) |
455 | break; | |
456 | if (sp->estimate_work) { | |
457 | moveon = sp->estimate_work(ctx, &max_work, &nr_threads, | |
458 | &rshift); | |
459 | if (!moveon) | |
460 | break; | |
461 | moveon = progress_init_phase(ctx, progress_fp, phase, | |
462 | max_work, rshift, nr_threads); | |
463 | } else { | |
464 | moveon = progress_init_phase(ctx, NULL, phase, 0, 0, 0); | |
465 | } | |
173a0283 DW |
466 | if (!moveon) |
467 | break; | |
468 | moveon = sp->fn(ctx); | |
469 | if (!moveon) { | |
470 | str_info(ctx, ctx->mntpoint, | |
471 | _("Scrub aborted after phase %d."), | |
472 | phase); | |
473 | break; | |
474 | } | |
ed60d210 | 475 | progress_end_phase(); |
173a0283 DW |
476 | moveon = phase_end(&pi, phase); |
477 | if (!moveon) | |
478 | break; | |
479 | ||
480 | /* Too many errors? */ | |
481 | moveon = !xfs_scrub_excessive_errors(ctx); | |
482 | if (!moveon) | |
483 | break; | |
484 | } | |
485 | ||
486 | return moveon; | |
487 | } | |
488 | ||
ee310b0c DW |
489 | static void |
490 | report_modifications( | |
491 | struct scrub_ctx *ctx) | |
492 | { | |
493 | if (ctx->repairs == 0 && ctx->preens == 0) | |
494 | return; | |
495 | ||
496 | if (ctx->repairs && ctx->preens) | |
497 | fprintf(stdout, | |
498 | _("%s: repairs made: %llu; optimizations made: %llu.\n"), | |
499 | ctx->mntpoint, ctx->repairs, ctx->preens); | |
500 | else if (ctx->preens == 0) | |
501 | fprintf(stdout, | |
502 | _("%s: repairs made: %llu.\n"), | |
503 | ctx->mntpoint, ctx->repairs); | |
504 | else if (ctx->repairs == 0) | |
505 | fprintf(stdout, | |
506 | _("%s: optimizations made: %llu.\n"), | |
507 | ctx->mntpoint, ctx->preens); | |
508 | } | |
509 | ||
5454c2bf DW |
510 | static void |
511 | report_outcome( | |
512 | struct scrub_ctx *ctx) | |
513 | { | |
514 | unsigned long long total_errors; | |
515 | ||
516 | total_errors = ctx->errors_found + ctx->runtime_errors; | |
517 | ||
7c309151 DW |
518 | if (total_errors == 0 && ctx->warnings_found == 0) { |
519 | log_info(ctx, _("No errors found.")); | |
5454c2bf | 520 | return; |
7c309151 | 521 | } |
5454c2bf | 522 | |
7c309151 | 523 | if (total_errors == 0) { |
5454c2bf DW |
524 | fprintf(stderr, _("%s: warnings found: %llu\n"), ctx->mntpoint, |
525 | ctx->warnings_found); | |
7c309151 DW |
526 | log_warn(ctx, _("warnings found: %llu"), ctx->warnings_found); |
527 | } else if (ctx->warnings_found == 0) { | |
5454c2bf DW |
528 | fprintf(stderr, _("%s: errors found: %llu\n"), ctx->mntpoint, |
529 | total_errors); | |
7c309151 DW |
530 | log_err(ctx, _("errors found: %llu"), total_errors); |
531 | } else { | |
5454c2bf DW |
532 | fprintf(stderr, _("%s: errors found: %llu; warnings found: %llu\n"), |
533 | ctx->mntpoint, total_errors, | |
534 | ctx->warnings_found); | |
7c309151 DW |
535 | log_err(ctx, _("errors found: %llu; warnings found: %llu"), |
536 | total_errors, ctx->warnings_found); | |
537 | } | |
538 | ||
c767c5ae DW |
539 | /* |
540 | * Don't advise the user to run repair unless we were successful in | |
541 | * setting up the scrub and we actually saw corruptions. Warnings | |
542 | * are not corruptions. | |
543 | */ | |
ee310b0c DW |
544 | if (ctx->scrub_setup_succeeded && total_errors > 0) { |
545 | char *msg; | |
546 | ||
547 | if (ctx->mode == SCRUB_MODE_DRY_RUN) | |
548 | msg = _("%s: Re-run xfs_scrub without -n.\n"); | |
549 | else | |
550 | msg = _("%s: Unmount and run xfs_repair.\n"); | |
551 | ||
552 | fprintf(stderr, msg, ctx->mntpoint); | |
553 | } | |
5454c2bf DW |
554 | } |
555 | ||
95b1e505 DW |
556 | int |
557 | main( | |
558 | int argc, | |
559 | char **argv) | |
560 | { | |
828105d1 | 561 | struct scrub_ctx ctx = {0}; |
173a0283 | 562 | struct phase_rusage all_pi; |
828105d1 | 563 | char *mtab = NULL; |
ed60d210 | 564 | FILE *progress_fp = NULL; |
680eacaa | 565 | struct fs_path *fsp; |
828105d1 DW |
566 | bool moveon = true; |
567 | int c; | |
ed60d210 | 568 | int fd; |
828105d1 DW |
569 | int ret = SCRUB_RET_SUCCESS; |
570 | ||
95b1e505 | 571 | fprintf(stdout, "EXPERIMENTAL xfs_scrub program in use! Use at your own risk!\n"); |
828105d1 DW |
572 | |
573 | progname = basename(argv[0]); | |
574 | setlocale(LC_ALL, ""); | |
575 | bindtextdomain(PACKAGE, LOCALEDIR); | |
576 | textdomain(PACKAGE); | |
577 | ||
578 | pthread_mutex_init(&ctx.lock, NULL); | |
1658224d | 579 | ctx.mode = SCRUB_MODE_REPAIR; |
828105d1 | 580 | ctx.error_action = ERRORS_CONTINUE; |
1658224d | 581 | while ((c = getopt(argc, argv, "a:bC:de:km:nTvxV")) != EOF) { |
828105d1 DW |
582 | switch (c) { |
583 | case 'a': | |
584 | ctx.max_errors = cvt_u64(optarg, 10); | |
585 | if (errno) { | |
586 | perror(optarg); | |
587 | usage(); | |
588 | } | |
589 | break; | |
590 | case 'b': | |
591 | nr_threads = 1; | |
592 | bg_mode++; | |
593 | break; | |
ed60d210 DW |
594 | case 'C': |
595 | errno = 0; | |
596 | fd = cvt_u32(optarg, 10); | |
597 | if (errno) { | |
598 | perror(optarg); | |
599 | usage(); | |
600 | } | |
601 | progress_fp = fdopen(fd, "w"); | |
602 | if (!progress_fp) { | |
603 | perror(optarg); | |
604 | usage(); | |
605 | } | |
606 | break; | |
828105d1 DW |
607 | case 'd': |
608 | debug++; | |
609 | break; | |
610 | case 'e': | |
611 | if (!strcmp("continue", optarg)) | |
612 | ctx.error_action = ERRORS_CONTINUE; | |
613 | else if (!strcmp("shutdown", optarg)) | |
614 | ctx.error_action = ERRORS_SHUTDOWN; | |
615 | else { | |
616 | fprintf(stderr, | |
617 | _("Unknown error behavior \"%s\".\n"), | |
618 | optarg); | |
619 | usage(); | |
620 | } | |
621 | break; | |
7e36bc0f DW |
622 | case 'k': |
623 | want_fstrim = false; | |
624 | break; | |
828105d1 DW |
625 | case 'm': |
626 | mtab = optarg; | |
627 | break; | |
628 | case 'n': | |
828105d1 DW |
629 | ctx.mode = SCRUB_MODE_DRY_RUN; |
630 | break; | |
631 | case 'T': | |
632 | display_rusage = true; | |
633 | break; | |
634 | case 'v': | |
635 | verbose = true; | |
636 | break; | |
637 | case 'V': | |
638 | fprintf(stdout, _("%s version %s\n"), progname, | |
639 | VERSION); | |
640 | fflush(stdout); | |
641 | return SCRUB_RET_SUCCESS; | |
642 | case 'x': | |
643 | scrub_data = true; | |
644 | break; | |
828105d1 DW |
645 | case '?': |
646 | /* fall through */ | |
647 | default: | |
648 | usage(); | |
649 | } | |
650 | } | |
651 | ||
652 | /* Override thread count if debugger */ | |
653 | if (debug_tweak_on("XFS_SCRUB_THREADS")) { | |
654 | unsigned int x; | |
655 | ||
656 | x = cvt_u32(getenv("XFS_SCRUB_THREADS"), 10); | |
657 | if (errno) { | |
658 | perror("nr_threads"); | |
659 | usage(); | |
660 | } | |
661 | nr_threads = x; | |
662 | } | |
663 | ||
664 | if (optind != argc - 1) | |
665 | usage(); | |
666 | ||
680eacaa | 667 | ctx.mntpoint = argv[optind]; |
828105d1 | 668 | |
ed60d210 DW |
669 | stdout_isatty = isatty(STDOUT_FILENO); |
670 | stderr_isatty = isatty(STDERR_FILENO); | |
671 | ||
672 | /* If interactive, start the progress bar. */ | |
673 | if (stdout_isatty && !progress_fp) | |
674 | progress_fp = fdopen(1, "w+"); | |
675 | ||
824b5807 DW |
676 | if (getenv("SERVICE_MODE")) |
677 | is_service = true; | |
678 | ||
2e4959c1 DW |
679 | /* Initialize overall phase stats. */ |
680 | moveon = phase_start(&all_pi, 0, NULL); | |
681 | if (!moveon) | |
682 | return SCRUB_RET_OPERROR; | |
683 | ||
50a573a7 DW |
684 | /* Find the mount record for the passed-in argument. */ |
685 | if (stat(argv[optind], &ctx.mnt_sb) < 0) { | |
686 | fprintf(stderr, | |
687 | _("%s: could not stat: %s: %s\n"), | |
688 | progname, argv[optind], strerror(errno)); | |
689 | ctx.runtime_errors++; | |
690 | goto out; | |
691 | } | |
692 | ||
828105d1 DW |
693 | /* |
694 | * If the user did not specify an explicit mount table, try to use | |
695 | * /proc/mounts if it is available, else /etc/mtab. We prefer | |
696 | * /proc/mounts because it is kernel controlled, while /etc/mtab | |
697 | * may contain garbage that userspace tools like pam_mounts wrote | |
698 | * into it. | |
699 | */ | |
700 | if (!mtab) { | |
701 | if (access(_PATH_PROC_MOUNTS, R_OK) == 0) | |
702 | mtab = _PATH_PROC_MOUNTS; | |
703 | else | |
704 | mtab = _PATH_MOUNTED; | |
705 | } | |
706 | ||
680eacaa DW |
707 | fs_table_initialise(0, NULL, 0, NULL); |
708 | fsp = fs_table_lookup_mount(ctx.mntpoint); | |
709 | if (!fsp) { | |
710 | fprintf(stderr, _("%s: Not a XFS mount point.\n"), | |
711 | ctx.mntpoint); | |
50a573a7 DW |
712 | ret |= SCRUB_RET_SYNTAX; |
713 | goto out; | |
714 | } | |
680eacaa | 715 | memcpy(&ctx.fsinfo, fsp, sizeof(struct fs_path)); |
50a573a7 | 716 | |
828105d1 DW |
717 | /* How many CPUs? */ |
718 | nproc = sysconf(_SC_NPROCESSORS_ONLN); | |
719 | if (nproc < 1) | |
720 | nproc = 1; | |
721 | ||
722 | /* Set up a page-aligned buffer for read verification. */ | |
723 | page_size = sysconf(_SC_PAGESIZE); | |
724 | if (page_size < 0) { | |
725 | str_errno(&ctx, ctx.mntpoint); | |
726 | goto out; | |
727 | } | |
728 | ||
729 | if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR")) | |
730 | ctx.mode = SCRUB_MODE_REPAIR; | |
731 | ||
173a0283 | 732 | /* Scrub a filesystem. */ |
ed60d210 | 733 | moveon = run_scrub_phases(&ctx, progress_fp); |
173a0283 DW |
734 | if (!moveon && ctx.runtime_errors == 0) |
735 | ctx.runtime_errors++; | |
736 | ||
737 | /* | |
738 | * Excessive errors will cause the scrub phases to bail out early. | |
739 | * We don't want every thread yelling that into the output, so check | |
740 | * if we hit the threshold and tell the user *once*. | |
741 | */ | |
828105d1 DW |
742 | if (xfs_scrub_excessive_errors(&ctx)) |
743 | str_info(&ctx, ctx.mntpoint, _("Too many errors; aborting.")); | |
744 | ||
745 | if (debug_tweak_on("XFS_SCRUB_FORCE_ERROR")) | |
746 | str_error(&ctx, ctx.mntpoint, _("Injecting error.")); | |
747 | ||
50a573a7 DW |
748 | /* Clean up scan data. */ |
749 | moveon = xfs_cleanup_fs(&ctx); | |
750 | if (!moveon && ctx.runtime_errors == 0) | |
751 | ctx.runtime_errors++; | |
752 | ||
828105d1 | 753 | out: |
ee310b0c | 754 | report_modifications(&ctx); |
5454c2bf DW |
755 | report_outcome(&ctx); |
756 | ||
50a573a7 DW |
757 | if (ctx.errors_found) { |
758 | if (ctx.error_action == ERRORS_SHUTDOWN) | |
759 | xfs_shutdown_fs(&ctx); | |
828105d1 | 760 | ret |= SCRUB_RET_CORRUPT; |
50a573a7 | 761 | } |
828105d1 DW |
762 | if (ctx.warnings_found) |
763 | ret |= SCRUB_RET_UNOPTIMIZED; | |
764 | if (ctx.runtime_errors) | |
765 | ret |= SCRUB_RET_OPERROR; | |
173a0283 | 766 | phase_end(&all_pi, 0); |
ed60d210 DW |
767 | if (progress_fp) |
768 | fclose(progress_fp); | |
828105d1 | 769 | |
824b5807 DW |
770 | /* |
771 | * If we're being run as a service, the return code must fit the LSB | |
772 | * init script action error guidelines, which is to say that we | |
773 | * compress all errors to 1 ("generic or unspecified error", LSB 5.0 | |
774 | * section 22.2) and hope the admin will scan the log for what | |
775 | * actually happened. | |
776 | * | |
777 | * We have to sleep 2 seconds here because journald uses the pid to | |
778 | * connect our log messages to the systemd service. This is critical | |
779 | * for capturing all the log messages if the scrub fails, because the | |
780 | * fail service uses the service name to gather log messages for the | |
781 | * error report. | |
782 | */ | |
783 | if (is_service) { | |
784 | sleep(2); | |
785 | if (ret != SCRUB_RET_SUCCESS) | |
786 | return 1; | |
787 | } | |
788 | ||
828105d1 | 789 | return ret; |
95b1e505 | 790 | } |