]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
95b1e505 DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
95b1e505 | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
95b1e505 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
f0585fce | 7 | #include <pthread.h> |
828105d1 | 8 | #include <stdlib.h> |
e2bc34de | 9 | #include <paths.h> |
173a0283 DW |
10 | #include <sys/time.h> |
11 | #include <sys/resource.h> | |
50a573a7 | 12 | #include <sys/statvfs.h> |
828105d1 | 13 | #include "platform_defs.h" |
828105d1 | 14 | #include "input.h" |
42b4c8e8 | 15 | #include "libfrog/paths.h" |
95b1e505 | 16 | #include "xfs_scrub.h" |
828105d1 | 17 | #include "common.h" |
b3f76f94 | 18 | #include "descr.h" |
4bbed4ec | 19 | #include "unicrash.h" |
ed60d210 | 20 | #include "progress.h" |
95b1e505 DW |
21 | |
22 | /* | |
23 | * XFS Online Metadata Scrub (and Repair) | |
24 | * | |
25 | * The XFS scrubber uses custom XFS ioctls to probe more deeply into the | |
26 | * internals of the filesystem. It takes advantage of scrubbing ioctls | |
27 | * to check all the records stored in a metadata object and to | |
28 | * cross-reference those records against the other filesystem metadata. | |
29 | * | |
30 | * After the program gathers command line arguments to figure out | |
31 | * exactly what the program is going to do, scrub execution is split up | |
32 | * into several separate phases: | |
33 | * | |
34 | * The "find geometry" phase queries XFS for the filesystem geometry. | |
35 | * The block devices for the data, realtime, and log devices are opened. | |
36 | * Kernel ioctls are test-queried to see if they actually work (the scrub | |
37 | * ioctl in particular), and any other filesystem-specific information | |
38 | * is gathered. | |
39 | * | |
40 | * In the "check internal metadata" phase, we call the metadata scrub | |
41 | * ioctl to check the filesystem's internal per-AG btrees. This | |
42 | * includes the AG superblock, AGF, AGFL, and AGI headers, freespace | |
43 | * btrees, the regular and free inode btrees, the reverse mapping | |
44 | * btrees, and the reference counting btrees. If the realtime device is | |
45 | * enabled, the realtime bitmap and reverse mapping btrees are checked. | |
46 | * Quotas, if enabled, are also checked in this phase. | |
47 | * | |
48 | * Each AG (and the realtime device) has its metadata checked in a | |
49 | * separate thread for better performance. Errors in the internal | |
50 | * metadata can be fixed here prior to the inode scan; refer to the | |
51 | * section about the "repair filesystem" phase for more information. | |
52 | * | |
53 | * The "scan all inodes" phase uses BULKSTAT to scan all the inodes in | |
54 | * an AG in disk order. The BULKSTAT information provides enough | |
55 | * information to construct a file handle that is used to check the | |
56 | * following parts of every file: | |
57 | * | |
58 | * - The inode record | |
59 | * - All three block forks (data, attr, CoW) | |
60 | * - If it's a symlink, the symlink target. | |
61 | * - If it's a directory, the directory entries. | |
62 | * - All extended attributes | |
63 | * - The parent pointer | |
64 | * | |
65 | * Multiple threads are started to check each the inodes of each AG in | |
66 | * parallel. Errors in file metadata can be fixed here; see the section | |
67 | * about the "repair filesystem" phase for more information. | |
68 | * | |
69 | * Next comes the (configurable) "repair filesystem" phase. The user | |
70 | * can instruct this program to fix all problems encountered; to fix | |
71 | * only optimality problems and leave the corruptions; or not to touch | |
72 | * the filesystem at all. Any metadata repairs that did not succeed in | |
73 | * the previous two phases are retried here; if there are uncorrectable | |
74 | * errors, xfs_scrub stops here. | |
75 | * | |
ee310b0c DW |
76 | * To perform the actual repairs (or optimizations), we iterate all the |
77 | * items on the per-AG action item list and ask the kernel to repair | |
78 | * them. Items which are successfully repaired are removed from the | |
79 | * list. If an item is not acted upon successfully (or the kernel asks us | |
80 | * to try again), we retry the actions until there is nothing left to | |
81 | * fix or we fail to make forward progress. In that event, the | |
82 | * unfinished items are recorded as errors. If there are no errors at | |
83 | * this point, we call FSTRIM on the filesystem. | |
84 | * | |
95b1e505 DW |
85 | * The next phase is the "check directory tree" phase. In this phase, |
86 | * every directory is opened (via file handle) to confirm that each | |
87 | * directory is connected to the root. Directory entries are checked | |
88 | * for ambiguous Unicode normalization mappings, which is to say that we | |
89 | * look for pairs of entries whose utf-8 strings normalize to the same | |
90 | * code point sequence and map to different inodes, because that could | |
91 | * be used to trick a user into opening the wrong file. The names of | |
92 | * extended attributes are checked for Unicode normalization collisions. | |
93 | * | |
94 | * In the "verify data file integrity" phase, we employ GETFSMAP to read | |
95 | * the reverse-mappings of all AGs and issue direct-reads of the | |
96 | * underlying disk blocks. We rely on the underlying storage to have | |
97 | * checksummed the data blocks appropriately. Multiple threads are | |
98 | * started to check each AG in parallel; a separate thread pool is used | |
99 | * to handle the direct reads. | |
100 | * | |
101 | * In the "check summary counters" phase, use GETFSMAP to tally up the | |
102 | * blocks and BULKSTAT to tally up the inodes we saw and compare that to | |
103 | * the statfs output. This gives the user a rough estimate of how | |
104 | * thorough the scrub was. | |
105 | */ | |
106 | ||
828105d1 DW |
107 | /* |
108 | * Known debug tweaks (pass -d and set the environment variable): | |
109 | * XFS_SCRUB_FORCE_ERROR -- pretend all metadata is corrupt | |
110 | * XFS_SCRUB_FORCE_REPAIR -- repair all metadata even if it's ok | |
111 | * XFS_SCRUB_NO_KERNEL -- pretend there is no kernel ioctl | |
112 | * XFS_SCRUB_NO_SCSI_VERIFY -- disable SCSI VERIFY (if present) | |
113 | * XFS_SCRUB_PHASE -- run only this scrub phase | |
114 | * XFS_SCRUB_THREADS -- start exactly this number of threads | |
cac2b8b0 DW |
115 | * XFS_SCRUB_DISK_ERROR_INTERVAL-- simulate a disk error every this many bytes |
116 | * XFS_SCRUB_DISK_VERIFY_SKIP -- pretend disk verify read calls succeeded | |
824b5807 DW |
117 | * |
118 | * Available even in non-debug mode: | |
119 | * SERVICE_MODE -- compress all error codes to 1 for LSB | |
120 | * service action compliance | |
828105d1 DW |
121 | */ |
122 | ||
95b1e505 DW |
123 | /* Program name; needed for libfrog error reports. */ |
124 | char *progname = "xfs_scrub"; | |
125 | ||
f0585fce DW |
126 | /* Debug level; higher values mean more verbosity. */ |
127 | unsigned int debug; | |
128 | ||
828105d1 DW |
129 | /* Display resource usage at the end of each phase? */ |
130 | static bool display_rusage; | |
131 | ||
132 | /* Background mode; higher values insert more pauses between scrub calls. */ | |
133 | unsigned int bg_mode; | |
134 | ||
828105d1 | 135 | /* Number of threads we're allowed to use. */ |
32c6cc09 | 136 | unsigned int force_nr_threads; |
828105d1 DW |
137 | |
138 | /* Verbosity; higher values print more information. */ | |
139 | bool verbose; | |
140 | ||
141 | /* Should we scrub the data blocks? */ | |
142 | static bool scrub_data; | |
143 | ||
144 | /* Size of a memory page. */ | |
145 | long page_size; | |
146 | ||
7e36bc0f DW |
147 | /* Should we FSTRIM after a successful run? */ |
148 | bool want_fstrim = true; | |
149 | ||
ed60d210 DW |
150 | /* If stdout/stderr are ttys, we can use richer terminal control. */ |
151 | bool stderr_isatty; | |
152 | bool stdout_isatty; | |
153 | ||
824b5807 DW |
154 | /* |
155 | * If we are running as a service, we need to be careful about what | |
156 | * error codes we return to the calling process. | |
157 | */ | |
7c309151 | 158 | bool is_service; |
824b5807 | 159 | |
828105d1 DW |
160 | #define SCRUB_RET_SUCCESS (0) /* no problems left behind */ |
161 | #define SCRUB_RET_CORRUPT (1) /* corruption remains on fs */ | |
162 | #define SCRUB_RET_UNOPTIMIZED (2) /* fs could be optimized */ | |
163 | #define SCRUB_RET_OPERROR (4) /* operational problems */ | |
164 | #define SCRUB_RET_SYNTAX (8) /* cmdline args rejected */ | |
165 | ||
166 | static void __attribute__((noreturn)) | |
167 | usage(void) | |
168 | { | |
680eacaa | 169 | fprintf(stderr, _("Usage: %s [OPTIONS] mountpoint\n"), progname); |
828105d1 DW |
170 | fprintf(stderr, "\n"); |
171 | fprintf(stderr, _("Options:\n")); | |
172 | fprintf(stderr, _(" -a count Stop after this many errors are found.\n")); | |
173 | fprintf(stderr, _(" -b Background mode.\n")); | |
ed60d210 | 174 | fprintf(stderr, _(" -C fd Print progress information to this fd.\n")); |
828105d1 | 175 | fprintf(stderr, _(" -e behavior What to do if errors are found.\n")); |
7e36bc0f | 176 | fprintf(stderr, _(" -k Do not FITRIM the free space.\n")); |
828105d1 DW |
177 | fprintf(stderr, _(" -m path Path to /etc/mtab.\n")); |
178 | fprintf(stderr, _(" -n Dry run. Do not modify anything.\n")); | |
179 | fprintf(stderr, _(" -T Display timing/usage information.\n")); | |
180 | fprintf(stderr, _(" -v Verbose output.\n")); | |
181 | fprintf(stderr, _(" -V Print version.\n")); | |
182 | fprintf(stderr, _(" -x Scrub file data too.\n")); | |
828105d1 DW |
183 | |
184 | exit(SCRUB_RET_SYNTAX); | |
185 | } | |
186 | ||
173a0283 DW |
187 | #ifndef RUSAGE_BOTH |
188 | # define RUSAGE_BOTH (-2) | |
189 | #endif | |
190 | ||
191 | /* Get resource usage for ourselves and all children. */ | |
192 | static int | |
193 | scrub_getrusage( | |
194 | struct rusage *usage) | |
195 | { | |
196 | struct rusage cusage; | |
197 | int err; | |
198 | ||
199 | err = getrusage(RUSAGE_BOTH, usage); | |
200 | if (!err) | |
201 | return err; | |
202 | ||
203 | err = getrusage(RUSAGE_SELF, usage); | |
204 | if (err) | |
205 | return err; | |
206 | ||
207 | err = getrusage(RUSAGE_CHILDREN, &cusage); | |
208 | if (err) | |
209 | return err; | |
210 | ||
211 | usage->ru_minflt += cusage.ru_minflt; | |
212 | usage->ru_majflt += cusage.ru_majflt; | |
213 | usage->ru_nswap += cusage.ru_nswap; | |
214 | usage->ru_inblock += cusage.ru_inblock; | |
215 | usage->ru_oublock += cusage.ru_oublock; | |
216 | usage->ru_msgsnd += cusage.ru_msgsnd; | |
217 | usage->ru_msgrcv += cusage.ru_msgrcv; | |
218 | usage->ru_nsignals += cusage.ru_nsignals; | |
219 | usage->ru_nvcsw += cusage.ru_nvcsw; | |
220 | usage->ru_nivcsw += cusage.ru_nivcsw; | |
221 | return 0; | |
222 | } | |
223 | ||
224 | /* | |
225 | * Scrub Phase Dispatch | |
226 | * | |
227 | * The operations of the scrub program are split up into several | |
228 | * different phases. Each phase builds upon the metadata checked in the | |
229 | * previous phase, which is to say that we may skip phase (X + 1) if our | |
230 | * scans in phase (X) reveal corruption. A phase may be skipped | |
231 | * entirely. | |
232 | */ | |
233 | ||
234 | /* Resource usage for each phase. */ | |
235 | struct phase_rusage { | |
236 | struct rusage ruse; | |
237 | struct timeval time; | |
238 | unsigned long long verified_bytes; | |
239 | void *brk_start; | |
240 | const char *descr; | |
241 | }; | |
242 | ||
243 | /* Operations for each phase. */ | |
244 | #define DATASCAN_DUMMY_FN ((void *)1) | |
245 | #define REPAIR_DUMMY_FN ((void *)2) | |
246 | struct phase_ops { | |
247 | char *descr; | |
64dabc9f DW |
248 | int (*fn)(struct scrub_ctx *ctx); |
249 | int (*estimate_work)(struct scrub_ctx *ctx, uint64_t *items, | |
250 | unsigned int *threads, int *rshift); | |
173a0283 DW |
251 | bool must_run; |
252 | }; | |
253 | ||
254 | /* Start tracking resource usage for a phase. */ | |
64dabc9f | 255 | static int |
173a0283 DW |
256 | phase_start( |
257 | struct phase_rusage *pi, | |
258 | unsigned int phase, | |
259 | const char *descr) | |
260 | { | |
261 | int error; | |
262 | ||
263 | memset(pi, 0, sizeof(*pi)); | |
264 | error = scrub_getrusage(&pi->ruse); | |
265 | if (error) { | |
266 | perror(_("getrusage")); | |
64dabc9f | 267 | return error; |
173a0283 DW |
268 | } |
269 | pi->brk_start = sbrk(0); | |
270 | ||
271 | error = gettimeofday(&pi->time, NULL); | |
272 | if (error) { | |
273 | perror(_("gettimeofday")); | |
64dabc9f | 274 | return error; |
173a0283 DW |
275 | } |
276 | ||
277 | pi->descr = descr; | |
278 | if ((verbose || display_rusage) && descr) { | |
279 | fprintf(stdout, _("Phase %u: %s\n"), phase, descr); | |
280 | fflush(stdout); | |
281 | } | |
64dabc9f | 282 | return error; |
173a0283 DW |
283 | } |
284 | ||
285 | /* Report usage stats. */ | |
64dabc9f | 286 | static int |
173a0283 DW |
287 | phase_end( |
288 | struct phase_rusage *pi, | |
289 | unsigned int phase) | |
290 | { | |
291 | struct rusage ruse_now; | |
292 | #ifdef HAVE_MALLINFO | |
293 | struct mallinfo mall_now; | |
294 | #endif | |
295 | struct timeval time_now; | |
296 | char phasebuf[DESCR_BUFSZ]; | |
297 | double dt; | |
298 | unsigned long long in, out; | |
299 | unsigned long long io; | |
300 | double i, o, t; | |
301 | double din, dout, dtot; | |
302 | char *iu, *ou, *tu, *dinu, *doutu, *dtotu; | |
303 | int error; | |
304 | ||
305 | if (!display_rusage) | |
64dabc9f | 306 | return 0; |
173a0283 DW |
307 | |
308 | error = gettimeofday(&time_now, NULL); | |
309 | if (error) { | |
310 | perror(_("gettimeofday")); | |
64dabc9f | 311 | return error; |
173a0283 DW |
312 | } |
313 | dt = timeval_subtract(&time_now, &pi->time); | |
314 | ||
315 | error = scrub_getrusage(&ruse_now); | |
316 | if (error) { | |
317 | perror(_("getrusage")); | |
64dabc9f | 318 | return error; |
173a0283 DW |
319 | } |
320 | ||
321 | if (phase) | |
322 | snprintf(phasebuf, DESCR_BUFSZ, _("Phase %u: "), phase); | |
323 | else | |
324 | phasebuf[0] = 0; | |
325 | ||
326 | #define kbytes(x) (((unsigned long)(x) + 1023) / 1024) | |
327 | #ifdef HAVE_MALLINFO | |
328 | ||
329 | mall_now = mallinfo(); | |
330 | fprintf(stdout, _("%sMemory used: %luk/%luk (%luk/%luk), "), | |
331 | phasebuf, | |
332 | kbytes(mall_now.arena), kbytes(mall_now.hblkhd), | |
333 | kbytes(mall_now.uordblks), kbytes(mall_now.fordblks)); | |
334 | #else | |
335 | fprintf(stdout, _("%sMemory used: %luk, "), | |
336 | phasebuf, | |
337 | (unsigned long) kbytes(((char *) sbrk(0)) - | |
338 | ((char *) pi->brk_start))); | |
339 | #endif | |
340 | #undef kbytes | |
341 | ||
342 | fprintf(stdout, _("time: %5.2f/%5.2f/%5.2fs\n"), | |
343 | timeval_subtract(&time_now, &pi->time), | |
344 | timeval_subtract(&ruse_now.ru_utime, &pi->ruse.ru_utime), | |
345 | timeval_subtract(&ruse_now.ru_stime, &pi->ruse.ru_stime)); | |
346 | ||
347 | /* I/O usage */ | |
348 | in = ((unsigned long long)ruse_now.ru_inblock - | |
349 | pi->ruse.ru_inblock) << BBSHIFT; | |
350 | out = ((unsigned long long)ruse_now.ru_oublock - | |
351 | pi->ruse.ru_oublock) << BBSHIFT; | |
352 | io = in + out; | |
353 | if (io) { | |
354 | i = auto_space_units(in, &iu); | |
355 | o = auto_space_units(out, &ou); | |
356 | t = auto_space_units(io, &tu); | |
357 | din = auto_space_units(in / dt, &dinu); | |
358 | dout = auto_space_units(out / dt, &doutu); | |
359 | dtot = auto_space_units(io / dt, &dtotu); | |
360 | fprintf(stdout, | |
361 | _("%sI/O: %.1f%s in, %.1f%s out, %.1f%s tot\n"), | |
362 | phasebuf, i, iu, o, ou, t, tu); | |
363 | fprintf(stdout, | |
364 | _("%sI/O rate: %.1f%s/s in, %.1f%s/s out, %.1f%s/s tot\n"), | |
365 | phasebuf, din, dinu, dout, doutu, dtot, dtotu); | |
366 | } | |
367 | fflush(stdout); | |
368 | ||
64dabc9f | 369 | return 0; |
173a0283 DW |
370 | } |
371 | ||
372 | /* Run all the phases of the scrubber. */ | |
373 | static bool | |
374 | run_scrub_phases( | |
ed60d210 DW |
375 | struct scrub_ctx *ctx, |
376 | FILE *progress_fp) | |
173a0283 DW |
377 | { |
378 | struct phase_ops phases[] = | |
379 | { | |
380 | { | |
381 | .descr = _("Find filesystem geometry."), | |
64dabc9f | 382 | .fn = phase1_func, |
50a573a7 | 383 | .must_run = true, |
173a0283 DW |
384 | }, |
385 | { | |
386 | .descr = _("Check internal metadata."), | |
64dabc9f DW |
387 | .fn = phase2_func, |
388 | .estimate_work = phase2_estimate, | |
173a0283 DW |
389 | }, |
390 | { | |
391 | .descr = _("Scan all inodes."), | |
64dabc9f DW |
392 | .fn = phase3_func, |
393 | .estimate_work = phase3_estimate, | |
173a0283 DW |
394 | }, |
395 | { | |
396 | .descr = _("Defer filesystem repairs."), | |
397 | .fn = REPAIR_DUMMY_FN, | |
64dabc9f | 398 | .estimate_work = phase4_estimate, |
173a0283 DW |
399 | }, |
400 | { | |
401 | .descr = _("Check directory tree."), | |
64dabc9f DW |
402 | .fn = phase5_func, |
403 | .estimate_work = phase5_estimate, | |
173a0283 DW |
404 | }, |
405 | { | |
406 | .descr = _("Verify data file integrity."), | |
407 | .fn = DATASCAN_DUMMY_FN, | |
64dabc9f | 408 | .estimate_work = phase6_estimate, |
173a0283 DW |
409 | }, |
410 | { | |
411 | .descr = _("Check summary counters."), | |
64dabc9f | 412 | .fn = phase7_func, |
698c6c7c | 413 | .must_run = true, |
173a0283 DW |
414 | }, |
415 | { | |
416 | NULL | |
417 | }, | |
418 | }; | |
419 | struct phase_rusage pi; | |
420 | struct phase_ops *sp; | |
ed60d210 | 421 | uint64_t max_work; |
173a0283 DW |
422 | unsigned int debug_phase = 0; |
423 | unsigned int phase; | |
ed60d210 | 424 | int rshift; |
c3387fb8 | 425 | int ret = 0; |
173a0283 | 426 | |
ea019515 | 427 | if (debug_tweak_on("XFS_SCRUB_PHASE")) |
173a0283 DW |
428 | debug_phase = atoi(getenv("XFS_SCRUB_PHASE")); |
429 | ||
430 | /* Run all phases of the scrub tool. */ | |
431 | for (phase = 1, sp = phases; sp->fn; sp++, phase++) { | |
b364a9c0 | 432 | /* Turn on certain phases if user said to. */ |
7e36bc0f | 433 | if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) { |
64dabc9f | 434 | sp->fn = phase6_func; |
1658224d DW |
435 | } else if (sp->fn == REPAIR_DUMMY_FN && |
436 | ctx->mode == SCRUB_MODE_REPAIR) { | |
437 | sp->descr = _("Repair filesystem."); | |
64dabc9f | 438 | sp->fn = phase4_func; |
1658224d | 439 | sp->must_run = true; |
7e36bc0f | 440 | } |
b364a9c0 | 441 | |
173a0283 DW |
442 | /* Skip certain phases unless they're turned on. */ |
443 | if (sp->fn == REPAIR_DUMMY_FN || | |
444 | sp->fn == DATASCAN_DUMMY_FN) | |
445 | continue; | |
446 | ||
447 | /* Allow debug users to force a particular phase. */ | |
448 | if (debug_phase && phase != debug_phase && !sp->must_run) | |
449 | continue; | |
450 | ||
451 | /* Run this phase. */ | |
64dabc9f DW |
452 | ret = phase_start(&pi, phase, sp->descr); |
453 | if (ret) | |
ed60d210 DW |
454 | break; |
455 | if (sp->estimate_work) { | |
4ace28d7 ES |
456 | unsigned int work_threads; |
457 | ||
64dabc9f | 458 | ret = sp->estimate_work(ctx, &max_work, |
4ace28d7 | 459 | &work_threads, &rshift); |
64dabc9f | 460 | if (ret) |
ed60d210 | 461 | break; |
51c94053 DW |
462 | |
463 | /* | |
464 | * The thread that starts the worker threads is also | |
465 | * allowed to contribute to the progress counters and | |
466 | * whatever other per-thread data we need to allocate. | |
467 | */ | |
468 | work_threads++; | |
d86e83b8 | 469 | ret = progress_init_phase(ctx, progress_fp, phase, |
4ace28d7 | 470 | max_work, rshift, work_threads); |
64dabc9f | 471 | if (ret) |
b3f76f94 | 472 | break; |
64dabc9f | 473 | ret = descr_init_phase(ctx, work_threads); |
ed60d210 | 474 | } else { |
d86e83b8 | 475 | ret = progress_init_phase(ctx, NULL, phase, 0, 0, 0); |
64dabc9f | 476 | if (ret) |
b3f76f94 | 477 | break; |
64dabc9f | 478 | ret = descr_init_phase(ctx, 1); |
ed60d210 | 479 | } |
64dabc9f | 480 | if (ret) |
173a0283 | 481 | break; |
64dabc9f DW |
482 | ret = sp->fn(ctx); |
483 | if (ret) { | |
173a0283 DW |
484 | str_info(ctx, ctx->mntpoint, |
485 | _("Scrub aborted after phase %d."), | |
486 | phase); | |
487 | break; | |
488 | } | |
ed60d210 | 489 | progress_end_phase(); |
b3f76f94 | 490 | descr_end_phase(); |
64dabc9f DW |
491 | ret = phase_end(&pi, phase); |
492 | if (ret) | |
173a0283 DW |
493 | break; |
494 | ||
495 | /* Too many errors? */ | |
273165cc | 496 | if (scrub_excessive_errors(ctx)) { |
64dabc9f | 497 | ret = ECANCELED; |
173a0283 | 498 | break; |
64dabc9f | 499 | } |
173a0283 DW |
500 | } |
501 | ||
64dabc9f | 502 | return ret; |
173a0283 DW |
503 | } |
504 | ||
ee310b0c DW |
505 | static void |
506 | report_modifications( | |
507 | struct scrub_ctx *ctx) | |
508 | { | |
509 | if (ctx->repairs == 0 && ctx->preens == 0) | |
510 | return; | |
511 | ||
512 | if (ctx->repairs && ctx->preens) | |
513 | fprintf(stdout, | |
514 | _("%s: repairs made: %llu; optimizations made: %llu.\n"), | |
515 | ctx->mntpoint, ctx->repairs, ctx->preens); | |
516 | else if (ctx->preens == 0) | |
517 | fprintf(stdout, | |
518 | _("%s: repairs made: %llu.\n"), | |
519 | ctx->mntpoint, ctx->repairs); | |
520 | else if (ctx->repairs == 0) | |
521 | fprintf(stdout, | |
522 | _("%s: optimizations made: %llu.\n"), | |
523 | ctx->mntpoint, ctx->preens); | |
524 | } | |
525 | ||
5454c2bf DW |
526 | static void |
527 | report_outcome( | |
528 | struct scrub_ctx *ctx) | |
529 | { | |
49e05cb0 | 530 | unsigned long long actionable_errors; |
5454c2bf | 531 | |
49e05cb0 | 532 | actionable_errors = ctx->corruptions_found + ctx->runtime_errors; |
5454c2bf | 533 | |
49e05cb0 DW |
534 | if (actionable_errors == 0 && |
535 | ctx->unfixable_errors == 0 && | |
536 | ctx->warnings_found == 0) { | |
5155653f | 537 | log_info(ctx, _("No problems found.")); |
5454c2bf | 538 | return; |
7c309151 | 539 | } |
5454c2bf | 540 | |
49e05cb0 DW |
541 | if (ctx->unfixable_errors) { |
542 | fprintf(stderr, _("%s: unfixable errors found: %llu\n"), | |
543 | ctx->mntpoint, ctx->unfixable_errors); | |
544 | log_err(ctx, _("unfixable errors found: %llu"), | |
545 | ctx->unfixable_errors); | |
546 | } | |
547 | ||
abc2e70d DW |
548 | if (ctx->corruptions_found > 0) { |
549 | fprintf(stderr, _("%s: corruptions found: %llu\n"), | |
550 | ctx->mntpoint, ctx->corruptions_found); | |
551 | log_err(ctx, _("corruptions found: %llu"), | |
552 | ctx->corruptions_found); | |
553 | } | |
554 | ||
555 | if (ctx->runtime_errors > 0) { | |
556 | fprintf(stderr, _("%s: operational errors found: %llu\n"), | |
557 | ctx->mntpoint, ctx->runtime_errors); | |
558 | log_err(ctx, _("operational errors found: %llu"), | |
559 | ctx->runtime_errors); | |
5155653f DW |
560 | } |
561 | ||
562 | if (ctx->warnings_found > 0) { | |
563 | fprintf(stderr, _("%s: warnings found: %llu\n"), ctx->mntpoint, | |
5454c2bf | 564 | ctx->warnings_found); |
5155653f | 565 | log_warn(ctx, _("warnings found: %llu"), ctx->warnings_found); |
7c309151 DW |
566 | } |
567 | ||
c767c5ae DW |
568 | /* |
569 | * Don't advise the user to run repair unless we were successful in | |
570 | * setting up the scrub and we actually saw corruptions. Warnings | |
571 | * are not corruptions. | |
572 | */ | |
49e05cb0 | 573 | if (ctx->scrub_setup_succeeded && actionable_errors > 0) { |
ee310b0c DW |
574 | char *msg; |
575 | ||
576 | if (ctx->mode == SCRUB_MODE_DRY_RUN) | |
577 | msg = _("%s: Re-run xfs_scrub without -n.\n"); | |
578 | else | |
579 | msg = _("%s: Unmount and run xfs_repair.\n"); | |
580 | ||
581 | fprintf(stderr, msg, ctx->mntpoint); | |
582 | } | |
5454c2bf DW |
583 | } |
584 | ||
95b1e505 DW |
585 | int |
586 | main( | |
587 | int argc, | |
588 | char **argv) | |
589 | { | |
828105d1 | 590 | struct scrub_ctx ctx = {0}; |
173a0283 | 591 | struct phase_rusage all_pi; |
828105d1 | 592 | char *mtab = NULL; |
ed60d210 | 593 | FILE *progress_fp = NULL; |
680eacaa | 594 | struct fs_path *fsp; |
828105d1 | 595 | int c; |
ed60d210 | 596 | int fd; |
828105d1 | 597 | int ret = SCRUB_RET_SUCCESS; |
35b65bcf | 598 | int error; |
828105d1 | 599 | |
95b1e505 | 600 | fprintf(stdout, "EXPERIMENTAL xfs_scrub program in use! Use at your own risk!\n"); |
828105d1 DW |
601 | |
602 | progname = basename(argv[0]); | |
603 | setlocale(LC_ALL, ""); | |
604 | bindtextdomain(PACKAGE, LOCALEDIR); | |
605 | textdomain(PACKAGE); | |
606 | ||
607 | pthread_mutex_init(&ctx.lock, NULL); | |
1658224d | 608 | ctx.mode = SCRUB_MODE_REPAIR; |
828105d1 | 609 | ctx.error_action = ERRORS_CONTINUE; |
1658224d | 610 | while ((c = getopt(argc, argv, "a:bC:de:km:nTvxV")) != EOF) { |
828105d1 DW |
611 | switch (c) { |
612 | case 'a': | |
613 | ctx.max_errors = cvt_u64(optarg, 10); | |
614 | if (errno) { | |
615 | perror(optarg); | |
616 | usage(); | |
617 | } | |
618 | break; | |
619 | case 'b': | |
32c6cc09 | 620 | force_nr_threads = 1; |
828105d1 DW |
621 | bg_mode++; |
622 | break; | |
ed60d210 DW |
623 | case 'C': |
624 | errno = 0; | |
625 | fd = cvt_u32(optarg, 10); | |
626 | if (errno) { | |
627 | perror(optarg); | |
628 | usage(); | |
629 | } | |
630 | progress_fp = fdopen(fd, "w"); | |
631 | if (!progress_fp) { | |
632 | perror(optarg); | |
633 | usage(); | |
634 | } | |
635 | break; | |
828105d1 DW |
636 | case 'd': |
637 | debug++; | |
638 | break; | |
639 | case 'e': | |
640 | if (!strcmp("continue", optarg)) | |
641 | ctx.error_action = ERRORS_CONTINUE; | |
642 | else if (!strcmp("shutdown", optarg)) | |
643 | ctx.error_action = ERRORS_SHUTDOWN; | |
644 | else { | |
645 | fprintf(stderr, | |
646 | _("Unknown error behavior \"%s\".\n"), | |
647 | optarg); | |
648 | usage(); | |
649 | } | |
650 | break; | |
7e36bc0f DW |
651 | case 'k': |
652 | want_fstrim = false; | |
653 | break; | |
828105d1 DW |
654 | case 'm': |
655 | mtab = optarg; | |
656 | break; | |
657 | case 'n': | |
828105d1 DW |
658 | ctx.mode = SCRUB_MODE_DRY_RUN; |
659 | break; | |
660 | case 'T': | |
661 | display_rusage = true; | |
662 | break; | |
663 | case 'v': | |
664 | verbose = true; | |
665 | break; | |
666 | case 'V': | |
667 | fprintf(stdout, _("%s version %s\n"), progname, | |
668 | VERSION); | |
669 | fflush(stdout); | |
670 | return SCRUB_RET_SUCCESS; | |
671 | case 'x': | |
672 | scrub_data = true; | |
673 | break; | |
828105d1 DW |
674 | default: |
675 | usage(); | |
676 | } | |
677 | } | |
678 | ||
679 | /* Override thread count if debugger */ | |
680 | if (debug_tweak_on("XFS_SCRUB_THREADS")) { | |
681 | unsigned int x; | |
682 | ||
683 | x = cvt_u32(getenv("XFS_SCRUB_THREADS"), 10); | |
684 | if (errno) { | |
685 | perror("nr_threads"); | |
686 | usage(); | |
687 | } | |
32c6cc09 | 688 | force_nr_threads = x; |
828105d1 DW |
689 | } |
690 | ||
691 | if (optind != argc - 1) | |
692 | usage(); | |
693 | ||
680eacaa | 694 | ctx.mntpoint = argv[optind]; |
828105d1 | 695 | |
ed60d210 DW |
696 | stdout_isatty = isatty(STDOUT_FILENO); |
697 | stderr_isatty = isatty(STDERR_FILENO); | |
698 | ||
699 | /* If interactive, start the progress bar. */ | |
700 | if (stdout_isatty && !progress_fp) | |
701 | progress_fp = fdopen(1, "w+"); | |
702 | ||
824b5807 DW |
703 | if (getenv("SERVICE_MODE")) |
704 | is_service = true; | |
705 | ||
2e4959c1 | 706 | /* Initialize overall phase stats. */ |
64dabc9f DW |
707 | error = phase_start(&all_pi, 0, NULL); |
708 | if (error) | |
2e4959c1 DW |
709 | return SCRUB_RET_OPERROR; |
710 | ||
50a573a7 DW |
711 | /* Find the mount record for the passed-in argument. */ |
712 | if (stat(argv[optind], &ctx.mnt_sb) < 0) { | |
713 | fprintf(stderr, | |
714 | _("%s: could not stat: %s: %s\n"), | |
715 | progname, argv[optind], strerror(errno)); | |
716 | ctx.runtime_errors++; | |
717 | goto out; | |
718 | } | |
719 | ||
828105d1 DW |
720 | /* |
721 | * If the user did not specify an explicit mount table, try to use | |
722 | * /proc/mounts if it is available, else /etc/mtab. We prefer | |
723 | * /proc/mounts because it is kernel controlled, while /etc/mtab | |
724 | * may contain garbage that userspace tools like pam_mounts wrote | |
725 | * into it. | |
726 | */ | |
727 | if (!mtab) { | |
728 | if (access(_PATH_PROC_MOUNTS, R_OK) == 0) | |
729 | mtab = _PATH_PROC_MOUNTS; | |
730 | else | |
731 | mtab = _PATH_MOUNTED; | |
732 | } | |
733 | ||
680eacaa DW |
734 | fs_table_initialise(0, NULL, 0, NULL); |
735 | fsp = fs_table_lookup_mount(ctx.mntpoint); | |
736 | if (!fsp) { | |
737 | fprintf(stderr, _("%s: Not a XFS mount point.\n"), | |
738 | ctx.mntpoint); | |
50a573a7 DW |
739 | ret |= SCRUB_RET_SYNTAX; |
740 | goto out; | |
741 | } | |
680eacaa | 742 | memcpy(&ctx.fsinfo, fsp, sizeof(struct fs_path)); |
50a573a7 | 743 | |
828105d1 DW |
744 | /* Set up a page-aligned buffer for read verification. */ |
745 | page_size = sysconf(_SC_PAGESIZE); | |
746 | if (page_size < 0) { | |
747 | str_errno(&ctx, ctx.mntpoint); | |
748 | goto out; | |
749 | } | |
750 | ||
751 | if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR")) | |
752 | ctx.mode = SCRUB_MODE_REPAIR; | |
753 | ||
173a0283 | 754 | /* Scrub a filesystem. */ |
64dabc9f DW |
755 | error = run_scrub_phases(&ctx, progress_fp); |
756 | if (error && ctx.runtime_errors == 0) | |
173a0283 DW |
757 | ctx.runtime_errors++; |
758 | ||
759 | /* | |
760 | * Excessive errors will cause the scrub phases to bail out early. | |
761 | * We don't want every thread yelling that into the output, so check | |
762 | * if we hit the threshold and tell the user *once*. | |
763 | */ | |
273165cc | 764 | if (scrub_excessive_errors(&ctx)) |
828105d1 DW |
765 | str_info(&ctx, ctx.mntpoint, _("Too many errors; aborting.")); |
766 | ||
767 | if (debug_tweak_on("XFS_SCRUB_FORCE_ERROR")) | |
e98616ba | 768 | str_info(&ctx, ctx.mntpoint, _("Injecting error.")); |
828105d1 | 769 | |
50a573a7 | 770 | /* Clean up scan data. */ |
35b65bcf DW |
771 | error = scrub_cleanup(&ctx); |
772 | if (error && ctx.runtime_errors == 0) | |
50a573a7 DW |
773 | ctx.runtime_errors++; |
774 | ||
828105d1 | 775 | out: |
ee310b0c | 776 | report_modifications(&ctx); |
5454c2bf DW |
777 | report_outcome(&ctx); |
778 | ||
abc2e70d | 779 | if (ctx.corruptions_found) { |
50a573a7 DW |
780 | if (ctx.error_action == ERRORS_SHUTDOWN) |
781 | xfs_shutdown_fs(&ctx); | |
828105d1 | 782 | ret |= SCRUB_RET_CORRUPT; |
50a573a7 | 783 | } |
828105d1 DW |
784 | if (ctx.warnings_found) |
785 | ret |= SCRUB_RET_UNOPTIMIZED; | |
786 | if (ctx.runtime_errors) | |
787 | ret |= SCRUB_RET_OPERROR; | |
173a0283 | 788 | phase_end(&all_pi, 0); |
ed60d210 DW |
789 | if (progress_fp) |
790 | fclose(progress_fp); | |
828105d1 | 791 | |
824b5807 DW |
792 | /* |
793 | * If we're being run as a service, the return code must fit the LSB | |
794 | * init script action error guidelines, which is to say that we | |
795 | * compress all errors to 1 ("generic or unspecified error", LSB 5.0 | |
796 | * section 22.2) and hope the admin will scan the log for what | |
797 | * actually happened. | |
798 | * | |
799 | * We have to sleep 2 seconds here because journald uses the pid to | |
800 | * connect our log messages to the systemd service. This is critical | |
801 | * for capturing all the log messages if the scrub fails, because the | |
802 | * fail service uses the service name to gather log messages for the | |
803 | * error report. | |
f6302b0f DW |
804 | * |
805 | * Note: We don't count a lack of kernel support as a service failure | |
806 | * because we haven't determined that there's anything wrong with the | |
807 | * filesystem. | |
824b5807 DW |
808 | */ |
809 | if (is_service) { | |
810 | sleep(2); | |
f6302b0f DW |
811 | if (!ctx.scrub_setup_succeeded) |
812 | return 0; | |
824b5807 DW |
813 | if (ret != SCRUB_RET_SUCCESS) |
814 | return 1; | |
815 | } | |
816 | ||
828105d1 | 817 | return ret; |
95b1e505 | 818 | } |