1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
11 #include <sys/types.h>
12 #include <sys/statvfs.h>
14 #include "libfrog/paths.h"
15 #include "libfrog/fsgeom.h"
16 #include "libfrog/scrub.h"
17 #include "xfs_scrub.h"
21 #include "xfs_errortag.h"
24 /* Online scrub and repair wrappers. */
26 /* Format a scrub description. */
29 struct scrub_ctx
*ctx
,
32 struct xfs_scrub_metadata
*meta
)
34 const struct xfrog_scrub_descr
*sc
= &xfrog_scrubbers
[meta
->sm_type
];
37 case XFROG_SCRUB_TYPE_AGHEADER
:
38 case XFROG_SCRUB_TYPE_PERAG
:
39 snprintf(buf
, buflen
, _("AG %u %s"), meta
->sm_agno
,
42 case XFROG_SCRUB_TYPE_INODE
:
43 scrub_render_ino_descr(ctx
, buf
, buflen
,
44 meta
->sm_ino
, meta
->sm_gen
, "%s",
47 case XFROG_SCRUB_TYPE_FS
:
48 snprintf(buf
, buflen
, _("%s"), _(sc
->descr
));
50 case XFROG_SCRUB_TYPE_NONE
:
56 /* Predicates for scrub flag state. */
58 static inline bool is_corrupt(struct xfs_scrub_metadata
*sm
)
60 return sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
;
63 static inline bool is_unoptimized(struct xfs_scrub_metadata
*sm
)
65 return sm
->sm_flags
& XFS_SCRUB_OFLAG_PREEN
;
68 static inline bool xref_failed(struct xfs_scrub_metadata
*sm
)
70 return sm
->sm_flags
& XFS_SCRUB_OFLAG_XFAIL
;
73 static inline bool xref_disagrees(struct xfs_scrub_metadata
*sm
)
75 return sm
->sm_flags
& XFS_SCRUB_OFLAG_XCORRUPT
;
78 static inline bool is_incomplete(struct xfs_scrub_metadata
*sm
)
80 return sm
->sm_flags
& XFS_SCRUB_OFLAG_INCOMPLETE
;
83 static inline bool is_suspicious(struct xfs_scrub_metadata
*sm
)
85 return sm
->sm_flags
& XFS_SCRUB_OFLAG_WARNING
;
88 /* Should we fix it? */
89 static inline bool needs_repair(struct xfs_scrub_metadata
*sm
)
91 return is_corrupt(sm
) || xref_disagrees(sm
);
94 /* Warn about strange circumstances after scrub. */
96 xfs_scrub_warn_incomplete_scrub(
97 struct scrub_ctx
*ctx
,
99 struct xfs_scrub_metadata
*meta
)
101 if (is_incomplete(meta
))
102 str_info(ctx
, descr
, _("Check incomplete."));
104 if (is_suspicious(meta
)) {
106 str_info(ctx
, descr
, _("Possibly suspect metadata."));
108 str_warn(ctx
, descr
, _("Possibly suspect metadata."));
111 if (xref_failed(meta
))
112 str_info(ctx
, descr
, _("Cross-referencing failed."));
115 /* Do a read-only check of some metadata. */
116 static enum check_outcome
118 struct scrub_ctx
*ctx
,
119 struct xfs_scrub_metadata
*meta
,
122 char buf
[DESCR_BUFSZ
];
123 unsigned int tries
= 0;
127 assert(!debug_tweak_on("XFS_SCRUB_NO_KERNEL"));
128 assert(meta
->sm_type
< XFS_SCRUB_TYPE_NR
);
129 format_scrub_descr(ctx
, buf
, DESCR_BUFSZ
, meta
);
131 dbg_printf("check %s flags %xh\n", buf
, meta
->sm_flags
);
133 error
= xfrog_scrub_metadata(&ctx
->mnt
, meta
);
134 if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !error
)
135 meta
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
140 /* Metadata not present, just skip it. */
143 /* FS already crashed, give up. */
145 _("Filesystem is shut down, aborting."));
149 /* Abort on I/O errors or insufficient memory. */
157 * The first two should never escape the kernel,
158 * and the other two should be reported via sm_flags.
160 str_liberror(ctx
, code
, _("Kernel bug"));
163 /* Operational error. */
170 * If the kernel says the test was incomplete or that there was
171 * a cross-referencing discrepancy but no obvious corruption,
172 * we'll try the scan again, just in case the fs was busy.
173 * Only retry so many times.
175 if (tries
< 10 && (is_incomplete(meta
) ||
176 (xref_disagrees(meta
) && !is_corrupt(meta
)))) {
181 /* Complain about incomplete or suspicious metadata. */
182 xfs_scrub_warn_incomplete_scrub(ctx
, buf
, meta
);
185 * If we need repairs or there were discrepancies, schedule a
186 * repair if desired, otherwise complain.
188 if (is_corrupt(meta
) || xref_disagrees(meta
)) {
189 if (ctx
->mode
< SCRUB_MODE_REPAIR
) {
190 str_corrupt(ctx
, buf
,
191 _("Repairs are required."));
199 * If we could optimize, schedule a repair if desired,
200 * otherwise complain.
202 if (is_unoptimized(meta
)) {
203 if (ctx
->mode
!= SCRUB_MODE_REPAIR
) {
205 /* AG or FS metadata, always warn. */
207 _("Optimization is possible."));
208 } else if (!ctx
->preen_triggers
[meta
->sm_type
]) {
209 /* File metadata, only warn once per type. */
210 pthread_mutex_lock(&ctx
->lock
);
211 if (!ctx
->preen_triggers
[meta
->sm_type
])
212 ctx
->preen_triggers
[meta
->sm_type
] = true;
213 pthread_mutex_unlock(&ctx
->lock
);
221 /* Everything is ok. */
225 /* Bulk-notify user about things that could be optimized. */
227 xfs_scrub_report_preen_triggers(
228 struct scrub_ctx
*ctx
)
232 for (i
= 0; i
< XFS_SCRUB_TYPE_NR
; i
++) {
233 pthread_mutex_lock(&ctx
->lock
);
234 if (ctx
->preen_triggers
[i
]) {
235 ctx
->preen_triggers
[i
] = false;
236 pthread_mutex_unlock(&ctx
->lock
);
237 str_info(ctx
, ctx
->mntpoint
,
238 _("Optimizations of %s are possible."), _(xfrog_scrubbers
[i
].descr
));
240 pthread_mutex_unlock(&ctx
->lock
);
245 /* Save a scrub context for later repairs. */
247 xfs_scrub_save_repair(
248 struct scrub_ctx
*ctx
,
249 struct xfs_action_list
*alist
,
250 struct xfs_scrub_metadata
*meta
)
252 struct action_item
*aitem
;
254 /* Schedule this item for later repairs. */
255 aitem
= malloc(sizeof(struct action_item
));
257 str_errno(ctx
, _("repair list"));
260 memset(aitem
, 0, sizeof(*aitem
));
261 aitem
->type
= meta
->sm_type
;
262 aitem
->flags
= meta
->sm_flags
;
263 switch (xfrog_scrubbers
[meta
->sm_type
].type
) {
264 case XFROG_SCRUB_TYPE_AGHEADER
:
265 case XFROG_SCRUB_TYPE_PERAG
:
266 aitem
->agno
= meta
->sm_agno
;
268 case XFROG_SCRUB_TYPE_INODE
:
269 aitem
->ino
= meta
->sm_ino
;
270 aitem
->gen
= meta
->sm_gen
;
276 xfs_action_list_add(alist
, aitem
);
280 /* Scrub a single XFS_SCRUB_TYPE_*, saving corruption reports for later. */
283 struct scrub_ctx
*ctx
,
286 struct xfs_action_list
*alist
)
288 struct xfs_scrub_metadata meta
= {
292 enum check_outcome fix
;
296 /* Check the item. */
297 fix
= xfs_check_metadata(ctx
, &meta
, false);
304 if (!xfs_scrub_save_repair(ctx
, alist
, &meta
))
310 /* CHECK_RETRY should never happen. */
316 * Scrub all metadata types that are assigned to the given XFROG_SCRUB_TYPE_*,
317 * saving corruption reports for later. This should not be used for
318 * XFROG_SCRUB_TYPE_INODE or for checking summary metadata.
322 struct scrub_ctx
*ctx
,
323 enum xfrog_scrub_type scrub_type
,
325 struct xfs_action_list
*alist
)
327 const struct xfrog_scrub_descr
*sc
;
330 sc
= xfrog_scrubbers
;
331 for (type
= 0; type
< XFS_SCRUB_TYPE_NR
; type
++, sc
++) {
334 if (sc
->type
!= scrub_type
)
336 if (sc
->flags
& XFROG_SCRUB_DESCR_SUMMARY
)
339 ret
= xfs_scrub_meta_type(ctx
, type
, agno
, alist
);
348 * Scrub primary superblock. This will be useful if we ever need to hook
349 * a filesystem-wide pre-scrub activity off of the sb 0 scrubber (which
350 * currently does nothing).
353 xfs_scrub_primary_super(
354 struct scrub_ctx
*ctx
,
355 struct xfs_action_list
*alist
)
359 ret
= xfs_scrub_meta_type(ctx
, XFS_SCRUB_TYPE_SB
, 0, alist
);
363 /* Scrub each AG's header blocks. */
365 xfs_scrub_ag_headers(
366 struct scrub_ctx
*ctx
,
368 struct xfs_action_list
*alist
)
370 return xfs_scrub_all_types(ctx
, XFROG_SCRUB_TYPE_AGHEADER
, agno
, alist
);
373 /* Scrub each AG's metadata btrees. */
375 xfs_scrub_ag_metadata(
376 struct scrub_ctx
*ctx
,
378 struct xfs_action_list
*alist
)
380 return xfs_scrub_all_types(ctx
, XFROG_SCRUB_TYPE_PERAG
, agno
, alist
);
383 /* Scrub whole-FS metadata btrees. */
385 xfs_scrub_fs_metadata(
386 struct scrub_ctx
*ctx
,
387 struct xfs_action_list
*alist
)
389 return xfs_scrub_all_types(ctx
, XFROG_SCRUB_TYPE_FS
, 0, alist
);
392 /* Scrub FS summary metadata. */
394 xfs_scrub_fs_summary(
395 struct scrub_ctx
*ctx
,
396 struct xfs_action_list
*alist
)
400 ret
= xfs_scrub_meta_type(ctx
, XFS_SCRUB_TYPE_FSCOUNTERS
, 0, alist
);
404 /* How many items do we have to check? */
406 xfs_scrub_estimate_ag_work(
407 struct scrub_ctx
*ctx
)
409 const struct xfrog_scrub_descr
*sc
;
411 unsigned int estimate
= 0;
413 sc
= xfrog_scrubbers
;
414 for (type
= 0; type
< XFS_SCRUB_TYPE_NR
; type
++, sc
++) {
416 case XFROG_SCRUB_TYPE_AGHEADER
:
417 case XFROG_SCRUB_TYPE_PERAG
:
418 estimate
+= ctx
->mnt
.fsgeom
.agcount
;
420 case XFROG_SCRUB_TYPE_FS
:
430 /* Scrub inode metadata. */
433 struct scrub_ctx
*ctx
,
437 struct xfs_action_list
*alist
)
439 struct xfs_scrub_metadata meta
= {0};
440 enum check_outcome fix
;
442 assert(type
< XFS_SCRUB_TYPE_NR
);
443 assert(xfrog_scrubbers
[type
].type
== XFROG_SCRUB_TYPE_INODE
);
449 /* Scrub the piece of metadata. */
450 fix
= xfs_check_metadata(ctx
, &meta
, true);
451 if (fix
== CHECK_ABORT
)
453 if (fix
== CHECK_DONE
)
456 return xfs_scrub_save_repair(ctx
, alist
, &meta
);
460 xfs_scrub_inode_fields(
461 struct scrub_ctx
*ctx
,
464 struct xfs_action_list
*alist
)
466 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_INODE
, alist
);
471 struct scrub_ctx
*ctx
,
474 struct xfs_action_list
*alist
)
476 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_BMBTD
, alist
);
481 struct scrub_ctx
*ctx
,
484 struct xfs_action_list
*alist
)
486 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_BMBTA
, alist
);
491 struct scrub_ctx
*ctx
,
494 struct xfs_action_list
*alist
)
496 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_BMBTC
, alist
);
501 struct scrub_ctx
*ctx
,
504 struct xfs_action_list
*alist
)
506 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_DIR
, alist
);
511 struct scrub_ctx
*ctx
,
514 struct xfs_action_list
*alist
)
516 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_XATTR
, alist
);
521 struct scrub_ctx
*ctx
,
524 struct xfs_action_list
*alist
)
526 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_SYMLINK
, alist
);
531 struct scrub_ctx
*ctx
,
534 struct xfs_action_list
*alist
)
536 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_PARENT
, alist
);
539 /* Test the availability of a kernel scrub command. */
542 struct scrub_ctx
*ctx
,
546 struct xfs_scrub_metadata meta
= {0};
547 struct xfs_error_injection inject
;
548 static bool injected
;
551 if (debug_tweak_on("XFS_SCRUB_NO_KERNEL"))
553 if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !injected
) {
554 inject
.fd
= ctx
->mnt
.fd
;
555 inject
.errtag
= XFS_ERRTAG_FORCE_SCRUB_REPAIR
;
556 error
= ioctl(ctx
->mnt
.fd
, XFS_IOC_ERROR_INJECTION
, &inject
);
563 meta
.sm_flags
|= XFS_SCRUB_IFLAG_REPAIR
;
564 error
= xfrog_scrub_metadata(&ctx
->mnt
, &meta
);
569 str_info(ctx
, ctx
->mntpoint
,
570 _("Filesystem is mounted read-only; cannot proceed."));
572 case ENOTRECOVERABLE
:
573 str_info(ctx
, ctx
->mntpoint
,
574 _("Filesystem is mounted norecovery; cannot proceed."));
578 if (debug
|| verbose
)
579 str_info(ctx
, ctx
->mntpoint
,
580 _("Kernel %s %s facility not detected."),
581 _(xfrog_scrubbers
[type
].descr
),
582 repair
? _("repair") : _("scrub"));
585 /* Scrubber says not present on this fs; that's fine. */
588 str_info(ctx
, ctx
->mntpoint
, "%s", strerror(errno
));
594 xfs_can_scrub_fs_metadata(
595 struct scrub_ctx
*ctx
)
597 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_PROBE
, false);
602 struct scrub_ctx
*ctx
)
604 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_INODE
, false);
609 struct scrub_ctx
*ctx
)
611 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_BMBTD
, false);
616 struct scrub_ctx
*ctx
)
618 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_DIR
, false);
623 struct scrub_ctx
*ctx
)
625 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_XATTR
, false);
629 xfs_can_scrub_symlink(
630 struct scrub_ctx
*ctx
)
632 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_SYMLINK
, false);
636 xfs_can_scrub_parent(
637 struct scrub_ctx
*ctx
)
639 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_PARENT
, false);
644 struct scrub_ctx
*ctx
)
646 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_PROBE
, true);
649 /* General repair routines. */
651 /* Repair some metadata. */
654 struct scrub_ctx
*ctx
,
656 struct action_item
*aitem
,
657 unsigned int repair_flags
)
659 char buf
[DESCR_BUFSZ
];
660 struct xfs_scrub_metadata meta
= { 0 };
661 struct xfs_scrub_metadata oldm
;
664 assert(aitem
->type
< XFS_SCRUB_TYPE_NR
);
665 assert(!debug_tweak_on("XFS_SCRUB_NO_KERNEL"));
666 meta
.sm_type
= aitem
->type
;
667 meta
.sm_flags
= aitem
->flags
| XFS_SCRUB_IFLAG_REPAIR
;
668 switch (xfrog_scrubbers
[aitem
->type
].type
) {
669 case XFROG_SCRUB_TYPE_AGHEADER
:
670 case XFROG_SCRUB_TYPE_PERAG
:
671 meta
.sm_agno
= aitem
->agno
;
673 case XFROG_SCRUB_TYPE_INODE
:
674 meta
.sm_ino
= aitem
->ino
;
675 meta
.sm_gen
= aitem
->gen
;
681 if (!is_corrupt(&meta
) && (repair_flags
& XRM_REPAIR_ONLY
))
684 memcpy(&oldm
, &meta
, sizeof(oldm
));
685 format_scrub_descr(ctx
, buf
, DESCR_BUFSZ
, &meta
);
687 if (needs_repair(&meta
))
688 str_info(ctx
, buf
, _("Attempting repair."));
689 else if (debug
|| verbose
)
690 str_info(ctx
, buf
, _("Attempting optimization."));
692 error
= xfrog_scrub_metadata(&ctx
->mnt
, &meta
);
697 /* Filesystem is busy, try again later. */
698 if (debug
|| verbose
)
700 _("Filesystem is busy, deferring repair."));
703 /* Filesystem is already shut down, abort. */
705 _("Filesystem is shut down, aborting."));
710 * If we're in no-complain mode, requeue the check for
711 * later. It's possible that an error in another
712 * component caused us to flag an error in this
713 * component. Even if the kernel didn't think it
714 * could fix this, it's at least worth trying the scan
715 * again to see if another repair fixed it.
717 if (!(repair_flags
& XRM_COMPLAIN_IF_UNFIXED
))
720 * If we forced repairs or this is a preen, don't
721 * error out if the kernel doesn't know how to fix.
723 if (is_unoptimized(&oldm
) ||
724 debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
728 /* Kernel doesn't know how to repair this? */
729 str_corrupt(ctx
, buf
,
730 _("Don't know how to fix; offline repair required."));
733 /* Read-only filesystem, can't fix. */
734 if (verbose
|| debug
|| needs_repair(&oldm
))
736 _("Read-only filesystem; cannot make changes."));
739 /* Metadata not present, just skip it. */
743 /* Don't care if preen fails due to low resources. */
744 if (is_unoptimized(&oldm
) && !needs_repair(&oldm
))
749 * Operational error. If the caller doesn't want us
750 * to complain about repair failures, tell the caller
751 * to requeue the repair for later and don't say a
752 * thing. Otherwise, print error and bail out.
754 if (!(repair_flags
& XRM_COMPLAIN_IF_UNFIXED
))
760 if (repair_flags
& XRM_COMPLAIN_IF_UNFIXED
)
761 xfs_scrub_warn_incomplete_scrub(ctx
, buf
, &meta
);
762 if (needs_repair(&meta
)) {
764 * Still broken; if we've been told not to complain then we
765 * just requeue this and try again later. Otherwise we
766 * log the error loudly and don't try again.
768 if (!(repair_flags
& XRM_COMPLAIN_IF_UNFIXED
))
770 str_corrupt(ctx
, buf
,
771 _("Repair unsuccessful; offline repair required."));
773 /* Clean operation, no corruption detected. */
774 if (needs_repair(&oldm
))
775 record_repair(ctx
, buf
, _("Repairs successful."));
777 record_preen(ctx
, buf
, _("Optimization successful."));