1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
11 #include <sys/types.h>
12 #include <sys/statvfs.h>
14 #include "libfrog/paths.h"
15 #include "xfs_scrub.h"
19 #include "xfs_errortag.h"
22 /* Online scrub and repair wrappers. */
24 /* Type info and names for the scrub types. */
26 ST_NONE
, /* disabled */
27 ST_AGHEADER
, /* per-AG header */
28 ST_PERAG
, /* per-AG metadata */
29 ST_FS
, /* per-FS metadata */
30 ST_INODE
, /* per-inode metadata */
37 /* These must correspond to XFS_SCRUB_TYPE_ */
38 static const struct scrub_descr scrubbers
[XFS_SCRUB_TYPE_NR
] = {
39 [XFS_SCRUB_TYPE_PROBE
] =
40 {"metadata", ST_NONE
},
42 {"superblock", ST_AGHEADER
},
43 [XFS_SCRUB_TYPE_AGF
] =
44 {"free space header", ST_AGHEADER
},
45 [XFS_SCRUB_TYPE_AGFL
] =
46 {"free list", ST_AGHEADER
},
47 [XFS_SCRUB_TYPE_AGI
] =
48 {"inode header", ST_AGHEADER
},
49 [XFS_SCRUB_TYPE_BNOBT
] =
50 {"freesp by block btree", ST_PERAG
},
51 [XFS_SCRUB_TYPE_CNTBT
] =
52 {"freesp by length btree", ST_PERAG
},
53 [XFS_SCRUB_TYPE_INOBT
] =
54 {"inode btree", ST_PERAG
},
55 [XFS_SCRUB_TYPE_FINOBT
] =
56 {"free inode btree", ST_PERAG
},
57 [XFS_SCRUB_TYPE_RMAPBT
] =
58 {"reverse mapping btree", ST_PERAG
},
59 [XFS_SCRUB_TYPE_REFCNTBT
] =
60 {"reference count btree", ST_PERAG
},
61 [XFS_SCRUB_TYPE_INODE
] =
62 {"inode record", ST_INODE
},
63 [XFS_SCRUB_TYPE_BMBTD
] =
64 {"data block map", ST_INODE
},
65 [XFS_SCRUB_TYPE_BMBTA
] =
66 {"attr block map", ST_INODE
},
67 [XFS_SCRUB_TYPE_BMBTC
] =
68 {"CoW block map", ST_INODE
},
69 [XFS_SCRUB_TYPE_DIR
] =
70 {"directory entries", ST_INODE
},
71 [XFS_SCRUB_TYPE_XATTR
] =
72 {"extended attributes", ST_INODE
},
73 [XFS_SCRUB_TYPE_SYMLINK
] =
74 {"symbolic link", ST_INODE
},
75 [XFS_SCRUB_TYPE_PARENT
] =
76 {"parent pointer", ST_INODE
},
77 [XFS_SCRUB_TYPE_RTBITMAP
] =
78 {"realtime bitmap", ST_FS
},
79 [XFS_SCRUB_TYPE_RTSUM
] =
80 {"realtime summary", ST_FS
},
81 [XFS_SCRUB_TYPE_UQUOTA
] =
82 {"user quotas", ST_FS
},
83 [XFS_SCRUB_TYPE_GQUOTA
] =
84 {"group quotas", ST_FS
},
85 [XFS_SCRUB_TYPE_PQUOTA
] =
86 {"project quotas", ST_FS
},
89 /* Format a scrub description. */
94 struct xfs_scrub_metadata
*meta
,
95 const struct scrub_descr
*sc
)
100 snprintf(buf
, buflen
, _("AG %u %s"), meta
->sm_agno
,
104 snprintf(buf
, buflen
, _("Inode %"PRIu64
" %s"),
105 (uint64_t)meta
->sm_ino
, _(sc
->name
));
108 snprintf(buf
, buflen
, _("%s"), _(sc
->name
));
116 /* Predicates for scrub flag state. */
118 static inline bool is_corrupt(struct xfs_scrub_metadata
*sm
)
120 return sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
;
123 static inline bool is_unoptimized(struct xfs_scrub_metadata
*sm
)
125 return sm
->sm_flags
& XFS_SCRUB_OFLAG_PREEN
;
128 static inline bool xref_failed(struct xfs_scrub_metadata
*sm
)
130 return sm
->sm_flags
& XFS_SCRUB_OFLAG_XFAIL
;
133 static inline bool xref_disagrees(struct xfs_scrub_metadata
*sm
)
135 return sm
->sm_flags
& XFS_SCRUB_OFLAG_XCORRUPT
;
138 static inline bool is_incomplete(struct xfs_scrub_metadata
*sm
)
140 return sm
->sm_flags
& XFS_SCRUB_OFLAG_INCOMPLETE
;
143 static inline bool is_suspicious(struct xfs_scrub_metadata
*sm
)
145 return sm
->sm_flags
& XFS_SCRUB_OFLAG_WARNING
;
148 /* Should we fix it? */
149 static inline bool needs_repair(struct xfs_scrub_metadata
*sm
)
151 return is_corrupt(sm
) || xref_disagrees(sm
);
154 /* Warn about strange circumstances after scrub. */
156 xfs_scrub_warn_incomplete_scrub(
157 struct scrub_ctx
*ctx
,
159 struct xfs_scrub_metadata
*meta
)
161 if (is_incomplete(meta
))
162 str_info(ctx
, descr
, _("Check incomplete."));
164 if (is_suspicious(meta
)) {
166 str_info(ctx
, descr
, _("Possibly suspect metadata."));
168 str_warn(ctx
, descr
, _("Possibly suspect metadata."));
171 if (xref_failed(meta
))
172 str_info(ctx
, descr
, _("Cross-referencing failed."));
175 /* Do a read-only check of some metadata. */
176 static enum check_outcome
178 struct scrub_ctx
*ctx
,
179 struct xfs_scrub_metadata
*meta
,
182 char buf
[DESCR_BUFSZ
];
183 unsigned int tries
= 0;
187 assert(!debug_tweak_on("XFS_SCRUB_NO_KERNEL"));
188 assert(meta
->sm_type
< XFS_SCRUB_TYPE_NR
);
189 format_scrub_descr(buf
, DESCR_BUFSZ
, meta
, &scrubbers
[meta
->sm_type
]);
191 dbg_printf("check %s flags %xh\n", buf
, meta
->sm_flags
);
193 error
= ioctl(ctx
->mnt
.fd
, XFS_IOC_SCRUB_METADATA
, meta
);
194 if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !error
)
195 meta
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
200 /* Metadata not present, just skip it. */
203 /* FS already crashed, give up. */
205 _("Filesystem is shut down, aborting."));
209 /* Abort on I/O errors or insufficient memory. */
217 * The first two should never escape the kernel,
218 * and the other two should be reported via sm_flags.
221 _("Kernel bug! errno=%d"), code
);
224 /* Operational error. */
231 * If the kernel says the test was incomplete or that there was
232 * a cross-referencing discrepancy but no obvious corruption,
233 * we'll try the scan again, just in case the fs was busy.
234 * Only retry so many times.
236 if (tries
< 10 && (is_incomplete(meta
) ||
237 (xref_disagrees(meta
) && !is_corrupt(meta
)))) {
242 /* Complain about incomplete or suspicious metadata. */
243 xfs_scrub_warn_incomplete_scrub(ctx
, buf
, meta
);
246 * If we need repairs or there were discrepancies, schedule a
247 * repair if desired, otherwise complain.
249 if (is_corrupt(meta
) || xref_disagrees(meta
)) {
250 if (ctx
->mode
< SCRUB_MODE_REPAIR
) {
252 _("Repairs are required."));
260 * If we could optimize, schedule a repair if desired,
261 * otherwise complain.
263 if (is_unoptimized(meta
)) {
264 if (ctx
->mode
!= SCRUB_MODE_REPAIR
) {
266 /* AG or FS metadata, always warn. */
268 _("Optimization is possible."));
269 } else if (!ctx
->preen_triggers
[meta
->sm_type
]) {
270 /* File metadata, only warn once per type. */
271 pthread_mutex_lock(&ctx
->lock
);
272 if (!ctx
->preen_triggers
[meta
->sm_type
])
273 ctx
->preen_triggers
[meta
->sm_type
] = true;
274 pthread_mutex_unlock(&ctx
->lock
);
282 /* Everything is ok. */
286 /* Bulk-notify user about things that could be optimized. */
288 xfs_scrub_report_preen_triggers(
289 struct scrub_ctx
*ctx
)
293 for (i
= 0; i
< XFS_SCRUB_TYPE_NR
; i
++) {
294 pthread_mutex_lock(&ctx
->lock
);
295 if (ctx
->preen_triggers
[i
]) {
296 ctx
->preen_triggers
[i
] = false;
297 pthread_mutex_unlock(&ctx
->lock
);
298 str_info(ctx
, ctx
->mntpoint
,
299 _("Optimizations of %s are possible."), scrubbers
[i
].name
);
301 pthread_mutex_unlock(&ctx
->lock
);
306 /* Save a scrub context for later repairs. */
308 xfs_scrub_save_repair(
309 struct scrub_ctx
*ctx
,
310 struct xfs_action_list
*alist
,
311 struct xfs_scrub_metadata
*meta
)
313 struct action_item
*aitem
;
315 /* Schedule this item for later repairs. */
316 aitem
= malloc(sizeof(struct action_item
));
318 str_errno(ctx
, _("repair list"));
321 memset(aitem
, 0, sizeof(*aitem
));
322 aitem
->type
= meta
->sm_type
;
323 aitem
->flags
= meta
->sm_flags
;
324 switch (scrubbers
[meta
->sm_type
].type
) {
327 aitem
->agno
= meta
->sm_agno
;
330 aitem
->ino
= meta
->sm_ino
;
331 aitem
->gen
= meta
->sm_gen
;
337 xfs_action_list_add(alist
, aitem
);
341 /* Scrub metadata, saving corruption reports for later. */
344 struct scrub_ctx
*ctx
,
345 enum scrub_type scrub_type
,
347 struct xfs_action_list
*alist
)
349 struct xfs_scrub_metadata meta
= {0};
350 const struct scrub_descr
*sc
;
351 enum check_outcome fix
;
355 for (type
= 0; type
< XFS_SCRUB_TYPE_NR
; type
++, sc
++) {
356 if (sc
->type
!= scrub_type
)
364 /* Check the item. */
365 fix
= xfs_check_metadata(ctx
, &meta
, false);
371 if (!xfs_scrub_save_repair(ctx
, alist
, &meta
))
386 * Scrub primary superblock. This will be useful if we ever need to hook
387 * a filesystem-wide pre-scrub activity off of the sb 0 scrubber (which
388 * currently does nothing).
391 xfs_scrub_primary_super(
392 struct scrub_ctx
*ctx
,
393 struct xfs_action_list
*alist
)
395 struct xfs_scrub_metadata meta
= {
396 .sm_type
= XFS_SCRUB_TYPE_SB
,
398 enum check_outcome fix
;
400 /* Check the item. */
401 fix
= xfs_check_metadata(ctx
, &meta
, false);
406 if (!xfs_scrub_save_repair(ctx
, alist
, &meta
))
419 /* Scrub each AG's header blocks. */
421 xfs_scrub_ag_headers(
422 struct scrub_ctx
*ctx
,
424 struct xfs_action_list
*alist
)
426 return xfs_scrub_metadata(ctx
, ST_AGHEADER
, agno
, alist
);
429 /* Scrub each AG's metadata btrees. */
431 xfs_scrub_ag_metadata(
432 struct scrub_ctx
*ctx
,
434 struct xfs_action_list
*alist
)
436 return xfs_scrub_metadata(ctx
, ST_PERAG
, agno
, alist
);
439 /* Scrub whole-FS metadata btrees. */
441 xfs_scrub_fs_metadata(
442 struct scrub_ctx
*ctx
,
443 struct xfs_action_list
*alist
)
445 return xfs_scrub_metadata(ctx
, ST_FS
, 0, alist
);
448 /* How many items do we have to check? */
450 xfs_scrub_estimate_ag_work(
451 struct scrub_ctx
*ctx
)
453 const struct scrub_descr
*sc
;
455 unsigned int estimate
= 0;
458 for (type
= 0; type
< XFS_SCRUB_TYPE_NR
; type
++, sc
++) {
462 estimate
+= ctx
->mnt
.fsgeom
.agcount
;
474 /* Scrub inode metadata. */
477 struct scrub_ctx
*ctx
,
481 struct xfs_action_list
*alist
)
483 struct xfs_scrub_metadata meta
= {0};
484 enum check_outcome fix
;
486 assert(type
< XFS_SCRUB_TYPE_NR
);
487 assert(scrubbers
[type
].type
== ST_INODE
);
493 /* Scrub the piece of metadata. */
494 fix
= xfs_check_metadata(ctx
, &meta
, true);
495 if (fix
== CHECK_ABORT
)
497 if (fix
== CHECK_DONE
)
500 return xfs_scrub_save_repair(ctx
, alist
, &meta
);
504 xfs_scrub_inode_fields(
505 struct scrub_ctx
*ctx
,
508 struct xfs_action_list
*alist
)
510 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_INODE
, alist
);
515 struct scrub_ctx
*ctx
,
518 struct xfs_action_list
*alist
)
520 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_BMBTD
, alist
);
525 struct scrub_ctx
*ctx
,
528 struct xfs_action_list
*alist
)
530 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_BMBTA
, alist
);
535 struct scrub_ctx
*ctx
,
538 struct xfs_action_list
*alist
)
540 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_BMBTC
, alist
);
545 struct scrub_ctx
*ctx
,
548 struct xfs_action_list
*alist
)
550 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_DIR
, alist
);
555 struct scrub_ctx
*ctx
,
558 struct xfs_action_list
*alist
)
560 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_XATTR
, alist
);
565 struct scrub_ctx
*ctx
,
568 struct xfs_action_list
*alist
)
570 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_SYMLINK
, alist
);
575 struct scrub_ctx
*ctx
,
578 struct xfs_action_list
*alist
)
580 return __xfs_scrub_file(ctx
, ino
, gen
, XFS_SCRUB_TYPE_PARENT
, alist
);
583 /* Test the availability of a kernel scrub command. */
586 struct scrub_ctx
*ctx
,
590 struct xfs_scrub_metadata meta
= {0};
591 struct xfs_error_injection inject
;
592 static bool injected
;
595 if (debug_tweak_on("XFS_SCRUB_NO_KERNEL"))
597 if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !injected
) {
598 inject
.fd
= ctx
->mnt
.fd
;
599 inject
.errtag
= XFS_ERRTAG_FORCE_SCRUB_REPAIR
;
600 error
= ioctl(ctx
->mnt
.fd
, XFS_IOC_ERROR_INJECTION
, &inject
);
607 meta
.sm_flags
|= XFS_SCRUB_IFLAG_REPAIR
;
608 error
= ioctl(ctx
->mnt
.fd
, XFS_IOC_SCRUB_METADATA
, &meta
);
613 str_info(ctx
, ctx
->mntpoint
,
614 _("Filesystem is mounted read-only; cannot proceed."));
616 case ENOTRECOVERABLE
:
617 str_info(ctx
, ctx
->mntpoint
,
618 _("Filesystem is mounted norecovery; cannot proceed."));
622 if (debug
|| verbose
)
623 str_info(ctx
, ctx
->mntpoint
,
624 _("Kernel %s %s facility not detected."),
625 _(scrubbers
[type
].name
),
626 repair
? _("repair") : _("scrub"));
629 /* Scrubber says not present on this fs; that's fine. */
632 str_info(ctx
, ctx
->mntpoint
, "%s", strerror(errno
));
638 xfs_can_scrub_fs_metadata(
639 struct scrub_ctx
*ctx
)
641 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_PROBE
, false);
646 struct scrub_ctx
*ctx
)
648 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_INODE
, false);
653 struct scrub_ctx
*ctx
)
655 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_BMBTD
, false);
660 struct scrub_ctx
*ctx
)
662 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_DIR
, false);
667 struct scrub_ctx
*ctx
)
669 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_XATTR
, false);
673 xfs_can_scrub_symlink(
674 struct scrub_ctx
*ctx
)
676 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_SYMLINK
, false);
680 xfs_can_scrub_parent(
681 struct scrub_ctx
*ctx
)
683 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_PARENT
, false);
688 struct scrub_ctx
*ctx
)
690 return __xfs_scrub_test(ctx
, XFS_SCRUB_TYPE_PROBE
, true);
693 /* General repair routines. */
695 /* Repair some metadata. */
698 struct scrub_ctx
*ctx
,
700 struct action_item
*aitem
,
701 unsigned int repair_flags
)
703 char buf
[DESCR_BUFSZ
];
704 struct xfs_scrub_metadata meta
= { 0 };
705 struct xfs_scrub_metadata oldm
;
708 assert(aitem
->type
< XFS_SCRUB_TYPE_NR
);
709 assert(!debug_tweak_on("XFS_SCRUB_NO_KERNEL"));
710 meta
.sm_type
= aitem
->type
;
711 meta
.sm_flags
= aitem
->flags
| XFS_SCRUB_IFLAG_REPAIR
;
712 switch (scrubbers
[aitem
->type
].type
) {
715 meta
.sm_agno
= aitem
->agno
;
718 meta
.sm_ino
= aitem
->ino
;
719 meta
.sm_gen
= aitem
->gen
;
725 if (!is_corrupt(&meta
) && (repair_flags
& XRM_REPAIR_ONLY
))
728 memcpy(&oldm
, &meta
, sizeof(oldm
));
729 format_scrub_descr(buf
, DESCR_BUFSZ
, &meta
, &scrubbers
[meta
.sm_type
]);
731 if (needs_repair(&meta
))
732 str_info(ctx
, buf
, _("Attempting repair."));
733 else if (debug
|| verbose
)
734 str_info(ctx
, buf
, _("Attempting optimization."));
736 error
= ioctl(fd
, XFS_IOC_SCRUB_METADATA
, &meta
);
741 /* Filesystem is busy, try again later. */
742 if (debug
|| verbose
)
744 _("Filesystem is busy, deferring repair."));
747 /* Filesystem is already shut down, abort. */
749 _("Filesystem is shut down, aborting."));
754 * If we're in no-complain mode, requeue the check for
755 * later. It's possible that an error in another
756 * component caused us to flag an error in this
757 * component. Even if the kernel didn't think it
758 * could fix this, it's at least worth trying the scan
759 * again to see if another repair fixed it.
761 if (!(repair_flags
& XRM_COMPLAIN_IF_UNFIXED
))
764 * If we forced repairs or this is a preen, don't
765 * error out if the kernel doesn't know how to fix.
767 if (is_unoptimized(&oldm
) ||
768 debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
772 /* Kernel doesn't know how to repair this? */
774 _("Don't know how to fix; offline repair required."));
777 /* Read-only filesystem, can't fix. */
778 if (verbose
|| debug
|| needs_repair(&oldm
))
780 _("Read-only filesystem; cannot make changes."));
783 /* Metadata not present, just skip it. */
787 /* Don't care if preen fails due to low resources. */
788 if (is_unoptimized(&oldm
) && !needs_repair(&oldm
))
793 * Operational error. If the caller doesn't want us
794 * to complain about repair failures, tell the caller
795 * to requeue the repair for later and don't say a
796 * thing. Otherwise, print error and bail out.
798 if (!(repair_flags
& XRM_COMPLAIN_IF_UNFIXED
))
804 if (repair_flags
& XRM_COMPLAIN_IF_UNFIXED
)
805 xfs_scrub_warn_incomplete_scrub(ctx
, buf
, &meta
);
806 if (needs_repair(&meta
)) {
808 * Still broken; if we've been told not to complain then we
809 * just requeue this and try again later. Otherwise we
810 * log the error loudly and don't try again.
812 if (!(repair_flags
& XRM_COMPLAIN_IF_UNFIXED
))
815 _("Repair unsuccessful; offline repair required."));
817 /* Clean operation, no corruption detected. */
818 if (needs_repair(&oldm
))
819 record_repair(ctx
, buf
, _("Repairs successful."));
821 record_preen(ctx
, buf
, _("Optimization successful."));