]>
Commit | Line | Data |
---|---|---|
4860a05d DW |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* | |
3 | * Copyright (C) 2019 Oracle. All Rights Reserved. | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | */ | |
6 | #include "xfs.h" | |
7 | #include "xfs_fs.h" | |
8 | #include "xfs_shared.h" | |
9 | #include "xfs_format.h" | |
4860a05d | 10 | #include "xfs_btree.h" |
36029dee DC |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" | |
9bbafc71 | 13 | #include "xfs_ag.h" |
4860a05d DW |
14 | #include "xfs_health.h" |
15 | #include "scrub/scrub.h" | |
5f213ddb | 16 | #include "scrub/health.h" |
4860a05d DW |
17 | |
18 | /* | |
19 | * Scrub and In-Core Filesystem Health Assessments | |
20 | * =============================================== | |
21 | * | |
22 | * Online scrub and repair have the time and the ability to perform stronger | |
23 | * checks than we can do from the metadata verifiers, because they can | |
24 | * cross-reference records between data structures. Therefore, scrub is in a | |
25 | * good position to update the online filesystem health assessments to reflect | |
26 | * the good/bad state of the data structure. | |
27 | * | |
28 | * We therefore extend scrub in the following ways to achieve this: | |
29 | * | |
30 | * 1. Create a "sick_mask" field in the scrub context. When we're setting up a | |
31 | * scrub call, set this to the default XFS_SICK_* flag(s) for the selected | |
32 | * scrub type (call it A). Scrub and repair functions can override the default | |
33 | * sick_mask value if they choose. | |
34 | * | |
35 | * 2. If the scrubber returns a runtime error code, we exit making no changes | |
36 | * to the incore sick state. | |
37 | * | |
38 | * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore | |
39 | * sick flags before exiting. | |
40 | * | |
41 | * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore | |
42 | * sick flags. If the user didn't want to repair then we exit, leaving the | |
43 | * metadata structure unfixed and the sick flag set. | |
44 | * | |
45 | * 5. Now we know that A is corrupt and the user wants to repair, so run the | |
46 | * repairer. If the repairer returns an error code, we exit with that error | |
47 | * code, having made no further changes to the incore sick state. | |
48 | * | |
49 | * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean, | |
50 | * use sick_mask to clear the incore sick flags. This should have the effect | |
51 | * that A is no longer marked sick. | |
52 | * | |
53 | * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and | |
54 | * use sick_mask to set the incore sick flags. This should have no externally | |
55 | * visible effect since we already set them in step (4). | |
56 | * | |
57 | * There are some complications to this story, however. For certain types of | |
58 | * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild | |
59 | * both structures at the same time. The following principles apply to this | |
60 | * type of repair strategy: | |
61 | * | |
62 | * 8. Any repair function that rebuilds multiple structures should update | |
63 | * sick_mask_visible to reflect whatever other structures are rebuilt, and | |
64 | * verify that all the rebuilt structures can pass a scrub check. The outcomes | |
65 | * of 5-7 still apply, but with a sick_mask that covers everything being | |
66 | * rebuilt. | |
67 | */ | |
68 | ||
69 | /* Map our scrub type to a sick mask and a set of health update functions. */ | |
70 | ||
71 | enum xchk_health_group { | |
72 | XHG_FS = 1, | |
73 | XHG_RT, | |
74 | XHG_AG, | |
75 | XHG_INO, | |
76 | }; | |
77 | ||
78 | struct xchk_health_map { | |
79 | enum xchk_health_group group; | |
80 | unsigned int sick_mask; | |
81 | }; | |
82 | ||
83 | static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { | |
84 | [XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB }, | |
85 | [XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF }, | |
86 | [XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL }, | |
87 | [XFS_SCRUB_TYPE_AGI] = { XHG_AG, XFS_SICK_AG_AGI }, | |
88 | [XFS_SCRUB_TYPE_BNOBT] = { XHG_AG, XFS_SICK_AG_BNOBT }, | |
89 | [XFS_SCRUB_TYPE_CNTBT] = { XHG_AG, XFS_SICK_AG_CNTBT }, | |
90 | [XFS_SCRUB_TYPE_INOBT] = { XHG_AG, XFS_SICK_AG_INOBT }, | |
91 | [XFS_SCRUB_TYPE_FINOBT] = { XHG_AG, XFS_SICK_AG_FINOBT }, | |
92 | [XFS_SCRUB_TYPE_RMAPBT] = { XHG_AG, XFS_SICK_AG_RMAPBT }, | |
93 | [XFS_SCRUB_TYPE_REFCNTBT] = { XHG_AG, XFS_SICK_AG_REFCNTBT }, | |
94 | [XFS_SCRUB_TYPE_INODE] = { XHG_INO, XFS_SICK_INO_CORE }, | |
95 | [XFS_SCRUB_TYPE_BMBTD] = { XHG_INO, XFS_SICK_INO_BMBTD }, | |
96 | [XFS_SCRUB_TYPE_BMBTA] = { XHG_INO, XFS_SICK_INO_BMBTA }, | |
97 | [XFS_SCRUB_TYPE_BMBTC] = { XHG_INO, XFS_SICK_INO_BMBTC }, | |
98 | [XFS_SCRUB_TYPE_DIR] = { XHG_INO, XFS_SICK_INO_DIR }, | |
99 | [XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR }, | |
100 | [XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK }, | |
101 | [XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT }, | |
102 | [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP }, | |
103 | [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY }, | |
104 | [XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA }, | |
105 | [XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA }, | |
106 | [XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA }, | |
75efa57d | 107 | [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS }, |
4860a05d DW |
108 | }; |
109 | ||
110 | /* Return the health status mask for this scrub type. */ | |
111 | unsigned int | |
112 | xchk_health_mask_for_scrub_type( | |
113 | __u32 scrub_type) | |
114 | { | |
115 | return type_to_health_flag[scrub_type].sick_mask; | |
116 | } | |
117 | ||
118 | /* | |
119 | * Update filesystem health assessments based on what we found and did. | |
120 | * | |
121 | * If the scrubber finds errors, we mark sick whatever's mentioned in | |
122 | * sick_mask, no matter whether this is a first scan or an | |
123 | * evaluation of repair effectiveness. | |
124 | * | |
125 | * Otherwise, no direct corruption was found, so mark whatever's in | |
126 | * sick_mask as healthy. | |
127 | */ | |
128 | void | |
129 | xchk_update_health( | |
130 | struct xfs_scrub *sc) | |
131 | { | |
132 | struct xfs_perag *pag; | |
133 | bool bad; | |
134 | ||
135 | if (!sc->sick_mask) | |
136 | return; | |
137 | ||
9de4b514 DW |
138 | bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | |
139 | XFS_SCRUB_OFLAG_XCORRUPT)); | |
4860a05d DW |
140 | switch (type_to_health_flag[sc->sm->sm_type].group) { |
141 | case XHG_AG: | |
142 | pag = xfs_perag_get(sc->mp, sc->sm->sm_agno); | |
143 | if (bad) | |
144 | xfs_ag_mark_sick(pag, sc->sick_mask); | |
145 | else | |
146 | xfs_ag_mark_healthy(pag, sc->sick_mask); | |
147 | xfs_perag_put(pag); | |
148 | break; | |
149 | case XHG_INO: | |
150 | if (!sc->ip) | |
151 | return; | |
152 | if (bad) | |
153 | xfs_inode_mark_sick(sc->ip, sc->sick_mask); | |
154 | else | |
155 | xfs_inode_mark_healthy(sc->ip, sc->sick_mask); | |
156 | break; | |
157 | case XHG_FS: | |
158 | if (bad) | |
159 | xfs_fs_mark_sick(sc->mp, sc->sick_mask); | |
160 | else | |
161 | xfs_fs_mark_healthy(sc->mp, sc->sick_mask); | |
162 | break; | |
163 | case XHG_RT: | |
164 | if (bad) | |
165 | xfs_rt_mark_sick(sc->mp, sc->sick_mask); | |
166 | else | |
167 | xfs_rt_mark_healthy(sc->mp, sc->sick_mask); | |
168 | break; | |
169 | default: | |
170 | ASSERT(0); | |
171 | break; | |
172 | } | |
173 | } | |
4fb7951f DW |
174 | |
175 | /* Is the given per-AG btree healthy enough for scanning? */ | |
176 | bool | |
177 | xchk_ag_btree_healthy_enough( | |
178 | struct xfs_scrub *sc, | |
179 | struct xfs_perag *pag, | |
180 | xfs_btnum_t btnum) | |
181 | { | |
182 | unsigned int mask = 0; | |
183 | ||
184 | /* | |
185 | * We always want the cursor if it's the same type as whatever we're | |
186 | * scrubbing, even if we already know the structure is corrupt. | |
187 | * | |
188 | * Otherwise, we're only interested in the btree for cross-referencing. | |
189 | * If we know the btree is bad then don't bother, just set XFAIL. | |
190 | */ | |
191 | switch (btnum) { | |
192 | case XFS_BTNUM_BNO: | |
193 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) | |
194 | return true; | |
195 | mask = XFS_SICK_AG_BNOBT; | |
196 | break; | |
197 | case XFS_BTNUM_CNT: | |
198 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT) | |
199 | return true; | |
200 | mask = XFS_SICK_AG_CNTBT; | |
201 | break; | |
202 | case XFS_BTNUM_INO: | |
203 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT) | |
204 | return true; | |
205 | mask = XFS_SICK_AG_INOBT; | |
206 | break; | |
207 | case XFS_BTNUM_FINO: | |
208 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) | |
209 | return true; | |
210 | mask = XFS_SICK_AG_FINOBT; | |
211 | break; | |
212 | case XFS_BTNUM_RMAP: | |
213 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT) | |
214 | return true; | |
215 | mask = XFS_SICK_AG_RMAPBT; | |
216 | break; | |
217 | case XFS_BTNUM_REFC: | |
218 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT) | |
219 | return true; | |
220 | mask = XFS_SICK_AG_REFCNTBT; | |
221 | break; | |
222 | default: | |
223 | ASSERT(0); | |
224 | return true; | |
225 | } | |
226 | ||
227 | if (xfs_ag_has_sickness(pag, mask)) { | |
228 | sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; | |
229 | return false; | |
230 | } | |
231 | ||
232 | return true; | |
233 | } |