]>
Commit | Line | Data |
---|---|---|
8d318d62 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
fa16b376 | 2 | /* |
52520522 | 3 | * Copyright (C) 2018-2024 Oracle. All Rights Reserved. |
8d318d62 | 4 | * Author: Darrick J. Wong <djwong@kernel.org> |
fa16b376 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
fa16b376 | 7 | #include <stdint.h> |
fa16b376 | 8 | #include <sys/types.h> |
fa16b376 | 9 | #include <sys/statvfs.h> |
19852474 | 10 | #include "list.h" |
42b4c8e8 | 11 | #include "libfrog/paths.h" |
56598728 | 12 | #include "libfrog/workqueue.h" |
fa16b376 DW |
13 | #include "xfs_scrub.h" |
14 | #include "common.h" | |
15 | #include "counter.h" | |
16 | #include "inodes.h" | |
ed60d210 | 17 | #include "progress.h" |
fa16b376 | 18 | #include "scrub.h" |
ee310b0c | 19 | #include "repair.h" |
fa16b376 DW |
20 | |
21 | /* Phase 3: Scan all inodes. */ | |
22 | ||
fa16b376 | 23 | struct scrub_inode_ctx { |
12ca67b3 DW |
24 | struct scrub_ctx *ctx; |
25 | ||
26 | /* Number of inodes scanned. */ | |
fa16b376 | 27 | struct ptcounter *icount; |
12ca67b3 | 28 | |
8f0c270f DW |
29 | /* per-AG locks to protect the repair lists */ |
30 | pthread_mutex_t *locks; | |
31 | ||
12ca67b3 | 32 | /* Set to true to abort all threads. */ |
df024103 | 33 | bool aborted; |
12ca67b3 DW |
34 | |
35 | /* Set to true if we want to defer file repairs to phase 4. */ | |
36 | bool always_defer_repairs; | |
fa16b376 DW |
37 | }; |
38 | ||
6c05cc5d DW |
39 | /* Report a filesystem error that the vfs fed us on close. */ |
40 | static void | |
df024103 | 41 | report_close_error( |
6c05cc5d | 42 | struct scrub_ctx *ctx, |
4cca629d | 43 | struct xfs_bulkstat *bstat) |
6c05cc5d DW |
44 | { |
45 | char descr[DESCR_BUFSZ]; | |
6c05cc5d DW |
46 | int old_errno = errno; |
47 | ||
15589f0a DW |
48 | scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino, |
49 | bstat->bs_gen, NULL); | |
6c05cc5d DW |
50 | errno = old_errno; |
51 | str_errno(ctx, descr); | |
52 | } | |
53 | ||
8f0c270f DW |
54 | /* |
55 | * Defer all the repairs until phase 4, being careful about locking since the | |
56 | * inode scrub threads are not per-AG. | |
57 | */ | |
58 | static void | |
59 | defer_inode_repair( | |
60 | struct scrub_inode_ctx *ictx, | |
61 | xfs_agnumber_t agno, | |
62 | struct action_list *alist) | |
63 | { | |
64 | if (alist->nr == 0) | |
65 | return; | |
66 | ||
67 | pthread_mutex_lock(&ictx->locks[agno]); | |
68 | action_list_defer(ictx->ctx, agno, alist); | |
69 | pthread_mutex_unlock(&ictx->locks[agno]); | |
70 | } | |
71 | ||
12ca67b3 DW |
72 | /* Run repair actions now and defer unfinished items for later. */ |
73 | static int | |
74 | try_inode_repair( | |
75 | struct scrub_inode_ctx *ictx, | |
bb9be147 | 76 | int fd, |
12ca67b3 DW |
77 | xfs_agnumber_t agno, |
78 | struct action_list *alist) | |
79 | { | |
bb9be147 DW |
80 | int ret; |
81 | ||
12ca67b3 DW |
82 | /* |
83 | * If at the start of phase 3 we already had ag/rt metadata repairs | |
84 | * queued up for phase 4, leave the action list untouched so that file | |
85 | * metadata repairs will be deferred in scan order until phase 4. | |
86 | */ | |
87 | if (ictx->always_defer_repairs) | |
88 | return 0; | |
89 | ||
bb9be147 DW |
90 | ret = action_list_process(ictx->ctx, fd, alist, |
91 | ALP_REPAIR_ONLY | ALP_NOPROGRESS); | |
92 | if (ret) | |
93 | return ret; | |
94 | ||
8f0c270f | 95 | defer_inode_repair(ictx, agno, alist); |
bb9be147 | 96 | return 0; |
12ca67b3 DW |
97 | } |
98 | ||
fa16b376 DW |
99 | /* Verify the contents, xattrs, and extent maps of an inode. */ |
100 | static int | |
df024103 | 101 | scrub_inode( |
fa16b376 DW |
102 | struct scrub_ctx *ctx, |
103 | struct xfs_handle *handle, | |
4cca629d | 104 | struct xfs_bulkstat *bstat, |
fa16b376 DW |
105 | void *arg) |
106 | { | |
83d2c80b | 107 | struct action_list alist; |
fa16b376 DW |
108 | struct scrub_inode_ctx *ictx = arg; |
109 | struct ptcounter *icount = ictx->icount; | |
ee310b0c | 110 | xfs_agnumber_t agno; |
fa16b376 | 111 | int fd = -1; |
6c05cc5d | 112 | int error; |
fa16b376 | 113 | |
83d2c80b | 114 | action_list_init(&alist); |
a749451c | 115 | agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino); |
fa16b376 DW |
116 | background_sleep(); |
117 | ||
a7ee7b68 DW |
118 | /* |
119 | * Open this regular file to pin it in memory. Avoiding the use of | |
120 | * scan-by-handle means that the in-kernel scrubber doesn't pay the | |
121 | * cost of opening the handle (looking up the inode in the inode btree, | |
122 | * grabbing the inode, checking the generation) with every scrub call. | |
123 | * | |
7ddf6e0f DW |
124 | * Ignore any runtime or corruption related errors here because we can |
125 | * fall back to scrubbing by handle. ESTALE can be ignored for the | |
126 | * following reasons: | |
127 | * | |
128 | * - If the file has been deleted since bulkstat, there's nothing to | |
129 | * check. Scrub-by-handle returns ENOENT for such inodes. | |
130 | * - If the file has been deleted and reallocated since bulkstat, | |
131 | * its ondisk metadata have been rewritten and is assumed to be ok. | |
132 | * Scrub-by-handle also returns ENOENT if the generation doesn't | |
133 | * match. | |
134 | * - The file itself is corrupt and cannot be loaded. In this case, | |
135 | * we fall back to scrub-by-handle. | |
136 | * | |
a7ee7b68 DW |
137 | * Note: We cannot use this same trick for directories because the VFS |
138 | * will try to reconnect directory file handles to the root directory | |
139 | * by walking '..' entries upwards, and loops in the dirent index | |
140 | * btree will cause livelocks. | |
a7ee7b68 | 141 | */ |
7ddf6e0f | 142 | if (S_ISREG(bstat->bs_mode)) |
59f79e0a | 143 | fd = scrub_open_handle(handle); |
fa16b376 DW |
144 | |
145 | /* Scrub the inode. */ | |
a7ee7b68 | 146 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist); |
d22f2471 | 147 | if (error) |
ee310b0c DW |
148 | goto out; |
149 | ||
bb9be147 | 150 | error = try_inode_repair(ictx, fd, agno, &alist); |
83d2c80b | 151 | if (error) |
fa16b376 DW |
152 | goto out; |
153 | ||
154 | /* Scrub all block mappings. */ | |
a7ee7b68 | 155 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist); |
d22f2471 | 156 | if (error) |
fa16b376 | 157 | goto out; |
a7ee7b68 | 158 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist); |
d22f2471 | 159 | if (error) |
fa16b376 | 160 | goto out; |
a7ee7b68 | 161 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist); |
d22f2471 | 162 | if (error) |
ee310b0c DW |
163 | goto out; |
164 | ||
bb9be147 | 165 | error = try_inode_repair(ictx, fd, agno, &alist); |
83d2c80b | 166 | if (error) |
fa16b376 DW |
167 | goto out; |
168 | ||
169 | if (S_ISLNK(bstat->bs_mode)) { | |
170 | /* Check symlink contents. */ | |
a7ee7b68 DW |
171 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK, |
172 | &alist); | |
fa16b376 DW |
173 | } else if (S_ISDIR(bstat->bs_mode)) { |
174 | /* Check the directory entries. */ | |
a7ee7b68 | 175 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist); |
fa16b376 | 176 | } |
d22f2471 | 177 | if (error) |
fa16b376 DW |
178 | goto out; |
179 | ||
180 | /* Check all the extended attributes. */ | |
a7ee7b68 | 181 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist); |
d22f2471 | 182 | if (error) |
fa16b376 DW |
183 | goto out; |
184 | ||
185 | /* Check parent pointers. */ | |
a7ee7b68 | 186 | error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist); |
d22f2471 | 187 | if (error) |
ee310b0c DW |
188 | goto out; |
189 | ||
190 | /* Try to repair the file while it's open. */ | |
bb9be147 | 191 | error = try_inode_repair(ictx, fd, agno, &alist); |
83d2c80b | 192 | if (error) |
fa16b376 DW |
193 | goto out; |
194 | ||
195 | out: | |
d22f2471 | 196 | if (error) |
df024103 DW |
197 | ictx->aborted = true; |
198 | ||
da3dd6c0 DW |
199 | error = ptcounter_add(icount, 1); |
200 | if (error) { | |
201 | str_liberror(ctx, error, | |
202 | _("incrementing scanned inode counter")); | |
df024103 | 203 | ictx->aborted = true; |
da3dd6c0 | 204 | } |
ed60d210 | 205 | progress_add(1); |
12ca67b3 DW |
206 | |
207 | if (!error && !ictx->aborted) | |
8f0c270f | 208 | defer_inode_repair(ictx, agno, &alist); |
12ca67b3 | 209 | |
6c05cc5d | 210 | if (fd >= 0) { |
df024103 DW |
211 | int err2; |
212 | ||
213 | err2 = close(fd); | |
214 | if (err2) { | |
215 | report_close_error(ctx, bstat); | |
216 | ictx->aborted = true; | |
217 | } | |
6c05cc5d | 218 | } |
df024103 DW |
219 | |
220 | if (!error && ictx->aborted) | |
221 | error = ECANCELED; | |
222 | return error; | |
fa16b376 DW |
223 | } |
224 | ||
225 | /* Verify all the inodes in a filesystem. */ | |
df024103 DW |
226 | int |
227 | phase3_func( | |
fa16b376 DW |
228 | struct scrub_ctx *ctx) |
229 | { | |
12ca67b3 | 230 | struct scrub_inode_ctx ictx = { .ctx = ctx }; |
da3dd6c0 | 231 | uint64_t val; |
12ca67b3 | 232 | xfs_agnumber_t agno; |
da3dd6c0 | 233 | int err; |
fa16b376 | 234 | |
da3dd6c0 DW |
235 | err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount); |
236 | if (err) { | |
237 | str_liberror(ctx, err, _("creating scanned inode counter")); | |
df024103 | 238 | return err; |
fa16b376 DW |
239 | } |
240 | ||
8f0c270f DW |
241 | ictx.locks = calloc(ctx->mnt.fsgeom.agcount, sizeof(pthread_mutex_t)); |
242 | if (!ictx.locks) { | |
243 | str_errno(ctx, _("creating per-AG repair list locks")); | |
244 | err = ENOMEM; | |
245 | goto out_ptcounter; | |
246 | } | |
247 | ||
12ca67b3 DW |
248 | /* |
249 | * If we already have ag/fs metadata to repair from previous phases, | |
250 | * we would rather not try to repair file metadata until we've tried | |
251 | * to repair the space metadata. | |
252 | */ | |
253 | for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) { | |
8f0c270f DW |
254 | pthread_mutex_init(&ictx.locks[agno], NULL); |
255 | ||
12ca67b3 DW |
256 | if (!action_list_empty(&ctx->action_lists[agno])) |
257 | ictx.always_defer_repairs = true; | |
258 | } | |
259 | ||
df024103 DW |
260 | err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx); |
261 | if (!err && ictx.aborted) | |
262 | err = ECANCELED; | |
59f79e0a | 263 | if (err) |
8f0c270f | 264 | goto out_locks; |
df024103 | 265 | |
273165cc | 266 | scrub_report_preen_triggers(ctx); |
da3dd6c0 DW |
267 | err = ptcounter_value(ictx.icount, &val); |
268 | if (err) { | |
269 | str_liberror(ctx, err, _("summing scanned inode counter")); | |
8f0c270f | 270 | goto out_locks; |
da3dd6c0 | 271 | } |
df024103 | 272 | |
da3dd6c0 | 273 | ctx->inodes_checked = val; |
8f0c270f DW |
274 | out_locks: |
275 | for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) | |
276 | pthread_mutex_destroy(&ictx.locks[agno]); | |
277 | free(ictx.locks); | |
278 | out_ptcounter: | |
fa16b376 | 279 | ptcounter_free(ictx.icount); |
df024103 | 280 | return err; |
fa16b376 | 281 | } |
ed60d210 | 282 | |
df024103 DW |
283 | /* Estimate how much work we're going to do. */ |
284 | int | |
285 | phase3_estimate( | |
ed60d210 DW |
286 | struct scrub_ctx *ctx, |
287 | uint64_t *items, | |
288 | unsigned int *nr_threads, | |
289 | int *rshift) | |
290 | { | |
291 | *items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree; | |
292 | *nr_threads = scrub_nproc(ctx); | |
293 | *rshift = 0; | |
df024103 DW |
294 | return 0; |
295 | } |