]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
372d4ba9 DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
372d4ba9 | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
372d4ba9 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
372d4ba9 DW |
7 | #include <stdint.h> |
8 | #include <stdlib.h> | |
9 | #include <pthread.h> | |
10 | #include <sys/statvfs.h> | |
11 | #include "platform_defs.h" | |
372d4ba9 | 12 | #include "xfs_arch.h" |
372d4ba9 | 13 | #include "handle.h" |
42b4c8e8 | 14 | #include "libfrog/paths.h" |
56598728 | 15 | #include "libfrog/workqueue.h" |
372d4ba9 DW |
16 | #include "xfs_scrub.h" |
17 | #include "common.h" | |
18 | #include "inodes.h" | |
fee68490 | 19 | #include "libfrog/fsgeom.h" |
f31b5e12 | 20 | #include "libfrog/bulkstat.h" |
372d4ba9 DW |
21 | |
22 | /* | |
23 | * Iterate a range of inodes. | |
24 | * | |
25 | * This is a little more involved than repeatedly asking BULKSTAT for a | |
26 | * buffer's worth of stat data for some number of inodes. We want to scan as | |
27 | * many of the inodes that the inobt thinks there are, including the ones that | |
28 | * are broken, but if we ask for n inodes starting at x, it'll skip the bad | |
29 | * ones and fill from beyond the range (x + n). | |
30 | * | |
31 | * Therefore, we ask INUMBERS to return one inobt chunk's worth of inode | |
32 | * bitmap information. Then we try to BULKSTAT only the inodes that were | |
33 | * present in that chunk, and compare what we got against what INUMBERS said | |
34 | * was there. If there's a mismatch, we know that we have an inode that fails | |
35 | * the verifiers but we can inject the bulkstat information to force the scrub | |
36 | * code to deal with the broken inodes. | |
37 | * | |
38 | * If the iteration function returns ESTALE, that means that the inode has | |
39 | * been deleted and possibly recreated since the BULKSTAT call. We wil | |
40 | * refresh the stat information and try again up to 30 times before reporting | |
41 | * the staleness as an error. | |
42 | */ | |
43 | ||
44 | /* | |
e3724c8b DW |
45 | * Run bulkstat on an entire inode allocation group, then check that we got |
46 | * exactly the inodes we expected. If not, load them one at a time (or fake | |
47 | * it) into the bulkstat data. | |
372d4ba9 DW |
48 | */ |
49 | static void | |
e3724c8b | 50 | bulkstat_for_inumbers( |
372d4ba9 | 51 | struct scrub_ctx *ctx, |
e3724c8b DW |
52 | const char *descr, |
53 | const struct xfs_inumbers *inumbers, | |
54 | struct xfs_bulkstat_req *breq) | |
372d4ba9 | 55 | { |
e3724c8b | 56 | struct xfs_bulkstat *bstat = breq->bulkstat; |
4cca629d | 57 | struct xfs_bulkstat *bs; |
372d4ba9 DW |
58 | int i; |
59 | int error; | |
60 | ||
e3724c8b DW |
61 | /* First we try regular bulkstat, for speed. */ |
62 | breq->hdr.ino = inumbers->xi_startino; | |
63 | breq->hdr.icount = inumbers->xi_alloccount; | |
e6542132 | 64 | error = -xfrog_bulkstat(&ctx->mnt, breq); |
e3724c8b DW |
65 | if (error) { |
66 | char errbuf[DESCR_BUFSZ]; | |
67 | ||
68 | str_info(ctx, descr, "%s", | |
69 | strerror_r(error, errbuf, DESCR_BUFSZ)); | |
70 | } | |
71 | ||
72 | /* | |
73 | * Check each of the stats we got back to make sure we got the inodes | |
74 | * we asked for. | |
75 | */ | |
e749bfaf | 76 | for (i = 0, bs = bstat; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) { |
b94a69ac | 77 | if (!(inumbers->xi_allocmask & (1ULL << i))) |
372d4ba9 | 78 | continue; |
b94a69ac | 79 | if (bs->bs_ino == inumbers->xi_startino + i) { |
372d4ba9 DW |
80 | bs++; |
81 | continue; | |
82 | } | |
83 | ||
84 | /* Load the one inode. */ | |
e6542132 | 85 | error = -xfrog_bulkstat_single(&ctx->mnt, |
b94a69ac DW |
86 | inumbers->xi_startino + i, 0, bs); |
87 | if (error || bs->bs_ino != inumbers->xi_startino + i) { | |
4cca629d | 88 | memset(bs, 0, sizeof(struct xfs_bulkstat)); |
b94a69ac | 89 | bs->bs_ino = inumbers->xi_startino + i; |
372d4ba9 DW |
90 | bs->bs_blksize = ctx->mnt_sv.f_frsize; |
91 | } | |
92 | bs++; | |
93 | } | |
94 | } | |
95 | ||
59f79e0a DW |
96 | /* BULKSTAT wrapper routines. */ |
97 | struct scan_inodes { | |
98 | scrub_inode_iter_fn fn; | |
99 | void *arg; | |
100 | bool aborted; | |
101 | }; | |
102 | ||
372d4ba9 DW |
103 | /* |
104 | * Call into the filesystem for inode/bulkstat information and call our | |
105 | * iterator function. We'll try to fill the bulkstat information in batches, | |
106 | * but we also can detect iget failures. | |
107 | */ | |
59f79e0a DW |
108 | static void |
109 | scan_ag_inodes( | |
110 | struct workqueue *wq, | |
111 | xfs_agnumber_t agno, | |
372d4ba9 DW |
112 | void *arg) |
113 | { | |
bbfbf5dd | 114 | struct xfs_handle handle = { }; |
59f79e0a | 115 | char descr[DESCR_BUFSZ]; |
b94a69ac | 116 | struct xfs_inumbers_req *ireq; |
4cca629d | 117 | struct xfs_bulkstat_req *breq; |
59f79e0a DW |
118 | struct scan_inodes *si = arg; |
119 | struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx; | |
4cca629d | 120 | struct xfs_bulkstat *bs; |
b94a69ac | 121 | struct xfs_inumbers *inumbers; |
4f546267 | 122 | uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0); |
372d4ba9 DW |
123 | int i; |
124 | int error; | |
125 | int stale_count = 0; | |
126 | ||
59f79e0a DW |
127 | snprintf(descr, DESCR_BUFSZ, _("dev %d:%d AG %u inodes"), |
128 | major(ctx->fsinfo.fs_datadev), | |
129 | minor(ctx->fsinfo.fs_datadev), | |
130 | agno); | |
131 | ||
132 | memcpy(&handle.ha_fsid, ctx->fshandle, sizeof(handle.ha_fsid)); | |
372d4ba9 DW |
133 | handle.ha_fid.fid_len = sizeof(xfs_fid_t) - |
134 | sizeof(handle.ha_fid.fid_len); | |
135 | handle.ha_fid.fid_pad = 0; | |
136 | ||
e749bfaf | 137 | error = -xfrog_bulkstat_alloc_req(LIBFROG_BULKSTAT_CHUNKSIZE, 0, &breq); |
e6542132 DW |
138 | if (error) { |
139 | str_liberror(ctx, error, descr); | |
59f79e0a DW |
140 | si->aborted = true; |
141 | return; | |
4cca629d DW |
142 | } |
143 | ||
e6542132 DW |
144 | error = -xfrog_inumbers_alloc_req(1, 0, &ireq); |
145 | if (error) { | |
146 | str_liberror(ctx, error, descr); | |
b94a69ac | 147 | free(breq); |
59f79e0a DW |
148 | si->aborted = true; |
149 | return; | |
b94a69ac DW |
150 | } |
151 | inumbers = &ireq->inumbers[0]; | |
23ea9841 | 152 | xfrog_inumbers_set_ag(ireq, agno); |
b94a69ac | 153 | |
372d4ba9 | 154 | /* Find the inode chunk & alloc mask */ |
e6542132 | 155 | error = -xfrog_inumbers(&ctx->mnt, ireq); |
59f79e0a | 156 | while (!error && !si->aborted && ireq->hdr.ocount > 0) { |
4f546267 DW |
157 | /* |
158 | * Make sure that we always make forward progress while we | |
159 | * scan the inode btree. | |
160 | */ | |
161 | if (nextino > inumbers->xi_startino) { | |
162 | str_corrupt(ctx, descr, | |
163 | _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno, | |
164 | cvt_ino_to_agino(&ctx->mnt, nextino), | |
165 | cvt_ino_to_agino(&ctx->mnt, | |
166 | ireq->inumbers[0].xi_startino)); | |
167 | si->aborted = true; | |
168 | break; | |
169 | } | |
170 | nextino = ireq->hdr.ino; | |
171 | ||
300661d3 DW |
172 | /* |
173 | * We can have totally empty inode chunks on filesystems where | |
174 | * there are more than 64 inodes per block. Skip these. | |
175 | */ | |
b94a69ac | 176 | if (inumbers->xi_alloccount == 0) |
300661d3 | 177 | goto igrp_retry; |
4cca629d | 178 | |
e3724c8b | 179 | bulkstat_for_inumbers(ctx, descr, inumbers, breq); |
372d4ba9 DW |
180 | |
181 | /* Iterate all the inodes. */ | |
4cca629d | 182 | for (i = 0, bs = breq->bulkstat; |
59f79e0a | 183 | !si->aborted && i < inumbers->xi_alloccount; |
4cca629d | 184 | i++, bs++) { |
372d4ba9 DW |
185 | handle.ha_fid.fid_ino = bs->bs_ino; |
186 | handle.ha_fid.fid_gen = bs->bs_gen; | |
59f79e0a | 187 | error = si->fn(ctx, &handle, bs, si->arg); |
372d4ba9 DW |
188 | switch (error) { |
189 | case 0: | |
190 | break; | |
59f79e0a DW |
191 | case ESTALE: { |
192 | char idescr[DESCR_BUFSZ]; | |
193 | ||
372d4ba9 DW |
194 | stale_count++; |
195 | if (stale_count < 30) { | |
b94a69ac | 196 | ireq->hdr.ino = inumbers->xi_startino; |
372d4ba9 DW |
197 | goto igrp_retry; |
198 | } | |
15589f0a DW |
199 | scrub_render_ino_descr(ctx, idescr, DESCR_BUFSZ, |
200 | bs->bs_ino, bs->bs_gen, NULL); | |
bb5dbd06 DW |
201 | str_info(ctx, idescr, |
202 | _("Changed too many times during scan; giving up.")); | |
372d4ba9 | 203 | break; |
59f79e0a | 204 | } |
b8e62724 | 205 | case ECANCELED: |
372d4ba9 DW |
206 | error = 0; |
207 | /* fall thru */ | |
208 | default: | |
372d4ba9 DW |
209 | goto err; |
210 | } | |
273165cc | 211 | if (scrub_excessive_errors(ctx)) { |
59f79e0a | 212 | si->aborted = true; |
372d4ba9 DW |
213 | goto out; |
214 | } | |
215 | } | |
216 | ||
217 | stale_count = 0; | |
218 | igrp_retry: | |
e6542132 | 219 | error = -xfrog_inumbers(&ctx->mnt, ireq); |
372d4ba9 DW |
220 | } |
221 | ||
222 | err: | |
223 | if (error) { | |
621f3374 | 224 | str_liberror(ctx, error, descr); |
59f79e0a | 225 | si->aborted = true; |
372d4ba9 DW |
226 | } |
227 | out: | |
b94a69ac | 228 | free(ireq); |
4cca629d | 229 | free(breq); |
372d4ba9 DW |
230 | } |
231 | ||
59f79e0a DW |
232 | /* |
233 | * Scan all the inodes in a filesystem. On error, this function will log | |
234 | * an error message and return -1. | |
235 | */ | |
236 | int | |
237 | scrub_scan_all_inodes( | |
372d4ba9 | 238 | struct scrub_ctx *ctx, |
59f79e0a | 239 | scrub_inode_iter_fn fn, |
372d4ba9 DW |
240 | void *arg) |
241 | { | |
59f79e0a DW |
242 | struct scan_inodes si = { |
243 | .fn = fn, | |
244 | .arg = arg, | |
245 | }; | |
372d4ba9 DW |
246 | xfs_agnumber_t agno; |
247 | struct workqueue wq; | |
248 | int ret; | |
249 | ||
baed134d | 250 | ret = -workqueue_create(&wq, (struct xfs_mount *)ctx, |
372d4ba9 DW |
251 | scrub_nproc_workqueue(ctx)); |
252 | if (ret) { | |
9d57cbfc | 253 | str_liberror(ctx, ret, _("creating bulkstat workqueue")); |
59f79e0a | 254 | return -1; |
372d4ba9 DW |
255 | } |
256 | ||
3f9efb2e | 257 | for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) { |
baed134d | 258 | ret = -workqueue_add(&wq, scan_ag_inodes, agno, &si); |
372d4ba9 | 259 | if (ret) { |
59f79e0a | 260 | si.aborted = true; |
9d57cbfc | 261 | str_liberror(ctx, ret, _("queueing bulkstat work")); |
372d4ba9 DW |
262 | break; |
263 | } | |
264 | } | |
265 | ||
baed134d | 266 | ret = -workqueue_terminate(&wq); |
71296cf8 | 267 | if (ret) { |
59f79e0a | 268 | si.aborted = true; |
71296cf8 DW |
269 | str_liberror(ctx, ret, _("finishing bulkstat work")); |
270 | } | |
372d4ba9 DW |
271 | workqueue_destroy(&wq); |
272 | ||
59f79e0a | 273 | return si.aborted ? -1 : 0; |
372d4ba9 DW |
274 | } |
275 | ||
59f79e0a | 276 | /* Open a file by handle, returning either the fd or -1 on error. */ |
372d4ba9 | 277 | int |
59f79e0a | 278 | scrub_open_handle( |
372d4ba9 DW |
279 | struct xfs_handle *handle) |
280 | { | |
281 | return open_by_fshandle(handle, sizeof(*handle), | |
282 | O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); | |
283 | } |