]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
372d4ba9 DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
372d4ba9 | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
372d4ba9 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
372d4ba9 DW |
7 | #include <stdint.h> |
8 | #include <stdlib.h> | |
9 | #include <pthread.h> | |
10 | #include <sys/statvfs.h> | |
11 | #include "platform_defs.h" | |
372d4ba9 DW |
12 | #include "xfs_arch.h" |
13 | #include "xfs_format.h" | |
14 | #include "handle.h" | |
42b4c8e8 | 15 | #include "libfrog/paths.h" |
56598728 | 16 | #include "libfrog/workqueue.h" |
372d4ba9 DW |
17 | #include "xfs_scrub.h" |
18 | #include "common.h" | |
19 | #include "inodes.h" | |
fee68490 | 20 | #include "libfrog/fsgeom.h" |
f31b5e12 | 21 | #include "libfrog/bulkstat.h" |
372d4ba9 DW |
22 | |
23 | /* | |
24 | * Iterate a range of inodes. | |
25 | * | |
26 | * This is a little more involved than repeatedly asking BULKSTAT for a | |
27 | * buffer's worth of stat data for some number of inodes. We want to scan as | |
28 | * many of the inodes that the inobt thinks there are, including the ones that | |
29 | * are broken, but if we ask for n inodes starting at x, it'll skip the bad | |
30 | * ones and fill from beyond the range (x + n). | |
31 | * | |
32 | * Therefore, we ask INUMBERS to return one inobt chunk's worth of inode | |
33 | * bitmap information. Then we try to BULKSTAT only the inodes that were | |
34 | * present in that chunk, and compare what we got against what INUMBERS said | |
35 | * was there. If there's a mismatch, we know that we have an inode that fails | |
36 | * the verifiers but we can inject the bulkstat information to force the scrub | |
37 | * code to deal with the broken inodes. | |
38 | * | |
39 | * If the iteration function returns ESTALE, that means that the inode has | |
40 | * been deleted and possibly recreated since the BULKSTAT call. We wil | |
41 | * refresh the stat information and try again up to 30 times before reporting | |
42 | * the staleness as an error. | |
43 | */ | |
44 | ||
45 | /* | |
e3724c8b DW |
46 | * Run bulkstat on an entire inode allocation group, then check that we got |
47 | * exactly the inodes we expected. If not, load them one at a time (or fake | |
48 | * it) into the bulkstat data. | |
372d4ba9 DW |
49 | */ |
50 | static void | |
e3724c8b | 51 | bulkstat_for_inumbers( |
372d4ba9 | 52 | struct scrub_ctx *ctx, |
e3724c8b DW |
53 | const char *descr, |
54 | const struct xfs_inumbers *inumbers, | |
55 | struct xfs_bulkstat_req *breq) | |
372d4ba9 | 56 | { |
e3724c8b | 57 | struct xfs_bulkstat *bstat = breq->bulkstat; |
4cca629d | 58 | struct xfs_bulkstat *bs; |
372d4ba9 DW |
59 | int i; |
60 | int error; | |
61 | ||
e3724c8b DW |
62 | /* First we try regular bulkstat, for speed. */ |
63 | breq->hdr.ino = inumbers->xi_startino; | |
64 | breq->hdr.icount = inumbers->xi_alloccount; | |
e6542132 | 65 | error = -xfrog_bulkstat(&ctx->mnt, breq); |
e3724c8b DW |
66 | if (error) { |
67 | char errbuf[DESCR_BUFSZ]; | |
68 | ||
69 | str_info(ctx, descr, "%s", | |
70 | strerror_r(error, errbuf, DESCR_BUFSZ)); | |
71 | } | |
72 | ||
73 | /* | |
74 | * Check each of the stats we got back to make sure we got the inodes | |
75 | * we asked for. | |
76 | */ | |
372d4ba9 | 77 | for (i = 0, bs = bstat; i < XFS_INODES_PER_CHUNK; i++) { |
b94a69ac | 78 | if (!(inumbers->xi_allocmask & (1ULL << i))) |
372d4ba9 | 79 | continue; |
b94a69ac | 80 | if (bs->bs_ino == inumbers->xi_startino + i) { |
372d4ba9 DW |
81 | bs++; |
82 | continue; | |
83 | } | |
84 | ||
85 | /* Load the one inode. */ | |
e6542132 | 86 | error = -xfrog_bulkstat_single(&ctx->mnt, |
b94a69ac DW |
87 | inumbers->xi_startino + i, 0, bs); |
88 | if (error || bs->bs_ino != inumbers->xi_startino + i) { | |
4cca629d | 89 | memset(bs, 0, sizeof(struct xfs_bulkstat)); |
b94a69ac | 90 | bs->bs_ino = inumbers->xi_startino + i; |
372d4ba9 DW |
91 | bs->bs_blksize = ctx->mnt_sv.f_frsize; |
92 | } | |
93 | bs++; | |
94 | } | |
95 | } | |
96 | ||
59f79e0a DW |
97 | /* BULKSTAT wrapper routines. */ |
98 | struct scan_inodes { | |
99 | scrub_inode_iter_fn fn; | |
100 | void *arg; | |
101 | bool aborted; | |
102 | }; | |
103 | ||
372d4ba9 DW |
104 | /* |
105 | * Call into the filesystem for inode/bulkstat information and call our | |
106 | * iterator function. We'll try to fill the bulkstat information in batches, | |
107 | * but we also can detect iget failures. | |
108 | */ | |
59f79e0a DW |
109 | static void |
110 | scan_ag_inodes( | |
111 | struct workqueue *wq, | |
112 | xfs_agnumber_t agno, | |
372d4ba9 DW |
113 | void *arg) |
114 | { | |
372d4ba9 | 115 | struct xfs_handle handle; |
59f79e0a | 116 | char descr[DESCR_BUFSZ]; |
b94a69ac | 117 | struct xfs_inumbers_req *ireq; |
4cca629d | 118 | struct xfs_bulkstat_req *breq; |
59f79e0a DW |
119 | struct scan_inodes *si = arg; |
120 | struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx; | |
4cca629d | 121 | struct xfs_bulkstat *bs; |
b94a69ac | 122 | struct xfs_inumbers *inumbers; |
372d4ba9 DW |
123 | int i; |
124 | int error; | |
125 | int stale_count = 0; | |
126 | ||
59f79e0a DW |
127 | snprintf(descr, DESCR_BUFSZ, _("dev %d:%d AG %u inodes"), |
128 | major(ctx->fsinfo.fs_datadev), | |
129 | minor(ctx->fsinfo.fs_datadev), | |
130 | agno); | |
131 | ||
132 | memcpy(&handle.ha_fsid, ctx->fshandle, sizeof(handle.ha_fsid)); | |
372d4ba9 DW |
133 | handle.ha_fid.fid_len = sizeof(xfs_fid_t) - |
134 | sizeof(handle.ha_fid.fid_len); | |
135 | handle.ha_fid.fid_pad = 0; | |
136 | ||
e6542132 DW |
137 | error = -xfrog_bulkstat_alloc_req(XFS_INODES_PER_CHUNK, 0, &breq); |
138 | if (error) { | |
139 | str_liberror(ctx, error, descr); | |
59f79e0a DW |
140 | si->aborted = true; |
141 | return; | |
4cca629d DW |
142 | } |
143 | ||
e6542132 DW |
144 | error = -xfrog_inumbers_alloc_req(1, 0, &ireq); |
145 | if (error) { | |
146 | str_liberror(ctx, error, descr); | |
b94a69ac | 147 | free(breq); |
59f79e0a DW |
148 | si->aborted = true; |
149 | return; | |
b94a69ac DW |
150 | } |
151 | inumbers = &ireq->inumbers[0]; | |
23ea9841 | 152 | xfrog_inumbers_set_ag(ireq, agno); |
b94a69ac | 153 | |
372d4ba9 | 154 | /* Find the inode chunk & alloc mask */ |
e6542132 | 155 | error = -xfrog_inumbers(&ctx->mnt, ireq); |
59f79e0a | 156 | while (!error && !si->aborted && ireq->hdr.ocount > 0) { |
300661d3 DW |
157 | /* |
158 | * We can have totally empty inode chunks on filesystems where | |
159 | * there are more than 64 inodes per block. Skip these. | |
160 | */ | |
b94a69ac | 161 | if (inumbers->xi_alloccount == 0) |
300661d3 | 162 | goto igrp_retry; |
4cca629d | 163 | |
e3724c8b | 164 | bulkstat_for_inumbers(ctx, descr, inumbers, breq); |
372d4ba9 DW |
165 | |
166 | /* Iterate all the inodes. */ | |
4cca629d | 167 | for (i = 0, bs = breq->bulkstat; |
59f79e0a | 168 | !si->aborted && i < inumbers->xi_alloccount; |
4cca629d | 169 | i++, bs++) { |
372d4ba9 DW |
170 | handle.ha_fid.fid_ino = bs->bs_ino; |
171 | handle.ha_fid.fid_gen = bs->bs_gen; | |
59f79e0a | 172 | error = si->fn(ctx, &handle, bs, si->arg); |
372d4ba9 DW |
173 | switch (error) { |
174 | case 0: | |
175 | break; | |
59f79e0a DW |
176 | case ESTALE: { |
177 | char idescr[DESCR_BUFSZ]; | |
178 | ||
372d4ba9 DW |
179 | stale_count++; |
180 | if (stale_count < 30) { | |
b94a69ac | 181 | ireq->hdr.ino = inumbers->xi_startino; |
372d4ba9 DW |
182 | goto igrp_retry; |
183 | } | |
15589f0a DW |
184 | scrub_render_ino_descr(ctx, idescr, DESCR_BUFSZ, |
185 | bs->bs_ino, bs->bs_gen, NULL); | |
bb5dbd06 DW |
186 | str_info(ctx, idescr, |
187 | _("Changed too many times during scan; giving up.")); | |
372d4ba9 | 188 | break; |
59f79e0a | 189 | } |
b8e62724 | 190 | case ECANCELED: |
372d4ba9 DW |
191 | error = 0; |
192 | /* fall thru */ | |
193 | default: | |
372d4ba9 DW |
194 | goto err; |
195 | } | |
196 | if (xfs_scrub_excessive_errors(ctx)) { | |
59f79e0a | 197 | si->aborted = true; |
372d4ba9 DW |
198 | goto out; |
199 | } | |
200 | } | |
201 | ||
202 | stale_count = 0; | |
203 | igrp_retry: | |
e6542132 | 204 | error = -xfrog_inumbers(&ctx->mnt, ireq); |
372d4ba9 DW |
205 | } |
206 | ||
207 | err: | |
208 | if (error) { | |
621f3374 | 209 | str_liberror(ctx, error, descr); |
59f79e0a | 210 | si->aborted = true; |
372d4ba9 DW |
211 | } |
212 | out: | |
b94a69ac | 213 | free(ireq); |
4cca629d | 214 | free(breq); |
372d4ba9 DW |
215 | } |
216 | ||
59f79e0a DW |
217 | /* |
218 | * Scan all the inodes in a filesystem. On error, this function will log | |
219 | * an error message and return -1. | |
220 | */ | |
221 | int | |
222 | scrub_scan_all_inodes( | |
372d4ba9 | 223 | struct scrub_ctx *ctx, |
59f79e0a | 224 | scrub_inode_iter_fn fn, |
372d4ba9 DW |
225 | void *arg) |
226 | { | |
59f79e0a DW |
227 | struct scan_inodes si = { |
228 | .fn = fn, | |
229 | .arg = arg, | |
230 | }; | |
372d4ba9 DW |
231 | xfs_agnumber_t agno; |
232 | struct workqueue wq; | |
233 | int ret; | |
234 | ||
baed134d | 235 | ret = -workqueue_create(&wq, (struct xfs_mount *)ctx, |
372d4ba9 DW |
236 | scrub_nproc_workqueue(ctx)); |
237 | if (ret) { | |
9d57cbfc | 238 | str_liberror(ctx, ret, _("creating bulkstat workqueue")); |
59f79e0a | 239 | return -1; |
372d4ba9 DW |
240 | } |
241 | ||
3f9efb2e | 242 | for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) { |
baed134d | 243 | ret = -workqueue_add(&wq, scan_ag_inodes, agno, &si); |
372d4ba9 | 244 | if (ret) { |
59f79e0a | 245 | si.aborted = true; |
9d57cbfc | 246 | str_liberror(ctx, ret, _("queueing bulkstat work")); |
372d4ba9 DW |
247 | break; |
248 | } | |
249 | } | |
250 | ||
baed134d | 251 | ret = -workqueue_terminate(&wq); |
71296cf8 | 252 | if (ret) { |
59f79e0a | 253 | si.aborted = true; |
71296cf8 DW |
254 | str_liberror(ctx, ret, _("finishing bulkstat work")); |
255 | } | |
372d4ba9 DW |
256 | workqueue_destroy(&wq); |
257 | ||
59f79e0a | 258 | return si.aborted ? -1 : 0; |
372d4ba9 DW |
259 | } |
260 | ||
59f79e0a | 261 | /* Open a file by handle, returning either the fd or -1 on error. */ |
372d4ba9 | 262 | int |
59f79e0a | 263 | scrub_open_handle( |
372d4ba9 DW |
264 | struct xfs_handle *handle) |
265 | { | |
266 | return open_by_fshandle(handle, sizeof(*handle), | |
267 | O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); | |
268 | } |