]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/inodes.c
xfs_scrub: handle concurrent directory updates during name scan
[thirdparty/xfsprogs-dev.git] / scrub / inodes.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
372d4ba9
DW
2/*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
372d4ba9 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
372d4ba9 5 */
a440f877 6#include "xfs.h"
372d4ba9
DW
7#include <stdint.h>
8#include <stdlib.h>
9#include <pthread.h>
10#include <sys/statvfs.h>
11#include "platform_defs.h"
372d4ba9 12#include "xfs_arch.h"
372d4ba9 13#include "handle.h"
42b4c8e8 14#include "libfrog/paths.h"
56598728 15#include "libfrog/workqueue.h"
372d4ba9
DW
16#include "xfs_scrub.h"
17#include "common.h"
18#include "inodes.h"
fee68490 19#include "libfrog/fsgeom.h"
f31b5e12 20#include "libfrog/bulkstat.h"
372d4ba9
DW
21
22/*
23 * Iterate a range of inodes.
24 *
25 * This is a little more involved than repeatedly asking BULKSTAT for a
26 * buffer's worth of stat data for some number of inodes. We want to scan as
27 * many of the inodes that the inobt thinks there are, including the ones that
28 * are broken, but if we ask for n inodes starting at x, it'll skip the bad
29 * ones and fill from beyond the range (x + n).
30 *
31 * Therefore, we ask INUMBERS to return one inobt chunk's worth of inode
32 * bitmap information. Then we try to BULKSTAT only the inodes that were
33 * present in that chunk, and compare what we got against what INUMBERS said
34 * was there. If there's a mismatch, we know that we have an inode that fails
35 * the verifiers but we can inject the bulkstat information to force the scrub
36 * code to deal with the broken inodes.
37 *
38 * If the iteration function returns ESTALE, that means that the inode has
39 * been deleted and possibly recreated since the BULKSTAT call. We wil
40 * refresh the stat information and try again up to 30 times before reporting
41 * the staleness as an error.
42 */
43
44/*
e3724c8b
DW
45 * Run bulkstat on an entire inode allocation group, then check that we got
46 * exactly the inodes we expected. If not, load them one at a time (or fake
47 * it) into the bulkstat data.
372d4ba9
DW
48 */
49static void
e3724c8b 50bulkstat_for_inumbers(
372d4ba9 51 struct scrub_ctx *ctx,
e3724c8b
DW
52 const char *descr,
53 const struct xfs_inumbers *inumbers,
54 struct xfs_bulkstat_req *breq)
372d4ba9 55{
e3724c8b 56 struct xfs_bulkstat *bstat = breq->bulkstat;
4cca629d 57 struct xfs_bulkstat *bs;
372d4ba9
DW
58 int i;
59 int error;
60
e3724c8b
DW
61 /* First we try regular bulkstat, for speed. */
62 breq->hdr.ino = inumbers->xi_startino;
63 breq->hdr.icount = inumbers->xi_alloccount;
e6542132 64 error = -xfrog_bulkstat(&ctx->mnt, breq);
e3724c8b
DW
65 if (error) {
66 char errbuf[DESCR_BUFSZ];
67
68 str_info(ctx, descr, "%s",
69 strerror_r(error, errbuf, DESCR_BUFSZ));
70 }
71
72 /*
73 * Check each of the stats we got back to make sure we got the inodes
74 * we asked for.
75 */
e749bfaf 76 for (i = 0, bs = bstat; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) {
b94a69ac 77 if (!(inumbers->xi_allocmask & (1ULL << i)))
372d4ba9 78 continue;
b94a69ac 79 if (bs->bs_ino == inumbers->xi_startino + i) {
372d4ba9
DW
80 bs++;
81 continue;
82 }
83
84 /* Load the one inode. */
e6542132 85 error = -xfrog_bulkstat_single(&ctx->mnt,
b94a69ac
DW
86 inumbers->xi_startino + i, 0, bs);
87 if (error || bs->bs_ino != inumbers->xi_startino + i) {
4cca629d 88 memset(bs, 0, sizeof(struct xfs_bulkstat));
b94a69ac 89 bs->bs_ino = inumbers->xi_startino + i;
372d4ba9
DW
90 bs->bs_blksize = ctx->mnt_sv.f_frsize;
91 }
92 bs++;
93 }
94}
95
59f79e0a
DW
96/* BULKSTAT wrapper routines. */
97struct scan_inodes {
98 scrub_inode_iter_fn fn;
99 void *arg;
100 bool aborted;
101};
102
372d4ba9
DW
103/*
104 * Call into the filesystem for inode/bulkstat information and call our
105 * iterator function. We'll try to fill the bulkstat information in batches,
106 * but we also can detect iget failures.
107 */
59f79e0a
DW
108static void
109scan_ag_inodes(
110 struct workqueue *wq,
111 xfs_agnumber_t agno,
372d4ba9
DW
112 void *arg)
113{
bbfbf5dd 114 struct xfs_handle handle = { };
59f79e0a 115 char descr[DESCR_BUFSZ];
b94a69ac 116 struct xfs_inumbers_req *ireq;
4cca629d 117 struct xfs_bulkstat_req *breq;
59f79e0a
DW
118 struct scan_inodes *si = arg;
119 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
4cca629d 120 struct xfs_bulkstat *bs;
b94a69ac 121 struct xfs_inumbers *inumbers;
4f546267 122 uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0);
372d4ba9
DW
123 int i;
124 int error;
125 int stale_count = 0;
126
59f79e0a
DW
127 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d AG %u inodes"),
128 major(ctx->fsinfo.fs_datadev),
129 minor(ctx->fsinfo.fs_datadev),
130 agno);
131
132 memcpy(&handle.ha_fsid, ctx->fshandle, sizeof(handle.ha_fsid));
372d4ba9
DW
133 handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
134 sizeof(handle.ha_fid.fid_len);
135 handle.ha_fid.fid_pad = 0;
136
e749bfaf 137 error = -xfrog_bulkstat_alloc_req(LIBFROG_BULKSTAT_CHUNKSIZE, 0, &breq);
e6542132
DW
138 if (error) {
139 str_liberror(ctx, error, descr);
59f79e0a
DW
140 si->aborted = true;
141 return;
4cca629d
DW
142 }
143
e6542132
DW
144 error = -xfrog_inumbers_alloc_req(1, 0, &ireq);
145 if (error) {
146 str_liberror(ctx, error, descr);
b94a69ac 147 free(breq);
59f79e0a
DW
148 si->aborted = true;
149 return;
b94a69ac
DW
150 }
151 inumbers = &ireq->inumbers[0];
23ea9841 152 xfrog_inumbers_set_ag(ireq, agno);
b94a69ac 153
372d4ba9 154 /* Find the inode chunk & alloc mask */
e6542132 155 error = -xfrog_inumbers(&ctx->mnt, ireq);
59f79e0a 156 while (!error && !si->aborted && ireq->hdr.ocount > 0) {
4f546267
DW
157 /*
158 * Make sure that we always make forward progress while we
159 * scan the inode btree.
160 */
161 if (nextino > inumbers->xi_startino) {
162 str_corrupt(ctx, descr,
163 _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno,
164 cvt_ino_to_agino(&ctx->mnt, nextino),
165 cvt_ino_to_agino(&ctx->mnt,
166 ireq->inumbers[0].xi_startino));
167 si->aborted = true;
168 break;
169 }
170 nextino = ireq->hdr.ino;
171
300661d3
DW
172 /*
173 * We can have totally empty inode chunks on filesystems where
174 * there are more than 64 inodes per block. Skip these.
175 */
b94a69ac 176 if (inumbers->xi_alloccount == 0)
300661d3 177 goto igrp_retry;
4cca629d 178
e3724c8b 179 bulkstat_for_inumbers(ctx, descr, inumbers, breq);
372d4ba9
DW
180
181 /* Iterate all the inodes. */
4cca629d 182 for (i = 0, bs = breq->bulkstat;
59f79e0a 183 !si->aborted && i < inumbers->xi_alloccount;
4cca629d 184 i++, bs++) {
372d4ba9
DW
185 handle.ha_fid.fid_ino = bs->bs_ino;
186 handle.ha_fid.fid_gen = bs->bs_gen;
59f79e0a 187 error = si->fn(ctx, &handle, bs, si->arg);
372d4ba9
DW
188 switch (error) {
189 case 0:
190 break;
59f79e0a
DW
191 case ESTALE: {
192 char idescr[DESCR_BUFSZ];
193
372d4ba9
DW
194 stale_count++;
195 if (stale_count < 30) {
b94a69ac 196 ireq->hdr.ino = inumbers->xi_startino;
372d4ba9
DW
197 goto igrp_retry;
198 }
15589f0a
DW
199 scrub_render_ino_descr(ctx, idescr, DESCR_BUFSZ,
200 bs->bs_ino, bs->bs_gen, NULL);
bb5dbd06
DW
201 str_info(ctx, idescr,
202_("Changed too many times during scan; giving up."));
372d4ba9 203 break;
59f79e0a 204 }
b8e62724 205 case ECANCELED:
372d4ba9
DW
206 error = 0;
207 /* fall thru */
208 default:
372d4ba9
DW
209 goto err;
210 }
273165cc 211 if (scrub_excessive_errors(ctx)) {
59f79e0a 212 si->aborted = true;
372d4ba9
DW
213 goto out;
214 }
215 }
216
217 stale_count = 0;
218igrp_retry:
e6542132 219 error = -xfrog_inumbers(&ctx->mnt, ireq);
372d4ba9
DW
220 }
221
222err:
223 if (error) {
621f3374 224 str_liberror(ctx, error, descr);
59f79e0a 225 si->aborted = true;
372d4ba9
DW
226 }
227out:
b94a69ac 228 free(ireq);
4cca629d 229 free(breq);
372d4ba9
DW
230}
231
59f79e0a
DW
232/*
233 * Scan all the inodes in a filesystem. On error, this function will log
234 * an error message and return -1.
235 */
236int
237scrub_scan_all_inodes(
372d4ba9 238 struct scrub_ctx *ctx,
59f79e0a 239 scrub_inode_iter_fn fn,
372d4ba9
DW
240 void *arg)
241{
59f79e0a
DW
242 struct scan_inodes si = {
243 .fn = fn,
244 .arg = arg,
245 };
372d4ba9
DW
246 xfs_agnumber_t agno;
247 struct workqueue wq;
248 int ret;
249
baed134d 250 ret = -workqueue_create(&wq, (struct xfs_mount *)ctx,
372d4ba9
DW
251 scrub_nproc_workqueue(ctx));
252 if (ret) {
9d57cbfc 253 str_liberror(ctx, ret, _("creating bulkstat workqueue"));
59f79e0a 254 return -1;
372d4ba9
DW
255 }
256
3f9efb2e 257 for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
baed134d 258 ret = -workqueue_add(&wq, scan_ag_inodes, agno, &si);
372d4ba9 259 if (ret) {
59f79e0a 260 si.aborted = true;
9d57cbfc 261 str_liberror(ctx, ret, _("queueing bulkstat work"));
372d4ba9
DW
262 break;
263 }
264 }
265
baed134d 266 ret = -workqueue_terminate(&wq);
71296cf8 267 if (ret) {
59f79e0a 268 si.aborted = true;
71296cf8
DW
269 str_liberror(ctx, ret, _("finishing bulkstat work"));
270 }
372d4ba9
DW
271 workqueue_destroy(&wq);
272
59f79e0a 273 return si.aborted ? -1 : 0;
372d4ba9
DW
274}
275
59f79e0a 276/* Open a file by handle, returning either the fd or -1 on error. */
372d4ba9 277int
59f79e0a 278scrub_open_handle(
372d4ba9
DW
279 struct xfs_handle *handle)
280{
281 return open_by_fshandle(handle, sizeof(*handle),
282 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
283}