]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/inodes.c
xfsprogs: Release v6.17.0
[thirdparty/xfsprogs-dev.git] / scrub / inodes.c
CommitLineData
8d318d62 1// SPDX-License-Identifier: GPL-2.0-or-later
372d4ba9 2/*
52520522 3 * Copyright (C) 2018-2024 Oracle. All Rights Reserved.
8d318d62 4 * Author: Darrick J. Wong <djwong@kernel.org>
372d4ba9 5 */
a440f877 6#include "xfs.h"
372d4ba9
DW
7#include <stdint.h>
8#include <stdlib.h>
9#include <pthread.h>
10#include <sys/statvfs.h>
11#include "platform_defs.h"
372d4ba9 12#include "xfs_arch.h"
372d4ba9 13#include "handle.h"
42b4c8e8 14#include "libfrog/paths.h"
56598728 15#include "libfrog/workqueue.h"
372d4ba9
DW
16#include "xfs_scrub.h"
17#include "common.h"
18#include "inodes.h"
245c72a6 19#include "descr.h"
fee68490 20#include "libfrog/fsgeom.h"
f31b5e12 21#include "libfrog/bulkstat.h"
13af0394 22#include "libfrog/handle_priv.h"
7ae92e1c
DW
23#include "bitops.h"
24#include "libfrog/bitmask.h"
372d4ba9
DW
25
26/*
27 * Iterate a range of inodes.
28 *
29 * This is a little more involved than repeatedly asking BULKSTAT for a
30 * buffer's worth of stat data for some number of inodes. We want to scan as
2451a997
DW
31 * many of the inodes that the inobt thinks there are, so we use the INUMBERS
32 * ioctl to walk all the inobt records in the filesystem and spawn a worker to
33 * bulkstat and iterate. The worker starts with an inumbers record that can
34 * look like this:
372d4ba9 35 *
2451a997
DW
36 * {startino = S, allocmask = 0b11011}
37 *
38 * Given a starting inumber S and count C=64, bulkstat will return a sorted
39 * array of stat information. The bs_ino of those array elements can look like
40 * any of the following:
41 *
42 * 0. [S, S+1, S+3, S+4]
43 * 1. [S+e, S+e+1, S+e+3, S+e+4, S+e+C+1...], where e >= 0
44 * 2. [S+e+n], where n >= 0
45 * 3. []
46 * 4. [], errno == EFSCORRUPTED
47 *
48 * We know that bulkstat scanned the entire inode range between S and bs_ino of
49 * the last array element, even though it only fills out an array element for
50 * allocated inodes. Therefore, we can say in cases 0-2 that S was filled,
51 * even if there is no bstat[] record for S. In turn, we can create a bitmask
52 * of inodes that we have seen, and set bits 0 through (bstat[-1].bs_ino - S),
53 * being careful not to set any bits past S+C.
54 *
55 * In case (0) we find that seen mask matches the inumber record
56 * exactly, so the caller can walk the stat records and move on. In case (1)
57 * this is also true, but we must be careful to reduce the array length to
58 * avoid scanning inodes that are not in the inumber chunk. In case (3) we
59 * conclude that there were no inodes left to scan and terminate.
60 *
7ae92e1c
DW
61 * In case (2) and (4) we don't know why bulkstat returned fewer than C
62 * elements. We might have found the end of the filesystem, or the kernel
63 * might have found a corrupt inode and stopped. This we must investigate by
64 * trying to fill out the rest of the bstat array starting with the next
65 * inumber after the last bstat array element filled, and continuing until S'
66 * is beyond S0 + C, or the array is full. Each time we succeed in loading
67 * new records, the kernel increases S' for us; if instead we encounter case
68 * (4), we can increment S' ourselves.
69 *
2451a997
DW
70 * Inodes that are set in the allocmask but not set in the seen mask are the
71 * corrupt inodes. For each of these cases, we try to populate the bulkstat
72 * array one inode at a time. If the kernel returns a matching record we can
73 * use it; if instead we receive an error, we synthesize enough of a record
74 * to be able to run online scrub by handle.
372d4ba9
DW
75 *
76 * If the iteration function returns ESTALE, that means that the inode has
77 * been deleted and possibly recreated since the BULKSTAT call. We wil
78 * refresh the stat information and try again up to 30 times before reporting
79 * the staleness as an error.
80 */
81
2451a997
DW
82/*
83 * Return the inumber of the highest inode in the bulkstat data, assuming the
84 * records are sorted in inumber order.
85 */
86static inline uint64_t last_bstat_ino(const struct xfs_bulkstat_req *b)
87{
88 return b->hdr.ocount ? b->bulkstat[b->hdr.ocount - 1].bs_ino : 0;
89}
90
91/*
92 * Deduce the bitmask of the inodes in inums that were seen by bulkstat. If
93 * the inode is present in the bstat array this is trivially true; or if it is
94 * not in the array but higher inumbers are present, then it was freed.
95 */
96static __u64
97seen_mask_from_bulkstat(
98 const struct xfs_inumbers *inums,
99 __u64 breq_startino,
100 const struct xfs_bulkstat_req *breq)
101{
102 const __u64 limit_ino =
103 inums->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE;
104 const __u64 last = last_bstat_ino(breq);
105 __u64 ret = 0;
106 int i, maxi;
107
108 /* Ignore the bulkstat results if they don't cover inumbers */
109 if (breq_startino > limit_ino || last < inums->xi_startino)
110 return 0;
111
112 maxi = min(LIBFROG_BULKSTAT_CHUNKSIZE, last - inums->xi_startino + 1);
113 for (i = breq_startino - inums->xi_startino; i < maxi; i++)
114 ret |= 1ULL << i;
115
116 return ret;
117}
118
7ae92e1c
DW
119/*
120 * Try to fill the rest of orig_breq with bulkstat data by re-running bulkstat
121 * with increasing start_ino until we either hit the end of the inumbers info
122 * or fill up the bstat array with something. Returns a bitmask of the inodes
123 * within inums that were filled by the bulkstat requests.
124 */
125static __u64
126bulkstat_the_rest(
127 struct scrub_ctx *ctx,
128 const struct xfs_inumbers *inums,
129 struct xfs_bulkstat_req *orig_breq,
130 int orig_error)
131{
132 struct xfs_bulkstat_req *new_breq;
133 struct xfs_bulkstat *old_bstat =
134 &orig_breq->bulkstat[orig_breq->hdr.ocount];
135 const __u64 limit_ino =
136 inums->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE;
137 __u64 start_ino = orig_breq->hdr.ino;
138 __u64 seen_mask = 0;
139 int error;
140
141 assert(orig_breq->hdr.ocount < orig_breq->hdr.icount);
142
143 /*
144 * If the first bulkstat returned a corruption error, that means
145 * start_ino is corrupt. Restart instead at the next inumber.
146 */
147 if (orig_error == EFSCORRUPTED)
148 start_ino++;
149 if (start_ino >= limit_ino)
150 return 0;
151
152 error = -xfrog_bulkstat_alloc_req(
153 orig_breq->hdr.icount - orig_breq->hdr.ocount,
154 start_ino, &new_breq);
155 if (error)
156 return error;
157 new_breq->hdr.flags = orig_breq->hdr.flags;
158
159 do {
160 /*
161 * Fill the new bulkstat request with stat data starting at
162 * start_ino.
163 */
164 error = -xfrog_bulkstat(&ctx->mnt, new_breq);
165 if (error == EFSCORRUPTED) {
166 /*
167 * start_ino is corrupt, increment and try the next
168 * inode.
169 */
170 start_ino++;
171 new_breq->hdr.ino = start_ino;
172 continue;
173 }
174 if (error) {
175 /*
176 * Any other error means the caller falls back to
177 * single stepping.
178 */
179 break;
180 }
181 if (new_breq->hdr.ocount == 0)
182 break;
183
184 /* Copy new results to the original bstat buffer */
185 memcpy(old_bstat, new_breq->bulkstat,
186 new_breq->hdr.ocount * sizeof(struct xfs_bulkstat));
187 orig_breq->hdr.ocount += new_breq->hdr.ocount;
188 old_bstat += new_breq->hdr.ocount;
189 seen_mask |= seen_mask_from_bulkstat(inums, start_ino,
190 new_breq);
191
192 new_breq->hdr.icount -= new_breq->hdr.ocount;
193 start_ino = new_breq->hdr.ino;
194 } while (new_breq->hdr.icount > 0 && new_breq->hdr.ino < limit_ino);
195
196 free(new_breq);
197 return seen_mask;
198}
199
2451a997
DW
200/* Compare two bulkstat records by inumber. */
201static int
202compare_bstat(
203 const void *a,
204 const void *b)
205{
206 const struct xfs_bulkstat *ba = a;
207 const struct xfs_bulkstat *bb = b;
208
209 return cmp_int(ba->bs_ino, bb->bs_ino);
210}
211
372d4ba9 212/*
b0289f63
DW
213 * Walk the xi_allocmask looking for set bits that aren't present in
214 * the fill mask. For each such inode, fill the entries at the end of
215 * the array with stat information one at a time, synthesizing them if
216 * necessary. At this point, (xi_allocmask & ~seen_mask) should be the
217 * corrupt inodes.
372d4ba9
DW
218 */
219static void
b0289f63 220bulkstat_single_step(
17429887 221 struct scrub_ctx *ctx,
17429887 222 const struct xfs_inumbers *inumbers,
b0289f63 223 uint64_t seen_mask,
17429887 224 struct xfs_bulkstat_req *breq)
372d4ba9 225{
2451a997 226 struct xfs_bulkstat *bs = NULL;
17429887
DW
227 int i;
228 int error;
372d4ba9 229
2451a997
DW
230 for (i = 0; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) {
231 /*
232 * Don't single-step if inumbers said it wasn't allocated or
233 * bulkstat actually filled it.
234 */
b94a69ac 235 if (!(inumbers->xi_allocmask & (1ULL << i)))
372d4ba9 236 continue;
2451a997 237 if (seen_mask & (1ULL << i))
372d4ba9 238 continue;
372d4ba9 239
2451a997
DW
240 assert(breq->hdr.ocount < LIBFROG_BULKSTAT_CHUNKSIZE);
241
242 if (!bs)
243 bs = &breq->bulkstat[breq->hdr.ocount];
244
245 /*
246 * Didn't get desired stat data and we've hit the end of the
247 * returned data. We can't distinguish between the inode being
248 * freed vs. the inode being to corrupt to load, so try a
249 * bulkstat single to see if we can load the inode.
250 */
e6542132 251 error = -xfrog_bulkstat_single(&ctx->mnt,
ae497842 252 inumbers->xi_startino + i, breq->hdr.flags, bs);
20dbdd61
DW
253 switch (error) {
254 case ENOENT:
255 /*
256 * This inode wasn't found, and no results were
257 * returned. We've likely hit the end of the
258 * filesystem, but we'll move on to the next inode in
259 * the mask for the sake of caution.
260 */
261 continue;
262 case 0:
263 /*
264 * If a result was returned but it wasn't the inode
265 * we were looking for, then the missing inode was
266 * freed. Move on to the next inode in the mask.
267 */
268 if (bs->bs_ino != inumbers->xi_startino + i)
269 continue;
270 break;
271 default:
272 /*
273 * Some error happened. Synthesize a bulkstat record
274 * so that phase3 can try to see if there's a corrupt
275 * inode that needs repairing.
276 */
4cca629d 277 memset(bs, 0, sizeof(struct xfs_bulkstat));
b94a69ac 278 bs->bs_ino = inumbers->xi_startino + i;
372d4ba9 279 bs->bs_blksize = ctx->mnt_sv.f_frsize;
20dbdd61 280 break;
372d4ba9 281 }
2451a997
DW
282
283 breq->hdr.ocount++;
372d4ba9
DW
284 bs++;
285 }
2451a997
DW
286
287 /* If we added any entries, re-sort the array. */
288 if (bs)
289 qsort(breq->bulkstat, breq->hdr.ocount,
290 sizeof(struct xfs_bulkstat), compare_bstat);
372d4ba9
DW
291}
292
7ae92e1c
DW
293/* Return the inumber of the highest allocated inode in the inumbers data. */
294static inline uint64_t last_allocmask_ino(const struct xfs_inumbers *i)
295{
296 return i->xi_startino + xfrog_highbit64(i->xi_allocmask);
297}
298
b0289f63
DW
299/*
300 * Run bulkstat on an entire inode allocation group, then check that we got
301 * exactly the inodes we expected. If not, load them one at a time (or fake
302 * it) into the bulkstat data.
303 */
304static void
305bulkstat_for_inumbers(
306 struct scrub_ctx *ctx,
307 const struct xfs_inumbers *inumbers,
308 struct xfs_bulkstat_req *breq)
309{
310 const uint64_t limit_ino =
311 inumbers->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE;
312 uint64_t seen_mask = 0;
313 int i;
314 int error;
315
316 assert(inumbers->xi_allocmask != 0);
317
318 /* First we try regular bulkstat, for speed. */
319 breq->hdr.ino = inumbers->xi_startino;
320 error = -xfrog_bulkstat(&ctx->mnt, breq);
321 if (!error) {
322 if (!breq->hdr.ocount)
323 return;
324 seen_mask |= seen_mask_from_bulkstat(inumbers,
325 inumbers->xi_startino, breq);
326 }
327
7ae92e1c
DW
328 /*
329 * If the last allocated inode as reported by inumbers is higher than
330 * the last inode reported by bulkstat, two things could have happened.
331 * Either all the inodes at the high end of the cluster were freed
332 * since the inumbers call; or bulkstat encountered a corrupt inode and
333 * returned early. Try to bulkstat the rest of the array.
334 */
335 if (last_allocmask_ino(inumbers) > last_bstat_ino(breq))
336 seen_mask |= bulkstat_the_rest(ctx, inumbers, breq, error);
337
b0289f63
DW
338 /*
339 * Bulkstat might return inodes beyond xi_startino + CHUNKSIZE. Reduce
340 * ocount to ignore inodes not described by the inumbers record.
341 */
342 for (i = breq->hdr.ocount - 1; i >= 0; i--) {
343 if (breq->bulkstat[i].bs_ino < limit_ino)
344 break;
345 breq->hdr.ocount--;
346 }
347
348 /*
349 * Fill in any missing inodes that are mentioned in the alloc mask but
7ae92e1c 350 * weren't previously seen by bulkstat. These are the corrupt inodes.
b0289f63
DW
351 */
352 bulkstat_single_step(ctx, inumbers, seen_mask, breq);
353}
354
59f79e0a
DW
355/* BULKSTAT wrapper routines. */
356struct scan_inodes {
245c72a6 357 struct workqueue wq_bulkstat;
59f79e0a
DW
358 scrub_inode_iter_fn fn;
359 void *arg;
245c72a6 360 unsigned int nr_threads;
59f79e0a
DW
361 bool aborted;
362};
363
372d4ba9 364/*
245c72a6
DW
365 * A single unit of inode scan work. This contains a pointer to the parent
366 * information, followed by an INUMBERS request structure, followed by a
367 * BULKSTAT request structure. The last two are VLAs, so we can't represent
368 * them here.
369 */
370struct scan_ichunk {
371 struct scan_inodes *si;
372};
373
374static inline struct xfs_inumbers_req *
375ichunk_to_inumbers(
376 struct scan_ichunk *ichunk)
377{
378 char *p = (char *)ichunk;
379
380 return (struct xfs_inumbers_req *)(p + sizeof(struct scan_ichunk));
381}
382
383static inline struct xfs_bulkstat_req *
384ichunk_to_bulkstat(
385 struct scan_ichunk *ichunk)
386{
387 char *p = (char *)ichunk_to_inumbers(ichunk);
388
389 return (struct xfs_bulkstat_req *)(p + XFS_INUMBERS_REQ_SIZE(1));
390}
391
392static inline int
393alloc_ichunk(
ae497842 394 struct scrub_ctx *ctx,
245c72a6
DW
395 struct scan_inodes *si,
396 uint32_t agno,
397 uint64_t startino,
398 struct scan_ichunk **ichunkp)
399{
400 struct scan_ichunk *ichunk;
401 struct xfs_inumbers_req *ireq;
402 struct xfs_bulkstat_req *breq;
403
404 ichunk = calloc(1, sizeof(struct scan_ichunk) +
405 XFS_INUMBERS_REQ_SIZE(1) +
406 XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE));
407 if (!ichunk)
408 return -errno;
409
410 ichunk->si = si;
411
412 ireq = ichunk_to_inumbers(ichunk);
413 ireq->hdr.icount = 1;
414 ireq->hdr.ino = startino;
415 ireq->hdr.agno = agno;
416 ireq->hdr.flags |= XFS_BULK_IREQ_AGNO;
417
418 breq = ichunk_to_bulkstat(ichunk);
419 breq->hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE;
ae497842
DW
420
421 /* Scan the metadata directory tree too. */
422 if (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_METADIR)
cd9d49b3 423 breq->hdr.flags |= XFS_BULK_IREQ_METADIR;
245c72a6
DW
424
425 *ichunkp = ichunk;
426 return 0;
427}
428
b6fef47a 429static int
245c72a6
DW
430render_ino_from_bulkstat(
431 struct scrub_ctx *ctx,
432 char *buf,
433 size_t buflen,
434 void *data)
435{
436 struct xfs_bulkstat *bstat = data;
437
438 return scrub_render_ino_descr(ctx, buf, buflen, bstat->bs_ino,
439 bstat->bs_gen, NULL);
440}
441
442static int
443render_inumbers_from_agno(
444 struct scrub_ctx *ctx,
445 char *buf,
446 size_t buflen,
447 void *data)
448{
449 xfs_agnumber_t *agno = data;
450
451 return snprintf(buf, buflen, _("dev %d:%d AG %u inodes"),
452 major(ctx->fsinfo.fs_datadev),
453 minor(ctx->fsinfo.fs_datadev),
454 *agno);
455}
456
457/*
458 * Call BULKSTAT for information on a single chunk's worth of inodes and call
459 * our iterator function. We'll try to fill the bulkstat information in
460 * batches, but we also can detect iget failures.
372d4ba9 461 */
59f79e0a 462static void
245c72a6 463scan_ag_bulkstat(
59f79e0a
DW
464 struct workqueue *wq,
465 xfs_agnumber_t agno,
372d4ba9
DW
466 void *arg)
467{
13af0394 468 struct xfs_handle handle;
59f79e0a 469 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
245c72a6
DW
470 struct scan_ichunk *ichunk = arg;
471 struct xfs_inumbers_req *ireq = ichunk_to_inumbers(ichunk);
472 struct xfs_bulkstat_req *breq = ichunk_to_bulkstat(ichunk);
473 struct scan_inodes *si = ichunk->si;
c053cf87 474 struct xfs_bulkstat *bs = &breq->bulkstat[0];
245c72a6 475 struct xfs_inumbers *inumbers = &ireq->inumbers[0];
9f4d6358 476 uint64_t last_ino = 0;
372d4ba9
DW
477 int i;
478 int error;
479 int stale_count = 0;
245c72a6
DW
480 DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat);
481 DEFINE_DESCR(dsc_inumbers, ctx, render_inumbers_from_agno);
372d4ba9 482
245c72a6 483 descr_set(&dsc_inumbers, &agno);
13af0394 484 handle_from_fshandle(&handle, ctx->fshandle, ctx->fshandle_len);
245c72a6 485retry:
3653b83e 486 bulkstat_for_inumbers(ctx, inumbers, breq);
245c72a6
DW
487
488 /* Iterate all the inodes. */
c053cf87 489 for (i = 0; !si->aborted && i < breq->hdr.ocount; i++, bs++) {
9f4d6358
DW
490 uint64_t scan_ino = bs->bs_ino;
491
492 /* ensure forward progress if we retried */
493 if (scan_ino < last_ino)
494 continue;
495
245c72a6 496 descr_set(&dsc_bulkstat, bs);
13af0394 497 handle_from_bulkstat(&handle, bs);
245c72a6
DW
498 error = si->fn(ctx, &handle, bs, si->arg);
499 switch (error) {
500 case 0:
501 break;
502 case ESTALE: {
503 stale_count++;
504 if (stale_count < 30) {
b95546f1
DW
505 uint64_t old_startino;
506
507 ireq->hdr.ino = old_startino =
508 inumbers->xi_startino;
245c72a6
DW
509 error = -xfrog_inumbers(&ctx->mnt, ireq);
510 if (error)
511 goto err;
b95546f1
DW
512 /*
513 * Retry only if inumbers returns the same
514 * inobt record as the previous record and
515 * there are allocated inodes in it.
516 */
517 if (!si->aborted &&
518 ireq->hdr.ocount > 0 &&
519 inumbers->xi_alloccount > 0 &&
520 inumbers->xi_startino == old_startino)
521 goto retry;
522 goto out;
245c72a6
DW
523 }
524 str_info(ctx, descr_render(&dsc_bulkstat),
525_("Changed too many times during scan; giving up."));
526 si->aborted = true;
527 goto out;
528 }
529 case ECANCELED:
530 error = 0;
531 fallthrough;
532 default:
533 goto err;
534 }
535 if (scrub_excessive_errors(ctx)) {
536 si->aborted = true;
537 goto out;
538 }
9f4d6358 539 last_ino = scan_ino;
4cca629d
DW
540 }
541
245c72a6 542err:
e6542132 543 if (error) {
245c72a6 544 str_liberror(ctx, error, descr_render(&dsc_bulkstat));
59f79e0a 545 si->aborted = true;
b94a69ac 546 }
245c72a6
DW
547out:
548 free(ichunk);
549}
550
551/*
552 * Call INUMBERS for information about inode chunks, then queue the inumbers
553 * responses in the bulkstat workqueue. This helps us maximize CPU parallelism
554 * if the filesystem AGs are not evenly loaded.
555 */
556static void
557scan_ag_inumbers(
558 struct workqueue *wq,
559 xfs_agnumber_t agno,
560 void *arg)
561{
562 struct scan_ichunk *ichunk = NULL;
563 struct scan_inodes *si = arg;
564 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
565 struct xfs_inumbers_req *ireq;
566 uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0);
567 int error;
568 DEFINE_DESCR(dsc, ctx, render_inumbers_from_agno);
569
570 descr_set(&dsc, &agno);
571
ae497842 572 error = alloc_ichunk(ctx, si, agno, 0, &ichunk);
245c72a6
DW
573 if (error)
574 goto err;
575 ireq = ichunk_to_inumbers(ichunk);
b94a69ac 576
372d4ba9 577 /* Find the inode chunk & alloc mask */
e6542132 578 error = -xfrog_inumbers(&ctx->mnt, ireq);
59f79e0a 579 while (!error && !si->aborted && ireq->hdr.ocount > 0) {
4f546267
DW
580 /*
581 * Make sure that we always make forward progress while we
582 * scan the inode btree.
583 */
245c72a6
DW
584 if (nextino > ireq->inumbers[0].xi_startino) {
585 str_corrupt(ctx, descr_render(&dsc),
4f546267
DW
586 _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno,
587 cvt_ino_to_agino(&ctx->mnt, nextino),
588 cvt_ino_to_agino(&ctx->mnt,
589 ireq->inumbers[0].xi_startino));
590 si->aborted = true;
591 break;
592 }
593 nextino = ireq->hdr.ino;
594
245c72a6
DW
595 if (ireq->inumbers[0].xi_alloccount == 0) {
596 /*
597 * We can have totally empty inode chunks on
598 * filesystems where there are more than 64 inodes per
599 * block. Skip these.
600 */
601 ;
602 } else if (si->nr_threads > 0) {
603 /* Queue this inode chunk on the bulkstat workqueue. */
604 error = -workqueue_add(&si->wq_bulkstat,
605 scan_ag_bulkstat, agno, ichunk);
606 if (error) {
59f79e0a 607 si->aborted = true;
245c72a6
DW
608 str_liberror(ctx, error,
609 _("queueing bulkstat work"));
372d4ba9
DW
610 goto out;
611 }
245c72a6
DW
612 ichunk = NULL;
613 } else {
614 /*
615 * Only one thread, call bulkstat directly. Remember,
616 * ichunk is freed by the worker before returning.
617 */
618 scan_ag_bulkstat(wq, agno, ichunk);
619 ichunk = NULL;
620 if (si->aborted)
621 break;
622 }
623
624 if (!ichunk) {
ae497842 625 error = alloc_ichunk(ctx, si, agno, nextino, &ichunk);
245c72a6
DW
626 if (error)
627 goto err;
372d4ba9 628 }
245c72a6 629 ireq = ichunk_to_inumbers(ichunk);
372d4ba9 630
e6542132 631 error = -xfrog_inumbers(&ctx->mnt, ireq);
372d4ba9
DW
632 }
633
634err:
635 if (error) {
245c72a6 636 str_liberror(ctx, error, descr_render(&dsc));
59f79e0a 637 si->aborted = true;
372d4ba9
DW
638 }
639out:
245c72a6
DW
640 if (ichunk)
641 free(ichunk);
372d4ba9
DW
642}
643
59f79e0a 644/*
ae497842
DW
645 * Scan all the inodes in a filesystem, including metadata directory files and
646 * broken files. On error, this function will log an error message and return
647 * -1.
59f79e0a
DW
648 */
649int
650scrub_scan_all_inodes(
372d4ba9 651 struct scrub_ctx *ctx,
59f79e0a 652 scrub_inode_iter_fn fn,
372d4ba9
DW
653 void *arg)
654{
59f79e0a
DW
655 struct scan_inodes si = {
656 .fn = fn,
657 .arg = arg,
245c72a6 658 .nr_threads = scrub_nproc_workqueue(ctx),
59f79e0a 659 };
372d4ba9 660 xfs_agnumber_t agno;
245c72a6
DW
661 struct workqueue wq_inumbers;
662 unsigned int max_bulkstat;
372d4ba9
DW
663 int ret;
664
245c72a6
DW
665 /*
666 * The bulkstat workqueue should queue at most one inobt block's worth
667 * of inode chunk records per worker thread. If we're running in
668 * single thread mode (nr_threads==0) then we skip the workqueues.
669 */
670 max_bulkstat = si.nr_threads * (ctx->mnt.fsgeom.blocksize / 16);
671
672 ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx,
673 si.nr_threads, max_bulkstat);
372d4ba9 674 if (ret) {
9d57cbfc 675 str_liberror(ctx, ret, _("creating bulkstat workqueue"));
59f79e0a 676 return -1;
372d4ba9
DW
677 }
678
245c72a6
DW
679 ret = -workqueue_create(&wq_inumbers, (struct xfs_mount *)ctx,
680 si.nr_threads);
681 if (ret) {
682 str_liberror(ctx, ret, _("creating inumbers workqueue"));
683 si.aborted = true;
684 goto kill_bulkstat;
685 }
686
3f9efb2e 687 for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
245c72a6 688 ret = -workqueue_add(&wq_inumbers, scan_ag_inumbers, agno, &si);
372d4ba9 689 if (ret) {
59f79e0a 690 si.aborted = true;
245c72a6 691 str_liberror(ctx, ret, _("queueing inumbers work"));
372d4ba9
DW
692 break;
693 }
694 }
695
245c72a6
DW
696 ret = -workqueue_terminate(&wq_inumbers);
697 if (ret) {
698 si.aborted = true;
699 str_liberror(ctx, ret, _("finishing inumbers work"));
700 }
701 workqueue_destroy(&wq_inumbers);
702
703kill_bulkstat:
704 ret = -workqueue_terminate(&si.wq_bulkstat);
71296cf8 705 if (ret) {
59f79e0a 706 si.aborted = true;
71296cf8
DW
707 str_liberror(ctx, ret, _("finishing bulkstat work"));
708 }
245c72a6 709 workqueue_destroy(&si.wq_bulkstat);
372d4ba9 710
59f79e0a 711 return si.aborted ? -1 : 0;
372d4ba9
DW
712}
713
279b0d0e
DW
714struct user_bulkstat {
715 struct scan_inodes *si;
716
717 /* vla, must be last */
718 struct xfs_bulkstat_req breq;
719};
720
721/* Iterate all the user files returned by a bulkstat. */
722static void
723scan_user_files(
724 struct workqueue *wq,
725 xfs_agnumber_t agno,
726 void *arg)
727{
728 struct xfs_handle handle;
729 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
730 struct user_bulkstat *ureq = arg;
731 struct xfs_bulkstat *bs = &ureq->breq.bulkstat[0];
732 struct scan_inodes *si = ureq->si;
733 int i;
734 int error = 0;
735 DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat);
736
737 handle_from_fshandle(&handle, ctx->fshandle, ctx->fshandle_len);
738
739 for (i = 0; !si->aborted && i < ureq->breq.hdr.ocount; i++, bs++) {
740 descr_set(&dsc_bulkstat, bs);
741 handle_from_bulkstat(&handle, bs);
742 error = si->fn(ctx, &handle, bs, si->arg);
743 switch (error) {
744 case 0:
745 break;
746 case ESTALE:
747 case ECANCELED:
748 error = 0;
749 fallthrough;
750 default:
751 goto err;
752 }
753 if (scrub_excessive_errors(ctx)) {
754 si->aborted = true;
755 goto out;
756 }
757 }
758
759err:
760 if (error) {
761 str_liberror(ctx, error, descr_render(&dsc_bulkstat));
762 si->aborted = true;
763 }
764out:
765 free(ureq);
766}
767
768/*
769 * Run one step of the user files bulkstat scan and schedule background
770 * processing of the stat data returned. Returns 1 to keep going, or 0 to
771 * stop.
772 */
773static int
774scan_user_bulkstat(
775 struct scrub_ctx *ctx,
776 struct scan_inodes *si,
777 uint64_t *cursor)
778{
779 struct user_bulkstat *ureq;
780 const char *what = NULL;
781 int ret;
782
783 ureq = calloc(1, sizeof(struct user_bulkstat) +
784 XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE));
785 if (!ureq) {
786 ret = ENOMEM;
787 what = _("creating bulkstat work item");
788 goto err;
789 }
790 ureq->si = si;
791 ureq->breq.hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE;
792 ureq->breq.hdr.ino = *cursor;
793
794 ret = -xfrog_bulkstat(&ctx->mnt, &ureq->breq);
795 if (ret) {
796 what = _("user files bulkstat");
797 goto err_ureq;
798 }
799 if (ureq->breq.hdr.ocount == 0) {
800 *cursor = NULLFSINO;
801 free(ureq);
802 return 0;
803 }
804
805 *cursor = ureq->breq.hdr.ino;
806
807 /* scan_user_files frees ureq; do not access it */
808 ret = -workqueue_add(&si->wq_bulkstat, scan_user_files, 0, ureq);
809 if (ret) {
810 what = _("queueing bulkstat work");
811 goto err_ureq;
812 }
813 ureq = NULL;
814
815 return 1;
816
817err_ureq:
818 free(ureq);
819err:
820 si->aborted = true;
821 str_liberror(ctx, ret, what);
822 return 0;
823}
824
825/*
826 * Scan all the user files in a filesystem in inumber order. On error, this
827 * function will log an error message and return -1.
828 */
829int
830scrub_scan_user_files(
831 struct scrub_ctx *ctx,
832 scrub_inode_iter_fn fn,
833 void *arg)
834{
835 struct scan_inodes si = {
836 .fn = fn,
837 .arg = arg,
838 .nr_threads = scrub_nproc_workqueue(ctx),
839 };
840 uint64_t ino = 0;
841 int ret;
842
843 /* Queue up to four bulkstat result sets per thread. */
844 ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx,
845 si.nr_threads, si.nr_threads * 4);
846 if (ret) {
847 str_liberror(ctx, ret, _("creating bulkstat workqueue"));
848 return -1;
849 }
850
851 while ((ret = scan_user_bulkstat(ctx, &si, &ino)) == 1) {
852 /* empty */
853 }
854
855 ret = -workqueue_terminate(&si.wq_bulkstat);
856 if (ret) {
857 si.aborted = true;
858 str_liberror(ctx, ret, _("finishing bulkstat work"));
859 }
860 workqueue_destroy(&si.wq_bulkstat);
861
862 return si.aborted ? -1 : 0;
863}
864
59f79e0a 865/* Open a file by handle, returning either the fd or -1 on error. */
372d4ba9 866int
59f79e0a 867scrub_open_handle(
372d4ba9
DW
868 struct xfs_handle *handle)
869{
870 return open_by_fshandle(handle, sizeof(*handle),
871 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
872}