]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/inodes.c
2f3c87be79f78325ba0b458214f6ecce462fcad0
[thirdparty/xfsprogs-dev.git] / scrub / inodes.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <pthread.h>
10 #include <sys/statvfs.h>
11 #include "platform_defs.h"
12 #include "xfs_arch.h"
13 #include "handle.h"
14 #include "libfrog/paths.h"
15 #include "libfrog/workqueue.h"
16 #include "xfs_scrub.h"
17 #include "common.h"
18 #include "inodes.h"
19 #include "descr.h"
20 #include "libfrog/fsgeom.h"
21 #include "libfrog/bulkstat.h"
22 #include "libfrog/handle_priv.h"
23 #include "bitops.h"
24 #include "libfrog/bitmask.h"
25
26 /*
27 * Iterate a range of inodes.
28 *
29 * This is a little more involved than repeatedly asking BULKSTAT for a
30 * buffer's worth of stat data for some number of inodes. We want to scan as
31 * many of the inodes that the inobt thinks there are, so we use the INUMBERS
32 * ioctl to walk all the inobt records in the filesystem and spawn a worker to
33 * bulkstat and iterate. The worker starts with an inumbers record that can
34 * look like this:
35 *
36 * {startino = S, allocmask = 0b11011}
37 *
38 * Given a starting inumber S and count C=64, bulkstat will return a sorted
39 * array of stat information. The bs_ino of those array elements can look like
40 * any of the following:
41 *
42 * 0. [S, S+1, S+3, S+4]
43 * 1. [S+e, S+e+1, S+e+3, S+e+4, S+e+C+1...], where e >= 0
44 * 2. [S+e+n], where n >= 0
45 * 3. []
46 * 4. [], errno == EFSCORRUPTED
47 *
48 * We know that bulkstat scanned the entire inode range between S and bs_ino of
49 * the last array element, even though it only fills out an array element for
50 * allocated inodes. Therefore, we can say in cases 0-2 that S was filled,
51 * even if there is no bstat[] record for S. In turn, we can create a bitmask
52 * of inodes that we have seen, and set bits 0 through (bstat[-1].bs_ino - S),
53 * being careful not to set any bits past S+C.
54 *
55 * In case (0) we find that seen mask matches the inumber record
56 * exactly, so the caller can walk the stat records and move on. In case (1)
57 * this is also true, but we must be careful to reduce the array length to
58 * avoid scanning inodes that are not in the inumber chunk. In case (3) we
59 * conclude that there were no inodes left to scan and terminate.
60 *
61 * In case (2) and (4) we don't know why bulkstat returned fewer than C
62 * elements. We might have found the end of the filesystem, or the kernel
63 * might have found a corrupt inode and stopped. This we must investigate by
64 * trying to fill out the rest of the bstat array starting with the next
65 * inumber after the last bstat array element filled, and continuing until S'
66 * is beyond S0 + C, or the array is full. Each time we succeed in loading
67 * new records, the kernel increases S' for us; if instead we encounter case
68 * (4), we can increment S' ourselves.
69 *
70 * Inodes that are set in the allocmask but not set in the seen mask are the
71 * corrupt inodes. For each of these cases, we try to populate the bulkstat
72 * array one inode at a time. If the kernel returns a matching record we can
73 * use it; if instead we receive an error, we synthesize enough of a record
74 * to be able to run online scrub by handle.
75 *
76 * If the iteration function returns ESTALE, that means that the inode has
77 * been deleted and possibly recreated since the BULKSTAT call. We wil
78 * refresh the stat information and try again up to 30 times before reporting
79 * the staleness as an error.
80 */
81
82 /*
83 * Return the inumber of the highest inode in the bulkstat data, assuming the
84 * records are sorted in inumber order.
85 */
86 static inline uint64_t last_bstat_ino(const struct xfs_bulkstat_req *b)
87 {
88 return b->hdr.ocount ? b->bulkstat[b->hdr.ocount - 1].bs_ino : 0;
89 }
90
91 /*
92 * Deduce the bitmask of the inodes in inums that were seen by bulkstat. If
93 * the inode is present in the bstat array this is trivially true; or if it is
94 * not in the array but higher inumbers are present, then it was freed.
95 */
96 static __u64
97 seen_mask_from_bulkstat(
98 const struct xfs_inumbers *inums,
99 __u64 breq_startino,
100 const struct xfs_bulkstat_req *breq)
101 {
102 const __u64 limit_ino =
103 inums->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE;
104 const __u64 last = last_bstat_ino(breq);
105 __u64 ret = 0;
106 int i, maxi;
107
108 /* Ignore the bulkstat results if they don't cover inumbers */
109 if (breq_startino > limit_ino || last < inums->xi_startino)
110 return 0;
111
112 maxi = min(LIBFROG_BULKSTAT_CHUNKSIZE, last - inums->xi_startino + 1);
113 for (i = breq_startino - inums->xi_startino; i < maxi; i++)
114 ret |= 1ULL << i;
115
116 return ret;
117 }
118
119 /*
120 * Try to fill the rest of orig_breq with bulkstat data by re-running bulkstat
121 * with increasing start_ino until we either hit the end of the inumbers info
122 * or fill up the bstat array with something. Returns a bitmask of the inodes
123 * within inums that were filled by the bulkstat requests.
124 */
125 static __u64
126 bulkstat_the_rest(
127 struct scrub_ctx *ctx,
128 const struct xfs_inumbers *inums,
129 struct xfs_bulkstat_req *orig_breq,
130 int orig_error)
131 {
132 struct xfs_bulkstat_req *new_breq;
133 struct xfs_bulkstat *old_bstat =
134 &orig_breq->bulkstat[orig_breq->hdr.ocount];
135 const __u64 limit_ino =
136 inums->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE;
137 __u64 start_ino = orig_breq->hdr.ino;
138 __u64 seen_mask = 0;
139 int error;
140
141 assert(orig_breq->hdr.ocount < orig_breq->hdr.icount);
142
143 /*
144 * If the first bulkstat returned a corruption error, that means
145 * start_ino is corrupt. Restart instead at the next inumber.
146 */
147 if (orig_error == EFSCORRUPTED)
148 start_ino++;
149 if (start_ino >= limit_ino)
150 return 0;
151
152 error = -xfrog_bulkstat_alloc_req(
153 orig_breq->hdr.icount - orig_breq->hdr.ocount,
154 start_ino, &new_breq);
155 if (error)
156 return error;
157 new_breq->hdr.flags = orig_breq->hdr.flags;
158
159 do {
160 /*
161 * Fill the new bulkstat request with stat data starting at
162 * start_ino.
163 */
164 error = -xfrog_bulkstat(&ctx->mnt, new_breq);
165 if (error == EFSCORRUPTED) {
166 /*
167 * start_ino is corrupt, increment and try the next
168 * inode.
169 */
170 start_ino++;
171 new_breq->hdr.ino = start_ino;
172 continue;
173 }
174 if (error) {
175 /*
176 * Any other error means the caller falls back to
177 * single stepping.
178 */
179 break;
180 }
181 if (new_breq->hdr.ocount == 0)
182 break;
183
184 /* Copy new results to the original bstat buffer */
185 memcpy(old_bstat, new_breq->bulkstat,
186 new_breq->hdr.ocount * sizeof(struct xfs_bulkstat));
187 orig_breq->hdr.ocount += new_breq->hdr.ocount;
188 old_bstat += new_breq->hdr.ocount;
189 seen_mask |= seen_mask_from_bulkstat(inums, start_ino,
190 new_breq);
191
192 new_breq->hdr.icount -= new_breq->hdr.ocount;
193 start_ino = new_breq->hdr.ino;
194 } while (new_breq->hdr.icount > 0 && new_breq->hdr.ino < limit_ino);
195
196 free(new_breq);
197 return seen_mask;
198 }
199
200 #define cmp_int(l, r) ((l > r) - (l < r))
201
202 /* Compare two bulkstat records by inumber. */
203 static int
204 compare_bstat(
205 const void *a,
206 const void *b)
207 {
208 const struct xfs_bulkstat *ba = a;
209 const struct xfs_bulkstat *bb = b;
210
211 return cmp_int(ba->bs_ino, bb->bs_ino);
212 }
213
214 /*
215 * Walk the xi_allocmask looking for set bits that aren't present in
216 * the fill mask. For each such inode, fill the entries at the end of
217 * the array with stat information one at a time, synthesizing them if
218 * necessary. At this point, (xi_allocmask & ~seen_mask) should be the
219 * corrupt inodes.
220 */
221 static void
222 bulkstat_single_step(
223 struct scrub_ctx *ctx,
224 const struct xfs_inumbers *inumbers,
225 uint64_t seen_mask,
226 struct xfs_bulkstat_req *breq)
227 {
228 struct xfs_bulkstat *bs = NULL;
229 int i;
230 int error;
231
232 for (i = 0; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) {
233 /*
234 * Don't single-step if inumbers said it wasn't allocated or
235 * bulkstat actually filled it.
236 */
237 if (!(inumbers->xi_allocmask & (1ULL << i)))
238 continue;
239 if (seen_mask & (1ULL << i))
240 continue;
241
242 assert(breq->hdr.ocount < LIBFROG_BULKSTAT_CHUNKSIZE);
243
244 if (!bs)
245 bs = &breq->bulkstat[breq->hdr.ocount];
246
247 /*
248 * Didn't get desired stat data and we've hit the end of the
249 * returned data. We can't distinguish between the inode being
250 * freed vs. the inode being to corrupt to load, so try a
251 * bulkstat single to see if we can load the inode.
252 */
253 error = -xfrog_bulkstat_single(&ctx->mnt,
254 inumbers->xi_startino + i, breq->hdr.flags, bs);
255 switch (error) {
256 case ENOENT:
257 /*
258 * This inode wasn't found, and no results were
259 * returned. We've likely hit the end of the
260 * filesystem, but we'll move on to the next inode in
261 * the mask for the sake of caution.
262 */
263 continue;
264 case 0:
265 /*
266 * If a result was returned but it wasn't the inode
267 * we were looking for, then the missing inode was
268 * freed. Move on to the next inode in the mask.
269 */
270 if (bs->bs_ino != inumbers->xi_startino + i)
271 continue;
272 break;
273 default:
274 /*
275 * Some error happened. Synthesize a bulkstat record
276 * so that phase3 can try to see if there's a corrupt
277 * inode that needs repairing.
278 */
279 memset(bs, 0, sizeof(struct xfs_bulkstat));
280 bs->bs_ino = inumbers->xi_startino + i;
281 bs->bs_blksize = ctx->mnt_sv.f_frsize;
282 break;
283 }
284
285 breq->hdr.ocount++;
286 bs++;
287 }
288
289 /* If we added any entries, re-sort the array. */
290 if (bs)
291 qsort(breq->bulkstat, breq->hdr.ocount,
292 sizeof(struct xfs_bulkstat), compare_bstat);
293 }
294
295 /* Return the inumber of the highest allocated inode in the inumbers data. */
296 static inline uint64_t last_allocmask_ino(const struct xfs_inumbers *i)
297 {
298 return i->xi_startino + xfrog_highbit64(i->xi_allocmask);
299 }
300
301 /*
302 * Run bulkstat on an entire inode allocation group, then check that we got
303 * exactly the inodes we expected. If not, load them one at a time (or fake
304 * it) into the bulkstat data.
305 */
306 static void
307 bulkstat_for_inumbers(
308 struct scrub_ctx *ctx,
309 const struct xfs_inumbers *inumbers,
310 struct xfs_bulkstat_req *breq)
311 {
312 const uint64_t limit_ino =
313 inumbers->xi_startino + LIBFROG_BULKSTAT_CHUNKSIZE;
314 uint64_t seen_mask = 0;
315 int i;
316 int error;
317
318 assert(inumbers->xi_allocmask != 0);
319
320 /* First we try regular bulkstat, for speed. */
321 breq->hdr.ino = inumbers->xi_startino;
322 error = -xfrog_bulkstat(&ctx->mnt, breq);
323 if (!error) {
324 if (!breq->hdr.ocount)
325 return;
326 seen_mask |= seen_mask_from_bulkstat(inumbers,
327 inumbers->xi_startino, breq);
328 }
329
330 /*
331 * If the last allocated inode as reported by inumbers is higher than
332 * the last inode reported by bulkstat, two things could have happened.
333 * Either all the inodes at the high end of the cluster were freed
334 * since the inumbers call; or bulkstat encountered a corrupt inode and
335 * returned early. Try to bulkstat the rest of the array.
336 */
337 if (last_allocmask_ino(inumbers) > last_bstat_ino(breq))
338 seen_mask |= bulkstat_the_rest(ctx, inumbers, breq, error);
339
340 /*
341 * Bulkstat might return inodes beyond xi_startino + CHUNKSIZE. Reduce
342 * ocount to ignore inodes not described by the inumbers record.
343 */
344 for (i = breq->hdr.ocount - 1; i >= 0; i--) {
345 if (breq->bulkstat[i].bs_ino < limit_ino)
346 break;
347 breq->hdr.ocount--;
348 }
349
350 /*
351 * Fill in any missing inodes that are mentioned in the alloc mask but
352 * weren't previously seen by bulkstat. These are the corrupt inodes.
353 */
354 bulkstat_single_step(ctx, inumbers, seen_mask, breq);
355 }
356
357 /* BULKSTAT wrapper routines. */
358 struct scan_inodes {
359 struct workqueue wq_bulkstat;
360 scrub_inode_iter_fn fn;
361 void *arg;
362 unsigned int nr_threads;
363 bool aborted;
364 };
365
366 /*
367 * A single unit of inode scan work. This contains a pointer to the parent
368 * information, followed by an INUMBERS request structure, followed by a
369 * BULKSTAT request structure. The last two are VLAs, so we can't represent
370 * them here.
371 */
372 struct scan_ichunk {
373 struct scan_inodes *si;
374 };
375
376 static inline struct xfs_inumbers_req *
377 ichunk_to_inumbers(
378 struct scan_ichunk *ichunk)
379 {
380 char *p = (char *)ichunk;
381
382 return (struct xfs_inumbers_req *)(p + sizeof(struct scan_ichunk));
383 }
384
385 static inline struct xfs_bulkstat_req *
386 ichunk_to_bulkstat(
387 struct scan_ichunk *ichunk)
388 {
389 char *p = (char *)ichunk_to_inumbers(ichunk);
390
391 return (struct xfs_bulkstat_req *)(p + XFS_INUMBERS_REQ_SIZE(1));
392 }
393
394 static inline int
395 alloc_ichunk(
396 struct scrub_ctx *ctx,
397 struct scan_inodes *si,
398 uint32_t agno,
399 uint64_t startino,
400 struct scan_ichunk **ichunkp)
401 {
402 struct scan_ichunk *ichunk;
403 struct xfs_inumbers_req *ireq;
404 struct xfs_bulkstat_req *breq;
405
406 ichunk = calloc(1, sizeof(struct scan_ichunk) +
407 XFS_INUMBERS_REQ_SIZE(1) +
408 XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE));
409 if (!ichunk)
410 return -errno;
411
412 ichunk->si = si;
413
414 ireq = ichunk_to_inumbers(ichunk);
415 ireq->hdr.icount = 1;
416 ireq->hdr.ino = startino;
417 ireq->hdr.agno = agno;
418 ireq->hdr.flags |= XFS_BULK_IREQ_AGNO;
419
420 breq = ichunk_to_bulkstat(ichunk);
421 breq->hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE;
422
423 /* Scan the metadata directory tree too. */
424 if (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_METADIR)
425 breq->hdr.flags |= XFS_BULK_IREQ_METADIR;
426
427 *ichunkp = ichunk;
428 return 0;
429 }
430
431 static int
432 render_ino_from_bulkstat(
433 struct scrub_ctx *ctx,
434 char *buf,
435 size_t buflen,
436 void *data)
437 {
438 struct xfs_bulkstat *bstat = data;
439
440 return scrub_render_ino_descr(ctx, buf, buflen, bstat->bs_ino,
441 bstat->bs_gen, NULL);
442 }
443
444 static int
445 render_inumbers_from_agno(
446 struct scrub_ctx *ctx,
447 char *buf,
448 size_t buflen,
449 void *data)
450 {
451 xfs_agnumber_t *agno = data;
452
453 return snprintf(buf, buflen, _("dev %d:%d AG %u inodes"),
454 major(ctx->fsinfo.fs_datadev),
455 minor(ctx->fsinfo.fs_datadev),
456 *agno);
457 }
458
459 /*
460 * Call BULKSTAT for information on a single chunk's worth of inodes and call
461 * our iterator function. We'll try to fill the bulkstat information in
462 * batches, but we also can detect iget failures.
463 */
464 static void
465 scan_ag_bulkstat(
466 struct workqueue *wq,
467 xfs_agnumber_t agno,
468 void *arg)
469 {
470 struct xfs_handle handle;
471 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
472 struct scan_ichunk *ichunk = arg;
473 struct xfs_inumbers_req *ireq = ichunk_to_inumbers(ichunk);
474 struct xfs_bulkstat_req *breq = ichunk_to_bulkstat(ichunk);
475 struct scan_inodes *si = ichunk->si;
476 struct xfs_bulkstat *bs = &breq->bulkstat[0];
477 struct xfs_inumbers *inumbers = &ireq->inumbers[0];
478 uint64_t last_ino = 0;
479 int i;
480 int error;
481 int stale_count = 0;
482 DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat);
483 DEFINE_DESCR(dsc_inumbers, ctx, render_inumbers_from_agno);
484
485 descr_set(&dsc_inumbers, &agno);
486 handle_from_fshandle(&handle, ctx->fshandle, ctx->fshandle_len);
487 retry:
488 bulkstat_for_inumbers(ctx, inumbers, breq);
489
490 /* Iterate all the inodes. */
491 for (i = 0; !si->aborted && i < breq->hdr.ocount; i++, bs++) {
492 uint64_t scan_ino = bs->bs_ino;
493
494 /* ensure forward progress if we retried */
495 if (scan_ino < last_ino)
496 continue;
497
498 descr_set(&dsc_bulkstat, bs);
499 handle_from_bulkstat(&handle, bs);
500 error = si->fn(ctx, &handle, bs, si->arg);
501 switch (error) {
502 case 0:
503 break;
504 case ESTALE: {
505 stale_count++;
506 if (stale_count < 30) {
507 uint64_t old_startino;
508
509 ireq->hdr.ino = old_startino =
510 inumbers->xi_startino;
511 error = -xfrog_inumbers(&ctx->mnt, ireq);
512 if (error)
513 goto err;
514 /*
515 * Retry only if inumbers returns the same
516 * inobt record as the previous record and
517 * there are allocated inodes in it.
518 */
519 if (!si->aborted &&
520 ireq->hdr.ocount > 0 &&
521 inumbers->xi_alloccount > 0 &&
522 inumbers->xi_startino == old_startino)
523 goto retry;
524 goto out;
525 }
526 str_info(ctx, descr_render(&dsc_bulkstat),
527 _("Changed too many times during scan; giving up."));
528 si->aborted = true;
529 goto out;
530 }
531 case ECANCELED:
532 error = 0;
533 fallthrough;
534 default:
535 goto err;
536 }
537 if (scrub_excessive_errors(ctx)) {
538 si->aborted = true;
539 goto out;
540 }
541 last_ino = scan_ino;
542 }
543
544 err:
545 if (error) {
546 str_liberror(ctx, error, descr_render(&dsc_bulkstat));
547 si->aborted = true;
548 }
549 out:
550 free(ichunk);
551 }
552
553 /*
554 * Call INUMBERS for information about inode chunks, then queue the inumbers
555 * responses in the bulkstat workqueue. This helps us maximize CPU parallelism
556 * if the filesystem AGs are not evenly loaded.
557 */
558 static void
559 scan_ag_inumbers(
560 struct workqueue *wq,
561 xfs_agnumber_t agno,
562 void *arg)
563 {
564 struct scan_ichunk *ichunk = NULL;
565 struct scan_inodes *si = arg;
566 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
567 struct xfs_inumbers_req *ireq;
568 uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0);
569 int error;
570 DEFINE_DESCR(dsc, ctx, render_inumbers_from_agno);
571
572 descr_set(&dsc, &agno);
573
574 error = alloc_ichunk(ctx, si, agno, 0, &ichunk);
575 if (error)
576 goto err;
577 ireq = ichunk_to_inumbers(ichunk);
578
579 /* Find the inode chunk & alloc mask */
580 error = -xfrog_inumbers(&ctx->mnt, ireq);
581 while (!error && !si->aborted && ireq->hdr.ocount > 0) {
582 /*
583 * Make sure that we always make forward progress while we
584 * scan the inode btree.
585 */
586 if (nextino > ireq->inumbers[0].xi_startino) {
587 str_corrupt(ctx, descr_render(&dsc),
588 _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno,
589 cvt_ino_to_agino(&ctx->mnt, nextino),
590 cvt_ino_to_agino(&ctx->mnt,
591 ireq->inumbers[0].xi_startino));
592 si->aborted = true;
593 break;
594 }
595 nextino = ireq->hdr.ino;
596
597 if (ireq->inumbers[0].xi_alloccount == 0) {
598 /*
599 * We can have totally empty inode chunks on
600 * filesystems where there are more than 64 inodes per
601 * block. Skip these.
602 */
603 ;
604 } else if (si->nr_threads > 0) {
605 /* Queue this inode chunk on the bulkstat workqueue. */
606 error = -workqueue_add(&si->wq_bulkstat,
607 scan_ag_bulkstat, agno, ichunk);
608 if (error) {
609 si->aborted = true;
610 str_liberror(ctx, error,
611 _("queueing bulkstat work"));
612 goto out;
613 }
614 ichunk = NULL;
615 } else {
616 /*
617 * Only one thread, call bulkstat directly. Remember,
618 * ichunk is freed by the worker before returning.
619 */
620 scan_ag_bulkstat(wq, agno, ichunk);
621 ichunk = NULL;
622 if (si->aborted)
623 break;
624 }
625
626 if (!ichunk) {
627 error = alloc_ichunk(ctx, si, agno, nextino, &ichunk);
628 if (error)
629 goto err;
630 }
631 ireq = ichunk_to_inumbers(ichunk);
632
633 error = -xfrog_inumbers(&ctx->mnt, ireq);
634 }
635
636 err:
637 if (error) {
638 str_liberror(ctx, error, descr_render(&dsc));
639 si->aborted = true;
640 }
641 out:
642 if (ichunk)
643 free(ichunk);
644 }
645
646 /*
647 * Scan all the inodes in a filesystem, including metadata directory files and
648 * broken files. On error, this function will log an error message and return
649 * -1.
650 */
651 int
652 scrub_scan_all_inodes(
653 struct scrub_ctx *ctx,
654 scrub_inode_iter_fn fn,
655 void *arg)
656 {
657 struct scan_inodes si = {
658 .fn = fn,
659 .arg = arg,
660 .nr_threads = scrub_nproc_workqueue(ctx),
661 };
662 xfs_agnumber_t agno;
663 struct workqueue wq_inumbers;
664 unsigned int max_bulkstat;
665 int ret;
666
667 /*
668 * The bulkstat workqueue should queue at most one inobt block's worth
669 * of inode chunk records per worker thread. If we're running in
670 * single thread mode (nr_threads==0) then we skip the workqueues.
671 */
672 max_bulkstat = si.nr_threads * (ctx->mnt.fsgeom.blocksize / 16);
673
674 ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx,
675 si.nr_threads, max_bulkstat);
676 if (ret) {
677 str_liberror(ctx, ret, _("creating bulkstat workqueue"));
678 return -1;
679 }
680
681 ret = -workqueue_create(&wq_inumbers, (struct xfs_mount *)ctx,
682 si.nr_threads);
683 if (ret) {
684 str_liberror(ctx, ret, _("creating inumbers workqueue"));
685 si.aborted = true;
686 goto kill_bulkstat;
687 }
688
689 for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
690 ret = -workqueue_add(&wq_inumbers, scan_ag_inumbers, agno, &si);
691 if (ret) {
692 si.aborted = true;
693 str_liberror(ctx, ret, _("queueing inumbers work"));
694 break;
695 }
696 }
697
698 ret = -workqueue_terminate(&wq_inumbers);
699 if (ret) {
700 si.aborted = true;
701 str_liberror(ctx, ret, _("finishing inumbers work"));
702 }
703 workqueue_destroy(&wq_inumbers);
704
705 kill_bulkstat:
706 ret = -workqueue_terminate(&si.wq_bulkstat);
707 if (ret) {
708 si.aborted = true;
709 str_liberror(ctx, ret, _("finishing bulkstat work"));
710 }
711 workqueue_destroy(&si.wq_bulkstat);
712
713 return si.aborted ? -1 : 0;
714 }
715
716 struct user_bulkstat {
717 struct scan_inodes *si;
718
719 /* vla, must be last */
720 struct xfs_bulkstat_req breq;
721 };
722
723 /* Iterate all the user files returned by a bulkstat. */
724 static void
725 scan_user_files(
726 struct workqueue *wq,
727 xfs_agnumber_t agno,
728 void *arg)
729 {
730 struct xfs_handle handle;
731 struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
732 struct user_bulkstat *ureq = arg;
733 struct xfs_bulkstat *bs = &ureq->breq.bulkstat[0];
734 struct scan_inodes *si = ureq->si;
735 int i;
736 int error = 0;
737 DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat);
738
739 handle_from_fshandle(&handle, ctx->fshandle, ctx->fshandle_len);
740
741 for (i = 0; !si->aborted && i < ureq->breq.hdr.ocount; i++, bs++) {
742 descr_set(&dsc_bulkstat, bs);
743 handle_from_bulkstat(&handle, bs);
744 error = si->fn(ctx, &handle, bs, si->arg);
745 switch (error) {
746 case 0:
747 break;
748 case ESTALE:
749 case ECANCELED:
750 error = 0;
751 fallthrough;
752 default:
753 goto err;
754 }
755 if (scrub_excessive_errors(ctx)) {
756 si->aborted = true;
757 goto out;
758 }
759 }
760
761 err:
762 if (error) {
763 str_liberror(ctx, error, descr_render(&dsc_bulkstat));
764 si->aborted = true;
765 }
766 out:
767 free(ureq);
768 }
769
770 /*
771 * Run one step of the user files bulkstat scan and schedule background
772 * processing of the stat data returned. Returns 1 to keep going, or 0 to
773 * stop.
774 */
775 static int
776 scan_user_bulkstat(
777 struct scrub_ctx *ctx,
778 struct scan_inodes *si,
779 uint64_t *cursor)
780 {
781 struct user_bulkstat *ureq;
782 const char *what = NULL;
783 int ret;
784
785 ureq = calloc(1, sizeof(struct user_bulkstat) +
786 XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE));
787 if (!ureq) {
788 ret = ENOMEM;
789 what = _("creating bulkstat work item");
790 goto err;
791 }
792 ureq->si = si;
793 ureq->breq.hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE;
794 ureq->breq.hdr.ino = *cursor;
795
796 ret = -xfrog_bulkstat(&ctx->mnt, &ureq->breq);
797 if (ret) {
798 what = _("user files bulkstat");
799 goto err_ureq;
800 }
801 if (ureq->breq.hdr.ocount == 0) {
802 *cursor = NULLFSINO;
803 free(ureq);
804 return 0;
805 }
806
807 *cursor = ureq->breq.hdr.ino;
808
809 /* scan_user_files frees ureq; do not access it */
810 ret = -workqueue_add(&si->wq_bulkstat, scan_user_files, 0, ureq);
811 if (ret) {
812 what = _("queueing bulkstat work");
813 goto err_ureq;
814 }
815 ureq = NULL;
816
817 return 1;
818
819 err_ureq:
820 free(ureq);
821 err:
822 si->aborted = true;
823 str_liberror(ctx, ret, what);
824 return 0;
825 }
826
827 /*
828 * Scan all the user files in a filesystem in inumber order. On error, this
829 * function will log an error message and return -1.
830 */
831 int
832 scrub_scan_user_files(
833 struct scrub_ctx *ctx,
834 scrub_inode_iter_fn fn,
835 void *arg)
836 {
837 struct scan_inodes si = {
838 .fn = fn,
839 .arg = arg,
840 .nr_threads = scrub_nproc_workqueue(ctx),
841 };
842 uint64_t ino = 0;
843 int ret;
844
845 /* Queue up to four bulkstat result sets per thread. */
846 ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx,
847 si.nr_threads, si.nr_threads * 4);
848 if (ret) {
849 str_liberror(ctx, ret, _("creating bulkstat workqueue"));
850 return -1;
851 }
852
853 while ((ret = scan_user_bulkstat(ctx, &si, &ino)) == 1) {
854 /* empty */
855 }
856
857 ret = -workqueue_terminate(&si.wq_bulkstat);
858 if (ret) {
859 si.aborted = true;
860 str_liberror(ctx, ret, _("finishing bulkstat work"));
861 }
862 workqueue_destroy(&si.wq_bulkstat);
863
864 return si.aborted ? -1 : 0;
865 }
866
867 /* Open a file by handle, returning either the fd or -1 on error. */
868 int
869 scrub_open_handle(
870 struct xfs_handle *handle)
871 {
872 return open_by_fshandle(handle, sizeof(*handle),
873 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
874 }