From b95546f18e6bfbe477e0bd3c726c9ae8a6c9f2ff Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 24 Feb 2025 10:21:42 -0800
Subject: [PATCH] xfs_scrub: selectively re-run bulkstat after re-running
 inumbers

In the phase 3 inode scan, don't bother retrying the inumbers ->
bulkstat conversion unless inumbers returns the same startino and there
are allocated inodes.  If inumbers returns data for a totally different
inobt record, that means the whole inode chunk was freed.

Cc: <linux-xfs@vger.kernel.org> # v5.18.0
Fixes: 245c72a6eeb720 ("xfs_scrub: balance inode chunk scan across CPUs")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 scrub/inodes.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/scrub/inodes.c b/scrub/inodes.c
index c32dfb62..8bdfa0b3 100644
--- a/scrub/inodes.c
+++ b/scrub/inodes.c
@@ -60,6 +60,8 @@ bulkstat_for_inumbers(
 	int			i;
 	int			error;
 
+	assert(inumbers->xi_allocmask != 0);
+
 	/* First we try regular bulkstat, for speed. */
 	breq->hdr.ino = inumbers->xi_startino;
 	breq->hdr.icount = inumbers->xi_alloccount;
@@ -246,11 +248,24 @@ retry:
 		case ESTALE: {
 			stale_count++;
 			if (stale_count < 30) {
-				ireq->hdr.ino = inumbers->xi_startino;
+				uint64_t	old_startino;
+
+				ireq->hdr.ino = old_startino =
+					inumbers->xi_startino;
 				error = -xfrog_inumbers(&ctx->mnt, ireq);
 				if (error)
 					goto err;
-				goto retry;
+				/*
+				 * Retry only if inumbers returns the same
+				 * inobt record as the previous record and
+				 * there are allocated inodes in it.
+				 */
+				if (!si->aborted &&
+				    ireq->hdr.ocount > 0 &&
+				    inumbers->xi_alloccount > 0 &&
+				    inumbers->xi_startino == old_startino)
+					goto retry;
+				goto out;
 			}
 			str_info(ctx, descr_render(&dsc_bulkstat),
 _("Changed too many times during scan; giving up."));
-- 
2.47.3