1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
10 #include <sys/statvfs.h>
11 #include "platform_defs.h"
13 #include "xfs_format.h"
16 #include "workqueue.h"
17 #include "xfs_scrub.h"
22 * Iterate a range of inodes.
24 * This is a little more involved than repeatedly asking BULKSTAT for a
25 * buffer's worth of stat data for some number of inodes. We want to scan as
26 * many of the inodes that the inobt thinks there are, including the ones that
27 * are broken, but if we ask for n inodes starting at x, it'll skip the bad
28 * ones and fill from beyond the range (x + n).
30 * Therefore, we ask INUMBERS to return one inobt chunk's worth of inode
31 * bitmap information. Then we try to BULKSTAT only the inodes that were
32 * present in that chunk, and compare what we got against what INUMBERS said
33 * was there. If there's a mismatch, we know that we have an inode that fails
34 * the verifiers but we can inject the bulkstat information to force the scrub
35 * code to deal with the broken inodes.
37 * If the iteration function returns ESTALE, that means that the inode has
38 * been deleted and possibly recreated since the BULKSTAT call. We wil
39 * refresh the stat information and try again up to 30 times before reporting
40 * the staleness as an error.
44 * Did we get exactly the inodes we expected? If not, load them one at a
45 * time (or fake it) into the bulkstat data.
48 xfs_iterate_inodes_range_check(
49 struct scrub_ctx
*ctx
,
50 struct xfs_inogrp
*inogrp
,
51 struct xfs_bstat
*bstat
)
53 struct xfs_fsop_bulkreq onereq
= {NULL
};
60 onereq
.lastip
= &oneino
;
62 onereq
.ocount
= &onelen
;
64 for (i
= 0, bs
= bstat
; i
< XFS_INODES_PER_CHUNK
; i
++) {
65 if (!(inogrp
->xi_allocmask
& (1ULL << i
)))
67 if (bs
->bs_ino
== inogrp
->xi_startino
+ i
) {
72 /* Load the one inode. */
73 oneino
= inogrp
->xi_startino
+ i
;
75 error
= ioctl(ctx
->mnt_fd
, XFS_IOC_FSBULKSTAT_SINGLE
,
77 if (error
|| bs
->bs_ino
!= inogrp
->xi_startino
+ i
) {
78 memset(bs
, 0, sizeof(struct xfs_bstat
));
79 bs
->bs_ino
= inogrp
->xi_startino
+ i
;
80 bs
->bs_blksize
= ctx
->mnt_sv
.f_frsize
;
87 * Call into the filesystem for inode/bulkstat information and call our
88 * iterator function. We'll try to fill the bulkstat information in batches,
89 * but we also can detect iget failures.
92 xfs_iterate_inodes_range(
93 struct scrub_ctx
*ctx
,
101 struct xfs_fsop_bulkreq igrpreq
= {NULL
};
102 struct xfs_fsop_bulkreq bulkreq
= {NULL
};
103 struct xfs_handle handle
;
104 struct xfs_inogrp inogrp
;
105 struct xfs_bstat bstat
[XFS_INODES_PER_CHUNK
];
106 char idescr
[DESCR_BUFSZ
];
107 char buf
[DESCR_BUFSZ
];
108 struct xfs_bstat
*bs
;
119 memset(bstat
, 0, XFS_INODES_PER_CHUNK
* sizeof(struct xfs_bstat
));
120 bulkreq
.lastip
= &ino
;
121 bulkreq
.icount
= XFS_INODES_PER_CHUNK
;
122 bulkreq
.ubuffer
= &bstat
;
123 bulkreq
.ocount
= &bulklen
;
125 igrpreq
.lastip
= &igrp_ino
;
127 igrpreq
.ubuffer
= &inogrp
;
128 igrpreq
.ocount
= &igrplen
;
130 memcpy(&handle
.ha_fsid
, fshandle
, sizeof(handle
.ha_fsid
));
131 handle
.ha_fid
.fid_len
= sizeof(xfs_fid_t
) -
132 sizeof(handle
.ha_fid
.fid_len
);
133 handle
.ha_fid
.fid_pad
= 0;
135 /* Find the inode chunk & alloc mask */
136 igrp_ino
= first_ino
;
137 error
= ioctl(ctx
->mnt_fd
, XFS_IOC_FSINUMBERS
, &igrpreq
);
138 while (!error
&& igrplen
) {
139 /* Load the inodes. */
140 ino
= inogrp
.xi_startino
- 1;
141 bulkreq
.icount
= inogrp
.xi_alloccount
;
143 * We can have totally empty inode chunks on filesystems where
144 * there are more than 64 inodes per block. Skip these.
146 if (inogrp
.xi_alloccount
== 0)
148 error
= ioctl(ctx
->mnt_fd
, XFS_IOC_FSBULKSTAT
, &bulkreq
);
150 str_info(ctx
, descr
, "%s", strerror_r(errno
,
153 xfs_iterate_inodes_range_check(ctx
, &inogrp
, bstat
);
155 /* Iterate all the inodes. */
156 for (i
= 0, bs
= bstat
; i
< inogrp
.xi_alloccount
; i
++, bs
++) {
157 if (bs
->bs_ino
> last_ino
)
160 handle
.ha_fid
.fid_ino
= bs
->bs_ino
;
161 handle
.ha_fid
.fid_gen
= bs
->bs_gen
;
162 error
= fn(ctx
, &handle
, bs
, arg
);
168 if (stale_count
< 30) {
169 igrp_ino
= inogrp
.xi_startino
;
172 snprintf(idescr
, DESCR_BUFSZ
, "inode %"PRIu64
,
173 (uint64_t)bs
->bs_ino
);
174 str_info(ctx
, idescr
,
175 _("Changed too many times during scan; giving up."));
177 case XFS_ITERATE_INODES_ABORT
:
185 if (xfs_scrub_excessive_errors(ctx
)) {
193 error
= ioctl(ctx
->mnt_fd
, XFS_IOC_FSINUMBERS
, &igrpreq
);
198 str_errno(ctx
, descr
);
205 /* BULKSTAT wrapper routines. */
206 struct xfs_scan_inodes
{
207 xfs_inode_iter_fn fn
;
212 /* Scan all the inodes in an AG. */
215 struct workqueue
*wq
,
219 struct xfs_scan_inodes
*si
= arg
;
220 struct scrub_ctx
*ctx
= (struct scrub_ctx
*)wq
->wq_ctx
;
221 char descr
[DESCR_BUFSZ
];
223 uint64_t next_ag_ino
;
226 snprintf(descr
, DESCR_BUFSZ
, _("dev %d:%d AG %u inodes"),
227 major(ctx
->fsinfo
.fs_datadev
),
228 minor(ctx
->fsinfo
.fs_datadev
),
231 ag_ino
= (__u64
)agno
<< (ctx
->inopblog
+ ctx
->agblklog
);
232 next_ag_ino
= (__u64
)(agno
+ 1) << (ctx
->inopblog
+ ctx
->agblklog
);
234 moveon
= xfs_iterate_inodes_range(ctx
, descr
, ctx
->fshandle
, ag_ino
,
235 next_ag_ino
- 1, si
->fn
, si
->arg
);
240 /* Scan all the inodes in a filesystem. */
243 struct scrub_ctx
*ctx
,
244 xfs_inode_iter_fn fn
,
247 struct xfs_scan_inodes si
;
256 ret
= workqueue_create(&wq
, (struct xfs_mount
*)ctx
,
257 scrub_nproc_workqueue(ctx
));
259 str_info(ctx
, ctx
->mntpoint
, _("Could not create workqueue."));
263 for (agno
= 0; agno
< ctx
->geo
.agcount
; agno
++) {
264 ret
= workqueue_add(&wq
, xfs_scan_ag_inodes
, agno
, &si
);
267 str_info(ctx
, ctx
->mntpoint
,
268 _("Could not queue AG %u bulkstat work."), agno
);
273 workqueue_destroy(&wq
);
279 * Open a file by handle, or return a negative error code.
283 struct xfs_handle
*handle
)
285 return open_by_fshandle(handle
, sizeof(*handle
),
286 O_RDONLY
| O_NOATIME
| O_NOFOLLOW
| O_NOCTTY
);