]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/phase3.c
xfsprogs: Release v6.7.0
[thirdparty/xfsprogs-dev.git] / scrub / phase3.c
CommitLineData
8d318d62 1// SPDX-License-Identifier: GPL-2.0-or-later
fa16b376 2/*
52520522 3 * Copyright (C) 2018-2024 Oracle. All Rights Reserved.
8d318d62 4 * Author: Darrick J. Wong <djwong@kernel.org>
fa16b376 5 */
a440f877 6#include "xfs.h"
fa16b376 7#include <stdint.h>
fa16b376 8#include <sys/types.h>
fa16b376 9#include <sys/statvfs.h>
19852474 10#include "list.h"
42b4c8e8 11#include "libfrog/paths.h"
56598728 12#include "libfrog/workqueue.h"
fa16b376
DW
13#include "xfs_scrub.h"
14#include "common.h"
15#include "counter.h"
16#include "inodes.h"
ed60d210 17#include "progress.h"
fa16b376 18#include "scrub.h"
ee310b0c 19#include "repair.h"
fa16b376
DW
20
21/* Phase 3: Scan all inodes. */
22
fa16b376 23struct scrub_inode_ctx {
12ca67b3
DW
24 struct scrub_ctx *ctx;
25
26 /* Number of inodes scanned. */
fa16b376 27 struct ptcounter *icount;
12ca67b3 28
8f0c270f
DW
29 /* per-AG locks to protect the repair lists */
30 pthread_mutex_t *locks;
31
12ca67b3 32 /* Set to true to abort all threads. */
df024103 33 bool aborted;
12ca67b3
DW
34
35 /* Set to true if we want to defer file repairs to phase 4. */
36 bool always_defer_repairs;
fa16b376
DW
37};
38
6c05cc5d
DW
39/* Report a filesystem error that the vfs fed us on close. */
40static void
df024103 41report_close_error(
6c05cc5d 42 struct scrub_ctx *ctx,
4cca629d 43 struct xfs_bulkstat *bstat)
6c05cc5d
DW
44{
45 char descr[DESCR_BUFSZ];
6c05cc5d
DW
46 int old_errno = errno;
47
15589f0a
DW
48 scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, bstat->bs_ino,
49 bstat->bs_gen, NULL);
6c05cc5d
DW
50 errno = old_errno;
51 str_errno(ctx, descr);
52}
53
8f0c270f
DW
54/*
55 * Defer all the repairs until phase 4, being careful about locking since the
56 * inode scrub threads are not per-AG.
57 */
58static void
59defer_inode_repair(
60 struct scrub_inode_ctx *ictx,
61 xfs_agnumber_t agno,
62 struct action_list *alist)
63{
64 if (alist->nr == 0)
65 return;
66
67 pthread_mutex_lock(&ictx->locks[agno]);
68 action_list_defer(ictx->ctx, agno, alist);
69 pthread_mutex_unlock(&ictx->locks[agno]);
70}
71
12ca67b3
DW
72/* Run repair actions now and defer unfinished items for later. */
73static int
74try_inode_repair(
75 struct scrub_inode_ctx *ictx,
bb9be147 76 int fd,
12ca67b3
DW
77 xfs_agnumber_t agno,
78 struct action_list *alist)
79{
bb9be147
DW
80 int ret;
81
12ca67b3
DW
82 /*
83 * If at the start of phase 3 we already had ag/rt metadata repairs
84 * queued up for phase 4, leave the action list untouched so that file
85 * metadata repairs will be deferred in scan order until phase 4.
86 */
87 if (ictx->always_defer_repairs)
88 return 0;
89
bb9be147
DW
90 ret = action_list_process(ictx->ctx, fd, alist,
91 ALP_REPAIR_ONLY | ALP_NOPROGRESS);
92 if (ret)
93 return ret;
94
8f0c270f 95 defer_inode_repair(ictx, agno, alist);
bb9be147 96 return 0;
12ca67b3
DW
97}
98
fa16b376
DW
99/* Verify the contents, xattrs, and extent maps of an inode. */
100static int
df024103 101scrub_inode(
fa16b376
DW
102 struct scrub_ctx *ctx,
103 struct xfs_handle *handle,
4cca629d 104 struct xfs_bulkstat *bstat,
fa16b376
DW
105 void *arg)
106{
83d2c80b 107 struct action_list alist;
fa16b376
DW
108 struct scrub_inode_ctx *ictx = arg;
109 struct ptcounter *icount = ictx->icount;
ee310b0c 110 xfs_agnumber_t agno;
fa16b376 111 int fd = -1;
6c05cc5d 112 int error;
fa16b376 113
83d2c80b 114 action_list_init(&alist);
a749451c 115 agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino);
fa16b376
DW
116 background_sleep();
117
a7ee7b68
DW
118 /*
119 * Open this regular file to pin it in memory. Avoiding the use of
120 * scan-by-handle means that the in-kernel scrubber doesn't pay the
121 * cost of opening the handle (looking up the inode in the inode btree,
122 * grabbing the inode, checking the generation) with every scrub call.
123 *
7ddf6e0f
DW
124 * Ignore any runtime or corruption related errors here because we can
125 * fall back to scrubbing by handle. ESTALE can be ignored for the
126 * following reasons:
127 *
128 * - If the file has been deleted since bulkstat, there's nothing to
129 * check. Scrub-by-handle returns ENOENT for such inodes.
130 * - If the file has been deleted and reallocated since bulkstat,
131 * its ondisk metadata have been rewritten and is assumed to be ok.
132 * Scrub-by-handle also returns ENOENT if the generation doesn't
133 * match.
134 * - The file itself is corrupt and cannot be loaded. In this case,
135 * we fall back to scrub-by-handle.
136 *
a7ee7b68
DW
137 * Note: We cannot use this same trick for directories because the VFS
138 * will try to reconnect directory file handles to the root directory
139 * by walking '..' entries upwards, and loops in the dirent index
140 * btree will cause livelocks.
a7ee7b68 141 */
7ddf6e0f 142 if (S_ISREG(bstat->bs_mode))
59f79e0a 143 fd = scrub_open_handle(handle);
fa16b376
DW
144
145 /* Scrub the inode. */
a7ee7b68 146 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist);
d22f2471 147 if (error)
ee310b0c
DW
148 goto out;
149
bb9be147 150 error = try_inode_repair(ictx, fd, agno, &alist);
83d2c80b 151 if (error)
fa16b376
DW
152 goto out;
153
154 /* Scrub all block mappings. */
a7ee7b68 155 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist);
d22f2471 156 if (error)
fa16b376 157 goto out;
a7ee7b68 158 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist);
d22f2471 159 if (error)
fa16b376 160 goto out;
a7ee7b68 161 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist);
d22f2471 162 if (error)
ee310b0c
DW
163 goto out;
164
bb9be147 165 error = try_inode_repair(ictx, fd, agno, &alist);
83d2c80b 166 if (error)
fa16b376
DW
167 goto out;
168
169 if (S_ISLNK(bstat->bs_mode)) {
170 /* Check symlink contents. */
a7ee7b68
DW
171 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK,
172 &alist);
fa16b376
DW
173 } else if (S_ISDIR(bstat->bs_mode)) {
174 /* Check the directory entries. */
a7ee7b68 175 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist);
fa16b376 176 }
d22f2471 177 if (error)
fa16b376
DW
178 goto out;
179
180 /* Check all the extended attributes. */
a7ee7b68 181 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist);
d22f2471 182 if (error)
fa16b376
DW
183 goto out;
184
185 /* Check parent pointers. */
a7ee7b68 186 error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist);
d22f2471 187 if (error)
ee310b0c
DW
188 goto out;
189
190 /* Try to repair the file while it's open. */
bb9be147 191 error = try_inode_repair(ictx, fd, agno, &alist);
83d2c80b 192 if (error)
fa16b376
DW
193 goto out;
194
195out:
d22f2471 196 if (error)
df024103
DW
197 ictx->aborted = true;
198
da3dd6c0
DW
199 error = ptcounter_add(icount, 1);
200 if (error) {
201 str_liberror(ctx, error,
202 _("incrementing scanned inode counter"));
df024103 203 ictx->aborted = true;
da3dd6c0 204 }
ed60d210 205 progress_add(1);
12ca67b3
DW
206
207 if (!error && !ictx->aborted)
8f0c270f 208 defer_inode_repair(ictx, agno, &alist);
12ca67b3 209
6c05cc5d 210 if (fd >= 0) {
df024103
DW
211 int err2;
212
213 err2 = close(fd);
214 if (err2) {
215 report_close_error(ctx, bstat);
216 ictx->aborted = true;
217 }
6c05cc5d 218 }
df024103
DW
219
220 if (!error && ictx->aborted)
221 error = ECANCELED;
222 return error;
fa16b376
DW
223}
224
225/* Verify all the inodes in a filesystem. */
df024103
DW
226int
227phase3_func(
fa16b376
DW
228 struct scrub_ctx *ctx)
229{
12ca67b3 230 struct scrub_inode_ctx ictx = { .ctx = ctx };
da3dd6c0 231 uint64_t val;
12ca67b3 232 xfs_agnumber_t agno;
da3dd6c0 233 int err;
fa16b376 234
da3dd6c0
DW
235 err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
236 if (err) {
237 str_liberror(ctx, err, _("creating scanned inode counter"));
df024103 238 return err;
fa16b376
DW
239 }
240
8f0c270f
DW
241 ictx.locks = calloc(ctx->mnt.fsgeom.agcount, sizeof(pthread_mutex_t));
242 if (!ictx.locks) {
243 str_errno(ctx, _("creating per-AG repair list locks"));
244 err = ENOMEM;
245 goto out_ptcounter;
246 }
247
12ca67b3
DW
248 /*
249 * If we already have ag/fs metadata to repair from previous phases,
250 * we would rather not try to repair file metadata until we've tried
251 * to repair the space metadata.
252 */
253 for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
8f0c270f
DW
254 pthread_mutex_init(&ictx.locks[agno], NULL);
255
12ca67b3
DW
256 if (!action_list_empty(&ctx->action_lists[agno]))
257 ictx.always_defer_repairs = true;
258 }
259
df024103
DW
260 err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
261 if (!err && ictx.aborted)
262 err = ECANCELED;
59f79e0a 263 if (err)
8f0c270f 264 goto out_locks;
df024103 265
273165cc 266 scrub_report_preen_triggers(ctx);
da3dd6c0
DW
267 err = ptcounter_value(ictx.icount, &val);
268 if (err) {
269 str_liberror(ctx, err, _("summing scanned inode counter"));
8f0c270f 270 goto out_locks;
da3dd6c0 271 }
df024103 272
da3dd6c0 273 ctx->inodes_checked = val;
8f0c270f
DW
274out_locks:
275 for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
276 pthread_mutex_destroy(&ictx.locks[agno]);
277 free(ictx.locks);
278out_ptcounter:
fa16b376 279 ptcounter_free(ictx.icount);
df024103 280 return err;
fa16b376 281}
ed60d210 282
df024103
DW
283/* Estimate how much work we're going to do. */
284int
285phase3_estimate(
ed60d210
DW
286 struct scrub_ctx *ctx,
287 uint64_t *items,
288 unsigned int *nr_threads,
289 int *rshift)
290{
291 *items = ctx->mnt_sv.f_files - ctx->mnt_sv.f_ffree;
292 *nr_threads = scrub_nproc(ctx);
293 *rshift = 0;
df024103
DW
294 return 0;
295}