]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/phase6.c
xfs_scrub: redistribute read verify pool flush and destroy responsibilities
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include <stdint.h>
8 #include <dirent.h>
9 #include <sys/statvfs.h>
10 #include "handle.h"
11 #include "libfrog/paths.h"
12 #include "libfrog/workqueue.h"
13 #include "xfs_scrub.h"
14 #include "common.h"
15 #include "libfrog/bitmap.h"
16 #include "disk.h"
17 #include "filemap.h"
18 #include "fscounters.h"
19 #include "inodes.h"
20 #include "read_verify.h"
21 #include "spacemap.h"
22 #include "vfs.h"
23
24 /*
25 * Phase 6: Verify data file integrity.
26 *
27 * Identify potential data block extents with GETFSMAP, then feed those
28 * extents to the read-verify pool to get the verify commands batched,
29 * issued, and (if there are problems) reported back to us. If there
30 * are errors, we'll record the bad regions and (if available) use rmap
31 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
32 * whole directory tree looking for files that overlap the bad regions
33 * and report the paths of the now corrupt files.
34 */
35
36 /* Verify disk blocks with GETFSMAP */
37
38 struct media_verify_state {
39 struct read_verify_pool *rvp_data;
40 struct read_verify_pool *rvp_log;
41 struct read_verify_pool *rvp_realtime;
42 struct bitmap *d_bad; /* bytes */
43 struct bitmap *r_bad; /* bytes */
44 };
45
46 /* Find the fd for a given device identifier. */
47 static struct read_verify_pool *
48 xfs_dev_to_pool(
49 struct scrub_ctx *ctx,
50 struct media_verify_state *vs,
51 dev_t dev)
52 {
53 if (dev == ctx->fsinfo.fs_datadev)
54 return vs->rvp_data;
55 else if (dev == ctx->fsinfo.fs_logdev)
56 return vs->rvp_log;
57 else if (dev == ctx->fsinfo.fs_rtdev)
58 return vs->rvp_realtime;
59 abort();
60 }
61
62 /* Find the device major/minor for a given file descriptor. */
63 static dev_t
64 xfs_disk_to_dev(
65 struct scrub_ctx *ctx,
66 struct disk *disk)
67 {
68 if (disk == ctx->datadev)
69 return ctx->fsinfo.fs_datadev;
70 else if (disk == ctx->logdev)
71 return ctx->fsinfo.fs_logdev;
72 else if (disk == ctx->rtdev)
73 return ctx->fsinfo.fs_rtdev;
74 abort();
75 }
76
77 struct owner_decode {
78 uint64_t owner;
79 const char *descr;
80 };
81
82 static const struct owner_decode special_owners[] = {
83 {XFS_FMR_OWN_FREE, "free space"},
84 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
85 {XFS_FMR_OWN_FS, "static FS metadata"},
86 {XFS_FMR_OWN_LOG, "journalling log"},
87 {XFS_FMR_OWN_AG, "per-AG metadata"},
88 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
89 {XFS_FMR_OWN_INODES, "inodes"},
90 {XFS_FMR_OWN_REFC, "refcount btree"},
91 {XFS_FMR_OWN_COW, "CoW staging"},
92 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
93 {0, NULL},
94 };
95
96 /* Decode a special owner. */
97 static const char *
98 xfs_decode_special_owner(
99 uint64_t owner)
100 {
101 const struct owner_decode *od = special_owners;
102
103 while (od->descr) {
104 if (od->owner == owner)
105 return od->descr;
106 od++;
107 }
108
109 return NULL;
110 }
111
112 /* Routines to translate bad physical extents into file paths and offsets. */
113
114 /* Report if this extent overlaps a bad region. */
115 static bool
116 xfs_report_verify_inode_bmap(
117 struct scrub_ctx *ctx,
118 const char *descr,
119 int fd,
120 int whichfork,
121 struct fsxattr *fsx,
122 struct xfs_bmap *bmap,
123 void *arg)
124 {
125 struct media_verify_state *vs = arg;
126 struct bitmap *bmp;
127
128 /* Only report errors for real extents. */
129 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
130 return true;
131
132 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
133 bmp = vs->r_bad;
134 else
135 bmp = vs->d_bad;
136
137 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
138 return true;
139
140 str_error(ctx, descr,
141 _("offset %llu failed read verification."), bmap->bm_offset);
142 return true;
143 }
144
145 /* Iterate the extent mappings of a file to report errors. */
146 static bool
147 xfs_report_verify_fd(
148 struct scrub_ctx *ctx,
149 const char *descr,
150 int fd,
151 void *arg)
152 {
153 struct xfs_bmap key = {0};
154 bool moveon;
155
156 /* data fork */
157 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
158 xfs_report_verify_inode_bmap, arg);
159 if (!moveon)
160 return false;
161
162 /* attr fork */
163 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
164 xfs_report_verify_inode_bmap, arg);
165 if (!moveon)
166 return false;
167 return true;
168 }
169
170 /* Report read verify errors in unlinked (but still open) files. */
171 static int
172 xfs_report_verify_inode(
173 struct scrub_ctx *ctx,
174 struct xfs_handle *handle,
175 struct xfs_bulkstat *bstat,
176 void *arg)
177 {
178 char descr[DESCR_BUFSZ];
179 bool moveon;
180 int fd;
181 int error;
182
183 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
184 (uint64_t)bstat->bs_ino);
185
186 /* Ignore linked files and things we can't open. */
187 if (bstat->bs_nlink != 0)
188 return 0;
189 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
190 return 0;
191
192 /* Try to open the inode. */
193 fd = xfs_open_handle(handle);
194 if (fd < 0) {
195 error = errno;
196 if (error == ESTALE)
197 return error;
198
199 str_info(ctx, descr,
200 _("Disappeared during read error reporting."));
201 return error;
202 }
203
204 /* Go find the badness. */
205 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
206 error = close(fd);
207 if (error)
208 str_errno(ctx, descr);
209
210 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
211 }
212
213 /* Scan a directory for matches in the read verify error list. */
214 static bool
215 xfs_report_verify_dir(
216 struct scrub_ctx *ctx,
217 const char *path,
218 int dir_fd,
219 void *arg)
220 {
221 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
222 }
223
224 /*
225 * Scan the inode associated with a directory entry for matches with
226 * the read verify error list.
227 */
228 static bool
229 xfs_report_verify_dirent(
230 struct scrub_ctx *ctx,
231 const char *path,
232 int dir_fd,
233 struct dirent *dirent,
234 struct stat *sb,
235 void *arg)
236 {
237 bool moveon;
238 int fd;
239 int error;
240
241 /* Ignore things we can't open. */
242 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
243 return true;
244
245 /* Ignore . and .. */
246 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
247 return true;
248
249 /*
250 * If we were given a dirent, open the associated file under
251 * dir_fd for badblocks scanning. If dirent is NULL, then it's
252 * the directory itself we want to scan.
253 */
254 fd = openat(dir_fd, dirent->d_name,
255 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
256 if (fd < 0)
257 return true;
258
259 /* Go find the badness. */
260 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
261 if (moveon)
262 goto out;
263
264 out:
265 error = close(fd);
266 if (error)
267 str_errno(ctx, path);
268 return moveon;
269 }
270
271 /* Given bad extent lists for the data & rtdev, find bad files. */
272 static bool
273 xfs_report_verify_errors(
274 struct scrub_ctx *ctx,
275 struct media_verify_state *vs)
276 {
277 bool moveon;
278
279 /* Scan the directory tree to get file paths. */
280 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
281 xfs_report_verify_dirent, vs);
282 if (!moveon)
283 return false;
284
285 /* Scan for unlinked files. */
286 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, vs);
287 }
288
289 /* Report an IO error resulting from read-verify based off getfsmap. */
290 static bool
291 xfs_check_rmap_error_report(
292 struct scrub_ctx *ctx,
293 const char *descr,
294 struct fsmap *map,
295 void *arg)
296 {
297 const char *type;
298 char buf[32];
299 uint64_t err_physical = *(uint64_t *)arg;
300 uint64_t err_off;
301
302 if (err_physical > map->fmr_physical)
303 err_off = err_physical - map->fmr_physical;
304 else
305 err_off = 0;
306
307 snprintf(buf, 32, _("disk offset %"PRIu64),
308 (uint64_t)BTOBB(map->fmr_physical + err_off));
309
310 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
311 type = xfs_decode_special_owner(map->fmr_owner);
312 str_error(ctx, buf,
313 _("%s failed read verification."),
314 type);
315 }
316
317 /*
318 * XXX: If we had a getparent() call we could report IO errors
319 * efficiently. Until then, we'll have to scan the dir tree
320 * to find the bad file's pathname.
321 */
322
323 return true;
324 }
325
326 /*
327 * Remember a read error for later, and see if rmap will tell us about the
328 * owner ahead of time.
329 */
330 static void
331 xfs_check_rmap_ioerr(
332 struct scrub_ctx *ctx,
333 struct disk *disk,
334 uint64_t start,
335 uint64_t length,
336 int error,
337 void *arg)
338 {
339 struct fsmap keys[2];
340 char descr[DESCR_BUFSZ];
341 struct media_verify_state *vs = arg;
342 struct bitmap *tree;
343 dev_t dev;
344
345 dev = xfs_disk_to_dev(ctx, disk);
346
347 /*
348 * If we don't have parent pointers, save the bad extent for
349 * later rescanning.
350 */
351 if (dev == ctx->fsinfo.fs_datadev)
352 tree = vs->d_bad;
353 else if (dev == ctx->fsinfo.fs_rtdev)
354 tree = vs->r_bad;
355 else
356 tree = NULL;
357 if (tree) {
358 errno = -bitmap_set(tree, start, length);
359 if (errno)
360 str_errno(ctx, ctx->mntpoint);
361 }
362
363 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
364 major(dev), minor(dev), start, length);
365
366 /* Go figure out which blocks are bad from the fsmap. */
367 memset(keys, 0, sizeof(struct fsmap) * 2);
368 keys->fmr_device = dev;
369 keys->fmr_physical = start;
370 (keys + 1)->fmr_device = dev;
371 (keys + 1)->fmr_physical = start + length - 1;
372 (keys + 1)->fmr_owner = ULLONG_MAX;
373 (keys + 1)->fmr_offset = ULLONG_MAX;
374 (keys + 1)->fmr_flags = UINT_MAX;
375 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
376 &start);
377 }
378
379 /* Schedule a read-verify of a (data block) extent. */
380 static bool
381 xfs_check_rmap(
382 struct scrub_ctx *ctx,
383 const char *descr,
384 struct fsmap *map,
385 void *arg)
386 {
387 struct media_verify_state *vs = arg;
388 struct read_verify_pool *rvp;
389
390 rvp = xfs_dev_to_pool(ctx, vs, map->fmr_device);
391
392 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
393 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
394 major(map->fmr_device), minor(map->fmr_device),
395 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
396 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
397 map->fmr_flags);
398
399 /* "Unknown" extents should be verified; they could be data. */
400 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
401 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
402 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
403
404 /*
405 * We only care about read-verifying data extents that have been
406 * written to disk. This means we can skip "special" owners
407 * (metadata), xattr blocks, unwritten extents, and extent maps.
408 * These should all get checked elsewhere in the scrubber.
409 */
410 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
411 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
412 goto out;
413
414 /* XXX: Filter out directory data blocks. */
415
416 /* Schedule the read verify command for (eventual) running. */
417 read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, vs);
418
419 out:
420 /* Is this the last extent? Fire off the read. */
421 if (map->fmr_flags & FMR_OF_LAST)
422 read_verify_force_io(rvp);
423
424 return true;
425 }
426
427 /* Wait for read/verify actions to finish, then return # bytes checked. */
428 static uint64_t
429 clean_pool(
430 struct read_verify_pool *rvp)
431 {
432 uint64_t ret;
433
434 if (!rvp)
435 return 0;
436
437 read_verify_pool_flush(rvp);
438 ret = read_verify_bytes(rvp);
439 read_verify_pool_destroy(rvp);
440 return ret;
441 }
442
443 /*
444 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
445 * do data checksums, we trust that the underlying storage will pass back
446 * an IO error if it can't retrieve whatever we previously stored there.
447 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
448 * scan the extent maps of the entire fs tree to figure (and the unlinked
449 * inodes) out which files are now broken.
450 */
451 bool
452 xfs_scan_blocks(
453 struct scrub_ctx *ctx)
454 {
455 struct media_verify_state vs = { NULL };
456 bool moveon = false;
457
458 errno = -bitmap_init(&vs.d_bad);
459 if (errno) {
460 str_errno(ctx, ctx->mntpoint);
461 goto out;
462 }
463
464 errno = -bitmap_init(&vs.r_bad);
465 if (errno) {
466 str_errno(ctx, ctx->mntpoint);
467 goto out_dbad;
468 }
469
470 vs.rvp_data = read_verify_pool_init(ctx, ctx->datadev,
471 ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
472 scrub_nproc(ctx));
473 if (!vs.rvp_data) {
474 str_info(ctx, ctx->mntpoint,
475 _("Could not create data device media verifier."));
476 goto out_rbad;
477 }
478 if (ctx->logdev) {
479 vs.rvp_log = read_verify_pool_init(ctx, ctx->logdev,
480 ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
481 scrub_nproc(ctx));
482 if (!vs.rvp_log) {
483 str_info(ctx, ctx->mntpoint,
484 _("Could not create log device media verifier."));
485 goto out_datapool;
486 }
487 }
488 if (ctx->rtdev) {
489 vs.rvp_realtime = read_verify_pool_init(ctx, ctx->rtdev,
490 ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
491 scrub_nproc(ctx));
492 if (!vs.rvp_realtime) {
493 str_info(ctx, ctx->mntpoint,
494 _("Could not create realtime device media verifier."));
495 goto out_logpool;
496 }
497 }
498 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
499 if (!moveon)
500 goto out_rtpool;
501 ctx->bytes_checked += clean_pool(vs.rvp_data);
502 ctx->bytes_checked += clean_pool(vs.rvp_log);
503 ctx->bytes_checked += clean_pool(vs.rvp_realtime);
504
505 /* Scan the whole dir tree to see what matches the bad extents. */
506 if (!bitmap_empty(vs.d_bad) || !bitmap_empty(vs.r_bad))
507 moveon = xfs_report_verify_errors(ctx, &vs);
508
509 bitmap_free(&vs.r_bad);
510 bitmap_free(&vs.d_bad);
511 return moveon;
512
513 out_rtpool:
514 if (vs.rvp_realtime) {
515 read_verify_pool_flush(vs.rvp_realtime);
516 read_verify_pool_destroy(vs.rvp_realtime);
517 }
518 out_logpool:
519 if (vs.rvp_log) {
520 read_verify_pool_flush(vs.rvp_log);
521 read_verify_pool_destroy(vs.rvp_log);
522 }
523 out_datapool:
524 read_verify_pool_flush(vs.rvp_data);
525 read_verify_pool_destroy(vs.rvp_data);
526 out_rbad:
527 bitmap_free(&vs.r_bad);
528 out_dbad:
529 bitmap_free(&vs.d_bad);
530 out:
531 return moveon;
532 }
533
534 /* Estimate how much work we're going to do. */
535 bool
536 xfs_estimate_verify_work(
537 struct scrub_ctx *ctx,
538 uint64_t *items,
539 unsigned int *nr_threads,
540 int *rshift)
541 {
542 unsigned long long d_blocks;
543 unsigned long long d_bfree;
544 unsigned long long r_blocks;
545 unsigned long long r_bfree;
546 unsigned long long f_files;
547 unsigned long long f_free;
548 bool moveon;
549
550 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
551 &r_blocks, &r_bfree, &f_files, &f_free);
552 if (!moveon)
553 return moveon;
554
555 *items = cvt_off_fsb_to_b(&ctx->mnt,
556 (d_blocks - d_bfree) + (r_blocks - r_bfree));
557 *nr_threads = disk_heads(ctx->datadev);
558 *rshift = 20;
559 return moveon;
560 }