]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/phase6.c
xfsprogs: Release v6.10.1
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
CommitLineData
8d318d62 1// SPDX-License-Identifier: GPL-2.0-or-later
b364a9c0 2/*
52520522 3 * Copyright (C) 2018-2024 Oracle. All Rights Reserved.
8d318d62 4 * Author: Darrick J. Wong <djwong@kernel.org>
b364a9c0 5 */
a440f877 6#include "xfs.h"
b364a9c0 7#include <stdint.h>
b364a9c0
DW
8#include <dirent.h>
9#include <sys/statvfs.h>
9b72515a 10#include <linux/fsmap.h>
b364a9c0 11#include "handle.h"
42b4c8e8 12#include "libfrog/paths.h"
56598728 13#include "libfrog/workqueue.h"
b364a9c0
DW
14#include "xfs_scrub.h"
15#include "common.h"
a58400ed 16#include "libfrog/bitmap.h"
b364a9c0
DW
17#include "disk.h"
18#include "filemap.h"
ed60d210 19#include "fscounters.h"
b364a9c0
DW
20#include "inodes.h"
21#include "read_verify.h"
22#include "spacemap.h"
23#include "vfs.h"
4d0ce76d 24#include "common.h"
9b5d1349 25#include "libfrog/bulkstat.h"
b364a9c0
DW
26
27/*
28 * Phase 6: Verify data file integrity.
29 *
30 * Identify potential data block extents with GETFSMAP, then feed those
31 * extents to the read-verify pool to get the verify commands batched,
32 * issued, and (if there are problems) reported back to us. If there
33 * are errors, we'll record the bad regions and (if available) use rmap
34 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
35 * whole directory tree looking for files that overlap the bad regions
36 * and report the paths of the now corrupt files.
37 */
38
f1bb1696
DW
39/* Verify disk blocks with GETFSMAP */
40
557f98d7 41struct media_verify_state {
f1bb1696
DW
42 struct read_verify_pool *rvp_data;
43 struct read_verify_pool *rvp_log;
44 struct read_verify_pool *rvp_realtime;
45 struct bitmap *d_bad; /* bytes */
46 struct bitmap *r_bad; /* bytes */
47};
48
b364a9c0 49/* Find the fd for a given device identifier. */
f1bb1696 50static struct read_verify_pool *
af9eb208 51dev_to_pool(
f1bb1696 52 struct scrub_ctx *ctx,
557f98d7 53 struct media_verify_state *vs,
f1bb1696 54 dev_t dev)
b364a9c0
DW
55{
56 if (dev == ctx->fsinfo.fs_datadev)
557f98d7 57 return vs->rvp_data;
b364a9c0 58 else if (dev == ctx->fsinfo.fs_logdev)
557f98d7 59 return vs->rvp_log;
b364a9c0 60 else if (dev == ctx->fsinfo.fs_rtdev)
557f98d7 61 return vs->rvp_realtime;
b364a9c0
DW
62 abort();
63}
64
65/* Find the device major/minor for a given file descriptor. */
66static dev_t
af9eb208 67disk_to_dev(
b364a9c0
DW
68 struct scrub_ctx *ctx,
69 struct disk *disk)
70{
71 if (disk == ctx->datadev)
72 return ctx->fsinfo.fs_datadev;
73 else if (disk == ctx->logdev)
74 return ctx->fsinfo.fs_logdev;
75 else if (disk == ctx->rtdev)
76 return ctx->fsinfo.fs_rtdev;
77 abort();
78}
79
c9b349bd
DW
80/* Find the incore bad blocks bitmap for a given disk. */
81static struct bitmap *
82bitmap_for_disk(
83 struct scrub_ctx *ctx,
84 struct disk *disk,
85 struct media_verify_state *vs)
86{
af9eb208 87 dev_t dev = disk_to_dev(ctx, disk);
c9b349bd
DW
88
89 if (dev == ctx->fsinfo.fs_datadev)
90 return vs->d_bad;
91 else if (dev == ctx->fsinfo.fs_rtdev)
92 return vs->r_bad;
93 return NULL;
94}
95
96struct disk_ioerr_report {
97 struct scrub_ctx *ctx;
98 struct disk *disk;
99};
100
b364a9c0
DW
101struct owner_decode {
102 uint64_t owner;
103 const char *descr;
104};
105
106static const struct owner_decode special_owners[] = {
107 {XFS_FMR_OWN_FREE, "free space"},
108 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
109 {XFS_FMR_OWN_FS, "static FS metadata"},
110 {XFS_FMR_OWN_LOG, "journalling log"},
111 {XFS_FMR_OWN_AG, "per-AG metadata"},
112 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
113 {XFS_FMR_OWN_INODES, "inodes"},
114 {XFS_FMR_OWN_REFC, "refcount btree"},
115 {XFS_FMR_OWN_COW, "CoW staging"},
116 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
117 {0, NULL},
118};
119
120/* Decode a special owner. */
121static const char *
af9eb208 122decode_special_owner(
b364a9c0
DW
123 uint64_t owner)
124{
125 const struct owner_decode *od = special_owners;
126
127 while (od->descr) {
128 if (od->owner == owner)
129 return od->descr;
130 od++;
131 }
132
133 return NULL;
134}
135
136/* Routines to translate bad physical extents into file paths and offsets. */
137
ed953d26 138struct badfile_report {
73ce9669
DW
139 struct scrub_ctx *ctx;
140 const char *descr;
141 struct media_verify_state *vs;
142 struct file_bmap *bmap;
ed953d26
DW
143};
144
145/* Report on bad extents found during a media scan. */
146static int
147report_badfile(
148 uint64_t start,
149 uint64_t length,
150 void *arg)
151{
152 struct badfile_report *br = arg;
153 unsigned long long bad_offset;
154 unsigned long long bad_length;
155
156 /* Clamp the bad region to the file mapping. */
157 if (start < br->bmap->bm_physical) {
158 length -= br->bmap->bm_physical - start;
159 start = br->bmap->bm_physical;
160 }
161 length = min(length, br->bmap->bm_length);
162
163 /* Figure out how far into the bmap is the bad mapping and report it. */
164 bad_offset = start - br->bmap->bm_physical;
165 bad_length = min(start + length,
166 br->bmap->bm_physical + br->bmap->bm_length) - start;
167
49e05cb0 168 str_unfixable_error(br->ctx, br->descr,
ed953d26
DW
169_("media error at data offset %llu length %llu."),
170 br->bmap->bm_offset + bad_offset, bad_length);
171 return 0;
172}
173
b364a9c0 174/* Report if this extent overlaps a bad region. */
73ce9669 175static int
663e02a0 176report_data_loss(
b364a9c0 177 struct scrub_ctx *ctx,
b364a9c0
DW
178 int fd,
179 int whichfork,
180 struct fsxattr *fsx,
73ce9669 181 struct file_bmap *bmap,
b364a9c0
DW
182 void *arg)
183{
73ce9669
DW
184 struct badfile_report *br = arg;
185 struct media_verify_state *vs = br->vs;
b364a9c0 186 struct bitmap *bmp;
73ce9669
DW
187
188 br->bmap = bmap;
b364a9c0
DW
189
190 /* Only report errors for real extents. */
191 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
73ce9669 192 return 0;
b364a9c0
DW
193
194 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
ed5f9cc7 195 bmp = vs->r_bad;
b364a9c0 196 else
ed5f9cc7 197 bmp = vs->d_bad;
b364a9c0 198
93d69bc7 199 return -bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length,
73ce9669 200 report_badfile, br);
b364a9c0
DW
201}
202
663e02a0 203/* Report if the extended attribute data overlaps a bad region. */
73ce9669 204static int
663e02a0
DW
205report_attr_loss(
206 struct scrub_ctx *ctx,
663e02a0
DW
207 int fd,
208 int whichfork,
209 struct fsxattr *fsx,
73ce9669 210 struct file_bmap *bmap,
663e02a0
DW
211 void *arg)
212{
73ce9669
DW
213 struct badfile_report *br = arg;
214 struct media_verify_state *vs = br->vs;
663e02a0
DW
215 struct bitmap *bmp = vs->d_bad;
216
217 /* Complain about attr fork extents that don't look right. */
218 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
73ce9669 219 str_info(ctx, br->descr,
663e02a0 220_("found unexpected unwritten/delalloc attr fork extent."));
73ce9669 221 return 0;
663e02a0
DW
222 }
223
224 if (fsx->fsx_xflags & FS_XFLAG_REALTIME) {
73ce9669 225 str_info(ctx, br->descr,
663e02a0 226_("found unexpected realtime attr fork extent."));
73ce9669 227 return 0;
663e02a0
DW
228 }
229
230 if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
73ce9669 231 str_corrupt(ctx, br->descr,
663e02a0
DW
232_("media error in extended attribute data."));
233
73ce9669 234 return 0;
663e02a0
DW
235}
236
b364a9c0 237/* Iterate the extent mappings of a file to report errors. */
af9eb208
DW
238static int
239report_fd_loss(
b364a9c0
DW
240 struct scrub_ctx *ctx,
241 const char *descr,
242 int fd,
243 void *arg)
244{
73ce9669
DW
245 struct badfile_report br = {
246 .ctx = ctx,
247 .vs = arg,
248 .descr = descr,
249 };
250 struct file_bmap key = {0};
251 int ret;
b364a9c0
DW
252
253 /* data fork */
73ce9669
DW
254 ret = scrub_iterate_filemaps(ctx, fd, XFS_DATA_FORK, &key,
255 report_data_loss, &br);
256 if (ret) {
257 str_liberror(ctx, ret, descr);
af9eb208 258 return ret;
73ce9669 259 }
b364a9c0
DW
260
261 /* attr fork */
73ce9669
DW
262 ret = scrub_iterate_filemaps(ctx, fd, XFS_ATTR_FORK, &key,
263 report_attr_loss, &br);
264 if (ret) {
265 str_liberror(ctx, ret, descr);
af9eb208 266 return ret;
73ce9669 267 }
af9eb208
DW
268
269 return 0;
b364a9c0
DW
270}
271
272/* Report read verify errors in unlinked (but still open) files. */
273static int
af9eb208 274report_inode_loss(
b364a9c0
DW
275 struct scrub_ctx *ctx,
276 struct xfs_handle *handle,
4cca629d 277 struct xfs_bulkstat *bstat,
b364a9c0
DW
278 void *arg)
279{
280 char descr[DESCR_BUFSZ];
b364a9c0 281 int fd;
af9eb208 282 int error, err2;
b364a9c0 283
b364a9c0
DW
284 /* Ignore linked files and things we can't open. */
285 if (bstat->bs_nlink != 0)
286 return 0;
287 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
288 return 0;
289
15589f0a
DW
290 scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
291 bstat->bs_ino, bstat->bs_gen, _("(unlinked)"));
292
b364a9c0 293 /* Try to open the inode. */
59f79e0a 294 fd = scrub_open_handle(handle);
b364a9c0 295 if (fd < 0) {
4d0ce76d
DW
296 /* Handle is stale, try again. */
297 if (errno == ESTALE)
298 return ESTALE;
b364a9c0 299
4d0ce76d
DW
300 str_error(ctx, descr,
301 _("Could not open to report read errors: %s."),
302 strerror(errno));
303 return 0;
b364a9c0
DW
304 }
305
306 /* Go find the badness. */
af9eb208
DW
307 error = report_fd_loss(ctx, descr, fd, arg);
308
309 err2 = close(fd);
310 if (err2)
6c05cc5d 311 str_errno(ctx, descr);
b364a9c0 312
af9eb208 313 return error;
b364a9c0
DW
314}
315
316/* Scan a directory for matches in the read verify error list. */
f544ec31 317static int
af9eb208 318report_dir_loss(
b364a9c0
DW
319 struct scrub_ctx *ctx,
320 const char *path,
321 int dir_fd,
322 void *arg)
323{
af9eb208 324 return report_fd_loss(ctx, path, dir_fd, arg);
b364a9c0
DW
325}
326
327/*
328 * Scan the inode associated with a directory entry for matches with
329 * the read verify error list.
330 */
f544ec31 331static int
af9eb208 332report_dirent_loss(
b364a9c0
DW
333 struct scrub_ctx *ctx,
334 const char *path,
335 int dir_fd,
336 struct dirent *dirent,
337 struct stat *sb,
338 void *arg)
339{
b364a9c0 340 int fd;
af9eb208 341 int error, err2;
b364a9c0
DW
342
343 /* Ignore things we can't open. */
344 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
f544ec31 345 return 0;
b364a9c0
DW
346
347 /* Ignore . and .. */
348 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
f544ec31 349 return 0;
b364a9c0
DW
350
351 /*
352 * If we were given a dirent, open the associated file under
353 * dir_fd for badblocks scanning. If dirent is NULL, then it's
354 * the directory itself we want to scan.
355 */
356 fd = openat(dir_fd, dirent->d_name,
357 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
f544ec31 358 if (fd < 0) {
4d0ce76d
DW
359 char descr[PATH_MAX + 1];
360
f544ec31
DW
361 if (errno == ENOENT)
362 return 0;
4d0ce76d
DW
363
364 snprintf(descr, PATH_MAX, "%s/%s", path, dirent->d_name);
365 descr[PATH_MAX] = 0;
366
367 str_error(ctx, descr,
368 _("Could not open to report read errors: %s."),
369 strerror(errno));
370 return 0;
f544ec31 371 }
b364a9c0
DW
372
373 /* Go find the badness. */
af9eb208 374 error = report_fd_loss(ctx, path, fd, arg);
b364a9c0 375
af9eb208
DW
376 err2 = close(fd);
377 if (err2)
6c05cc5d 378 str_errno(ctx, path);
af9eb208
DW
379 if (!error && err2)
380 error = err2;
381
382 return error;
b364a9c0
DW
383}
384
9b5d1349
DW
385struct ioerr_filerange {
386 uint64_t physical;
387 uint64_t length;
388};
389
390/*
391 * If reverse mapping and parent pointers are enabled, we can map media errors
392 * directly back to a filename and a file position without needing to walk the
393 * directory tree.
394 */
395static inline bool
396can_use_pptrs(
397 const struct scrub_ctx *ctx)
398{
399 return (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_PARENT) &&
400 (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT);
401}
402
c9b349bd 403/* Use a fsmap to report metadata lost to a media error. */
7a2eef2b 404static int
c9b349bd 405report_ioerr_fsmap(
b364a9c0 406 struct scrub_ctx *ctx,
b364a9c0
DW
407 struct fsmap *map,
408 void *arg)
409{
410 const char *type;
9b5d1349 411 struct xfs_bulkstat bs = { };
f1f5fd3a 412 char buf[DESCR_BUFSZ];
9b5d1349 413 struct ioerr_filerange *fr = arg;
b364a9c0 414 uint64_t err_off;
9b5d1349 415 int ret;
b364a9c0 416
909c6a54
DW
417 /* Don't care about unwritten extents. */
418 if (map->fmr_flags & FMR_OF_PREALLOC)
7a2eef2b 419 return 0;
909c6a54 420
9b5d1349
DW
421 if (fr->physical > map->fmr_physical)
422 err_off = fr->physical - map->fmr_physical;
b364a9c0
DW
423 else
424 err_off = 0;
425
f1f5fd3a 426 /* Report special owners */
b364a9c0 427 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
f1f5fd3a
DW
428 snprintf(buf, DESCR_BUFSZ, _("disk offset %"PRIu64),
429 (uint64_t)map->fmr_physical + err_off);
af9eb208 430 type = decode_special_owner(map->fmr_owner);
96ac83c8
DW
431 /*
432 * On filesystems that don't store reverse mappings, the
433 * GETFSMAP call returns OWNER_UNKNOWN for allocated space.
434 * We'll have to let the directory tree walker find the file
435 * that lost data.
436 */
437 if (!(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT) &&
438 map->fmr_owner == XFS_FMR_OWN_UNKNOWN) {
439 str_info(ctx, buf, _("media error detected."));
440 } else {
441 str_corrupt(ctx, buf, _("media error in %s."), type);
442 }
b364a9c0
DW
443 }
444
9b5d1349
DW
445 if (can_use_pptrs(ctx)) {
446 ret = -xfrog_bulkstat_single(&ctx->mnt, map->fmr_owner, 0, &bs);
447 if (ret)
448 str_liberror(ctx, ret,
449 _("bulkstat for media error report"));
450 }
451
02d0069e
DW
452 /* Report extent maps */
453 if (map->fmr_flags & FMR_OF_EXTENT_MAP) {
454 bool attr = (map->fmr_flags & FMR_OF_ATTR_FORK);
455
456 scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ,
9b5d1349 457 map->fmr_owner, bs.bs_gen, " %s",
02d0069e
DW
458 attr ? _("extended attribute") :
459 _("file data"));
abc2e70d 460 str_corrupt(ctx, buf, _("media error in extent map"));
02d0069e
DW
461 }
462
b364a9c0 463 /*
9b5d1349
DW
464 * If directory parent pointers are available, use that to find the
465 * pathname to a file, and report that path as having lost its
466 * extended attributes, or the precise offset of the lost file data.
b364a9c0 467 */
9b5d1349
DW
468 if (!can_use_pptrs(ctx))
469 return 0;
b364a9c0 470
9b5d1349
DW
471 scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, map->fmr_owner,
472 bs.bs_gen, NULL);
473
474 if (map->fmr_flags & FMR_OF_ATTR_FORK) {
475 str_corrupt(ctx, buf, _("media error in extended attributes"));
476 return 0;
477 }
478
479 str_unfixable_error(ctx, buf,
480 _("media error at data offset %llu length %llu."),
481 err_off, fr->length);
7a2eef2b 482 return 0;
b364a9c0
DW
483}
484
485/*
c9b349bd
DW
486 * For a range of bad blocks, visit each space mapping that overlaps the bad
487 * range so that we can report lost metadata.
b364a9c0 488 */
c9b349bd
DW
489static int
490report_ioerr(
b364a9c0
DW
491 uint64_t start,
492 uint64_t length,
b364a9c0
DW
493 void *arg)
494{
495 struct fsmap keys[2];
9b5d1349
DW
496 struct ioerr_filerange fr = {
497 .physical = start,
498 .length = length,
499 };
c9b349bd 500 struct disk_ioerr_report *dioerr = arg;
b364a9c0 501 dev_t dev;
b364a9c0 502
af9eb208 503 dev = disk_to_dev(dioerr->ctx, dioerr->disk);
b364a9c0 504
b364a9c0
DW
505 /* Go figure out which blocks are bad from the fsmap. */
506 memset(keys, 0, sizeof(struct fsmap) * 2);
507 keys->fmr_device = dev;
508 keys->fmr_physical = start;
509 (keys + 1)->fmr_device = dev;
510 (keys + 1)->fmr_physical = start + length - 1;
511 (keys + 1)->fmr_owner = ULLONG_MAX;
512 (keys + 1)->fmr_offset = ULLONG_MAX;
513 (keys + 1)->fmr_flags = UINT_MAX;
93d69bc7 514 return -scrub_iterate_fsmap(dioerr->ctx, keys, report_ioerr_fsmap,
9b5d1349 515 &fr);
c9b349bd
DW
516}
517
518/* Report all the media errors found on a disk. */
519static int
520report_disk_ioerrs(
521 struct scrub_ctx *ctx,
522 struct disk *disk,
523 struct media_verify_state *vs)
524{
525 struct disk_ioerr_report dioerr = {
526 .ctx = ctx,
527 .disk = disk,
528 };
529 struct bitmap *tree;
530
531 if (!disk)
532 return 0;
533 tree = bitmap_for_disk(ctx, disk, vs);
534 if (!tree)
535 return 0;
93d69bc7 536 return -bitmap_iterate(tree, report_ioerr, &dioerr);
c9b349bd
DW
537}
538
539/* Given bad extent lists for the data & rtdev, find bad files. */
af9eb208 540static int
c9b349bd
DW
541report_all_media_errors(
542 struct scrub_ctx *ctx,
543 struct media_verify_state *vs)
544{
c9b349bd
DW
545 int ret;
546
547 ret = report_disk_ioerrs(ctx, ctx->datadev, vs);
548 if (ret) {
549 str_liberror(ctx, ret, _("walking datadev io errors"));
af9eb208 550 return ret;
c9b349bd
DW
551 }
552
553 ret = report_disk_ioerrs(ctx, ctx->rtdev, vs);
554 if (ret) {
555 str_liberror(ctx, ret, _("walking rtdev io errors"));
af9eb208 556 return ret;
c9b349bd
DW
557 }
558
9b5d1349
DW
559 /*
560 * Scan the directory tree to get file paths if we didn't already use
561 * directory parent pointers to report the loss.
562 */
563 if (!can_use_pptrs(ctx)) {
564 ret = scan_fs_tree(ctx, report_dir_loss, report_dirent_loss,
565 vs);
566 if (ret)
567 return ret;
568 }
c9b349bd
DW
569
570 /* Scan for unlinked files. */
af9eb208 571 return scrub_scan_all_inodes(ctx, report_inode_loss, vs);
b364a9c0
DW
572}
573
574/* Schedule a read-verify of a (data block) extent. */
7a2eef2b
DW
575static int
576check_rmap(
b364a9c0 577 struct scrub_ctx *ctx,
b364a9c0
DW
578 struct fsmap *map,
579 void *arg)
580{
557f98d7 581 struct media_verify_state *vs = arg;
f1bb1696 582 struct read_verify_pool *rvp;
8cab77d3 583 int ret;
f1bb1696 584
af9eb208 585 rvp = dev_to_pool(ctx, vs, map->fmr_device);
b364a9c0
DW
586
587 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
588 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
589 major(map->fmr_device), minor(map->fmr_device),
590 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
591 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
592 map->fmr_flags);
593
594 /* "Unknown" extents should be verified; they could be data. */
595 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
596 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
597 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
598
599 /*
600 * We only care about read-verifying data extents that have been
601 * written to disk. This means we can skip "special" owners
602 * (metadata), xattr blocks, unwritten extents, and extent maps.
603 * These should all get checked elsewhere in the scrubber.
604 */
605 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
606 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
7a2eef2b 607 return 0;
b364a9c0
DW
608
609 /* XXX: Filter out directory data blocks. */
610
611 /* Schedule the read verify command for (eventual) running. */
8cab77d3
DW
612 ret = read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length,
613 vs);
614 if (ret) {
7a2eef2b
DW
615 str_liberror(ctx, ret, _("scheduling media verify command"));
616 return ret;
8cab77d3 617 }
b364a9c0 618
7a2eef2b 619 return 0;
b364a9c0
DW
620}
621
f1bb1696 622/* Wait for read/verify actions to finish, then return # bytes checked. */
8cab77d3 623static int
f1bb1696 624clean_pool(
8cab77d3
DW
625 struct read_verify_pool *rvp,
626 unsigned long long *bytes_checked)
f1bb1696 627{
8cab77d3
DW
628 uint64_t pool_checked;
629 int ret;
f1bb1696
DW
630
631 if (!rvp)
632 return 0;
633
22d658ec
DW
634 ret = read_verify_force_io(rvp);
635 if (ret)
636 return ret;
637
8cab77d3
DW
638 ret = read_verify_pool_flush(rvp);
639 if (ret)
640 goto out_destroy;
641
642 ret = read_verify_bytes(rvp, &pool_checked);
643 if (ret)
644 goto out_destroy;
645
646 *bytes_checked += pool_checked;
647out_destroy:
f1bb1696
DW
648 read_verify_pool_destroy(rvp);
649 return ret;
650}
651
c9b349bd
DW
652/* Remember a media error for later. */
653static void
654remember_ioerr(
655 struct scrub_ctx *ctx,
656 struct disk *disk,
657 uint64_t start,
658 uint64_t length,
659 int error,
660 void *arg)
661{
662 struct media_verify_state *vs = arg;
663 struct bitmap *tree;
664 int ret;
665
666 tree = bitmap_for_disk(ctx, disk, vs);
667 if (!tree) {
668 str_liberror(ctx, ENOENT, _("finding bad block bitmap"));
669 return;
670 }
671
93d69bc7 672 ret = -bitmap_set(tree, start, length);
c9b349bd
DW
673 if (ret)
674 str_liberror(ctx, ret, _("setting bad block bitmap"));
675}
676
b364a9c0
DW
677/*
678 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
679 * do data checksums, we trust that the underlying storage will pass back
680 * an IO error if it can't retrieve whatever we previously stored there.
681 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
682 * scan the extent maps of the entire fs tree to figure (and the unlinked
683 * inodes) out which files are now broken.
684 */
af9eb208
DW
685int
686phase6_func(
b364a9c0
DW
687 struct scrub_ctx *ctx)
688{
557f98d7 689 struct media_verify_state vs = { NULL };
af9eb208 690 int ret, ret2, ret3;
b364a9c0 691
93d69bc7 692 ret = -bitmap_alloc(&vs.d_bad);
233fabee
DW
693 if (ret) {
694 str_liberror(ctx, ret, _("creating datadev badblock bitmap"));
af9eb208 695 return ret;
b364a9c0
DW
696 }
697
93d69bc7 698 ret = -bitmap_alloc(&vs.r_bad);
233fabee
DW
699 if (ret) {
700 str_liberror(ctx, ret, _("creating realtime badblock bitmap"));
b364a9c0
DW
701 goto out_dbad;
702 }
703
8cab77d3 704 ret = read_verify_pool_alloc(ctx, ctx->datadev,
c9b349bd 705 ctx->mnt.fsgeom.blocksize, remember_ioerr,
8cab77d3
DW
706 scrub_nproc(ctx), &vs.rvp_data);
707 if (ret) {
708 str_liberror(ctx, ret, _("creating datadev media verifier"));
b364a9c0
DW
709 goto out_rbad;
710 }
f1bb1696 711 if (ctx->logdev) {
8cab77d3 712 ret = read_verify_pool_alloc(ctx, ctx->logdev,
c9b349bd 713 ctx->mnt.fsgeom.blocksize, remember_ioerr,
8cab77d3
DW
714 scrub_nproc(ctx), &vs.rvp_log);
715 if (ret) {
716 str_liberror(ctx, ret,
717 _("creating logdev media verifier"));
f1bb1696
DW
718 goto out_datapool;
719 }
720 }
721 if (ctx->rtdev) {
8cab77d3 722 ret = read_verify_pool_alloc(ctx, ctx->rtdev,
c9b349bd 723 ctx->mnt.fsgeom.blocksize, remember_ioerr,
8cab77d3
DW
724 scrub_nproc(ctx), &vs.rvp_realtime);
725 if (ret) {
726 str_liberror(ctx, ret,
727 _("creating rtdev media verifier"));
f1bb1696
DW
728 goto out_logpool;
729 }
730 }
7a2eef2b 731 ret = scrub_scan_all_spacemaps(ctx, check_rmap, &vs);
af9eb208 732 if (ret)
f1bb1696 733 goto out_rtpool;
8cab77d3
DW
734
735 ret = clean_pool(vs.rvp_data, &ctx->bytes_checked);
af9eb208 736 if (ret)
8cab77d3 737 str_liberror(ctx, ret, _("flushing datadev verify pool"));
8cab77d3 738
af9eb208
DW
739 ret2 = clean_pool(vs.rvp_log, &ctx->bytes_checked);
740 if (ret2)
741 str_liberror(ctx, ret2, _("flushing logdev verify pool"));
8cab77d3 742
af9eb208
DW
743 ret3 = clean_pool(vs.rvp_realtime, &ctx->bytes_checked);
744 if (ret3)
745 str_liberror(ctx, ret3, _("flushing rtdev verify pool"));
746
747 /*
748 * If the verify flush didn't work or we found no bad blocks, we're
749 * done! No errors detected.
750 */
751 if (ret || ret2 || ret3)
752 goto out_rbad;
753 if (bitmap_empty(vs.d_bad) && bitmap_empty(vs.r_bad))
754 goto out_rbad;
b364a9c0
DW
755
756 /* Scan the whole dir tree to see what matches the bad extents. */
af9eb208 757 ret = report_all_media_errors(ctx, &vs);
b364a9c0 758
557f98d7
DW
759 bitmap_free(&vs.r_bad);
760 bitmap_free(&vs.d_bad);
af9eb208 761 return ret;
b364a9c0 762
f1bb1696 763out_rtpool:
7668d01d 764 if (vs.rvp_realtime) {
4cd869e5 765 read_verify_pool_abort(vs.rvp_realtime);
557f98d7 766 read_verify_pool_destroy(vs.rvp_realtime);
7668d01d 767 }
f1bb1696 768out_logpool:
7668d01d 769 if (vs.rvp_log) {
4cd869e5 770 read_verify_pool_abort(vs.rvp_log);
557f98d7 771 read_verify_pool_destroy(vs.rvp_log);
7668d01d 772 }
f1bb1696 773out_datapool:
4cd869e5 774 read_verify_pool_abort(vs.rvp_data);
557f98d7 775 read_verify_pool_destroy(vs.rvp_data);
b364a9c0 776out_rbad:
557f98d7 777 bitmap_free(&vs.r_bad);
b364a9c0 778out_dbad:
557f98d7 779 bitmap_free(&vs.d_bad);
af9eb208 780 return ret;
b364a9c0 781}
ed60d210 782
af9eb208
DW
783/* Estimate how much work we're going to do. */
784int
785phase6_estimate(
ed60d210
DW
786 struct scrub_ctx *ctx,
787 uint64_t *items,
788 unsigned int *nr_threads,
789 int *rshift)
790{
791 unsigned long long d_blocks;
792 unsigned long long d_bfree;
793 unsigned long long r_blocks;
794 unsigned long long r_bfree;
0b78ac05 795 unsigned long long dontcare;
934d8d3a 796 int ret;
ed60d210 797
0b78ac05
DW
798 ret = scrub_scan_estimate_blocks(ctx, &d_blocks, &d_bfree, &r_blocks,
799 &r_bfree, &dontcare);
934d8d3a
DW
800 if (ret) {
801 str_liberror(ctx, ret, _("estimating verify work"));
af9eb208 802 return ret;
934d8d3a 803 }
ed60d210 804
a749451c
DW
805 *items = cvt_off_fsb_to_b(&ctx->mnt,
806 (d_blocks - d_bfree) + (r_blocks - r_bfree));
13eedd45
DW
807
808 /*
809 * Each read-verify pool starts a thread pool, and each worker thread
810 * can contribute to the progress counter. Hence we need to set
811 * nr_threads appropriately to handle that many threads.
812 */
ed60d210 813 *nr_threads = disk_heads(ctx->datadev);
13eedd45
DW
814 if (ctx->rtdev)
815 *nr_threads += disk_heads(ctx->rtdev);
816 if (ctx->logdev)
817 *nr_threads += disk_heads(ctx->logdev);
ed60d210 818 *rshift = 20;
af9eb208
DW
819 return 0;
820}