]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/phase6.c
xfs_scrub: remove xfs_ prefixes from structure names
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
b364a9c0
DW
2/*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
b364a9c0 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
b364a9c0 5 */
a440f877 6#include "xfs.h"
b364a9c0 7#include <stdint.h>
b364a9c0
DW
8#include <dirent.h>
9#include <sys/statvfs.h>
b364a9c0
DW
10#include "handle.h"
11#include "path.h"
b364a9c0
DW
12#include "workqueue.h"
13#include "xfs_scrub.h"
14#include "common.h"
15#include "bitmap.h"
16#include "disk.h"
17#include "filemap.h"
ed60d210 18#include "fscounters.h"
b364a9c0
DW
19#include "inodes.h"
20#include "read_verify.h"
21#include "spacemap.h"
22#include "vfs.h"
23
24/*
25 * Phase 6: Verify data file integrity.
26 *
27 * Identify potential data block extents with GETFSMAP, then feed those
28 * extents to the read-verify pool to get the verify commands batched,
29 * issued, and (if there are problems) reported back to us. If there
30 * are errors, we'll record the bad regions and (if available) use rmap
31 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
32 * whole directory tree looking for files that overlap the bad regions
33 * and report the paths of the now corrupt files.
34 */
35
f1bb1696
DW
36/* Verify disk blocks with GETFSMAP */
37
38struct xfs_verify_extent {
39 struct read_verify_pool *rvp_data;
40 struct read_verify_pool *rvp_log;
41 struct read_verify_pool *rvp_realtime;
42 struct bitmap *d_bad; /* bytes */
43 struct bitmap *r_bad; /* bytes */
44};
45
b364a9c0 46/* Find the fd for a given device identifier. */
f1bb1696
DW
47static struct read_verify_pool *
48xfs_dev_to_pool(
49 struct scrub_ctx *ctx,
50 struct xfs_verify_extent *ve,
51 dev_t dev)
b364a9c0
DW
52{
53 if (dev == ctx->fsinfo.fs_datadev)
f1bb1696 54 return ve->rvp_data;
b364a9c0 55 else if (dev == ctx->fsinfo.fs_logdev)
f1bb1696 56 return ve->rvp_log;
b364a9c0 57 else if (dev == ctx->fsinfo.fs_rtdev)
f1bb1696 58 return ve->rvp_realtime;
b364a9c0
DW
59 abort();
60}
61
62/* Find the device major/minor for a given file descriptor. */
63static dev_t
64xfs_disk_to_dev(
65 struct scrub_ctx *ctx,
66 struct disk *disk)
67{
68 if (disk == ctx->datadev)
69 return ctx->fsinfo.fs_datadev;
70 else if (disk == ctx->logdev)
71 return ctx->fsinfo.fs_logdev;
72 else if (disk == ctx->rtdev)
73 return ctx->fsinfo.fs_rtdev;
74 abort();
75}
76
77struct owner_decode {
78 uint64_t owner;
79 const char *descr;
80};
81
82static const struct owner_decode special_owners[] = {
83 {XFS_FMR_OWN_FREE, "free space"},
84 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
85 {XFS_FMR_OWN_FS, "static FS metadata"},
86 {XFS_FMR_OWN_LOG, "journalling log"},
87 {XFS_FMR_OWN_AG, "per-AG metadata"},
88 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
89 {XFS_FMR_OWN_INODES, "inodes"},
90 {XFS_FMR_OWN_REFC, "refcount btree"},
91 {XFS_FMR_OWN_COW, "CoW staging"},
92 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
93 {0, NULL},
94};
95
96/* Decode a special owner. */
97static const char *
98xfs_decode_special_owner(
99 uint64_t owner)
100{
101 const struct owner_decode *od = special_owners;
102
103 while (od->descr) {
104 if (od->owner == owner)
105 return od->descr;
106 od++;
107 }
108
109 return NULL;
110}
111
112/* Routines to translate bad physical extents into file paths and offsets. */
113
114struct xfs_verify_error_info {
115 struct bitmap *d_bad; /* bytes */
116 struct bitmap *r_bad; /* bytes */
117};
118
119/* Report if this extent overlaps a bad region. */
120static bool
121xfs_report_verify_inode_bmap(
122 struct scrub_ctx *ctx,
123 const char *descr,
124 int fd,
125 int whichfork,
126 struct fsxattr *fsx,
127 struct xfs_bmap *bmap,
128 void *arg)
129{
130 struct xfs_verify_error_info *vei = arg;
131 struct bitmap *bmp;
132
133 /* Only report errors for real extents. */
134 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
135 return true;
136
137 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
138 bmp = vei->r_bad;
139 else
140 bmp = vei->d_bad;
141
142 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
143 return true;
144
145 str_error(ctx, descr,
146_("offset %llu failed read verification."), bmap->bm_offset);
147 return true;
148}
149
150/* Iterate the extent mappings of a file to report errors. */
151static bool
152xfs_report_verify_fd(
153 struct scrub_ctx *ctx,
154 const char *descr,
155 int fd,
156 void *arg)
157{
158 struct xfs_bmap key = {0};
159 bool moveon;
160
161 /* data fork */
162 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
163 xfs_report_verify_inode_bmap, arg);
164 if (!moveon)
165 return false;
166
167 /* attr fork */
168 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
169 xfs_report_verify_inode_bmap, arg);
170 if (!moveon)
171 return false;
172 return true;
173}
174
175/* Report read verify errors in unlinked (but still open) files. */
176static int
177xfs_report_verify_inode(
178 struct scrub_ctx *ctx,
179 struct xfs_handle *handle,
180 struct xfs_bstat *bstat,
181 void *arg)
182{
183 char descr[DESCR_BUFSZ];
b364a9c0
DW
184 bool moveon;
185 int fd;
186 int error;
187
188 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
189 (uint64_t)bstat->bs_ino);
190
191 /* Ignore linked files and things we can't open. */
192 if (bstat->bs_nlink != 0)
193 return 0;
194 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
195 return 0;
196
197 /* Try to open the inode. */
198 fd = xfs_open_handle(handle);
199 if (fd < 0) {
200 error = errno;
201 if (error == ESTALE)
202 return error;
203
bb5dbd06
DW
204 str_info(ctx, descr,
205_("Disappeared during read error reporting."));
b364a9c0
DW
206 return error;
207 }
208
209 /* Go find the badness. */
210 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
6c05cc5d
DW
211 error = close(fd);
212 if (error)
213 str_errno(ctx, descr);
b364a9c0
DW
214
215 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
216}
217
218/* Scan a directory for matches in the read verify error list. */
219static bool
220xfs_report_verify_dir(
221 struct scrub_ctx *ctx,
222 const char *path,
223 int dir_fd,
224 void *arg)
225{
226 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
227}
228
229/*
230 * Scan the inode associated with a directory entry for matches with
231 * the read verify error list.
232 */
233static bool
234xfs_report_verify_dirent(
235 struct scrub_ctx *ctx,
236 const char *path,
237 int dir_fd,
238 struct dirent *dirent,
239 struct stat *sb,
240 void *arg)
241{
242 bool moveon;
243 int fd;
6c05cc5d 244 int error;
b364a9c0
DW
245
246 /* Ignore things we can't open. */
247 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
248 return true;
249
250 /* Ignore . and .. */
251 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
252 return true;
253
254 /*
255 * If we were given a dirent, open the associated file under
256 * dir_fd for badblocks scanning. If dirent is NULL, then it's
257 * the directory itself we want to scan.
258 */
259 fd = openat(dir_fd, dirent->d_name,
260 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
261 if (fd < 0)
262 return true;
263
264 /* Go find the badness. */
265 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
266 if (moveon)
267 goto out;
268
269out:
6c05cc5d
DW
270 error = close(fd);
271 if (error)
272 str_errno(ctx, path);
b364a9c0
DW
273 return moveon;
274}
275
276/* Given bad extent lists for the data & rtdev, find bad files. */
277static bool
278xfs_report_verify_errors(
279 struct scrub_ctx *ctx,
280 struct bitmap *d_bad,
281 struct bitmap *r_bad)
282{
283 struct xfs_verify_error_info vei;
284 bool moveon;
285
286 vei.d_bad = d_bad;
287 vei.r_bad = r_bad;
288
289 /* Scan the directory tree to get file paths. */
290 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
291 xfs_report_verify_dirent, &vei);
292 if (!moveon)
293 return false;
294
295 /* Scan for unlinked files. */
296 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
297}
298
b364a9c0
DW
299/* Report an IO error resulting from read-verify based off getfsmap. */
300static bool
301xfs_check_rmap_error_report(
302 struct scrub_ctx *ctx,
303 const char *descr,
304 struct fsmap *map,
305 void *arg)
306{
307 const char *type;
308 char buf[32];
309 uint64_t err_physical = *(uint64_t *)arg;
310 uint64_t err_off;
311
312 if (err_physical > map->fmr_physical)
313 err_off = err_physical - map->fmr_physical;
314 else
315 err_off = 0;
316
317 snprintf(buf, 32, _("disk offset %"PRIu64),
318 (uint64_t)BTOBB(map->fmr_physical + err_off));
319
320 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
321 type = xfs_decode_special_owner(map->fmr_owner);
322 str_error(ctx, buf,
323_("%s failed read verification."),
324 type);
325 }
326
327 /*
328 * XXX: If we had a getparent() call we could report IO errors
329 * efficiently. Until then, we'll have to scan the dir tree
330 * to find the bad file's pathname.
331 */
332
333 return true;
334}
335
336/*
337 * Remember a read error for later, and see if rmap will tell us about the
338 * owner ahead of time.
339 */
340static void
341xfs_check_rmap_ioerr(
342 struct scrub_ctx *ctx,
343 struct disk *disk,
344 uint64_t start,
345 uint64_t length,
346 int error,
347 void *arg)
348{
349 struct fsmap keys[2];
350 char descr[DESCR_BUFSZ];
351 struct xfs_verify_extent *ve = arg;
352 struct bitmap *tree;
353 dev_t dev;
354 bool moveon;
355
356 dev = xfs_disk_to_dev(ctx, disk);
357
358 /*
359 * If we don't have parent pointers, save the bad extent for
360 * later rescanning.
361 */
362 if (dev == ctx->fsinfo.fs_datadev)
363 tree = ve->d_bad;
364 else if (dev == ctx->fsinfo.fs_rtdev)
365 tree = ve->r_bad;
366 else
367 tree = NULL;
368 if (tree) {
369 moveon = bitmap_set(tree, start, length);
370 if (!moveon)
371 str_errno(ctx, ctx->mntpoint);
372 }
373
374 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
375 major(dev), minor(dev), start, length);
376
377 /* Go figure out which blocks are bad from the fsmap. */
378 memset(keys, 0, sizeof(struct fsmap) * 2);
379 keys->fmr_device = dev;
380 keys->fmr_physical = start;
381 (keys + 1)->fmr_device = dev;
382 (keys + 1)->fmr_physical = start + length - 1;
383 (keys + 1)->fmr_owner = ULLONG_MAX;
384 (keys + 1)->fmr_offset = ULLONG_MAX;
385 (keys + 1)->fmr_flags = UINT_MAX;
386 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
387 &start);
388}
389
390/* Schedule a read-verify of a (data block) extent. */
391static bool
392xfs_check_rmap(
393 struct scrub_ctx *ctx,
394 const char *descr,
395 struct fsmap *map,
396 void *arg)
397{
398 struct xfs_verify_extent *ve = arg;
f1bb1696
DW
399 struct read_verify_pool *rvp;
400
401 rvp = xfs_dev_to_pool(ctx, ve, map->fmr_device);
b364a9c0
DW
402
403 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
404 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
405 major(map->fmr_device), minor(map->fmr_device),
406 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
407 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
408 map->fmr_flags);
409
410 /* "Unknown" extents should be verified; they could be data. */
411 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
412 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
413 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
414
415 /*
416 * We only care about read-verifying data extents that have been
417 * written to disk. This means we can skip "special" owners
418 * (metadata), xattr blocks, unwritten extents, and extent maps.
419 * These should all get checked elsewhere in the scrubber.
420 */
421 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
422 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
423 goto out;
424
425 /* XXX: Filter out directory data blocks. */
426
427 /* Schedule the read verify command for (eventual) running. */
f1bb1696 428 read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, ve);
b364a9c0
DW
429
430out:
431 /* Is this the last extent? Fire off the read. */
432 if (map->fmr_flags & FMR_OF_LAST)
f1bb1696 433 read_verify_force_io(rvp);
b364a9c0
DW
434
435 return true;
436}
437
f1bb1696
DW
438/* Wait for read/verify actions to finish, then return # bytes checked. */
439static uint64_t
440clean_pool(
441 struct read_verify_pool *rvp)
442{
443 uint64_t ret;
444
445 if (!rvp)
446 return 0;
447
448 read_verify_pool_flush(rvp);
449 ret = read_verify_bytes(rvp);
450 read_verify_pool_destroy(rvp);
451 return ret;
452}
453
b364a9c0
DW
454/*
455 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
456 * do data checksums, we trust that the underlying storage will pass back
457 * an IO error if it can't retrieve whatever we previously stored there.
458 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
459 * scan the extent maps of the entire fs tree to figure (and the unlinked
460 * inodes) out which files are now broken.
461 */
462bool
463xfs_scan_blocks(
464 struct scrub_ctx *ctx)
465{
f1bb1696 466 struct xfs_verify_extent ve = { NULL };
b364a9c0
DW
467 bool moveon;
468
b364a9c0
DW
469 moveon = bitmap_init(&ve.d_bad);
470 if (!moveon) {
471 str_errno(ctx, ctx->mntpoint);
41c08606 472 goto out;
b364a9c0
DW
473 }
474
475 moveon = bitmap_init(&ve.r_bad);
476 if (!moveon) {
477 str_errno(ctx, ctx->mntpoint);
478 goto out_dbad;
479 }
480
f1bb1696
DW
481 ve.rvp_data = read_verify_pool_init(ctx, ctx->datadev,
482 ctx->geo.blocksize, xfs_check_rmap_ioerr,
41c08606 483 scrub_nproc(ctx));
f1bb1696 484 if (!ve.rvp_data) {
b364a9c0 485 moveon = false;
82377bde 486 str_info(ctx, ctx->mntpoint,
f1bb1696 487_("Could not create data device media verifier."));
b364a9c0
DW
488 goto out_rbad;
489 }
f1bb1696
DW
490 if (ctx->logdev) {
491 ve.rvp_log = read_verify_pool_init(ctx, ctx->logdev,
492 ctx->geo.blocksize, xfs_check_rmap_ioerr,
493 scrub_nproc(ctx));
494 if (!ve.rvp_log) {
495 moveon = false;
496 str_info(ctx, ctx->mntpoint,
497 _("Could not create log device media verifier."));
498 goto out_datapool;
499 }
500 }
501 if (ctx->rtdev) {
502 ve.rvp_realtime = read_verify_pool_init(ctx, ctx->rtdev,
503 ctx->geo.blocksize, xfs_check_rmap_ioerr,
504 scrub_nproc(ctx));
505 if (!ve.rvp_realtime) {
506 moveon = false;
507 str_info(ctx, ctx->mntpoint,
508 _("Could not create realtime device media verifier."));
509 goto out_logpool;
510 }
511 }
b364a9c0
DW
512 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
513 if (!moveon)
f1bb1696
DW
514 goto out_rtpool;
515 ctx->bytes_checked += clean_pool(ve.rvp_data);
516 ctx->bytes_checked += clean_pool(ve.rvp_log);
517 ctx->bytes_checked += clean_pool(ve.rvp_realtime);
b364a9c0
DW
518
519 /* Scan the whole dir tree to see what matches the bad extents. */
520 if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
521 moveon = xfs_report_verify_errors(ctx, ve.d_bad, ve.r_bad);
522
523 bitmap_free(&ve.r_bad);
524 bitmap_free(&ve.d_bad);
b364a9c0
DW
525 return moveon;
526
f1bb1696
DW
527out_rtpool:
528 if (ve.rvp_realtime)
529 read_verify_pool_destroy(ve.rvp_realtime);
530out_logpool:
531 if (ve.rvp_log)
532 read_verify_pool_destroy(ve.rvp_log);
533out_datapool:
534 read_verify_pool_destroy(ve.rvp_data);
b364a9c0
DW
535out_rbad:
536 bitmap_free(&ve.r_bad);
537out_dbad:
538 bitmap_free(&ve.d_bad);
41c08606 539out:
b364a9c0
DW
540 return moveon;
541}
ed60d210
DW
542
543/* Estimate how much work we're going to do. */
544bool
545xfs_estimate_verify_work(
546 struct scrub_ctx *ctx,
547 uint64_t *items,
548 unsigned int *nr_threads,
549 int *rshift)
550{
551 unsigned long long d_blocks;
552 unsigned long long d_bfree;
553 unsigned long long r_blocks;
554 unsigned long long r_bfree;
555 unsigned long long f_files;
556 unsigned long long f_free;
557 bool moveon;
558
559 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
560 &r_blocks, &r_bfree, &f_files, &f_free);
561 if (!moveon)
562 return moveon;
563
564 *items = ((d_blocks - d_bfree) + (r_blocks - r_bfree)) << ctx->blocklog;
565 *nr_threads = disk_heads(ctx->datadev);
566 *rshift = 20;
567 return moveon;
568}