]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/phase6.c
xfs_scrub: fstrim the free areas if there are no errors on the filesystem
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
CommitLineData
b364a9c0
DW
1/*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include <stdio.h>
21#include <stdint.h>
22#include <stdbool.h>
23#include <dirent.h>
24#include <sys/statvfs.h>
25#include "xfs.h"
26#include "xfs_fs.h"
27#include "handle.h"
28#include "path.h"
29#include "ptvar.h"
30#include "workqueue.h"
31#include "xfs_scrub.h"
32#include "common.h"
33#include "bitmap.h"
34#include "disk.h"
35#include "filemap.h"
36#include "inodes.h"
37#include "read_verify.h"
38#include "spacemap.h"
39#include "vfs.h"
40
41/*
42 * Phase 6: Verify data file integrity.
43 *
44 * Identify potential data block extents with GETFSMAP, then feed those
45 * extents to the read-verify pool to get the verify commands batched,
46 * issued, and (if there are problems) reported back to us. If there
47 * are errors, we'll record the bad regions and (if available) use rmap
48 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
49 * whole directory tree looking for files that overlap the bad regions
50 * and report the paths of the now corrupt files.
51 */
52
53/* Find the fd for a given device identifier. */
54static struct disk *
55xfs_dev_to_disk(
56 struct scrub_ctx *ctx,
57 dev_t dev)
58{
59 if (dev == ctx->fsinfo.fs_datadev)
60 return ctx->datadev;
61 else if (dev == ctx->fsinfo.fs_logdev)
62 return ctx->logdev;
63 else if (dev == ctx->fsinfo.fs_rtdev)
64 return ctx->rtdev;
65 abort();
66}
67
68/* Find the device major/minor for a given file descriptor. */
69static dev_t
70xfs_disk_to_dev(
71 struct scrub_ctx *ctx,
72 struct disk *disk)
73{
74 if (disk == ctx->datadev)
75 return ctx->fsinfo.fs_datadev;
76 else if (disk == ctx->logdev)
77 return ctx->fsinfo.fs_logdev;
78 else if (disk == ctx->rtdev)
79 return ctx->fsinfo.fs_rtdev;
80 abort();
81}
82
83struct owner_decode {
84 uint64_t owner;
85 const char *descr;
86};
87
88static const struct owner_decode special_owners[] = {
89 {XFS_FMR_OWN_FREE, "free space"},
90 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
91 {XFS_FMR_OWN_FS, "static FS metadata"},
92 {XFS_FMR_OWN_LOG, "journalling log"},
93 {XFS_FMR_OWN_AG, "per-AG metadata"},
94 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
95 {XFS_FMR_OWN_INODES, "inodes"},
96 {XFS_FMR_OWN_REFC, "refcount btree"},
97 {XFS_FMR_OWN_COW, "CoW staging"},
98 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
99 {0, NULL},
100};
101
102/* Decode a special owner. */
103static const char *
104xfs_decode_special_owner(
105 uint64_t owner)
106{
107 const struct owner_decode *od = special_owners;
108
109 while (od->descr) {
110 if (od->owner == owner)
111 return od->descr;
112 od++;
113 }
114
115 return NULL;
116}
117
118/* Routines to translate bad physical extents into file paths and offsets. */
119
120struct xfs_verify_error_info {
121 struct bitmap *d_bad; /* bytes */
122 struct bitmap *r_bad; /* bytes */
123};
124
125/* Report if this extent overlaps a bad region. */
126static bool
127xfs_report_verify_inode_bmap(
128 struct scrub_ctx *ctx,
129 const char *descr,
130 int fd,
131 int whichfork,
132 struct fsxattr *fsx,
133 struct xfs_bmap *bmap,
134 void *arg)
135{
136 struct xfs_verify_error_info *vei = arg;
137 struct bitmap *bmp;
138
139 /* Only report errors for real extents. */
140 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
141 return true;
142
143 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
144 bmp = vei->r_bad;
145 else
146 bmp = vei->d_bad;
147
148 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
149 return true;
150
151 str_error(ctx, descr,
152_("offset %llu failed read verification."), bmap->bm_offset);
153 return true;
154}
155
156/* Iterate the extent mappings of a file to report errors. */
157static bool
158xfs_report_verify_fd(
159 struct scrub_ctx *ctx,
160 const char *descr,
161 int fd,
162 void *arg)
163{
164 struct xfs_bmap key = {0};
165 bool moveon;
166
167 /* data fork */
168 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
169 xfs_report_verify_inode_bmap, arg);
170 if (!moveon)
171 return false;
172
173 /* attr fork */
174 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
175 xfs_report_verify_inode_bmap, arg);
176 if (!moveon)
177 return false;
178 return true;
179}
180
181/* Report read verify errors in unlinked (but still open) files. */
182static int
183xfs_report_verify_inode(
184 struct scrub_ctx *ctx,
185 struct xfs_handle *handle,
186 struct xfs_bstat *bstat,
187 void *arg)
188{
189 char descr[DESCR_BUFSZ];
190 char buf[DESCR_BUFSZ];
191 bool moveon;
192 int fd;
193 int error;
194
195 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
196 (uint64_t)bstat->bs_ino);
197
198 /* Ignore linked files and things we can't open. */
199 if (bstat->bs_nlink != 0)
200 return 0;
201 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
202 return 0;
203
204 /* Try to open the inode. */
205 fd = xfs_open_handle(handle);
206 if (fd < 0) {
207 error = errno;
208 if (error == ESTALE)
209 return error;
210
211 str_warn(ctx, descr, "%s", strerror_r(error, buf, DESCR_BUFSZ));
212 return error;
213 }
214
215 /* Go find the badness. */
216 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
217 close(fd);
218
219 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
220}
221
222/* Scan a directory for matches in the read verify error list. */
223static bool
224xfs_report_verify_dir(
225 struct scrub_ctx *ctx,
226 const char *path,
227 int dir_fd,
228 void *arg)
229{
230 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
231}
232
233/*
234 * Scan the inode associated with a directory entry for matches with
235 * the read verify error list.
236 */
237static bool
238xfs_report_verify_dirent(
239 struct scrub_ctx *ctx,
240 const char *path,
241 int dir_fd,
242 struct dirent *dirent,
243 struct stat *sb,
244 void *arg)
245{
246 bool moveon;
247 int fd;
248
249 /* Ignore things we can't open. */
250 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
251 return true;
252
253 /* Ignore . and .. */
254 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
255 return true;
256
257 /*
258 * If we were given a dirent, open the associated file under
259 * dir_fd for badblocks scanning. If dirent is NULL, then it's
260 * the directory itself we want to scan.
261 */
262 fd = openat(dir_fd, dirent->d_name,
263 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
264 if (fd < 0)
265 return true;
266
267 /* Go find the badness. */
268 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
269 if (moveon)
270 goto out;
271
272out:
273 close(fd);
274
275 return moveon;
276}
277
278/* Given bad extent lists for the data & rtdev, find bad files. */
279static bool
280xfs_report_verify_errors(
281 struct scrub_ctx *ctx,
282 struct bitmap *d_bad,
283 struct bitmap *r_bad)
284{
285 struct xfs_verify_error_info vei;
286 bool moveon;
287
288 vei.d_bad = d_bad;
289 vei.r_bad = r_bad;
290
291 /* Scan the directory tree to get file paths. */
292 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
293 xfs_report_verify_dirent, &vei);
294 if (!moveon)
295 return false;
296
297 /* Scan for unlinked files. */
298 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
299}
300
301/* Verify disk blocks with GETFSMAP */
302
303struct xfs_verify_extent {
304 struct read_verify_pool *readverify;
305 struct ptvar *rvstate;
306 struct bitmap *d_bad; /* bytes */
307 struct bitmap *r_bad; /* bytes */
308};
309
310/* Report an IO error resulting from read-verify based off getfsmap. */
311static bool
312xfs_check_rmap_error_report(
313 struct scrub_ctx *ctx,
314 const char *descr,
315 struct fsmap *map,
316 void *arg)
317{
318 const char *type;
319 char buf[32];
320 uint64_t err_physical = *(uint64_t *)arg;
321 uint64_t err_off;
322
323 if (err_physical > map->fmr_physical)
324 err_off = err_physical - map->fmr_physical;
325 else
326 err_off = 0;
327
328 snprintf(buf, 32, _("disk offset %"PRIu64),
329 (uint64_t)BTOBB(map->fmr_physical + err_off));
330
331 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
332 type = xfs_decode_special_owner(map->fmr_owner);
333 str_error(ctx, buf,
334_("%s failed read verification."),
335 type);
336 }
337
338 /*
339 * XXX: If we had a getparent() call we could report IO errors
340 * efficiently. Until then, we'll have to scan the dir tree
341 * to find the bad file's pathname.
342 */
343
344 return true;
345}
346
347/*
348 * Remember a read error for later, and see if rmap will tell us about the
349 * owner ahead of time.
350 */
351static void
352xfs_check_rmap_ioerr(
353 struct scrub_ctx *ctx,
354 struct disk *disk,
355 uint64_t start,
356 uint64_t length,
357 int error,
358 void *arg)
359{
360 struct fsmap keys[2];
361 char descr[DESCR_BUFSZ];
362 struct xfs_verify_extent *ve = arg;
363 struct bitmap *tree;
364 dev_t dev;
365 bool moveon;
366
367 dev = xfs_disk_to_dev(ctx, disk);
368
369 /*
370 * If we don't have parent pointers, save the bad extent for
371 * later rescanning.
372 */
373 if (dev == ctx->fsinfo.fs_datadev)
374 tree = ve->d_bad;
375 else if (dev == ctx->fsinfo.fs_rtdev)
376 tree = ve->r_bad;
377 else
378 tree = NULL;
379 if (tree) {
380 moveon = bitmap_set(tree, start, length);
381 if (!moveon)
382 str_errno(ctx, ctx->mntpoint);
383 }
384
385 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
386 major(dev), minor(dev), start, length);
387
388 /* Go figure out which blocks are bad from the fsmap. */
389 memset(keys, 0, sizeof(struct fsmap) * 2);
390 keys->fmr_device = dev;
391 keys->fmr_physical = start;
392 (keys + 1)->fmr_device = dev;
393 (keys + 1)->fmr_physical = start + length - 1;
394 (keys + 1)->fmr_owner = ULLONG_MAX;
395 (keys + 1)->fmr_offset = ULLONG_MAX;
396 (keys + 1)->fmr_flags = UINT_MAX;
397 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
398 &start);
399}
400
401/* Schedule a read-verify of a (data block) extent. */
402static bool
403xfs_check_rmap(
404 struct scrub_ctx *ctx,
405 const char *descr,
406 struct fsmap *map,
407 void *arg)
408{
409 struct xfs_verify_extent *ve = arg;
410 struct disk *disk;
411
412 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
413 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
414 major(map->fmr_device), minor(map->fmr_device),
415 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
416 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
417 map->fmr_flags);
418
419 /* "Unknown" extents should be verified; they could be data. */
420 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
421 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
422 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
423
424 /*
425 * We only care about read-verifying data extents that have been
426 * written to disk. This means we can skip "special" owners
427 * (metadata), xattr blocks, unwritten extents, and extent maps.
428 * These should all get checked elsewhere in the scrubber.
429 */
430 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
431 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
432 goto out;
433
434 /* XXX: Filter out directory data blocks. */
435
436 /* Schedule the read verify command for (eventual) running. */
437 disk = xfs_dev_to_disk(ctx, map->fmr_device);
438
439 read_verify_schedule_io(ve->readverify, ptvar_get(ve->rvstate), disk,
440 map->fmr_physical, map->fmr_length, ve);
441
442out:
443 /* Is this the last extent? Fire off the read. */
444 if (map->fmr_flags & FMR_OF_LAST)
445 read_verify_force_io(ve->readverify, ptvar_get(ve->rvstate));
446
447 return true;
448}
449
450/*
451 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
452 * do data checksums, we trust that the underlying storage will pass back
453 * an IO error if it can't retrieve whatever we previously stored there.
454 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
455 * scan the extent maps of the entire fs tree to figure (and the unlinked
456 * inodes) out which files are now broken.
457 */
458bool
459xfs_scan_blocks(
460 struct scrub_ctx *ctx)
461{
462 struct xfs_verify_extent ve;
463 bool moveon;
464
465 ve.rvstate = ptvar_init(scrub_nproc(ctx), sizeof(struct read_verify));
466 if (!ve.rvstate) {
467 str_errno(ctx, ctx->mntpoint);
468 return false;
469 }
470
471 moveon = bitmap_init(&ve.d_bad);
472 if (!moveon) {
473 str_errno(ctx, ctx->mntpoint);
474 goto out_ve;
475 }
476
477 moveon = bitmap_init(&ve.r_bad);
478 if (!moveon) {
479 str_errno(ctx, ctx->mntpoint);
480 goto out_dbad;
481 }
482
483 ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
484 xfs_check_rmap_ioerr, disk_heads(ctx->datadev));
485 if (!ve.readverify) {
486 moveon = false;
487 str_error(ctx, ctx->mntpoint,
488_("Could not create media verifier."));
489 goto out_rbad;
490 }
491 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
492 if (!moveon)
493 goto out_pool;
494 read_verify_pool_flush(ve.readverify);
495 ctx->bytes_checked += read_verify_bytes(ve.readverify);
496 read_verify_pool_destroy(ve.readverify);
497
498 /* Scan the whole dir tree to see what matches the bad extents. */
499 if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
500 moveon = xfs_report_verify_errors(ctx, ve.d_bad, ve.r_bad);
501
502 bitmap_free(&ve.r_bad);
503 bitmap_free(&ve.d_bad);
504 ptvar_free(ve.rvstate);
505 return moveon;
506
507out_pool:
508 read_verify_pool_destroy(ve.readverify);
509out_rbad:
510 bitmap_free(&ve.r_bad);
511out_dbad:
512 bitmap_free(&ve.d_bad);
513out_ve:
514 ptvar_free(ve.rvstate);
515 return moveon;
516}