]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/rmap.c
libfrog: convert bitmap.c to negative error codes
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
9e0f480e
DW
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
9e0f480e 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
9e0f480e
DW
5 */
6#include <libxfs.h>
7#include "btree.h"
8#include "err_protos.h"
9#include "libxlog.h"
10#include "incore.h"
11#include "globals.h"
12#include "dinode.h"
13#include "slab.h"
14#include "rmap.h"
a58400ed 15#include "libfrog/bitmap.h"
9e0f480e
DW
16
17#undef RMAP_DEBUG
18
19#ifdef RMAP_DEBUG
20# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
21#else
22# define dbg_printf(f, a...)
23#endif
24
25/* per-AG rmap object anchor */
26struct xfs_ag_rmap {
27 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
1102c155 28 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
62cf990a
DW
29 int ar_flcount; /* agfl entries from leftover */
30 /* agbt allocations */
b7f12e53 31 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
00f34bca 32 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
9e0f480e
DW
33};
34
35static struct xfs_ag_rmap *ag_rmaps;
11b9e510 36static bool rmapbt_suspect;
80dbc783 37static bool refcbt_suspect;
9e0f480e 38
197c2c6a 39static inline int rmap_compare(const void *a, const void *b)
9e0f480e 40{
197c2c6a 41 return libxfs_rmap_compare(a, b);
9e0f480e
DW
42}
43
44/*
45 * Returns true if we must reconstruct either the reference count or reverse
46 * mapping trees.
47 */
48bool
2d273771 49rmap_needs_work(
9e0f480e
DW
50 struct xfs_mount *mp)
51{
00f34bca
DW
52 return xfs_sb_version_hasreflink(&mp->m_sb) ||
53 xfs_sb_version_hasrmapbt(&mp->m_sb);
9e0f480e
DW
54}
55
56/*
57 * Initialize per-AG reverse map data.
58 */
59void
2d273771 60rmaps_init(
9e0f480e
DW
61 struct xfs_mount *mp)
62{
63 xfs_agnumber_t i;
64 int error;
65
2d273771 66 if (!rmap_needs_work(mp))
9e0f480e
DW
67 return;
68
69 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
70 if (!ag_rmaps)
71 do_error(_("couldn't allocate per-AG reverse map roots\n"));
72
73 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
74 error = init_slab(&ag_rmaps[i].ar_rmaps,
75 sizeof(struct xfs_rmap_irec));
76 if (error)
77 do_error(
78_("Insufficient memory while allocating reverse mapping slabs."));
1102c155
DW
79 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
80 sizeof(struct xfs_rmap_irec));
81 if (error)
82 do_error(
83_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
b7f12e53 84 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
00f34bca
DW
85 error = init_slab(&ag_rmaps[i].ar_refcount_items,
86 sizeof(struct xfs_refcount_irec));
87 if (error)
88 do_error(
89_("Insufficient memory while allocating refcount item slabs."));
9e0f480e
DW
90 }
91}
92
93/*
94 * Free the per-AG reverse-mapping data.
95 */
96void
2d273771 97rmaps_free(
9e0f480e
DW
98 struct xfs_mount *mp)
99{
100 xfs_agnumber_t i;
101
2d273771 102 if (!rmap_needs_work(mp))
9e0f480e
DW
103 return;
104
1102c155 105 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
9e0f480e 106 free_slab(&ag_rmaps[i].ar_rmaps);
1102c155 107 free_slab(&ag_rmaps[i].ar_raw_rmaps);
00f34bca 108 free_slab(&ag_rmaps[i].ar_refcount_items);
1102c155 109 }
9e0f480e
DW
110 free(ag_rmaps);
111 ag_rmaps = NULL;
112}
113
1102c155
DW
114/*
115 * Decide if two reverse-mapping records can be merged.
116 */
117bool
2d273771 118rmaps_are_mergeable(
1102c155
DW
119 struct xfs_rmap_irec *r1,
120 struct xfs_rmap_irec *r2)
121{
122 if (r1->rm_owner != r2->rm_owner)
123 return false;
124 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
125 return false;
126 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
127 XFS_RMAP_LEN_MAX)
128 return false;
129 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
130 return true;
131 /* must be an inode owner below here */
132 if (r1->rm_flags != r2->rm_flags)
133 return false;
134 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
135 return true;
136 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
137}
138
9e0f480e
DW
139/*
140 * Add an observation about a block mapping in an inode's data or attribute
141 * fork for later btree reconstruction.
142 */
143int
2d273771 144rmap_add_rec(
9e0f480e
DW
145 struct xfs_mount *mp,
146 xfs_ino_t ino,
147 int whichfork,
148 struct xfs_bmbt_irec *irec)
149{
9e0f480e
DW
150 struct xfs_rmap_irec rmap;
151 xfs_agnumber_t agno;
152 xfs_agblock_t agbno;
b7f12e53
DW
153 struct xfs_rmap_irec *last_rmap;
154 int error = 0;
9e0f480e 155
2d273771 156 if (!rmap_needs_work(mp))
9e0f480e
DW
157 return 0;
158
159 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
160 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
161 ASSERT(agno != NULLAGNUMBER);
162 ASSERT(agno < mp->m_sb.sb_agcount);
163 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
164 ASSERT(ino != NULLFSINO);
165 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
166
9e0f480e
DW
167 rmap.rm_owner = ino;
168 rmap.rm_offset = irec->br_startoff;
169 rmap.rm_flags = 0;
170 if (whichfork == XFS_ATTR_FORK)
171 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
172 rmap.rm_startblock = agbno;
173 rmap.rm_blockcount = irec->br_blockcount;
174 if (irec->br_state == XFS_EXT_UNWRITTEN)
175 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
b7f12e53
DW
176 last_rmap = &ag_rmaps[agno].ar_last_rmap;
177 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
178 *last_rmap = rmap;
2d273771 179 else if (rmaps_are_mergeable(last_rmap, &rmap))
b7f12e53
DW
180 last_rmap->rm_blockcount += rmap.rm_blockcount;
181 else {
182 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
183 if (error)
184 return error;
185 *last_rmap = rmap;
186 }
187
188 return error;
189}
190
191/* Finish collecting inode data/attr fork rmaps. */
192int
2d273771 193rmap_finish_collecting_fork_recs(
b7f12e53
DW
194 struct xfs_mount *mp,
195 xfs_agnumber_t agno)
196{
2d273771 197 if (!rmap_needs_work(mp) ||
b7f12e53
DW
198 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
199 return 0;
200 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
9e0f480e
DW
201}
202
1102c155
DW
203/* add a raw rmap; these will be merged later */
204static int
2d273771 205__rmap_add_raw_rec(
1102c155
DW
206 struct xfs_mount *mp,
207 xfs_agnumber_t agno,
208 xfs_agblock_t agbno,
209 xfs_extlen_t len,
210 uint64_t owner,
211 bool is_attr,
212 bool is_bmbt)
213{
214 struct xfs_rmap_irec rmap;
215
216 ASSERT(len != 0);
217 rmap.rm_owner = owner;
218 rmap.rm_offset = 0;
219 rmap.rm_flags = 0;
220 if (is_attr)
221 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
222 if (is_bmbt)
223 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
224 rmap.rm_startblock = agbno;
225 rmap.rm_blockcount = len;
226 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
227}
228
00efc33a
DW
229/*
230 * Add a reverse mapping for an inode fork's block mapping btree block.
231 */
232int
2d273771 233rmap_add_bmbt_rec(
00efc33a
DW
234 struct xfs_mount *mp,
235 xfs_ino_t ino,
236 int whichfork,
237 xfs_fsblock_t fsbno)
238{
239 xfs_agnumber_t agno;
240 xfs_agblock_t agbno;
241
2d273771 242 if (!rmap_needs_work(mp))
00efc33a
DW
243 return 0;
244
245 agno = XFS_FSB_TO_AGNO(mp, fsbno);
246 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
247 ASSERT(agno != NULLAGNUMBER);
248 ASSERT(agno < mp->m_sb.sb_agcount);
249 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
250
2d273771 251 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
00efc33a
DW
252 whichfork == XFS_ATTR_FORK, true);
253}
254
1102c155
DW
255/*
256 * Add a reverse mapping for a per-AG fixed metadata extent.
257 */
258int
2d273771 259rmap_add_ag_rec(
1102c155
DW
260 struct xfs_mount *mp,
261 xfs_agnumber_t agno,
262 xfs_agblock_t agbno,
263 xfs_extlen_t len,
264 uint64_t owner)
265{
2d273771 266 if (!rmap_needs_work(mp))
1102c155
DW
267 return 0;
268
269 ASSERT(agno != NULLAGNUMBER);
270 ASSERT(agno < mp->m_sb.sb_agcount);
271 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
272
2d273771 273 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
1102c155
DW
274}
275
276/*
277 * Merge adjacent raw rmaps and add them to the main rmap list.
278 */
279int
2d273771 280rmap_fold_raw_recs(
1102c155
DW
281 struct xfs_mount *mp,
282 xfs_agnumber_t agno)
283{
284 struct xfs_slab_cursor *cur = NULL;
285 struct xfs_rmap_irec *prev, *rec;
286 size_t old_sz;
138ce9ff 287 int error = 0;
1102c155
DW
288
289 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
290 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
291 goto no_raw;
292 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
293 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
294 &cur);
295 if (error)
296 goto err;
297
298 prev = pop_slab_cursor(cur);
299 rec = pop_slab_cursor(cur);
138ce9ff 300 while (prev && rec) {
2d273771 301 if (rmaps_are_mergeable(prev, rec)) {
1102c155
DW
302 prev->rm_blockcount += rec->rm_blockcount;
303 rec = pop_slab_cursor(cur);
304 continue;
305 }
306 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
307 if (error)
308 goto err;
309 prev = rec;
310 rec = pop_slab_cursor(cur);
311 }
312 if (prev) {
313 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
314 if (error)
315 goto err;
316 }
317 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
318 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
319 sizeof(struct xfs_rmap_irec));
320 if (error)
321 do_error(
322_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
323no_raw:
324 if (old_sz)
325 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
326err:
327 free_slab_cursor(&cur);
328 return error;
329}
330
713b6817
DW
331static int
332find_first_zero_bit(
14f8b681 333 uint64_t mask)
713b6817
DW
334{
335 int n;
336 int b = 0;
337
338 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
339 b++;
340
341 return b;
342}
343
344static int
345popcnt(
14f8b681 346 uint64_t mask)
713b6817
DW
347{
348 int n;
349 int b = 0;
350
351 if (mask == 0)
352 return 0;
353
354 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
355 if (mask & 1)
356 b++;
357
358 return b;
359}
360
361/*
362 * Add an allocation group's fixed metadata to the rmap list. This includes
363 * sb/agi/agf/agfl headers, inode chunks, and the log.
364 */
365int
2d273771 366rmap_add_fixed_ag_rec(
713b6817
DW
367 struct xfs_mount *mp,
368 xfs_agnumber_t agno)
369{
370 xfs_fsblock_t fsbno;
371 xfs_agblock_t agbno;
372 ino_tree_node_t *ino_rec;
373 xfs_agino_t agino;
374 int error;
375 int startidx;
376 int nr;
377
2d273771 378 if (!rmap_needs_work(mp))
713b6817
DW
379 return 0;
380
381 /* sb/agi/agf/agfl headers */
2d273771 382 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
713b6817
DW
383 XFS_RMAP_OWN_FS);
384 if (error)
385 goto out;
386
387 /* inodes */
388 ino_rec = findfirst_inode_rec(agno);
389 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
390 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
391 startidx = find_first_zero_bit(ino_rec->ir_sparse);
392 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
393 } else {
394 startidx = 0;
395 nr = XFS_INODES_PER_CHUNK;
396 }
397 nr /= mp->m_sb.sb_inopblock;
398 if (nr == 0)
399 nr = 1;
400 agino = ino_rec->ino_startnum + startidx;
401 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
402 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
2d273771 403 error = rmap_add_ag_rec(mp, agno, agbno, nr,
713b6817
DW
404 XFS_RMAP_OWN_INODES);
405 if (error)
406 goto out;
407 }
408 }
409
410 /* log */
411 fsbno = mp->m_sb.sb_logstart;
412 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
413 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
2d273771 414 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
713b6817
DW
415 XFS_RMAP_OWN_LOG);
416 if (error)
417 goto out;
418 }
419out:
420 return error;
421}
422
62cf990a
DW
423/*
424 * Copy the per-AG btree reverse-mapping data into the rmapbt.
425 *
426 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
427 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
428 * building the AG btrees we can record all the blocks allocated for each
429 * btree, but we cannot resolve the conflict between the fact that one has to
430 * finish allocating the space for the rmapbt before building the bnobt and the
431 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
432 * Therefore we record in-core the rmaps for each btree and here use the
433 * libxfs rmap functions to finish building the rmap btree.
434 *
435 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
436 * recorded in memory. The rmapbt has not been set up yet, so we need to be
437 * able to "expand" the AGFL without updating the rmapbt. After we've written
438 * out the new AGF header the new rmapbt is available, so this function reads
439 * each AGFL to generate rmap entries. These entries are merged with the AG
440 * btree rmap entries, and then we use libxfs' rmap functions to add them to
441 * the rmapbt, after which it is fully regenerated.
442 */
443int
2d273771 444rmap_store_ag_btree_rec(
62cf990a
DW
445 struct xfs_mount *mp,
446 xfs_agnumber_t agno)
447{
448 struct xfs_slab_cursor *rm_cur;
449 struct xfs_rmap_irec *rm_rec = NULL;
450 struct xfs_buf *agbp = NULL;
451 struct xfs_buf *agflbp = NULL;
452 struct xfs_trans *tp;
62cf990a 453 __be32 *agfl_bno, *b;
1cdc777d
DW
454 struct xfs_ag_rmap *ag_rmap = &ag_rmaps[agno];
455 struct bitmap *own_ag_bitmap = NULL;
62cf990a 456 int error = 0;
62cf990a
DW
457
458 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
459 return 0;
460
461 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
1cdc777d
DW
462 free_slab(&ag_rmap->ar_rmaps);
463 error = init_slab(&ag_rmap->ar_rmaps, sizeof(struct xfs_rmap_irec));
62cf990a
DW
464 if (error)
465 goto err;
466
467 /* Add the AGFL blocks to the rmap list */
e2f60652 468 error = -libxfs_trans_read_buf(
62cf990a
DW
469 mp, NULL, mp->m_ddev_targp,
470 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
471 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
472 if (error)
473 goto err;
474
636f06d8
DW
475 /*
476 * Sometimes, the blocks at the beginning of the AGFL are there
477 * because we overestimated how many blocks we needed to rebuild
478 * the freespace btrees. ar_flcount records the number of
479 * blocks in this situation. Since those blocks already have an
480 * rmap, we only need to add rmap records for AGFL blocks past
481 * that point in the AGFL because those blocks are a result of a
482 * no-rmap no-shrink freelist fixup that we did earlier.
1cdc777d
DW
483 *
484 * However, some blocks end up on the AGFL because the free space
485 * btrees shed blocks as a result of allocating space to fix the
486 * freelist. We already created in-core rmap records for the free
487 * space btree blocks, so we must be careful not to create those
488 * records again. Create a bitmap of already-recorded OWN_AG rmaps.
636f06d8 489 */
1cdc777d
DW
490 error = init_slab_cursor(ag_rmap->ar_raw_rmaps, rmap_compare, &rm_cur);
491 if (error)
492 goto err;
93d69bc7 493 error = -bitmap_alloc(&own_ag_bitmap);
93ab49dd 494 if (error)
1cdc777d 495 goto err_slab;
1cdc777d
DW
496 while ((rm_rec = pop_slab_cursor(rm_cur)) != NULL) {
497 if (rm_rec->rm_owner != XFS_RMAP_OWN_AG)
498 continue;
93d69bc7 499 error = -bitmap_set(own_ag_bitmap, rm_rec->rm_startblock,
93ab49dd
DW
500 rm_rec->rm_blockcount);
501 if (error) {
502 /*
503 * If this range is already set, then the incore rmap
504 * records for the AG free space btrees overlap and
505 * we're toast because that is not allowed.
506 */
507 if (error == EEXIST)
508 error = EFSCORRUPTED;
1cdc777d
DW
509 goto err_slab;
510 }
511 }
512 free_slab_cursor(&rm_cur);
513
514 /* Create rmaps for any AGFL blocks that aren't already rmapped. */
62cf990a 515 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
1cdc777d 516 b = agfl_bno + ag_rmap->ar_flcount;
8e1338fb
ES
517 while (*b != cpu_to_be32(NULLAGBLOCK) &&
518 b - agfl_bno < libxfs_agfl_size(mp)) {
1cdc777d
DW
519 xfs_agblock_t agbno;
520
521 agbno = be32_to_cpu(*b);
522 if (!bitmap_test(own_ag_bitmap, agbno, 1)) {
523 error = rmap_add_ag_rec(mp, agno, agbno, 1,
524 XFS_RMAP_OWN_AG);
525 if (error)
526 goto err;
527 }
62cf990a
DW
528 b++;
529 }
530 libxfs_putbuf(agflbp);
531 agflbp = NULL;
1cdc777d 532 bitmap_free(&own_ag_bitmap);
62cf990a
DW
533
534 /* Merge all the raw rmaps into the main list */
2d273771 535 error = rmap_fold_raw_recs(mp, agno);
62cf990a
DW
536 if (error)
537 goto err;
538
539 /* Create cursors to refcount structures */
1cdc777d 540 error = init_slab_cursor(ag_rmap->ar_rmaps, rmap_compare, &rm_cur);
62cf990a
DW
541 if (error)
542 goto err;
543
544 /* Insert rmaps into the btree one at a time */
545 rm_rec = pop_slab_cursor(rm_cur);
546 while (rm_rec) {
007347e3
DW
547 struct xfs_owner_info oinfo = {};
548
225e4bb2 549 error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
62cf990a
DW
550 if (error)
551 goto err_slab;
552
e2f60652 553 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
62cf990a
DW
554 if (error)
555 goto err_trans;
556
557 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
007347e3 558 oinfo.oi_owner = rm_rec->rm_owner;
e2f60652 559 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
62cf990a
DW
560 rm_rec->rm_blockcount, &oinfo);
561 if (error)
562 goto err_trans;
563
564 error = -libxfs_trans_commit(tp);
565 if (error)
566 goto err_slab;
567
568 fix_freelist(mp, agno, false);
569
570 rm_rec = pop_slab_cursor(rm_cur);
571 }
572
573 free_slab_cursor(&rm_cur);
574 return 0;
575
576err_trans:
577 libxfs_trans_cancel(tp);
578err_slab:
579 free_slab_cursor(&rm_cur);
580err:
581 if (agflbp)
582 libxfs_putbuf(agflbp);
1cdc777d
DW
583 if (own_ag_bitmap)
584 bitmap_free(&own_ag_bitmap);
62cf990a
DW
585 return error;
586}
587
9e0f480e
DW
588#ifdef RMAP_DEBUG
589static void
2d273771 590rmap_dump(
9e0f480e
DW
591 const char *msg,
592 xfs_agnumber_t agno,
593 struct xfs_rmap_irec *rmap)
594{
595 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
596 msg, rmap,
597 (unsigned int)agno,
598 (unsigned long long)rmap->rm_startblock,
599 (unsigned long long)rmap->rm_owner,
600 (unsigned long long)rmap->rm_offset,
601 (unsigned int)rmap->rm_blockcount,
602 (unsigned int)rmap->rm_flags);
603}
604#else
2d273771 605# define rmap_dump(m, a, r)
9e0f480e 606#endif
11b9e510 607
00f34bca
DW
608/*
609 * Rebuilding the Reference Count & Reverse Mapping Btrees
610 *
611 * The reference count (refcnt) and reverse mapping (rmap) btrees are
612 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
613 * mappings must be processed into reference counts at the end of phase
614 * 4, and the rmaps must be recorded during phase 4. There is a need to
615 * access the rmaps in physical block order, but no particular need for
616 * random access, so the slab.c code provides a big logical array
617 * (consisting of smaller slabs) and some inorder iterator functions.
618 *
619 * Once we've recorded all the reverse mappings, we're ready to
620 * translate the rmaps into refcount entries. Imagine the rmap entries
621 * as rectangles representing extents of physical blocks, and that the
622 * rectangles can be laid down to allow them to overlap each other; then
623 * we know that we must emit a refcnt btree entry wherever the amount of
624 * overlap changes, i.e. the emission stimulus is level-triggered:
625 *
626 * - ---
627 * -- ----- ---- --- ------
628 * -- ---- ----------- ---- ---------
629 * -------------------------------- -----------
630 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
631 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
632 *
633 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
634 *
635 * Note that in the actual refcnt btree we don't store the refcount < 2
636 * cases because the bnobt tells us which blocks are free; single-use
637 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
638 * supports storing multiple entries covering a given block we could
639 * theoretically dispense with the refcntbt and simply count rmaps, but
640 * that's inefficient in the (hot) write path, so we'll take the cost of
641 * the extra tree to save time. Also there's no guarantee that rmap
642 * will be enabled.
643 *
644 * Given an array of rmaps sorted by physical block number, a starting
645 * physical block (sp), a bag to hold rmaps that cover sp, and the next
646 * physical block where the level changes (np), we can reconstruct the
647 * refcount btree as follows:
648 *
649 * While there are still unprocessed rmaps in the array,
650 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
651 * - Add to the bag all rmaps in the array where startblock == sp.
652 * - Set np to the physical block where the bag size will change. This
653 * is the minimum of (the pblk of the next unprocessed rmap) and
654 * (startblock + len of each rmap in the bag).
655 * - Record the bag size as old_bag_size.
656 *
657 * - While the bag isn't empty,
658 * - Remove from the bag all rmaps where startblock + len == np.
659 * - Add to the bag all rmaps in the array where startblock == np.
660 * - If the bag size isn't old_bag_size, store the refcount entry
661 * (sp, np - sp, bag_size) in the refcnt btree.
662 * - If the bag is empty, break out of the inner loop.
663 * - Set old_bag_size to the bag size
664 * - Set sp = np.
665 * - Set np to the physical block where the bag size will change.
666 * This is the minimum of (the pblk of the next unprocessed rmap)
667 * and (startblock + len of each rmap in the bag).
668 *
669 * An implementation detail is that because this processing happens
670 * during phase 4, the refcount entries are stored in an array so that
671 * phase 5 can load them into the refcount btree. The rmaps can be
672 * loaded directly into the rmap btree during phase 5 as well.
673 */
674
ca8d7d6a
DW
675/*
676 * Mark all inodes in the reverse-mapping observation stack as requiring the
677 * reflink inode flag, if the stack depth is greater than 1.
678 */
679static void
680mark_inode_rl(
681 struct xfs_mount *mp,
682 struct xfs_bag *rmaps)
683{
684 xfs_agnumber_t iagno;
685 struct xfs_rmap_irec *rmap;
686 struct ino_tree_node *irec;
687 int off;
688 size_t idx;
689 xfs_agino_t ino;
690
691 if (bag_count(rmaps) < 2)
692 return;
693
694 /* Reflink flag accounting */
695 foreach_bag_ptr(rmaps, idx, rmap) {
696 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
697 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
698 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
699 pthread_mutex_lock(&ag_locks[iagno].lock);
700 irec = find_inode_rec(mp, iagno, ino);
701 off = get_inode_offset(mp, rmap->rm_owner, irec);
702 /* lock here because we might go outside this ag */
703 set_inode_is_rl(irec, off);
704 pthread_mutex_unlock(&ag_locks[iagno].lock);
705 }
706}
707
00f34bca
DW
708/*
709 * Emit a refcount object for refcntbt reconstruction during phase 5.
710 */
711#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
712static void
713refcount_emit(
714 struct xfs_mount *mp,
715 xfs_agnumber_t agno,
716 xfs_agblock_t agbno,
717 xfs_extlen_t len,
718 size_t nr_rmaps)
719{
720 struct xfs_refcount_irec rlrec;
721 int error;
722 struct xfs_slab *rlslab;
723
724 rlslab = ag_rmaps[agno].ar_refcount_items;
725 ASSERT(nr_rmaps > 0);
726
727 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
728 agno, agbno, len, nr_rmaps);
729 rlrec.rc_startblock = agbno;
730 rlrec.rc_blockcount = len;
731 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
732 error = slab_add(rlslab, &rlrec);
733 if (error)
734 do_error(
735_("Insufficient memory while recreating refcount tree."));
736}
737#undef REFCOUNT_CLAMP
738
739/*
740 * Transform a pile of physical block mapping observations into refcount data
741 * for eventual rebuilding of the btrees.
742 */
743#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
744int
745compute_refcounts(
746 struct xfs_mount *mp,
747 xfs_agnumber_t agno)
748{
749 struct xfs_bag *stack_top = NULL;
750 struct xfs_slab *rmaps;
751 struct xfs_slab_cursor *rmaps_cur;
752 struct xfs_rmap_irec *array_cur;
753 struct xfs_rmap_irec *rmap;
754 xfs_agblock_t sbno; /* first bno of this rmap set */
755 xfs_agblock_t cbno; /* first bno of this refcount set */
756 xfs_agblock_t nbno; /* next bno where rmap set changes */
757 size_t n, idx;
758 size_t old_stack_nr;
759 int error;
760
761 if (!xfs_sb_version_hasreflink(&mp->m_sb))
762 return 0;
763
764 rmaps = ag_rmaps[agno].ar_rmaps;
765
766 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
767 if (error)
768 return error;
769
770 error = init_bag(&stack_top);
771 if (error)
772 goto err;
773
774 /* While there are rmaps to be processed... */
775 n = 0;
776 while (n < slab_count(rmaps)) {
777 array_cur = peek_slab_cursor(rmaps_cur);
778 sbno = cbno = array_cur->rm_startblock;
779 /* Push all rmaps with pblk == sbno onto the stack */
780 for (;
781 array_cur && array_cur->rm_startblock == sbno;
782 array_cur = peek_slab_cursor(rmaps_cur)) {
783 advance_slab_cursor(rmaps_cur); n++;
784 rmap_dump("push0", agno, array_cur);
785 error = bag_add(stack_top, array_cur);
786 if (error)
787 goto err;
788 }
ca8d7d6a 789 mark_inode_rl(mp, stack_top);
00f34bca
DW
790
791 /* Set nbno to the bno of the next refcount change */
ff14f594 792 if (n < slab_count(rmaps) && array_cur)
00f34bca
DW
793 nbno = array_cur->rm_startblock;
794 else
795 nbno = NULLAGBLOCK;
796 foreach_bag_ptr(stack_top, idx, rmap) {
797 nbno = min(nbno, RMAP_END(rmap));
798 }
799
800 /* Emit reverse mappings, if needed */
801 ASSERT(nbno > sbno);
802 old_stack_nr = bag_count(stack_top);
803
804 /* While stack isn't empty... */
805 while (bag_count(stack_top)) {
806 /* Pop all rmaps that end at nbno */
807 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
808 if (RMAP_END(rmap) != nbno)
809 continue;
810 rmap_dump("pop", agno, rmap);
811 error = bag_remove(stack_top, idx);
812 if (error)
813 goto err;
814 }
815
816 /* Push array items that start at nbno */
817 for (;
818 array_cur && array_cur->rm_startblock == nbno;
819 array_cur = peek_slab_cursor(rmaps_cur)) {
820 advance_slab_cursor(rmaps_cur); n++;
821 rmap_dump("push1", agno, array_cur);
822 error = bag_add(stack_top, array_cur);
823 if (error)
824 goto err;
825 }
ca8d7d6a 826 mark_inode_rl(mp, stack_top);
00f34bca
DW
827
828 /* Emit refcount if necessary */
829 ASSERT(nbno > cbno);
830 if (bag_count(stack_top) != old_stack_nr) {
831 if (old_stack_nr > 1) {
832 refcount_emit(mp, agno, cbno,
833 nbno - cbno,
834 old_stack_nr);
835 }
836 cbno = nbno;
837 }
838
839 /* Stack empty, go find the next rmap */
840 if (bag_count(stack_top) == 0)
841 break;
842 old_stack_nr = bag_count(stack_top);
843 sbno = nbno;
844
845 /* Set nbno to the bno of the next refcount change */
846 if (n < slab_count(rmaps))
847 nbno = array_cur->rm_startblock;
848 else
849 nbno = NULLAGBLOCK;
850 foreach_bag_ptr(stack_top, idx, rmap) {
851 nbno = min(nbno, RMAP_END(rmap));
852 }
853
854 /* Emit reverse mappings, if needed */
855 ASSERT(nbno > sbno);
856 }
857 }
858err:
859 free_bag(&stack_top);
860 free_slab_cursor(&rmaps_cur);
861
862 return error;
863}
864#undef RMAP_END
865
11b9e510
DW
866/*
867 * Return the number of rmap objects for an AG.
868 */
869size_t
870rmap_record_count(
871 struct xfs_mount *mp,
872 xfs_agnumber_t agno)
873{
874 return slab_count(ag_rmaps[agno].ar_rmaps);
875}
876
877/*
878 * Return a slab cursor that will return rmap objects in order.
879 */
880int
2d273771 881rmap_init_cursor(
11b9e510
DW
882 xfs_agnumber_t agno,
883 struct xfs_slab_cursor **cur)
884{
885 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
886}
887
888/*
889 * Disable the refcount btree check.
890 */
891void
892rmap_avoid_check(void)
893{
894 rmapbt_suspect = true;
895}
896
897/* Look for an rmap in the rmapbt that matches a given rmap. */
898static int
2d273771 899rmap_lookup(
11b9e510
DW
900 struct xfs_btree_cur *bt_cur,
901 struct xfs_rmap_irec *rm_rec,
902 struct xfs_rmap_irec *tmp,
903 int *have)
904{
905 int error;
906
907 /* Use the regular btree retrieval routine. */
908 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
909 rm_rec->rm_blockcount,
910 rm_rec->rm_owner, rm_rec->rm_offset,
911 rm_rec->rm_flags, have);
912 if (error)
913 return error;
914 if (*have == 0)
915 return error;
916 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
917}
918
7ba02033
DW
919/* Look for an rmap in the rmapbt that matches a given rmap. */
920static int
921rmap_lookup_overlapped(
922 struct xfs_btree_cur *bt_cur,
923 struct xfs_rmap_irec *rm_rec,
924 struct xfs_rmap_irec *tmp,
925 int *have)
926{
927 /* Have to use our fancy version for overlapped */
928 return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
929 rm_rec->rm_owner, rm_rec->rm_offset,
930 rm_rec->rm_flags, tmp, have);
931}
932
11b9e510
DW
933/* Does the btree rmap cover the observed rmap? */
934#define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
935#define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
936static bool
2d273771 937rmap_is_good(
11b9e510
DW
938 struct xfs_rmap_irec *observed,
939 struct xfs_rmap_irec *btree)
940{
941 /* Can't have mismatches in the flags or the owner. */
942 if (btree->rm_flags != observed->rm_flags ||
943 btree->rm_owner != observed->rm_owner)
944 return false;
945
946 /*
947 * Btree record can't physically start after the observed
948 * record, nor can it end before the observed record.
949 */
950 if (btree->rm_startblock > observed->rm_startblock ||
951 NEXTP(btree) < NEXTP(observed))
952 return false;
953
954 /* If this is metadata or bmbt, we're done. */
955 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
956 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
957 return true;
958 /*
959 * Btree record can't logically start after the observed
960 * record, nor can it end before the observed record.
961 */
962 if (btree->rm_offset > observed->rm_offset ||
963 NEXTL(btree) < NEXTL(observed))
964 return false;
965
966 return true;
967}
968#undef NEXTP
969#undef NEXTL
970
971/*
972 * Compare the observed reverse mappings against what's in the ag btree.
973 */
974int
2d273771 975rmaps_verify_btree(
11b9e510
DW
976 struct xfs_mount *mp,
977 xfs_agnumber_t agno)
978{
979 struct xfs_slab_cursor *rm_cur;
980 struct xfs_btree_cur *bt_cur = NULL;
981 int error;
982 int have;
983 struct xfs_buf *agbp = NULL;
984 struct xfs_rmap_irec *rm_rec;
985 struct xfs_rmap_irec tmp;
986 struct xfs_perag *pag; /* per allocation group data */
987
988 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
989 return 0;
990 if (rmapbt_suspect) {
991 if (no_modify && agno == 0)
992 do_warn(_("would rebuild corrupt rmap btrees.\n"));
993 return 0;
994 }
995
996 /* Create cursors to refcount structures */
2d273771 997 error = rmap_init_cursor(agno, &rm_cur);
11b9e510
DW
998 if (error)
999 return error;
1000
1001 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1002 if (error)
1003 goto err;
1004
1005 /* Leave the per-ag data "uninitialized" since we rewrite it later */
e2f60652 1006 pag = libxfs_perag_get(mp, agno);
11b9e510 1007 pag->pagf_init = 0;
e2f60652 1008 libxfs_perag_put(pag);
11b9e510
DW
1009
1010 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
1011 if (!bt_cur) {
1012 error = -ENOMEM;
1013 goto err;
1014 }
1015
1016 rm_rec = pop_slab_cursor(rm_cur);
1017 while (rm_rec) {
2d273771 1018 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
11b9e510
DW
1019 if (error)
1020 goto err;
7ba02033
DW
1021 /*
1022 * Using the range query is expensive, so only do it if
1023 * the regular lookup doesn't find anything or if it doesn't
1024 * match the observed rmap.
1025 */
1026 if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
1027 (!have || !rmap_is_good(rm_rec, &tmp))) {
1028 error = rmap_lookup_overlapped(bt_cur, rm_rec,
1029 &tmp, &have);
1030 if (error)
1031 goto err;
1032 }
11b9e510
DW
1033 if (!have) {
1034 do_warn(
1035_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
1036%s%soff %"PRIu64"\n"),
1037 agno, rm_rec->rm_startblock,
1038 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1039 _("unwritten ") : "",
1040 rm_rec->rm_blockcount,
1041 rm_rec->rm_owner,
1042 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1043 _("attr ") : "",
1044 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1045 _("bmbt ") : "",
1046 rm_rec->rm_offset);
1047 goto next_loop;
1048 }
1049
1050 /* Compare each refcount observation against the btree's */
2d273771 1051 if (!rmap_is_good(rm_rec, &tmp)) {
11b9e510
DW
1052 do_warn(
1053_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1054%"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1055 agno, tmp.rm_startblock,
1056 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1057 _("unwritten ") : "",
1058 tmp.rm_blockcount,
1059 tmp.rm_owner,
1060 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1061 _("attr ") : "",
1062 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1063 _("bmbt ") : "",
1064 tmp.rm_offset,
1065 agno, rm_rec->rm_startblock,
1066 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1067 _("unwritten ") : "",
1068 rm_rec->rm_blockcount,
1069 rm_rec->rm_owner,
1070 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1071 _("attr ") : "",
1072 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1073 _("bmbt ") : "",
1074 rm_rec->rm_offset);
1075 goto next_loop;
1076 }
1077next_loop:
1078 rm_rec = pop_slab_cursor(rm_cur);
1079 }
1080
1081err:
1082 if (bt_cur)
1083 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1084 if (agbp)
1085 libxfs_putbuf(agbp);
1086 free_slab_cursor(&rm_cur);
1087 return 0;
1088}
1089
1090/*
1091 * Compare the key fields of two rmap records -- positive if key1 > key2,
1092 * negative if key1 < key2, and zero if equal.
1093 */
14f8b681 1094int64_t
11b9e510
DW
1095rmap_diffkeys(
1096 struct xfs_rmap_irec *kp1,
1097 struct xfs_rmap_irec *kp2)
1098{
1099 __u64 oa;
1100 __u64 ob;
14f8b681 1101 int64_t d;
11b9e510
DW
1102 struct xfs_rmap_irec tmp;
1103
1104 tmp = *kp1;
1105 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1106 oa = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510
DW
1107 tmp = *kp2;
1108 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1109 ob = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510 1110
14f8b681 1111 d = (int64_t)kp1->rm_startblock - kp2->rm_startblock;
11b9e510
DW
1112 if (d)
1113 return d;
1114
1115 if (kp1->rm_owner > kp2->rm_owner)
1116 return 1;
1117 else if (kp2->rm_owner > kp1->rm_owner)
1118 return -1;
1119
1120 if (oa > ob)
1121 return 1;
1122 else if (ob > oa)
1123 return -1;
1124 return 0;
1125}
1126
1127/* Compute the high key of an rmap record. */
1128void
1129rmap_high_key_from_rec(
1130 struct xfs_rmap_irec *rec,
1131 struct xfs_rmap_irec *key)
1132{
1133 int adj;
1134
1135 adj = rec->rm_blockcount - 1;
1136
1137 key->rm_startblock = rec->rm_startblock + adj;
1138 key->rm_owner = rec->rm_owner;
1139 key->rm_offset = rec->rm_offset;
1140 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1141 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1142 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1143 return;
1144 key->rm_offset += adj;
1145}
62cf990a 1146
7e174ec7
DW
1147/*
1148 * Record that an inode had the reflink flag set when repair started. The
1149 * inode reflink flag will be adjusted as necessary.
1150 */
1151void
1152record_inode_reflink_flag(
1153 struct xfs_mount *mp,
1154 struct xfs_dinode *dino,
1155 xfs_agnumber_t agno,
1156 xfs_agino_t ino,
1157 xfs_ino_t lino)
1158{
1159 struct ino_tree_node *irec;
1160 int off;
1161
1162 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1163 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1164 return;
1165 irec = find_inode_rec(mp, agno, ino);
1166 off = get_inode_offset(mp, lino, irec);
1167 ASSERT(!inode_was_rl(irec, off));
1168 set_inode_was_rl(irec, off);
1169 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1170 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1171}
1172
904a5020
DW
1173/*
1174 * Inform the user that we're clearing the reflink flag on an inode that
1175 * doesn't actually share any blocks. This is an optimization (the kernel
1176 * skips refcount checks for non-reflink files) and not a corruption repair,
1177 * so we don't need to log every time we clear a flag unless verbose mode is
1178 * enabled.
1179 */
1180static void
1181warn_clearing_reflink(
1182 xfs_ino_t ino)
1183{
1184 static bool warned = false;
1185 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
1186
1187 if (verbose) {
1188 do_warn(_("clearing reflink flag on inode %"PRIu64"\n"), ino);
1189 return;
1190 }
1191
1192 if (warned)
1193 return;
1194
1195 pthread_mutex_lock(&lock);
1196 if (!warned) {
1197 do_warn(_("clearing reflink flag on inodes when possible\n"));
1198 warned = true;
1199 }
1200 pthread_mutex_unlock(&lock);
1201}
1202
ca8d7d6a
DW
1203/*
1204 * Fix an inode's reflink flag.
1205 */
1206static int
1207fix_inode_reflink_flag(
1208 struct xfs_mount *mp,
1209 xfs_agnumber_t agno,
1210 xfs_agino_t agino,
1211 bool set)
1212{
1213 struct xfs_dinode *dino;
1214 struct xfs_buf *buf;
1215
1216 if (set)
1217 do_warn(
1218_("setting reflink flag on inode %"PRIu64"\n"),
1219 XFS_AGINO_TO_INO(mp, agno, agino));
1220 else if (!no_modify) /* && !set */
904a5020 1221 warn_clearing_reflink(XFS_AGINO_TO_INO(mp, agno, agino));
ca8d7d6a
DW
1222 if (no_modify)
1223 return 0;
1224
1225 buf = get_agino_buf(mp, agno, agino, &dino);
1226 if (!buf)
1227 return 1;
1228 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1229 if (set)
1230 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1231 else
1232 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1233 libxfs_dinode_calc_crc(mp, dino);
1234 libxfs_writebuf(buf, 0);
1235
1236 return 0;
1237}
1238
1239/*
1240 * Fix discrepancies between the state of the inode reflink flag and our
1241 * observations as to whether or not the inode really needs it.
1242 */
1243int
1244fix_inode_reflink_flags(
1245 struct xfs_mount *mp,
1246 xfs_agnumber_t agno)
1247{
1248 struct ino_tree_node *irec;
1249 int bit;
14f8b681
DW
1250 uint64_t was;
1251 uint64_t is;
1252 uint64_t diff;
1253 uint64_t mask;
ca8d7d6a
DW
1254 int error = 0;
1255 xfs_agino_t agino;
1256
1257 /*
1258 * Update the reflink flag for any inode where there's a discrepancy
1259 * between the inode flag and whether or not we found any reflinked
1260 * extents.
1261 */
1262 for (irec = findfirst_inode_rec(agno);
1263 irec != NULL;
1264 irec = next_ino_rec(irec)) {
1265 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1266 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1267 was = irec->ino_was_rl;
1268 is = irec->ino_is_rl;
1269 if (was == is)
1270 continue;
1271 diff = was ^ is;
1272 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1273 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1274 irec->ino_startnum),
1275 was, is, diff);
1276
1277 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1278 agino = bit + irec->ino_startnum;
1279 if (!(diff & mask))
1280 continue;
1281 else if (was & mask)
1282 error = fix_inode_reflink_flag(mp, agno, agino,
1283 false);
1284 else if (is & mask)
1285 error = fix_inode_reflink_flag(mp, agno, agino,
1286 true);
1287 else
1288 ASSERT(0);
1289 if (error)
1290 do_error(
1291_("Unable to fix reflink flag on inode %"PRIu64".\n"),
1292 XFS_AGINO_TO_INO(mp, agno, agino));
1293 }
1294 }
1295
1296 return error;
1297}
1298
80dbc783
DW
1299/*
1300 * Return the number of refcount objects for an AG.
1301 */
1302size_t
1303refcount_record_count(
1304 struct xfs_mount *mp,
1305 xfs_agnumber_t agno)
1306{
1307 return slab_count(ag_rmaps[agno].ar_refcount_items);
1308}
1309
1310/*
1311 * Return a slab cursor that will return refcount objects in order.
1312 */
1313int
1314init_refcount_cursor(
1315 xfs_agnumber_t agno,
1316 struct xfs_slab_cursor **cur)
1317{
1318 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1319}
1320
1321/*
1322 * Disable the refcount btree check.
1323 */
1324void
1325refcount_avoid_check(void)
1326{
1327 refcbt_suspect = true;
1328}
1329
1330/*
1331 * Compare the observed reference counts against what's in the ag btree.
1332 */
1333int
1334check_refcounts(
1335 struct xfs_mount *mp,
1336 xfs_agnumber_t agno)
1337{
1338 struct xfs_slab_cursor *rl_cur;
1339 struct xfs_btree_cur *bt_cur = NULL;
1340 int error;
1341 int have;
1342 int i;
1343 struct xfs_buf *agbp = NULL;
1344 struct xfs_refcount_irec *rl_rec;
1345 struct xfs_refcount_irec tmp;
1346 struct xfs_perag *pag; /* per allocation group data */
1347
1348 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1349 return 0;
1350 if (refcbt_suspect) {
1351 if (no_modify && agno == 0)
1352 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1353 return 0;
1354 }
1355
1356 /* Create cursors to refcount structures */
1357 error = init_refcount_cursor(agno, &rl_cur);
1358 if (error)
1359 return error;
1360
1361 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1362 if (error)
1363 goto err;
1364
1365 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1366 pag = libxfs_perag_get(mp, agno);
1367 pag->pagf_init = 0;
1368 libxfs_perag_put(pag);
1369
5ff5ced0 1370 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno);
80dbc783
DW
1371 if (!bt_cur) {
1372 error = -ENOMEM;
1373 goto err;
1374 }
1375
1376 rl_rec = pop_slab_cursor(rl_cur);
1377 while (rl_rec) {
1378 /* Look for a refcount record in the btree */
1379 error = -libxfs_refcount_lookup_le(bt_cur,
1380 rl_rec->rc_startblock, &have);
1381 if (error)
1382 goto err;
1383 if (!have) {
1384 do_warn(
1385_("Missing reference count record for (%u/%u) len %u count %u\n"),
1386 agno, rl_rec->rc_startblock,
1387 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1388 goto next_loop;
1389 }
1390
1391 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1392 if (error)
1393 goto err;
1394 if (!i) {
1395 do_warn(
1396_("Missing reference count record for (%u/%u) len %u count %u\n"),
1397 agno, rl_rec->rc_startblock,
1398 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1399 goto next_loop;
1400 }
1401
1402 /* Compare each refcount observation against the btree's */
1403 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1404 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1405 tmp.rc_refcount < rl_rec->rc_refcount)
1406 do_warn(
1407_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1408 agno, tmp.rc_startblock, tmp.rc_blockcount,
1409 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1410 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1411next_loop:
1412 rl_rec = pop_slab_cursor(rl_cur);
1413 }
1414
1415err:
1416 if (bt_cur)
8743fab4
DW
1417 libxfs_btree_del_cursor(bt_cur, error ? XFS_BTREE_ERROR :
1418 XFS_BTREE_NOERROR);
80dbc783
DW
1419 if (agbp)
1420 libxfs_putbuf(agbp);
1421 free_slab_cursor(&rl_cur);
1422 return 0;
1423}
1424
62cf990a
DW
1425/*
1426 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1427 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1428 * because we're updating the rmapbt).
1429 */
1430void
1431fix_freelist(
1432 struct xfs_mount *mp,
1433 xfs_agnumber_t agno,
1434 bool skip_rmapbt)
1435{
1436 xfs_alloc_arg_t args;
1437 xfs_trans_t *tp;
62cf990a
DW
1438 int flags;
1439 int error;
1440
1441 memset(&args, 0, sizeof(args));
1442 args.mp = mp;
1443 args.agno = agno;
1444 args.alignment = 1;
e2f60652 1445 args.pag = libxfs_perag_get(mp, agno);
f9c9fd94 1446 error = -libxfs_trans_alloc_rollable(mp, 0, &tp);
62cf990a
DW
1447 if (error)
1448 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1449 agno, error);
1450 args.tp = tp;
1451
1452 /*
1453 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1454 * the fresh AGFL header from empty to full. That hasn't changed. For
1455 * rmapbt, however, things change a bit.
1456 *
1457 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1458 * expand, so we need to keep the AGFL well-stocked for the expansion.
1459 * However, this expansion can cause the bnobt/cntbt to shrink, which
1460 * can make the AGFL eligible for shrinking. Shrinking involves
1461 * freeing rmapbt entries, but since we haven't finished loading the
1462 * rmapbt with the btree rmaps it's possible for the remove operation
1463 * to fail. The AGFL block is large enough at this point to absorb any
1464 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1465 *
1466 * During the initial AGFL regeneration during AGF generation in phase5
1467 * we must also disable rmapbt modifications because the AGF that
1468 * libxfs reads does not yet point to the new rmapbt. These initial
1469 * AGFL entries are added just prior to adding the AG btree block rmaps
1470 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1471 * empty and cannot shrink.
1472 */
1473 flags = XFS_ALLOC_FLAG_NOSHRINK;
1474 if (skip_rmapbt)
1475 flags |= XFS_ALLOC_FLAG_NORMAP;
e2f60652
DW
1476 error = -libxfs_alloc_fix_freelist(&args, flags);
1477 libxfs_perag_put(args.pag);
62cf990a
DW
1478 if (error) {
1479 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1480 agno, error);
1481 }
f2279d8d
DW
1482 error = -libxfs_trans_commit(tp);
1483 if (error)
1484 do_error(_("%s: commit failed, error %d\n"), __func__, error);
62cf990a
DW
1485}
1486
1487/*
1488 * Remember how many AGFL entries came from excess AG btree allocations and
1489 * therefore already have rmap entries.
1490 */
1491void
1492rmap_store_agflcount(
1493 struct xfs_mount *mp,
1494 xfs_agnumber_t agno,
1495 int count)
1496{
2d273771 1497 if (!rmap_needs_work(mp))
62cf990a
DW
1498 return;
1499
1500 ag_rmaps[agno].ar_flcount = count;
1501}