]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/rmap.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
9e0f480e
DW
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
9e0f480e 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
9e0f480e
DW
5 */
6#include <libxfs.h>
7#include "btree.h"
8#include "err_protos.h"
9#include "libxlog.h"
10#include "incore.h"
11#include "globals.h"
12#include "dinode.h"
13#include "slab.h"
14#include "rmap.h"
1cdc777d 15#include "bitmap.h"
9e0f480e
DW
16
17#undef RMAP_DEBUG
18
19#ifdef RMAP_DEBUG
20# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
21#else
22# define dbg_printf(f, a...)
23#endif
24
25/* per-AG rmap object anchor */
26struct xfs_ag_rmap {
27 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
1102c155 28 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
62cf990a
DW
29 int ar_flcount; /* agfl entries from leftover */
30 /* agbt allocations */
b7f12e53 31 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
00f34bca 32 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
9e0f480e
DW
33};
34
35static struct xfs_ag_rmap *ag_rmaps;
11b9e510 36static bool rmapbt_suspect;
80dbc783 37static bool refcbt_suspect;
9e0f480e 38
197c2c6a 39static inline int rmap_compare(const void *a, const void *b)
9e0f480e 40{
197c2c6a 41 return libxfs_rmap_compare(a, b);
9e0f480e
DW
42}
43
44/*
45 * Returns true if we must reconstruct either the reference count or reverse
46 * mapping trees.
47 */
48bool
2d273771 49rmap_needs_work(
9e0f480e
DW
50 struct xfs_mount *mp)
51{
00f34bca
DW
52 return xfs_sb_version_hasreflink(&mp->m_sb) ||
53 xfs_sb_version_hasrmapbt(&mp->m_sb);
9e0f480e
DW
54}
55
56/*
57 * Initialize per-AG reverse map data.
58 */
59void
2d273771 60rmaps_init(
9e0f480e
DW
61 struct xfs_mount *mp)
62{
63 xfs_agnumber_t i;
64 int error;
65
2d273771 66 if (!rmap_needs_work(mp))
9e0f480e
DW
67 return;
68
69 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
70 if (!ag_rmaps)
71 do_error(_("couldn't allocate per-AG reverse map roots\n"));
72
73 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
74 error = init_slab(&ag_rmaps[i].ar_rmaps,
75 sizeof(struct xfs_rmap_irec));
76 if (error)
77 do_error(
78_("Insufficient memory while allocating reverse mapping slabs."));
1102c155
DW
79 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
80 sizeof(struct xfs_rmap_irec));
81 if (error)
82 do_error(
83_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
b7f12e53 84 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
00f34bca
DW
85 error = init_slab(&ag_rmaps[i].ar_refcount_items,
86 sizeof(struct xfs_refcount_irec));
87 if (error)
88 do_error(
89_("Insufficient memory while allocating refcount item slabs."));
9e0f480e
DW
90 }
91}
92
93/*
94 * Free the per-AG reverse-mapping data.
95 */
96void
2d273771 97rmaps_free(
9e0f480e
DW
98 struct xfs_mount *mp)
99{
100 xfs_agnumber_t i;
101
2d273771 102 if (!rmap_needs_work(mp))
9e0f480e
DW
103 return;
104
1102c155 105 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
9e0f480e 106 free_slab(&ag_rmaps[i].ar_rmaps);
1102c155 107 free_slab(&ag_rmaps[i].ar_raw_rmaps);
00f34bca 108 free_slab(&ag_rmaps[i].ar_refcount_items);
1102c155 109 }
9e0f480e
DW
110 free(ag_rmaps);
111 ag_rmaps = NULL;
112}
113
1102c155
DW
114/*
115 * Decide if two reverse-mapping records can be merged.
116 */
117bool
2d273771 118rmaps_are_mergeable(
1102c155
DW
119 struct xfs_rmap_irec *r1,
120 struct xfs_rmap_irec *r2)
121{
122 if (r1->rm_owner != r2->rm_owner)
123 return false;
124 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
125 return false;
126 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
127 XFS_RMAP_LEN_MAX)
128 return false;
129 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
130 return true;
131 /* must be an inode owner below here */
132 if (r1->rm_flags != r2->rm_flags)
133 return false;
134 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
135 return true;
136 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
137}
138
9e0f480e
DW
139/*
140 * Add an observation about a block mapping in an inode's data or attribute
141 * fork for later btree reconstruction.
142 */
143int
2d273771 144rmap_add_rec(
9e0f480e
DW
145 struct xfs_mount *mp,
146 xfs_ino_t ino,
147 int whichfork,
148 struct xfs_bmbt_irec *irec)
149{
9e0f480e
DW
150 struct xfs_rmap_irec rmap;
151 xfs_agnumber_t agno;
152 xfs_agblock_t agbno;
b7f12e53
DW
153 struct xfs_rmap_irec *last_rmap;
154 int error = 0;
9e0f480e 155
2d273771 156 if (!rmap_needs_work(mp))
9e0f480e
DW
157 return 0;
158
159 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
160 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
161 ASSERT(agno != NULLAGNUMBER);
162 ASSERT(agno < mp->m_sb.sb_agcount);
163 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
164 ASSERT(ino != NULLFSINO);
165 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
166
9e0f480e
DW
167 rmap.rm_owner = ino;
168 rmap.rm_offset = irec->br_startoff;
169 rmap.rm_flags = 0;
170 if (whichfork == XFS_ATTR_FORK)
171 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
172 rmap.rm_startblock = agbno;
173 rmap.rm_blockcount = irec->br_blockcount;
174 if (irec->br_state == XFS_EXT_UNWRITTEN)
175 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
b7f12e53
DW
176 last_rmap = &ag_rmaps[agno].ar_last_rmap;
177 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
178 *last_rmap = rmap;
2d273771 179 else if (rmaps_are_mergeable(last_rmap, &rmap))
b7f12e53
DW
180 last_rmap->rm_blockcount += rmap.rm_blockcount;
181 else {
182 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
183 if (error)
184 return error;
185 *last_rmap = rmap;
186 }
187
188 return error;
189}
190
191/* Finish collecting inode data/attr fork rmaps. */
192int
2d273771 193rmap_finish_collecting_fork_recs(
b7f12e53
DW
194 struct xfs_mount *mp,
195 xfs_agnumber_t agno)
196{
2d273771 197 if (!rmap_needs_work(mp) ||
b7f12e53
DW
198 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
199 return 0;
200 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
9e0f480e
DW
201}
202
1102c155
DW
203/* add a raw rmap; these will be merged later */
204static int
2d273771 205__rmap_add_raw_rec(
1102c155
DW
206 struct xfs_mount *mp,
207 xfs_agnumber_t agno,
208 xfs_agblock_t agbno,
209 xfs_extlen_t len,
210 uint64_t owner,
211 bool is_attr,
212 bool is_bmbt)
213{
214 struct xfs_rmap_irec rmap;
215
216 ASSERT(len != 0);
217 rmap.rm_owner = owner;
218 rmap.rm_offset = 0;
219 rmap.rm_flags = 0;
220 if (is_attr)
221 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
222 if (is_bmbt)
223 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
224 rmap.rm_startblock = agbno;
225 rmap.rm_blockcount = len;
226 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
227}
228
00efc33a
DW
229/*
230 * Add a reverse mapping for an inode fork's block mapping btree block.
231 */
232int
2d273771 233rmap_add_bmbt_rec(
00efc33a
DW
234 struct xfs_mount *mp,
235 xfs_ino_t ino,
236 int whichfork,
237 xfs_fsblock_t fsbno)
238{
239 xfs_agnumber_t agno;
240 xfs_agblock_t agbno;
241
2d273771 242 if (!rmap_needs_work(mp))
00efc33a
DW
243 return 0;
244
245 agno = XFS_FSB_TO_AGNO(mp, fsbno);
246 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
247 ASSERT(agno != NULLAGNUMBER);
248 ASSERT(agno < mp->m_sb.sb_agcount);
249 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
250
2d273771 251 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
00efc33a
DW
252 whichfork == XFS_ATTR_FORK, true);
253}
254
1102c155
DW
255/*
256 * Add a reverse mapping for a per-AG fixed metadata extent.
257 */
258int
2d273771 259rmap_add_ag_rec(
1102c155
DW
260 struct xfs_mount *mp,
261 xfs_agnumber_t agno,
262 xfs_agblock_t agbno,
263 xfs_extlen_t len,
264 uint64_t owner)
265{
2d273771 266 if (!rmap_needs_work(mp))
1102c155
DW
267 return 0;
268
269 ASSERT(agno != NULLAGNUMBER);
270 ASSERT(agno < mp->m_sb.sb_agcount);
271 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
272
2d273771 273 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
1102c155
DW
274}
275
276/*
277 * Merge adjacent raw rmaps and add them to the main rmap list.
278 */
279int
2d273771 280rmap_fold_raw_recs(
1102c155
DW
281 struct xfs_mount *mp,
282 xfs_agnumber_t agno)
283{
284 struct xfs_slab_cursor *cur = NULL;
285 struct xfs_rmap_irec *prev, *rec;
286 size_t old_sz;
138ce9ff 287 int error = 0;
1102c155
DW
288
289 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
290 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
291 goto no_raw;
292 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
293 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
294 &cur);
295 if (error)
296 goto err;
297
298 prev = pop_slab_cursor(cur);
299 rec = pop_slab_cursor(cur);
138ce9ff 300 while (prev && rec) {
2d273771 301 if (rmaps_are_mergeable(prev, rec)) {
1102c155
DW
302 prev->rm_blockcount += rec->rm_blockcount;
303 rec = pop_slab_cursor(cur);
304 continue;
305 }
306 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
307 if (error)
308 goto err;
309 prev = rec;
310 rec = pop_slab_cursor(cur);
311 }
312 if (prev) {
313 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
314 if (error)
315 goto err;
316 }
317 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
318 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
319 sizeof(struct xfs_rmap_irec));
320 if (error)
321 do_error(
322_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
323no_raw:
324 if (old_sz)
325 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
326err:
327 free_slab_cursor(&cur);
328 return error;
329}
330
713b6817
DW
331static int
332find_first_zero_bit(
14f8b681 333 uint64_t mask)
713b6817
DW
334{
335 int n;
336 int b = 0;
337
338 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
339 b++;
340
341 return b;
342}
343
344static int
345popcnt(
14f8b681 346 uint64_t mask)
713b6817
DW
347{
348 int n;
349 int b = 0;
350
351 if (mask == 0)
352 return 0;
353
354 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
355 if (mask & 1)
356 b++;
357
358 return b;
359}
360
361/*
362 * Add an allocation group's fixed metadata to the rmap list. This includes
363 * sb/agi/agf/agfl headers, inode chunks, and the log.
364 */
365int
2d273771 366rmap_add_fixed_ag_rec(
713b6817
DW
367 struct xfs_mount *mp,
368 xfs_agnumber_t agno)
369{
370 xfs_fsblock_t fsbno;
371 xfs_agblock_t agbno;
372 ino_tree_node_t *ino_rec;
373 xfs_agino_t agino;
374 int error;
375 int startidx;
376 int nr;
377
2d273771 378 if (!rmap_needs_work(mp))
713b6817
DW
379 return 0;
380
381 /* sb/agi/agf/agfl headers */
2d273771 382 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
713b6817
DW
383 XFS_RMAP_OWN_FS);
384 if (error)
385 goto out;
386
387 /* inodes */
388 ino_rec = findfirst_inode_rec(agno);
389 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
390 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
391 startidx = find_first_zero_bit(ino_rec->ir_sparse);
392 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
393 } else {
394 startidx = 0;
395 nr = XFS_INODES_PER_CHUNK;
396 }
397 nr /= mp->m_sb.sb_inopblock;
398 if (nr == 0)
399 nr = 1;
400 agino = ino_rec->ino_startnum + startidx;
401 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
402 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
2d273771 403 error = rmap_add_ag_rec(mp, agno, agbno, nr,
713b6817
DW
404 XFS_RMAP_OWN_INODES);
405 if (error)
406 goto out;
407 }
408 }
409
410 /* log */
411 fsbno = mp->m_sb.sb_logstart;
412 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
413 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
2d273771 414 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
713b6817
DW
415 XFS_RMAP_OWN_LOG);
416 if (error)
417 goto out;
418 }
419out:
420 return error;
421}
422
62cf990a
DW
423/*
424 * Copy the per-AG btree reverse-mapping data into the rmapbt.
425 *
426 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
427 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
428 * building the AG btrees we can record all the blocks allocated for each
429 * btree, but we cannot resolve the conflict between the fact that one has to
430 * finish allocating the space for the rmapbt before building the bnobt and the
431 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
432 * Therefore we record in-core the rmaps for each btree and here use the
433 * libxfs rmap functions to finish building the rmap btree.
434 *
435 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
436 * recorded in memory. The rmapbt has not been set up yet, so we need to be
437 * able to "expand" the AGFL without updating the rmapbt. After we've written
438 * out the new AGF header the new rmapbt is available, so this function reads
439 * each AGFL to generate rmap entries. These entries are merged with the AG
440 * btree rmap entries, and then we use libxfs' rmap functions to add them to
441 * the rmapbt, after which it is fully regenerated.
442 */
443int
2d273771 444rmap_store_ag_btree_rec(
62cf990a
DW
445 struct xfs_mount *mp,
446 xfs_agnumber_t agno)
447{
448 struct xfs_slab_cursor *rm_cur;
449 struct xfs_rmap_irec *rm_rec = NULL;
450 struct xfs_buf *agbp = NULL;
451 struct xfs_buf *agflbp = NULL;
452 struct xfs_trans *tp;
62cf990a 453 __be32 *agfl_bno, *b;
1cdc777d
DW
454 struct xfs_ag_rmap *ag_rmap = &ag_rmaps[agno];
455 struct bitmap *own_ag_bitmap = NULL;
62cf990a 456 int error = 0;
62cf990a
DW
457
458 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
459 return 0;
460
461 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
1cdc777d
DW
462 free_slab(&ag_rmap->ar_rmaps);
463 error = init_slab(&ag_rmap->ar_rmaps, sizeof(struct xfs_rmap_irec));
62cf990a
DW
464 if (error)
465 goto err;
466
467 /* Add the AGFL blocks to the rmap list */
e2f60652 468 error = -libxfs_trans_read_buf(
62cf990a
DW
469 mp, NULL, mp->m_ddev_targp,
470 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
471 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
472 if (error)
473 goto err;
474
636f06d8
DW
475 /*
476 * Sometimes, the blocks at the beginning of the AGFL are there
477 * because we overestimated how many blocks we needed to rebuild
478 * the freespace btrees. ar_flcount records the number of
479 * blocks in this situation. Since those blocks already have an
480 * rmap, we only need to add rmap records for AGFL blocks past
481 * that point in the AGFL because those blocks are a result of a
482 * no-rmap no-shrink freelist fixup that we did earlier.
1cdc777d
DW
483 *
484 * However, some blocks end up on the AGFL because the free space
485 * btrees shed blocks as a result of allocating space to fix the
486 * freelist. We already created in-core rmap records for the free
487 * space btree blocks, so we must be careful not to create those
488 * records again. Create a bitmap of already-recorded OWN_AG rmaps.
636f06d8 489 */
1cdc777d
DW
490 error = init_slab_cursor(ag_rmap->ar_raw_rmaps, rmap_compare, &rm_cur);
491 if (error)
492 goto err;
493 if (!bitmap_init(&own_ag_bitmap)) {
494 error = -ENOMEM;
495 goto err_slab;
496 }
497 while ((rm_rec = pop_slab_cursor(rm_cur)) != NULL) {
498 if (rm_rec->rm_owner != XFS_RMAP_OWN_AG)
499 continue;
500 if (!bitmap_set(own_ag_bitmap, rm_rec->rm_startblock,
501 rm_rec->rm_blockcount)) {
502 error = EFSCORRUPTED;
503 goto err_slab;
504 }
505 }
506 free_slab_cursor(&rm_cur);
507
508 /* Create rmaps for any AGFL blocks that aren't already rmapped. */
62cf990a 509 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
1cdc777d 510 b = agfl_bno + ag_rmap->ar_flcount;
8e1338fb
ES
511 while (*b != cpu_to_be32(NULLAGBLOCK) &&
512 b - agfl_bno < libxfs_agfl_size(mp)) {
1cdc777d
DW
513 xfs_agblock_t agbno;
514
515 agbno = be32_to_cpu(*b);
516 if (!bitmap_test(own_ag_bitmap, agbno, 1)) {
517 error = rmap_add_ag_rec(mp, agno, agbno, 1,
518 XFS_RMAP_OWN_AG);
519 if (error)
520 goto err;
521 }
62cf990a
DW
522 b++;
523 }
524 libxfs_putbuf(agflbp);
525 agflbp = NULL;
1cdc777d 526 bitmap_free(&own_ag_bitmap);
62cf990a
DW
527
528 /* Merge all the raw rmaps into the main list */
2d273771 529 error = rmap_fold_raw_recs(mp, agno);
62cf990a
DW
530 if (error)
531 goto err;
532
533 /* Create cursors to refcount structures */
1cdc777d 534 error = init_slab_cursor(ag_rmap->ar_rmaps, rmap_compare, &rm_cur);
62cf990a
DW
535 if (error)
536 goto err;
537
538 /* Insert rmaps into the btree one at a time */
539 rm_rec = pop_slab_cursor(rm_cur);
540 while (rm_rec) {
007347e3
DW
541 struct xfs_owner_info oinfo = {};
542
225e4bb2 543 error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
62cf990a
DW
544 if (error)
545 goto err_slab;
546
e2f60652 547 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
62cf990a
DW
548 if (error)
549 goto err_trans;
550
551 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
007347e3 552 oinfo.oi_owner = rm_rec->rm_owner;
e2f60652 553 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
62cf990a
DW
554 rm_rec->rm_blockcount, &oinfo);
555 if (error)
556 goto err_trans;
557
558 error = -libxfs_trans_commit(tp);
559 if (error)
560 goto err_slab;
561
562 fix_freelist(mp, agno, false);
563
564 rm_rec = pop_slab_cursor(rm_cur);
565 }
566
567 free_slab_cursor(&rm_cur);
568 return 0;
569
570err_trans:
571 libxfs_trans_cancel(tp);
572err_slab:
573 free_slab_cursor(&rm_cur);
574err:
575 if (agflbp)
576 libxfs_putbuf(agflbp);
1cdc777d
DW
577 if (own_ag_bitmap)
578 bitmap_free(&own_ag_bitmap);
62cf990a
DW
579 return error;
580}
581
9e0f480e
DW
582#ifdef RMAP_DEBUG
583static void
2d273771 584rmap_dump(
9e0f480e
DW
585 const char *msg,
586 xfs_agnumber_t agno,
587 struct xfs_rmap_irec *rmap)
588{
589 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
590 msg, rmap,
591 (unsigned int)agno,
592 (unsigned long long)rmap->rm_startblock,
593 (unsigned long long)rmap->rm_owner,
594 (unsigned long long)rmap->rm_offset,
595 (unsigned int)rmap->rm_blockcount,
596 (unsigned int)rmap->rm_flags);
597}
598#else
2d273771 599# define rmap_dump(m, a, r)
9e0f480e 600#endif
11b9e510 601
00f34bca
DW
602/*
603 * Rebuilding the Reference Count & Reverse Mapping Btrees
604 *
605 * The reference count (refcnt) and reverse mapping (rmap) btrees are
606 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
607 * mappings must be processed into reference counts at the end of phase
608 * 4, and the rmaps must be recorded during phase 4. There is a need to
609 * access the rmaps in physical block order, but no particular need for
610 * random access, so the slab.c code provides a big logical array
611 * (consisting of smaller slabs) and some inorder iterator functions.
612 *
613 * Once we've recorded all the reverse mappings, we're ready to
614 * translate the rmaps into refcount entries. Imagine the rmap entries
615 * as rectangles representing extents of physical blocks, and that the
616 * rectangles can be laid down to allow them to overlap each other; then
617 * we know that we must emit a refcnt btree entry wherever the amount of
618 * overlap changes, i.e. the emission stimulus is level-triggered:
619 *
620 * - ---
621 * -- ----- ---- --- ------
622 * -- ---- ----------- ---- ---------
623 * -------------------------------- -----------
624 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
625 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
626 *
627 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
628 *
629 * Note that in the actual refcnt btree we don't store the refcount < 2
630 * cases because the bnobt tells us which blocks are free; single-use
631 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
632 * supports storing multiple entries covering a given block we could
633 * theoretically dispense with the refcntbt and simply count rmaps, but
634 * that's inefficient in the (hot) write path, so we'll take the cost of
635 * the extra tree to save time. Also there's no guarantee that rmap
636 * will be enabled.
637 *
638 * Given an array of rmaps sorted by physical block number, a starting
639 * physical block (sp), a bag to hold rmaps that cover sp, and the next
640 * physical block where the level changes (np), we can reconstruct the
641 * refcount btree as follows:
642 *
643 * While there are still unprocessed rmaps in the array,
644 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
645 * - Add to the bag all rmaps in the array where startblock == sp.
646 * - Set np to the physical block where the bag size will change. This
647 * is the minimum of (the pblk of the next unprocessed rmap) and
648 * (startblock + len of each rmap in the bag).
649 * - Record the bag size as old_bag_size.
650 *
651 * - While the bag isn't empty,
652 * - Remove from the bag all rmaps where startblock + len == np.
653 * - Add to the bag all rmaps in the array where startblock == np.
654 * - If the bag size isn't old_bag_size, store the refcount entry
655 * (sp, np - sp, bag_size) in the refcnt btree.
656 * - If the bag is empty, break out of the inner loop.
657 * - Set old_bag_size to the bag size
658 * - Set sp = np.
659 * - Set np to the physical block where the bag size will change.
660 * This is the minimum of (the pblk of the next unprocessed rmap)
661 * and (startblock + len of each rmap in the bag).
662 *
663 * An implementation detail is that because this processing happens
664 * during phase 4, the refcount entries are stored in an array so that
665 * phase 5 can load them into the refcount btree. The rmaps can be
666 * loaded directly into the rmap btree during phase 5 as well.
667 */
668
ca8d7d6a
DW
669/*
670 * Mark all inodes in the reverse-mapping observation stack as requiring the
671 * reflink inode flag, if the stack depth is greater than 1.
672 */
673static void
674mark_inode_rl(
675 struct xfs_mount *mp,
676 struct xfs_bag *rmaps)
677{
678 xfs_agnumber_t iagno;
679 struct xfs_rmap_irec *rmap;
680 struct ino_tree_node *irec;
681 int off;
682 size_t idx;
683 xfs_agino_t ino;
684
685 if (bag_count(rmaps) < 2)
686 return;
687
688 /* Reflink flag accounting */
689 foreach_bag_ptr(rmaps, idx, rmap) {
690 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
691 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
692 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
693 pthread_mutex_lock(&ag_locks[iagno].lock);
694 irec = find_inode_rec(mp, iagno, ino);
695 off = get_inode_offset(mp, rmap->rm_owner, irec);
696 /* lock here because we might go outside this ag */
697 set_inode_is_rl(irec, off);
698 pthread_mutex_unlock(&ag_locks[iagno].lock);
699 }
700}
701
00f34bca
DW
702/*
703 * Emit a refcount object for refcntbt reconstruction during phase 5.
704 */
705#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
706static void
707refcount_emit(
708 struct xfs_mount *mp,
709 xfs_agnumber_t agno,
710 xfs_agblock_t agbno,
711 xfs_extlen_t len,
712 size_t nr_rmaps)
713{
714 struct xfs_refcount_irec rlrec;
715 int error;
716 struct xfs_slab *rlslab;
717
718 rlslab = ag_rmaps[agno].ar_refcount_items;
719 ASSERT(nr_rmaps > 0);
720
721 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
722 agno, agbno, len, nr_rmaps);
723 rlrec.rc_startblock = agbno;
724 rlrec.rc_blockcount = len;
725 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
726 error = slab_add(rlslab, &rlrec);
727 if (error)
728 do_error(
729_("Insufficient memory while recreating refcount tree."));
730}
731#undef REFCOUNT_CLAMP
732
733/*
734 * Transform a pile of physical block mapping observations into refcount data
735 * for eventual rebuilding of the btrees.
736 */
737#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
738int
739compute_refcounts(
740 struct xfs_mount *mp,
741 xfs_agnumber_t agno)
742{
743 struct xfs_bag *stack_top = NULL;
744 struct xfs_slab *rmaps;
745 struct xfs_slab_cursor *rmaps_cur;
746 struct xfs_rmap_irec *array_cur;
747 struct xfs_rmap_irec *rmap;
748 xfs_agblock_t sbno; /* first bno of this rmap set */
749 xfs_agblock_t cbno; /* first bno of this refcount set */
750 xfs_agblock_t nbno; /* next bno where rmap set changes */
751 size_t n, idx;
752 size_t old_stack_nr;
753 int error;
754
755 if (!xfs_sb_version_hasreflink(&mp->m_sb))
756 return 0;
757
758 rmaps = ag_rmaps[agno].ar_rmaps;
759
760 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
761 if (error)
762 return error;
763
764 error = init_bag(&stack_top);
765 if (error)
766 goto err;
767
768 /* While there are rmaps to be processed... */
769 n = 0;
770 while (n < slab_count(rmaps)) {
771 array_cur = peek_slab_cursor(rmaps_cur);
772 sbno = cbno = array_cur->rm_startblock;
773 /* Push all rmaps with pblk == sbno onto the stack */
774 for (;
775 array_cur && array_cur->rm_startblock == sbno;
776 array_cur = peek_slab_cursor(rmaps_cur)) {
777 advance_slab_cursor(rmaps_cur); n++;
778 rmap_dump("push0", agno, array_cur);
779 error = bag_add(stack_top, array_cur);
780 if (error)
781 goto err;
782 }
ca8d7d6a 783 mark_inode_rl(mp, stack_top);
00f34bca
DW
784
785 /* Set nbno to the bno of the next refcount change */
ff14f594 786 if (n < slab_count(rmaps) && array_cur)
00f34bca
DW
787 nbno = array_cur->rm_startblock;
788 else
789 nbno = NULLAGBLOCK;
790 foreach_bag_ptr(stack_top, idx, rmap) {
791 nbno = min(nbno, RMAP_END(rmap));
792 }
793
794 /* Emit reverse mappings, if needed */
795 ASSERT(nbno > sbno);
796 old_stack_nr = bag_count(stack_top);
797
798 /* While stack isn't empty... */
799 while (bag_count(stack_top)) {
800 /* Pop all rmaps that end at nbno */
801 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
802 if (RMAP_END(rmap) != nbno)
803 continue;
804 rmap_dump("pop", agno, rmap);
805 error = bag_remove(stack_top, idx);
806 if (error)
807 goto err;
808 }
809
810 /* Push array items that start at nbno */
811 for (;
812 array_cur && array_cur->rm_startblock == nbno;
813 array_cur = peek_slab_cursor(rmaps_cur)) {
814 advance_slab_cursor(rmaps_cur); n++;
815 rmap_dump("push1", agno, array_cur);
816 error = bag_add(stack_top, array_cur);
817 if (error)
818 goto err;
819 }
ca8d7d6a 820 mark_inode_rl(mp, stack_top);
00f34bca
DW
821
822 /* Emit refcount if necessary */
823 ASSERT(nbno > cbno);
824 if (bag_count(stack_top) != old_stack_nr) {
825 if (old_stack_nr > 1) {
826 refcount_emit(mp, agno, cbno,
827 nbno - cbno,
828 old_stack_nr);
829 }
830 cbno = nbno;
831 }
832
833 /* Stack empty, go find the next rmap */
834 if (bag_count(stack_top) == 0)
835 break;
836 old_stack_nr = bag_count(stack_top);
837 sbno = nbno;
838
839 /* Set nbno to the bno of the next refcount change */
840 if (n < slab_count(rmaps))
841 nbno = array_cur->rm_startblock;
842 else
843 nbno = NULLAGBLOCK;
844 foreach_bag_ptr(stack_top, idx, rmap) {
845 nbno = min(nbno, RMAP_END(rmap));
846 }
847
848 /* Emit reverse mappings, if needed */
849 ASSERT(nbno > sbno);
850 }
851 }
852err:
853 free_bag(&stack_top);
854 free_slab_cursor(&rmaps_cur);
855
856 return error;
857}
858#undef RMAP_END
859
11b9e510
DW
860/*
861 * Return the number of rmap objects for an AG.
862 */
863size_t
864rmap_record_count(
865 struct xfs_mount *mp,
866 xfs_agnumber_t agno)
867{
868 return slab_count(ag_rmaps[agno].ar_rmaps);
869}
870
871/*
872 * Return a slab cursor that will return rmap objects in order.
873 */
874int
2d273771 875rmap_init_cursor(
11b9e510
DW
876 xfs_agnumber_t agno,
877 struct xfs_slab_cursor **cur)
878{
879 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
880}
881
882/*
883 * Disable the refcount btree check.
884 */
885void
886rmap_avoid_check(void)
887{
888 rmapbt_suspect = true;
889}
890
891/* Look for an rmap in the rmapbt that matches a given rmap. */
892static int
2d273771 893rmap_lookup(
11b9e510
DW
894 struct xfs_btree_cur *bt_cur,
895 struct xfs_rmap_irec *rm_rec,
896 struct xfs_rmap_irec *tmp,
897 int *have)
898{
899 int error;
900
901 /* Use the regular btree retrieval routine. */
902 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
903 rm_rec->rm_blockcount,
904 rm_rec->rm_owner, rm_rec->rm_offset,
905 rm_rec->rm_flags, have);
906 if (error)
907 return error;
908 if (*have == 0)
909 return error;
910 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
911}
912
7ba02033
DW
913/* Look for an rmap in the rmapbt that matches a given rmap. */
914static int
915rmap_lookup_overlapped(
916 struct xfs_btree_cur *bt_cur,
917 struct xfs_rmap_irec *rm_rec,
918 struct xfs_rmap_irec *tmp,
919 int *have)
920{
921 /* Have to use our fancy version for overlapped */
922 return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
923 rm_rec->rm_owner, rm_rec->rm_offset,
924 rm_rec->rm_flags, tmp, have);
925}
926
11b9e510
DW
927/* Does the btree rmap cover the observed rmap? */
928#define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
929#define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
930static bool
2d273771 931rmap_is_good(
11b9e510
DW
932 struct xfs_rmap_irec *observed,
933 struct xfs_rmap_irec *btree)
934{
935 /* Can't have mismatches in the flags or the owner. */
936 if (btree->rm_flags != observed->rm_flags ||
937 btree->rm_owner != observed->rm_owner)
938 return false;
939
940 /*
941 * Btree record can't physically start after the observed
942 * record, nor can it end before the observed record.
943 */
944 if (btree->rm_startblock > observed->rm_startblock ||
945 NEXTP(btree) < NEXTP(observed))
946 return false;
947
948 /* If this is metadata or bmbt, we're done. */
949 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
950 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
951 return true;
952 /*
953 * Btree record can't logically start after the observed
954 * record, nor can it end before the observed record.
955 */
956 if (btree->rm_offset > observed->rm_offset ||
957 NEXTL(btree) < NEXTL(observed))
958 return false;
959
960 return true;
961}
962#undef NEXTP
963#undef NEXTL
964
965/*
966 * Compare the observed reverse mappings against what's in the ag btree.
967 */
968int
2d273771 969rmaps_verify_btree(
11b9e510
DW
970 struct xfs_mount *mp,
971 xfs_agnumber_t agno)
972{
973 struct xfs_slab_cursor *rm_cur;
974 struct xfs_btree_cur *bt_cur = NULL;
975 int error;
976 int have;
977 struct xfs_buf *agbp = NULL;
978 struct xfs_rmap_irec *rm_rec;
979 struct xfs_rmap_irec tmp;
980 struct xfs_perag *pag; /* per allocation group data */
981
982 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
983 return 0;
984 if (rmapbt_suspect) {
985 if (no_modify && agno == 0)
986 do_warn(_("would rebuild corrupt rmap btrees.\n"));
987 return 0;
988 }
989
990 /* Create cursors to refcount structures */
2d273771 991 error = rmap_init_cursor(agno, &rm_cur);
11b9e510
DW
992 if (error)
993 return error;
994
995 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
996 if (error)
997 goto err;
998
999 /* Leave the per-ag data "uninitialized" since we rewrite it later */
e2f60652 1000 pag = libxfs_perag_get(mp, agno);
11b9e510 1001 pag->pagf_init = 0;
e2f60652 1002 libxfs_perag_put(pag);
11b9e510
DW
1003
1004 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
1005 if (!bt_cur) {
1006 error = -ENOMEM;
1007 goto err;
1008 }
1009
1010 rm_rec = pop_slab_cursor(rm_cur);
1011 while (rm_rec) {
2d273771 1012 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
11b9e510
DW
1013 if (error)
1014 goto err;
7ba02033
DW
1015 /*
1016 * Using the range query is expensive, so only do it if
1017 * the regular lookup doesn't find anything or if it doesn't
1018 * match the observed rmap.
1019 */
1020 if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
1021 (!have || !rmap_is_good(rm_rec, &tmp))) {
1022 error = rmap_lookup_overlapped(bt_cur, rm_rec,
1023 &tmp, &have);
1024 if (error)
1025 goto err;
1026 }
11b9e510
DW
1027 if (!have) {
1028 do_warn(
1029_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
1030%s%soff %"PRIu64"\n"),
1031 agno, rm_rec->rm_startblock,
1032 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1033 _("unwritten ") : "",
1034 rm_rec->rm_blockcount,
1035 rm_rec->rm_owner,
1036 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1037 _("attr ") : "",
1038 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1039 _("bmbt ") : "",
1040 rm_rec->rm_offset);
1041 goto next_loop;
1042 }
1043
1044 /* Compare each refcount observation against the btree's */
2d273771 1045 if (!rmap_is_good(rm_rec, &tmp)) {
11b9e510
DW
1046 do_warn(
1047_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1048%"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1049 agno, tmp.rm_startblock,
1050 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1051 _("unwritten ") : "",
1052 tmp.rm_blockcount,
1053 tmp.rm_owner,
1054 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1055 _("attr ") : "",
1056 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1057 _("bmbt ") : "",
1058 tmp.rm_offset,
1059 agno, rm_rec->rm_startblock,
1060 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1061 _("unwritten ") : "",
1062 rm_rec->rm_blockcount,
1063 rm_rec->rm_owner,
1064 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1065 _("attr ") : "",
1066 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1067 _("bmbt ") : "",
1068 rm_rec->rm_offset);
1069 goto next_loop;
1070 }
1071next_loop:
1072 rm_rec = pop_slab_cursor(rm_cur);
1073 }
1074
1075err:
1076 if (bt_cur)
1077 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1078 if (agbp)
1079 libxfs_putbuf(agbp);
1080 free_slab_cursor(&rm_cur);
1081 return 0;
1082}
1083
1084/*
1085 * Compare the key fields of two rmap records -- positive if key1 > key2,
1086 * negative if key1 < key2, and zero if equal.
1087 */
14f8b681 1088int64_t
11b9e510
DW
1089rmap_diffkeys(
1090 struct xfs_rmap_irec *kp1,
1091 struct xfs_rmap_irec *kp2)
1092{
1093 __u64 oa;
1094 __u64 ob;
14f8b681 1095 int64_t d;
11b9e510
DW
1096 struct xfs_rmap_irec tmp;
1097
1098 tmp = *kp1;
1099 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1100 oa = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510
DW
1101 tmp = *kp2;
1102 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1103 ob = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510 1104
14f8b681 1105 d = (int64_t)kp1->rm_startblock - kp2->rm_startblock;
11b9e510
DW
1106 if (d)
1107 return d;
1108
1109 if (kp1->rm_owner > kp2->rm_owner)
1110 return 1;
1111 else if (kp2->rm_owner > kp1->rm_owner)
1112 return -1;
1113
1114 if (oa > ob)
1115 return 1;
1116 else if (ob > oa)
1117 return -1;
1118 return 0;
1119}
1120
1121/* Compute the high key of an rmap record. */
1122void
1123rmap_high_key_from_rec(
1124 struct xfs_rmap_irec *rec,
1125 struct xfs_rmap_irec *key)
1126{
1127 int adj;
1128
1129 adj = rec->rm_blockcount - 1;
1130
1131 key->rm_startblock = rec->rm_startblock + adj;
1132 key->rm_owner = rec->rm_owner;
1133 key->rm_offset = rec->rm_offset;
1134 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1135 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1136 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1137 return;
1138 key->rm_offset += adj;
1139}
62cf990a 1140
7e174ec7
DW
1141/*
1142 * Record that an inode had the reflink flag set when repair started. The
1143 * inode reflink flag will be adjusted as necessary.
1144 */
1145void
1146record_inode_reflink_flag(
1147 struct xfs_mount *mp,
1148 struct xfs_dinode *dino,
1149 xfs_agnumber_t agno,
1150 xfs_agino_t ino,
1151 xfs_ino_t lino)
1152{
1153 struct ino_tree_node *irec;
1154 int off;
1155
1156 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1157 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1158 return;
1159 irec = find_inode_rec(mp, agno, ino);
1160 off = get_inode_offset(mp, lino, irec);
1161 ASSERT(!inode_was_rl(irec, off));
1162 set_inode_was_rl(irec, off);
1163 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1164 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1165}
1166
ca8d7d6a
DW
1167/*
1168 * Fix an inode's reflink flag.
1169 */
1170static int
1171fix_inode_reflink_flag(
1172 struct xfs_mount *mp,
1173 xfs_agnumber_t agno,
1174 xfs_agino_t agino,
1175 bool set)
1176{
1177 struct xfs_dinode *dino;
1178 struct xfs_buf *buf;
1179
1180 if (set)
1181 do_warn(
1182_("setting reflink flag on inode %"PRIu64"\n"),
1183 XFS_AGINO_TO_INO(mp, agno, agino));
1184 else if (!no_modify) /* && !set */
1185 do_warn(
1186_("clearing reflink flag on inode %"PRIu64"\n"),
1187 XFS_AGINO_TO_INO(mp, agno, agino));
1188 if (no_modify)
1189 return 0;
1190
1191 buf = get_agino_buf(mp, agno, agino, &dino);
1192 if (!buf)
1193 return 1;
1194 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1195 if (set)
1196 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1197 else
1198 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1199 libxfs_dinode_calc_crc(mp, dino);
1200 libxfs_writebuf(buf, 0);
1201
1202 return 0;
1203}
1204
1205/*
1206 * Fix discrepancies between the state of the inode reflink flag and our
1207 * observations as to whether or not the inode really needs it.
1208 */
1209int
1210fix_inode_reflink_flags(
1211 struct xfs_mount *mp,
1212 xfs_agnumber_t agno)
1213{
1214 struct ino_tree_node *irec;
1215 int bit;
14f8b681
DW
1216 uint64_t was;
1217 uint64_t is;
1218 uint64_t diff;
1219 uint64_t mask;
ca8d7d6a
DW
1220 int error = 0;
1221 xfs_agino_t agino;
1222
1223 /*
1224 * Update the reflink flag for any inode where there's a discrepancy
1225 * between the inode flag and whether or not we found any reflinked
1226 * extents.
1227 */
1228 for (irec = findfirst_inode_rec(agno);
1229 irec != NULL;
1230 irec = next_ino_rec(irec)) {
1231 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1232 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1233 was = irec->ino_was_rl;
1234 is = irec->ino_is_rl;
1235 if (was == is)
1236 continue;
1237 diff = was ^ is;
1238 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1239 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1240 irec->ino_startnum),
1241 was, is, diff);
1242
1243 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1244 agino = bit + irec->ino_startnum;
1245 if (!(diff & mask))
1246 continue;
1247 else if (was & mask)
1248 error = fix_inode_reflink_flag(mp, agno, agino,
1249 false);
1250 else if (is & mask)
1251 error = fix_inode_reflink_flag(mp, agno, agino,
1252 true);
1253 else
1254 ASSERT(0);
1255 if (error)
1256 do_error(
1257_("Unable to fix reflink flag on inode %"PRIu64".\n"),
1258 XFS_AGINO_TO_INO(mp, agno, agino));
1259 }
1260 }
1261
1262 return error;
1263}
1264
80dbc783
DW
1265/*
1266 * Return the number of refcount objects for an AG.
1267 */
1268size_t
1269refcount_record_count(
1270 struct xfs_mount *mp,
1271 xfs_agnumber_t agno)
1272{
1273 return slab_count(ag_rmaps[agno].ar_refcount_items);
1274}
1275
1276/*
1277 * Return a slab cursor that will return refcount objects in order.
1278 */
1279int
1280init_refcount_cursor(
1281 xfs_agnumber_t agno,
1282 struct xfs_slab_cursor **cur)
1283{
1284 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1285}
1286
1287/*
1288 * Disable the refcount btree check.
1289 */
1290void
1291refcount_avoid_check(void)
1292{
1293 refcbt_suspect = true;
1294}
1295
1296/*
1297 * Compare the observed reference counts against what's in the ag btree.
1298 */
1299int
1300check_refcounts(
1301 struct xfs_mount *mp,
1302 xfs_agnumber_t agno)
1303{
1304 struct xfs_slab_cursor *rl_cur;
1305 struct xfs_btree_cur *bt_cur = NULL;
1306 int error;
1307 int have;
1308 int i;
1309 struct xfs_buf *agbp = NULL;
1310 struct xfs_refcount_irec *rl_rec;
1311 struct xfs_refcount_irec tmp;
1312 struct xfs_perag *pag; /* per allocation group data */
1313
1314 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1315 return 0;
1316 if (refcbt_suspect) {
1317 if (no_modify && agno == 0)
1318 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1319 return 0;
1320 }
1321
1322 /* Create cursors to refcount structures */
1323 error = init_refcount_cursor(agno, &rl_cur);
1324 if (error)
1325 return error;
1326
1327 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1328 if (error)
1329 goto err;
1330
1331 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1332 pag = libxfs_perag_get(mp, agno);
1333 pag->pagf_init = 0;
1334 libxfs_perag_put(pag);
1335
5ff5ced0 1336 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno);
80dbc783
DW
1337 if (!bt_cur) {
1338 error = -ENOMEM;
1339 goto err;
1340 }
1341
1342 rl_rec = pop_slab_cursor(rl_cur);
1343 while (rl_rec) {
1344 /* Look for a refcount record in the btree */
1345 error = -libxfs_refcount_lookup_le(bt_cur,
1346 rl_rec->rc_startblock, &have);
1347 if (error)
1348 goto err;
1349 if (!have) {
1350 do_warn(
1351_("Missing reference count record for (%u/%u) len %u count %u\n"),
1352 agno, rl_rec->rc_startblock,
1353 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1354 goto next_loop;
1355 }
1356
1357 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1358 if (error)
1359 goto err;
1360 if (!i) {
1361 do_warn(
1362_("Missing reference count record for (%u/%u) len %u count %u\n"),
1363 agno, rl_rec->rc_startblock,
1364 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1365 goto next_loop;
1366 }
1367
1368 /* Compare each refcount observation against the btree's */
1369 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1370 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1371 tmp.rc_refcount < rl_rec->rc_refcount)
1372 do_warn(
1373_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1374 agno, tmp.rc_startblock, tmp.rc_blockcount,
1375 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1376 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1377next_loop:
1378 rl_rec = pop_slab_cursor(rl_cur);
1379 }
1380
1381err:
1382 if (bt_cur)
8743fab4
DW
1383 libxfs_btree_del_cursor(bt_cur, error ? XFS_BTREE_ERROR :
1384 XFS_BTREE_NOERROR);
80dbc783
DW
1385 if (agbp)
1386 libxfs_putbuf(agbp);
1387 free_slab_cursor(&rl_cur);
1388 return 0;
1389}
1390
62cf990a
DW
1391/*
1392 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1393 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1394 * because we're updating the rmapbt).
1395 */
1396void
1397fix_freelist(
1398 struct xfs_mount *mp,
1399 xfs_agnumber_t agno,
1400 bool skip_rmapbt)
1401{
1402 xfs_alloc_arg_t args;
1403 xfs_trans_t *tp;
62cf990a
DW
1404 int flags;
1405 int error;
1406
1407 memset(&args, 0, sizeof(args));
1408 args.mp = mp;
1409 args.agno = agno;
1410 args.alignment = 1;
e2f60652 1411 args.pag = libxfs_perag_get(mp, agno);
f9c9fd94 1412 error = -libxfs_trans_alloc_rollable(mp, 0, &tp);
62cf990a
DW
1413 if (error)
1414 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1415 agno, error);
1416 args.tp = tp;
1417
1418 /*
1419 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1420 * the fresh AGFL header from empty to full. That hasn't changed. For
1421 * rmapbt, however, things change a bit.
1422 *
1423 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1424 * expand, so we need to keep the AGFL well-stocked for the expansion.
1425 * However, this expansion can cause the bnobt/cntbt to shrink, which
1426 * can make the AGFL eligible for shrinking. Shrinking involves
1427 * freeing rmapbt entries, but since we haven't finished loading the
1428 * rmapbt with the btree rmaps it's possible for the remove operation
1429 * to fail. The AGFL block is large enough at this point to absorb any
1430 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1431 *
1432 * During the initial AGFL regeneration during AGF generation in phase5
1433 * we must also disable rmapbt modifications because the AGF that
1434 * libxfs reads does not yet point to the new rmapbt. These initial
1435 * AGFL entries are added just prior to adding the AG btree block rmaps
1436 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1437 * empty and cannot shrink.
1438 */
1439 flags = XFS_ALLOC_FLAG_NOSHRINK;
1440 if (skip_rmapbt)
1441 flags |= XFS_ALLOC_FLAG_NORMAP;
e2f60652
DW
1442 error = -libxfs_alloc_fix_freelist(&args, flags);
1443 libxfs_perag_put(args.pag);
62cf990a
DW
1444 if (error) {
1445 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1446 agno, error);
1447 }
f2279d8d
DW
1448 error = -libxfs_trans_commit(tp);
1449 if (error)
1450 do_error(_("%s: commit failed, error %d\n"), __func__, error);
62cf990a
DW
1451}
1452
1453/*
1454 * Remember how many AGFL entries came from excess AG btree allocations and
1455 * therefore already have rmap entries.
1456 */
1457void
1458rmap_store_agflcount(
1459 struct xfs_mount *mp,
1460 xfs_agnumber_t agno,
1461 int count)
1462{
2d273771 1463 if (!rmap_needs_work(mp))
62cf990a
DW
1464 return;
1465
1466 ag_rmaps[agno].ar_flcount = count;
1467}