]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/rmap.c
xfs_repair: skip block reservation when fixing freelist
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
9e0f480e
DW
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
9e0f480e 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
9e0f480e
DW
5 */
6#include <libxfs.h>
7#include "btree.h"
8#include "err_protos.h"
9#include "libxlog.h"
10#include "incore.h"
11#include "globals.h"
12#include "dinode.h"
13#include "slab.h"
14#include "rmap.h"
15
16#undef RMAP_DEBUG
17
18#ifdef RMAP_DEBUG
19# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
20#else
21# define dbg_printf(f, a...)
22#endif
23
24/* per-AG rmap object anchor */
25struct xfs_ag_rmap {
26 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
1102c155 27 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
62cf990a
DW
28 int ar_flcount; /* agfl entries from leftover */
29 /* agbt allocations */
b7f12e53 30 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
00f34bca 31 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
9e0f480e
DW
32};
33
34static struct xfs_ag_rmap *ag_rmaps;
11b9e510 35static bool rmapbt_suspect;
80dbc783 36static bool refcbt_suspect;
9e0f480e 37
197c2c6a 38static inline int rmap_compare(const void *a, const void *b)
9e0f480e 39{
197c2c6a 40 return libxfs_rmap_compare(a, b);
9e0f480e
DW
41}
42
43/*
44 * Returns true if we must reconstruct either the reference count or reverse
45 * mapping trees.
46 */
47bool
2d273771 48rmap_needs_work(
9e0f480e
DW
49 struct xfs_mount *mp)
50{
00f34bca
DW
51 return xfs_sb_version_hasreflink(&mp->m_sb) ||
52 xfs_sb_version_hasrmapbt(&mp->m_sb);
9e0f480e
DW
53}
54
55/*
56 * Initialize per-AG reverse map data.
57 */
58void
2d273771 59rmaps_init(
9e0f480e
DW
60 struct xfs_mount *mp)
61{
62 xfs_agnumber_t i;
63 int error;
64
2d273771 65 if (!rmap_needs_work(mp))
9e0f480e
DW
66 return;
67
68 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
69 if (!ag_rmaps)
70 do_error(_("couldn't allocate per-AG reverse map roots\n"));
71
72 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
73 error = init_slab(&ag_rmaps[i].ar_rmaps,
74 sizeof(struct xfs_rmap_irec));
75 if (error)
76 do_error(
77_("Insufficient memory while allocating reverse mapping slabs."));
1102c155
DW
78 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
79 sizeof(struct xfs_rmap_irec));
80 if (error)
81 do_error(
82_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
b7f12e53 83 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
00f34bca
DW
84 error = init_slab(&ag_rmaps[i].ar_refcount_items,
85 sizeof(struct xfs_refcount_irec));
86 if (error)
87 do_error(
88_("Insufficient memory while allocating refcount item slabs."));
9e0f480e
DW
89 }
90}
91
92/*
93 * Free the per-AG reverse-mapping data.
94 */
95void
2d273771 96rmaps_free(
9e0f480e
DW
97 struct xfs_mount *mp)
98{
99 xfs_agnumber_t i;
100
2d273771 101 if (!rmap_needs_work(mp))
9e0f480e
DW
102 return;
103
1102c155 104 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
9e0f480e 105 free_slab(&ag_rmaps[i].ar_rmaps);
1102c155 106 free_slab(&ag_rmaps[i].ar_raw_rmaps);
00f34bca 107 free_slab(&ag_rmaps[i].ar_refcount_items);
1102c155 108 }
9e0f480e
DW
109 free(ag_rmaps);
110 ag_rmaps = NULL;
111}
112
1102c155
DW
113/*
114 * Decide if two reverse-mapping records can be merged.
115 */
116bool
2d273771 117rmaps_are_mergeable(
1102c155
DW
118 struct xfs_rmap_irec *r1,
119 struct xfs_rmap_irec *r2)
120{
121 if (r1->rm_owner != r2->rm_owner)
122 return false;
123 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
124 return false;
125 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
126 XFS_RMAP_LEN_MAX)
127 return false;
128 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
129 return true;
130 /* must be an inode owner below here */
131 if (r1->rm_flags != r2->rm_flags)
132 return false;
133 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
134 return true;
135 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
136}
137
9e0f480e
DW
138/*
139 * Add an observation about a block mapping in an inode's data or attribute
140 * fork for later btree reconstruction.
141 */
142int
2d273771 143rmap_add_rec(
9e0f480e
DW
144 struct xfs_mount *mp,
145 xfs_ino_t ino,
146 int whichfork,
147 struct xfs_bmbt_irec *irec)
148{
9e0f480e
DW
149 struct xfs_rmap_irec rmap;
150 xfs_agnumber_t agno;
151 xfs_agblock_t agbno;
b7f12e53
DW
152 struct xfs_rmap_irec *last_rmap;
153 int error = 0;
9e0f480e 154
2d273771 155 if (!rmap_needs_work(mp))
9e0f480e
DW
156 return 0;
157
158 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
159 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
160 ASSERT(agno != NULLAGNUMBER);
161 ASSERT(agno < mp->m_sb.sb_agcount);
162 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
163 ASSERT(ino != NULLFSINO);
164 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
165
9e0f480e
DW
166 rmap.rm_owner = ino;
167 rmap.rm_offset = irec->br_startoff;
168 rmap.rm_flags = 0;
169 if (whichfork == XFS_ATTR_FORK)
170 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
171 rmap.rm_startblock = agbno;
172 rmap.rm_blockcount = irec->br_blockcount;
173 if (irec->br_state == XFS_EXT_UNWRITTEN)
174 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
b7f12e53
DW
175 last_rmap = &ag_rmaps[agno].ar_last_rmap;
176 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
177 *last_rmap = rmap;
2d273771 178 else if (rmaps_are_mergeable(last_rmap, &rmap))
b7f12e53
DW
179 last_rmap->rm_blockcount += rmap.rm_blockcount;
180 else {
181 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
182 if (error)
183 return error;
184 *last_rmap = rmap;
185 }
186
187 return error;
188}
189
190/* Finish collecting inode data/attr fork rmaps. */
191int
2d273771 192rmap_finish_collecting_fork_recs(
b7f12e53
DW
193 struct xfs_mount *mp,
194 xfs_agnumber_t agno)
195{
2d273771 196 if (!rmap_needs_work(mp) ||
b7f12e53
DW
197 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
198 return 0;
199 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
9e0f480e
DW
200}
201
1102c155
DW
202/* add a raw rmap; these will be merged later */
203static int
2d273771 204__rmap_add_raw_rec(
1102c155
DW
205 struct xfs_mount *mp,
206 xfs_agnumber_t agno,
207 xfs_agblock_t agbno,
208 xfs_extlen_t len,
209 uint64_t owner,
210 bool is_attr,
211 bool is_bmbt)
212{
213 struct xfs_rmap_irec rmap;
214
215 ASSERT(len != 0);
216 rmap.rm_owner = owner;
217 rmap.rm_offset = 0;
218 rmap.rm_flags = 0;
219 if (is_attr)
220 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
221 if (is_bmbt)
222 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
223 rmap.rm_startblock = agbno;
224 rmap.rm_blockcount = len;
225 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
226}
227
00efc33a
DW
228/*
229 * Add a reverse mapping for an inode fork's block mapping btree block.
230 */
231int
2d273771 232rmap_add_bmbt_rec(
00efc33a
DW
233 struct xfs_mount *mp,
234 xfs_ino_t ino,
235 int whichfork,
236 xfs_fsblock_t fsbno)
237{
238 xfs_agnumber_t agno;
239 xfs_agblock_t agbno;
240
2d273771 241 if (!rmap_needs_work(mp))
00efc33a
DW
242 return 0;
243
244 agno = XFS_FSB_TO_AGNO(mp, fsbno);
245 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
246 ASSERT(agno != NULLAGNUMBER);
247 ASSERT(agno < mp->m_sb.sb_agcount);
248 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
249
2d273771 250 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
00efc33a
DW
251 whichfork == XFS_ATTR_FORK, true);
252}
253
1102c155
DW
254/*
255 * Add a reverse mapping for a per-AG fixed metadata extent.
256 */
257int
2d273771 258rmap_add_ag_rec(
1102c155
DW
259 struct xfs_mount *mp,
260 xfs_agnumber_t agno,
261 xfs_agblock_t agbno,
262 xfs_extlen_t len,
263 uint64_t owner)
264{
2d273771 265 if (!rmap_needs_work(mp))
1102c155
DW
266 return 0;
267
268 ASSERT(agno != NULLAGNUMBER);
269 ASSERT(agno < mp->m_sb.sb_agcount);
270 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
271
2d273771 272 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
1102c155
DW
273}
274
275/*
276 * Merge adjacent raw rmaps and add them to the main rmap list.
277 */
278int
2d273771 279rmap_fold_raw_recs(
1102c155
DW
280 struct xfs_mount *mp,
281 xfs_agnumber_t agno)
282{
283 struct xfs_slab_cursor *cur = NULL;
284 struct xfs_rmap_irec *prev, *rec;
285 size_t old_sz;
138ce9ff 286 int error = 0;
1102c155
DW
287
288 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
289 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
290 goto no_raw;
291 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
292 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
293 &cur);
294 if (error)
295 goto err;
296
297 prev = pop_slab_cursor(cur);
298 rec = pop_slab_cursor(cur);
138ce9ff 299 while (prev && rec) {
2d273771 300 if (rmaps_are_mergeable(prev, rec)) {
1102c155
DW
301 prev->rm_blockcount += rec->rm_blockcount;
302 rec = pop_slab_cursor(cur);
303 continue;
304 }
305 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
306 if (error)
307 goto err;
308 prev = rec;
309 rec = pop_slab_cursor(cur);
310 }
311 if (prev) {
312 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
313 if (error)
314 goto err;
315 }
316 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
317 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
318 sizeof(struct xfs_rmap_irec));
319 if (error)
320 do_error(
321_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
322no_raw:
323 if (old_sz)
324 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
325err:
326 free_slab_cursor(&cur);
327 return error;
328}
329
713b6817
DW
330static int
331find_first_zero_bit(
14f8b681 332 uint64_t mask)
713b6817
DW
333{
334 int n;
335 int b = 0;
336
337 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
338 b++;
339
340 return b;
341}
342
343static int
344popcnt(
14f8b681 345 uint64_t mask)
713b6817
DW
346{
347 int n;
348 int b = 0;
349
350 if (mask == 0)
351 return 0;
352
353 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
354 if (mask & 1)
355 b++;
356
357 return b;
358}
359
360/*
361 * Add an allocation group's fixed metadata to the rmap list. This includes
362 * sb/agi/agf/agfl headers, inode chunks, and the log.
363 */
364int
2d273771 365rmap_add_fixed_ag_rec(
713b6817
DW
366 struct xfs_mount *mp,
367 xfs_agnumber_t agno)
368{
369 xfs_fsblock_t fsbno;
370 xfs_agblock_t agbno;
371 ino_tree_node_t *ino_rec;
372 xfs_agino_t agino;
373 int error;
374 int startidx;
375 int nr;
376
2d273771 377 if (!rmap_needs_work(mp))
713b6817
DW
378 return 0;
379
380 /* sb/agi/agf/agfl headers */
2d273771 381 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
713b6817
DW
382 XFS_RMAP_OWN_FS);
383 if (error)
384 goto out;
385
386 /* inodes */
387 ino_rec = findfirst_inode_rec(agno);
388 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
389 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
390 startidx = find_first_zero_bit(ino_rec->ir_sparse);
391 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
392 } else {
393 startidx = 0;
394 nr = XFS_INODES_PER_CHUNK;
395 }
396 nr /= mp->m_sb.sb_inopblock;
397 if (nr == 0)
398 nr = 1;
399 agino = ino_rec->ino_startnum + startidx;
400 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
401 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
2d273771 402 error = rmap_add_ag_rec(mp, agno, agbno, nr,
713b6817
DW
403 XFS_RMAP_OWN_INODES);
404 if (error)
405 goto out;
406 }
407 }
408
409 /* log */
410 fsbno = mp->m_sb.sb_logstart;
411 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
412 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
2d273771 413 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
713b6817
DW
414 XFS_RMAP_OWN_LOG);
415 if (error)
416 goto out;
417 }
418out:
419 return error;
420}
421
62cf990a
DW
422/*
423 * Copy the per-AG btree reverse-mapping data into the rmapbt.
424 *
425 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
426 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
427 * building the AG btrees we can record all the blocks allocated for each
428 * btree, but we cannot resolve the conflict between the fact that one has to
429 * finish allocating the space for the rmapbt before building the bnobt and the
430 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
431 * Therefore we record in-core the rmaps for each btree and here use the
432 * libxfs rmap functions to finish building the rmap btree.
433 *
434 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
435 * recorded in memory. The rmapbt has not been set up yet, so we need to be
436 * able to "expand" the AGFL without updating the rmapbt. After we've written
437 * out the new AGF header the new rmapbt is available, so this function reads
438 * each AGFL to generate rmap entries. These entries are merged with the AG
439 * btree rmap entries, and then we use libxfs' rmap functions to add them to
440 * the rmapbt, after which it is fully regenerated.
441 */
442int
2d273771 443rmap_store_ag_btree_rec(
62cf990a
DW
444 struct xfs_mount *mp,
445 xfs_agnumber_t agno)
446{
447 struct xfs_slab_cursor *rm_cur;
448 struct xfs_rmap_irec *rm_rec = NULL;
449 struct xfs_buf *agbp = NULL;
450 struct xfs_buf *agflbp = NULL;
451 struct xfs_trans *tp;
62cf990a
DW
452 __be32 *agfl_bno, *b;
453 int error = 0;
454 struct xfs_owner_info oinfo;
455
456 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
457 return 0;
458
459 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
460 free_slab(&ag_rmaps[agno].ar_rmaps);
461 error = init_slab(&ag_rmaps[agno].ar_rmaps,
462 sizeof(struct xfs_rmap_irec));
463 if (error)
464 goto err;
465
466 /* Add the AGFL blocks to the rmap list */
e2f60652 467 error = -libxfs_trans_read_buf(
62cf990a
DW
468 mp, NULL, mp->m_ddev_targp,
469 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
470 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
471 if (error)
472 goto err;
473
636f06d8
DW
474 /*
475 * Sometimes, the blocks at the beginning of the AGFL are there
476 * because we overestimated how many blocks we needed to rebuild
477 * the freespace btrees. ar_flcount records the number of
478 * blocks in this situation. Since those blocks already have an
479 * rmap, we only need to add rmap records for AGFL blocks past
480 * that point in the AGFL because those blocks are a result of a
481 * no-rmap no-shrink freelist fixup that we did earlier.
482 */
62cf990a 483 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
636f06d8 484 b = agfl_bno + ag_rmaps[agno].ar_flcount;
8e1338fb
ES
485 while (*b != cpu_to_be32(NULLAGBLOCK) &&
486 b - agfl_bno < libxfs_agfl_size(mp)) {
2d273771 487 error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
62cf990a
DW
488 XFS_RMAP_OWN_AG);
489 if (error)
490 goto err;
491 b++;
492 }
493 libxfs_putbuf(agflbp);
494 agflbp = NULL;
495
496 /* Merge all the raw rmaps into the main list */
2d273771 497 error = rmap_fold_raw_recs(mp, agno);
62cf990a
DW
498 if (error)
499 goto err;
500
501 /* Create cursors to refcount structures */
502 error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
503 &rm_cur);
504 if (error)
505 goto err;
506
507 /* Insert rmaps into the btree one at a time */
508 rm_rec = pop_slab_cursor(rm_cur);
509 while (rm_rec) {
225e4bb2 510 error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
62cf990a
DW
511 if (error)
512 goto err_slab;
513
e2f60652 514 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
62cf990a
DW
515 if (error)
516 goto err_trans;
517
518 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
519 libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
e2f60652 520 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
62cf990a
DW
521 rm_rec->rm_blockcount, &oinfo);
522 if (error)
523 goto err_trans;
524
525 error = -libxfs_trans_commit(tp);
526 if (error)
527 goto err_slab;
528
529 fix_freelist(mp, agno, false);
530
531 rm_rec = pop_slab_cursor(rm_cur);
532 }
533
534 free_slab_cursor(&rm_cur);
535 return 0;
536
537err_trans:
538 libxfs_trans_cancel(tp);
539err_slab:
540 free_slab_cursor(&rm_cur);
541err:
542 if (agflbp)
543 libxfs_putbuf(agflbp);
62cf990a
DW
544 return error;
545}
546
9e0f480e
DW
547#ifdef RMAP_DEBUG
548static void
2d273771 549rmap_dump(
9e0f480e
DW
550 const char *msg,
551 xfs_agnumber_t agno,
552 struct xfs_rmap_irec *rmap)
553{
554 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
555 msg, rmap,
556 (unsigned int)agno,
557 (unsigned long long)rmap->rm_startblock,
558 (unsigned long long)rmap->rm_owner,
559 (unsigned long long)rmap->rm_offset,
560 (unsigned int)rmap->rm_blockcount,
561 (unsigned int)rmap->rm_flags);
562}
563#else
2d273771 564# define rmap_dump(m, a, r)
9e0f480e 565#endif
11b9e510 566
00f34bca
DW
567/*
568 * Rebuilding the Reference Count & Reverse Mapping Btrees
569 *
570 * The reference count (refcnt) and reverse mapping (rmap) btrees are
571 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
572 * mappings must be processed into reference counts at the end of phase
573 * 4, and the rmaps must be recorded during phase 4. There is a need to
574 * access the rmaps in physical block order, but no particular need for
575 * random access, so the slab.c code provides a big logical array
576 * (consisting of smaller slabs) and some inorder iterator functions.
577 *
578 * Once we've recorded all the reverse mappings, we're ready to
579 * translate the rmaps into refcount entries. Imagine the rmap entries
580 * as rectangles representing extents of physical blocks, and that the
581 * rectangles can be laid down to allow them to overlap each other; then
582 * we know that we must emit a refcnt btree entry wherever the amount of
583 * overlap changes, i.e. the emission stimulus is level-triggered:
584 *
585 * - ---
586 * -- ----- ---- --- ------
587 * -- ---- ----------- ---- ---------
588 * -------------------------------- -----------
589 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
590 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
591 *
592 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
593 *
594 * Note that in the actual refcnt btree we don't store the refcount < 2
595 * cases because the bnobt tells us which blocks are free; single-use
596 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
597 * supports storing multiple entries covering a given block we could
598 * theoretically dispense with the refcntbt and simply count rmaps, but
599 * that's inefficient in the (hot) write path, so we'll take the cost of
600 * the extra tree to save time. Also there's no guarantee that rmap
601 * will be enabled.
602 *
603 * Given an array of rmaps sorted by physical block number, a starting
604 * physical block (sp), a bag to hold rmaps that cover sp, and the next
605 * physical block where the level changes (np), we can reconstruct the
606 * refcount btree as follows:
607 *
608 * While there are still unprocessed rmaps in the array,
609 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
610 * - Add to the bag all rmaps in the array where startblock == sp.
611 * - Set np to the physical block where the bag size will change. This
612 * is the minimum of (the pblk of the next unprocessed rmap) and
613 * (startblock + len of each rmap in the bag).
614 * - Record the bag size as old_bag_size.
615 *
616 * - While the bag isn't empty,
617 * - Remove from the bag all rmaps where startblock + len == np.
618 * - Add to the bag all rmaps in the array where startblock == np.
619 * - If the bag size isn't old_bag_size, store the refcount entry
620 * (sp, np - sp, bag_size) in the refcnt btree.
621 * - If the bag is empty, break out of the inner loop.
622 * - Set old_bag_size to the bag size
623 * - Set sp = np.
624 * - Set np to the physical block where the bag size will change.
625 * This is the minimum of (the pblk of the next unprocessed rmap)
626 * and (startblock + len of each rmap in the bag).
627 *
628 * An implementation detail is that because this processing happens
629 * during phase 4, the refcount entries are stored in an array so that
630 * phase 5 can load them into the refcount btree. The rmaps can be
631 * loaded directly into the rmap btree during phase 5 as well.
632 */
633
ca8d7d6a
DW
634/*
635 * Mark all inodes in the reverse-mapping observation stack as requiring the
636 * reflink inode flag, if the stack depth is greater than 1.
637 */
638static void
639mark_inode_rl(
640 struct xfs_mount *mp,
641 struct xfs_bag *rmaps)
642{
643 xfs_agnumber_t iagno;
644 struct xfs_rmap_irec *rmap;
645 struct ino_tree_node *irec;
646 int off;
647 size_t idx;
648 xfs_agino_t ino;
649
650 if (bag_count(rmaps) < 2)
651 return;
652
653 /* Reflink flag accounting */
654 foreach_bag_ptr(rmaps, idx, rmap) {
655 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
656 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
657 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
658 pthread_mutex_lock(&ag_locks[iagno].lock);
659 irec = find_inode_rec(mp, iagno, ino);
660 off = get_inode_offset(mp, rmap->rm_owner, irec);
661 /* lock here because we might go outside this ag */
662 set_inode_is_rl(irec, off);
663 pthread_mutex_unlock(&ag_locks[iagno].lock);
664 }
665}
666
00f34bca
DW
667/*
668 * Emit a refcount object for refcntbt reconstruction during phase 5.
669 */
670#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
671static void
672refcount_emit(
673 struct xfs_mount *mp,
674 xfs_agnumber_t agno,
675 xfs_agblock_t agbno,
676 xfs_extlen_t len,
677 size_t nr_rmaps)
678{
679 struct xfs_refcount_irec rlrec;
680 int error;
681 struct xfs_slab *rlslab;
682
683 rlslab = ag_rmaps[agno].ar_refcount_items;
684 ASSERT(nr_rmaps > 0);
685
686 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
687 agno, agbno, len, nr_rmaps);
688 rlrec.rc_startblock = agbno;
689 rlrec.rc_blockcount = len;
690 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
691 error = slab_add(rlslab, &rlrec);
692 if (error)
693 do_error(
694_("Insufficient memory while recreating refcount tree."));
695}
696#undef REFCOUNT_CLAMP
697
698/*
699 * Transform a pile of physical block mapping observations into refcount data
700 * for eventual rebuilding of the btrees.
701 */
702#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
703int
704compute_refcounts(
705 struct xfs_mount *mp,
706 xfs_agnumber_t agno)
707{
708 struct xfs_bag *stack_top = NULL;
709 struct xfs_slab *rmaps;
710 struct xfs_slab_cursor *rmaps_cur;
711 struct xfs_rmap_irec *array_cur;
712 struct xfs_rmap_irec *rmap;
713 xfs_agblock_t sbno; /* first bno of this rmap set */
714 xfs_agblock_t cbno; /* first bno of this refcount set */
715 xfs_agblock_t nbno; /* next bno where rmap set changes */
716 size_t n, idx;
717 size_t old_stack_nr;
718 int error;
719
720 if (!xfs_sb_version_hasreflink(&mp->m_sb))
721 return 0;
722
723 rmaps = ag_rmaps[agno].ar_rmaps;
724
725 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
726 if (error)
727 return error;
728
729 error = init_bag(&stack_top);
730 if (error)
731 goto err;
732
733 /* While there are rmaps to be processed... */
734 n = 0;
735 while (n < slab_count(rmaps)) {
736 array_cur = peek_slab_cursor(rmaps_cur);
737 sbno = cbno = array_cur->rm_startblock;
738 /* Push all rmaps with pblk == sbno onto the stack */
739 for (;
740 array_cur && array_cur->rm_startblock == sbno;
741 array_cur = peek_slab_cursor(rmaps_cur)) {
742 advance_slab_cursor(rmaps_cur); n++;
743 rmap_dump("push0", agno, array_cur);
744 error = bag_add(stack_top, array_cur);
745 if (error)
746 goto err;
747 }
ca8d7d6a 748 mark_inode_rl(mp, stack_top);
00f34bca
DW
749
750 /* Set nbno to the bno of the next refcount change */
ff14f594 751 if (n < slab_count(rmaps) && array_cur)
00f34bca
DW
752 nbno = array_cur->rm_startblock;
753 else
754 nbno = NULLAGBLOCK;
755 foreach_bag_ptr(stack_top, idx, rmap) {
756 nbno = min(nbno, RMAP_END(rmap));
757 }
758
759 /* Emit reverse mappings, if needed */
760 ASSERT(nbno > sbno);
761 old_stack_nr = bag_count(stack_top);
762
763 /* While stack isn't empty... */
764 while (bag_count(stack_top)) {
765 /* Pop all rmaps that end at nbno */
766 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
767 if (RMAP_END(rmap) != nbno)
768 continue;
769 rmap_dump("pop", agno, rmap);
770 error = bag_remove(stack_top, idx);
771 if (error)
772 goto err;
773 }
774
775 /* Push array items that start at nbno */
776 for (;
777 array_cur && array_cur->rm_startblock == nbno;
778 array_cur = peek_slab_cursor(rmaps_cur)) {
779 advance_slab_cursor(rmaps_cur); n++;
780 rmap_dump("push1", agno, array_cur);
781 error = bag_add(stack_top, array_cur);
782 if (error)
783 goto err;
784 }
ca8d7d6a 785 mark_inode_rl(mp, stack_top);
00f34bca
DW
786
787 /* Emit refcount if necessary */
788 ASSERT(nbno > cbno);
789 if (bag_count(stack_top) != old_stack_nr) {
790 if (old_stack_nr > 1) {
791 refcount_emit(mp, agno, cbno,
792 nbno - cbno,
793 old_stack_nr);
794 }
795 cbno = nbno;
796 }
797
798 /* Stack empty, go find the next rmap */
799 if (bag_count(stack_top) == 0)
800 break;
801 old_stack_nr = bag_count(stack_top);
802 sbno = nbno;
803
804 /* Set nbno to the bno of the next refcount change */
805 if (n < slab_count(rmaps))
806 nbno = array_cur->rm_startblock;
807 else
808 nbno = NULLAGBLOCK;
809 foreach_bag_ptr(stack_top, idx, rmap) {
810 nbno = min(nbno, RMAP_END(rmap));
811 }
812
813 /* Emit reverse mappings, if needed */
814 ASSERT(nbno > sbno);
815 }
816 }
817err:
818 free_bag(&stack_top);
819 free_slab_cursor(&rmaps_cur);
820
821 return error;
822}
823#undef RMAP_END
824
11b9e510
DW
825/*
826 * Return the number of rmap objects for an AG.
827 */
828size_t
829rmap_record_count(
830 struct xfs_mount *mp,
831 xfs_agnumber_t agno)
832{
833 return slab_count(ag_rmaps[agno].ar_rmaps);
834}
835
836/*
837 * Return a slab cursor that will return rmap objects in order.
838 */
839int
2d273771 840rmap_init_cursor(
11b9e510
DW
841 xfs_agnumber_t agno,
842 struct xfs_slab_cursor **cur)
843{
844 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
845}
846
847/*
848 * Disable the refcount btree check.
849 */
850void
851rmap_avoid_check(void)
852{
853 rmapbt_suspect = true;
854}
855
856/* Look for an rmap in the rmapbt that matches a given rmap. */
857static int
2d273771 858rmap_lookup(
11b9e510
DW
859 struct xfs_btree_cur *bt_cur,
860 struct xfs_rmap_irec *rm_rec,
861 struct xfs_rmap_irec *tmp,
862 int *have)
863{
864 int error;
865
866 /* Use the regular btree retrieval routine. */
867 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
868 rm_rec->rm_blockcount,
869 rm_rec->rm_owner, rm_rec->rm_offset,
870 rm_rec->rm_flags, have);
871 if (error)
872 return error;
873 if (*have == 0)
874 return error;
875 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
876}
877
7ba02033
DW
878/* Look for an rmap in the rmapbt that matches a given rmap. */
879static int
880rmap_lookup_overlapped(
881 struct xfs_btree_cur *bt_cur,
882 struct xfs_rmap_irec *rm_rec,
883 struct xfs_rmap_irec *tmp,
884 int *have)
885{
886 /* Have to use our fancy version for overlapped */
887 return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
888 rm_rec->rm_owner, rm_rec->rm_offset,
889 rm_rec->rm_flags, tmp, have);
890}
891
11b9e510
DW
892/* Does the btree rmap cover the observed rmap? */
893#define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
894#define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
895static bool
2d273771 896rmap_is_good(
11b9e510
DW
897 struct xfs_rmap_irec *observed,
898 struct xfs_rmap_irec *btree)
899{
900 /* Can't have mismatches in the flags or the owner. */
901 if (btree->rm_flags != observed->rm_flags ||
902 btree->rm_owner != observed->rm_owner)
903 return false;
904
905 /*
906 * Btree record can't physically start after the observed
907 * record, nor can it end before the observed record.
908 */
909 if (btree->rm_startblock > observed->rm_startblock ||
910 NEXTP(btree) < NEXTP(observed))
911 return false;
912
913 /* If this is metadata or bmbt, we're done. */
914 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
915 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
916 return true;
917 /*
918 * Btree record can't logically start after the observed
919 * record, nor can it end before the observed record.
920 */
921 if (btree->rm_offset > observed->rm_offset ||
922 NEXTL(btree) < NEXTL(observed))
923 return false;
924
925 return true;
926}
927#undef NEXTP
928#undef NEXTL
929
930/*
931 * Compare the observed reverse mappings against what's in the ag btree.
932 */
933int
2d273771 934rmaps_verify_btree(
11b9e510
DW
935 struct xfs_mount *mp,
936 xfs_agnumber_t agno)
937{
938 struct xfs_slab_cursor *rm_cur;
939 struct xfs_btree_cur *bt_cur = NULL;
940 int error;
941 int have;
942 struct xfs_buf *agbp = NULL;
943 struct xfs_rmap_irec *rm_rec;
944 struct xfs_rmap_irec tmp;
945 struct xfs_perag *pag; /* per allocation group data */
946
947 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
948 return 0;
949 if (rmapbt_suspect) {
950 if (no_modify && agno == 0)
951 do_warn(_("would rebuild corrupt rmap btrees.\n"));
952 return 0;
953 }
954
955 /* Create cursors to refcount structures */
2d273771 956 error = rmap_init_cursor(agno, &rm_cur);
11b9e510
DW
957 if (error)
958 return error;
959
960 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
961 if (error)
962 goto err;
963
964 /* Leave the per-ag data "uninitialized" since we rewrite it later */
e2f60652 965 pag = libxfs_perag_get(mp, agno);
11b9e510 966 pag->pagf_init = 0;
e2f60652 967 libxfs_perag_put(pag);
11b9e510
DW
968
969 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
970 if (!bt_cur) {
971 error = -ENOMEM;
972 goto err;
973 }
974
975 rm_rec = pop_slab_cursor(rm_cur);
976 while (rm_rec) {
2d273771 977 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
11b9e510
DW
978 if (error)
979 goto err;
7ba02033
DW
980 /*
981 * Using the range query is expensive, so only do it if
982 * the regular lookup doesn't find anything or if it doesn't
983 * match the observed rmap.
984 */
985 if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
986 (!have || !rmap_is_good(rm_rec, &tmp))) {
987 error = rmap_lookup_overlapped(bt_cur, rm_rec,
988 &tmp, &have);
989 if (error)
990 goto err;
991 }
11b9e510
DW
992 if (!have) {
993 do_warn(
994_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
995%s%soff %"PRIu64"\n"),
996 agno, rm_rec->rm_startblock,
997 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
998 _("unwritten ") : "",
999 rm_rec->rm_blockcount,
1000 rm_rec->rm_owner,
1001 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1002 _("attr ") : "",
1003 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1004 _("bmbt ") : "",
1005 rm_rec->rm_offset);
1006 goto next_loop;
1007 }
1008
1009 /* Compare each refcount observation against the btree's */
2d273771 1010 if (!rmap_is_good(rm_rec, &tmp)) {
11b9e510
DW
1011 do_warn(
1012_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1013%"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1014 agno, tmp.rm_startblock,
1015 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1016 _("unwritten ") : "",
1017 tmp.rm_blockcount,
1018 tmp.rm_owner,
1019 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1020 _("attr ") : "",
1021 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1022 _("bmbt ") : "",
1023 tmp.rm_offset,
1024 agno, rm_rec->rm_startblock,
1025 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1026 _("unwritten ") : "",
1027 rm_rec->rm_blockcount,
1028 rm_rec->rm_owner,
1029 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1030 _("attr ") : "",
1031 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1032 _("bmbt ") : "",
1033 rm_rec->rm_offset);
1034 goto next_loop;
1035 }
1036next_loop:
1037 rm_rec = pop_slab_cursor(rm_cur);
1038 }
1039
1040err:
1041 if (bt_cur)
1042 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1043 if (agbp)
1044 libxfs_putbuf(agbp);
1045 free_slab_cursor(&rm_cur);
1046 return 0;
1047}
1048
1049/*
1050 * Compare the key fields of two rmap records -- positive if key1 > key2,
1051 * negative if key1 < key2, and zero if equal.
1052 */
14f8b681 1053int64_t
11b9e510
DW
1054rmap_diffkeys(
1055 struct xfs_rmap_irec *kp1,
1056 struct xfs_rmap_irec *kp2)
1057{
1058 __u64 oa;
1059 __u64 ob;
14f8b681 1060 int64_t d;
11b9e510
DW
1061 struct xfs_rmap_irec tmp;
1062
1063 tmp = *kp1;
1064 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1065 oa = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510
DW
1066 tmp = *kp2;
1067 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1068 ob = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510 1069
14f8b681 1070 d = (int64_t)kp1->rm_startblock - kp2->rm_startblock;
11b9e510
DW
1071 if (d)
1072 return d;
1073
1074 if (kp1->rm_owner > kp2->rm_owner)
1075 return 1;
1076 else if (kp2->rm_owner > kp1->rm_owner)
1077 return -1;
1078
1079 if (oa > ob)
1080 return 1;
1081 else if (ob > oa)
1082 return -1;
1083 return 0;
1084}
1085
1086/* Compute the high key of an rmap record. */
1087void
1088rmap_high_key_from_rec(
1089 struct xfs_rmap_irec *rec,
1090 struct xfs_rmap_irec *key)
1091{
1092 int adj;
1093
1094 adj = rec->rm_blockcount - 1;
1095
1096 key->rm_startblock = rec->rm_startblock + adj;
1097 key->rm_owner = rec->rm_owner;
1098 key->rm_offset = rec->rm_offset;
1099 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1100 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1101 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1102 return;
1103 key->rm_offset += adj;
1104}
62cf990a 1105
7e174ec7
DW
1106/*
1107 * Record that an inode had the reflink flag set when repair started. The
1108 * inode reflink flag will be adjusted as necessary.
1109 */
1110void
1111record_inode_reflink_flag(
1112 struct xfs_mount *mp,
1113 struct xfs_dinode *dino,
1114 xfs_agnumber_t agno,
1115 xfs_agino_t ino,
1116 xfs_ino_t lino)
1117{
1118 struct ino_tree_node *irec;
1119 int off;
1120
1121 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1122 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1123 return;
1124 irec = find_inode_rec(mp, agno, ino);
1125 off = get_inode_offset(mp, lino, irec);
1126 ASSERT(!inode_was_rl(irec, off));
1127 set_inode_was_rl(irec, off);
1128 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1129 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1130}
1131
ca8d7d6a
DW
1132/*
1133 * Fix an inode's reflink flag.
1134 */
1135static int
1136fix_inode_reflink_flag(
1137 struct xfs_mount *mp,
1138 xfs_agnumber_t agno,
1139 xfs_agino_t agino,
1140 bool set)
1141{
1142 struct xfs_dinode *dino;
1143 struct xfs_buf *buf;
1144
1145 if (set)
1146 do_warn(
1147_("setting reflink flag on inode %"PRIu64"\n"),
1148 XFS_AGINO_TO_INO(mp, agno, agino));
1149 else if (!no_modify) /* && !set */
1150 do_warn(
1151_("clearing reflink flag on inode %"PRIu64"\n"),
1152 XFS_AGINO_TO_INO(mp, agno, agino));
1153 if (no_modify)
1154 return 0;
1155
1156 buf = get_agino_buf(mp, agno, agino, &dino);
1157 if (!buf)
1158 return 1;
1159 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1160 if (set)
1161 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1162 else
1163 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1164 libxfs_dinode_calc_crc(mp, dino);
1165 libxfs_writebuf(buf, 0);
1166
1167 return 0;
1168}
1169
1170/*
1171 * Fix discrepancies between the state of the inode reflink flag and our
1172 * observations as to whether or not the inode really needs it.
1173 */
1174int
1175fix_inode_reflink_flags(
1176 struct xfs_mount *mp,
1177 xfs_agnumber_t agno)
1178{
1179 struct ino_tree_node *irec;
1180 int bit;
14f8b681
DW
1181 uint64_t was;
1182 uint64_t is;
1183 uint64_t diff;
1184 uint64_t mask;
ca8d7d6a
DW
1185 int error = 0;
1186 xfs_agino_t agino;
1187
1188 /*
1189 * Update the reflink flag for any inode where there's a discrepancy
1190 * between the inode flag and whether or not we found any reflinked
1191 * extents.
1192 */
1193 for (irec = findfirst_inode_rec(agno);
1194 irec != NULL;
1195 irec = next_ino_rec(irec)) {
1196 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1197 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1198 was = irec->ino_was_rl;
1199 is = irec->ino_is_rl;
1200 if (was == is)
1201 continue;
1202 diff = was ^ is;
1203 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1204 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1205 irec->ino_startnum),
1206 was, is, diff);
1207
1208 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1209 agino = bit + irec->ino_startnum;
1210 if (!(diff & mask))
1211 continue;
1212 else if (was & mask)
1213 error = fix_inode_reflink_flag(mp, agno, agino,
1214 false);
1215 else if (is & mask)
1216 error = fix_inode_reflink_flag(mp, agno, agino,
1217 true);
1218 else
1219 ASSERT(0);
1220 if (error)
1221 do_error(
1222_("Unable to fix reflink flag on inode %"PRIu64".\n"),
1223 XFS_AGINO_TO_INO(mp, agno, agino));
1224 }
1225 }
1226
1227 return error;
1228}
1229
80dbc783
DW
1230/*
1231 * Return the number of refcount objects for an AG.
1232 */
1233size_t
1234refcount_record_count(
1235 struct xfs_mount *mp,
1236 xfs_agnumber_t agno)
1237{
1238 return slab_count(ag_rmaps[agno].ar_refcount_items);
1239}
1240
1241/*
1242 * Return a slab cursor that will return refcount objects in order.
1243 */
1244int
1245init_refcount_cursor(
1246 xfs_agnumber_t agno,
1247 struct xfs_slab_cursor **cur)
1248{
1249 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1250}
1251
1252/*
1253 * Disable the refcount btree check.
1254 */
1255void
1256refcount_avoid_check(void)
1257{
1258 refcbt_suspect = true;
1259}
1260
1261/*
1262 * Compare the observed reference counts against what's in the ag btree.
1263 */
1264int
1265check_refcounts(
1266 struct xfs_mount *mp,
1267 xfs_agnumber_t agno)
1268{
1269 struct xfs_slab_cursor *rl_cur;
1270 struct xfs_btree_cur *bt_cur = NULL;
1271 int error;
1272 int have;
1273 int i;
1274 struct xfs_buf *agbp = NULL;
1275 struct xfs_refcount_irec *rl_rec;
1276 struct xfs_refcount_irec tmp;
1277 struct xfs_perag *pag; /* per allocation group data */
1278
1279 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1280 return 0;
1281 if (refcbt_suspect) {
1282 if (no_modify && agno == 0)
1283 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1284 return 0;
1285 }
1286
1287 /* Create cursors to refcount structures */
1288 error = init_refcount_cursor(agno, &rl_cur);
1289 if (error)
1290 return error;
1291
1292 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1293 if (error)
1294 goto err;
1295
1296 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1297 pag = libxfs_perag_get(mp, agno);
1298 pag->pagf_init = 0;
1299 libxfs_perag_put(pag);
1300
5ff5ced0 1301 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno);
80dbc783
DW
1302 if (!bt_cur) {
1303 error = -ENOMEM;
1304 goto err;
1305 }
1306
1307 rl_rec = pop_slab_cursor(rl_cur);
1308 while (rl_rec) {
1309 /* Look for a refcount record in the btree */
1310 error = -libxfs_refcount_lookup_le(bt_cur,
1311 rl_rec->rc_startblock, &have);
1312 if (error)
1313 goto err;
1314 if (!have) {
1315 do_warn(
1316_("Missing reference count record for (%u/%u) len %u count %u\n"),
1317 agno, rl_rec->rc_startblock,
1318 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1319 goto next_loop;
1320 }
1321
1322 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1323 if (error)
1324 goto err;
1325 if (!i) {
1326 do_warn(
1327_("Missing reference count record for (%u/%u) len %u count %u\n"),
1328 agno, rl_rec->rc_startblock,
1329 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1330 goto next_loop;
1331 }
1332
1333 /* Compare each refcount observation against the btree's */
1334 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1335 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1336 tmp.rc_refcount < rl_rec->rc_refcount)
1337 do_warn(
1338_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1339 agno, tmp.rc_startblock, tmp.rc_blockcount,
1340 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1341 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1342next_loop:
1343 rl_rec = pop_slab_cursor(rl_cur);
1344 }
1345
1346err:
1347 if (bt_cur)
8743fab4
DW
1348 libxfs_btree_del_cursor(bt_cur, error ? XFS_BTREE_ERROR :
1349 XFS_BTREE_NOERROR);
80dbc783
DW
1350 if (agbp)
1351 libxfs_putbuf(agbp);
1352 free_slab_cursor(&rl_cur);
1353 return 0;
1354}
1355
62cf990a
DW
1356/*
1357 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1358 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1359 * because we're updating the rmapbt).
1360 */
1361void
1362fix_freelist(
1363 struct xfs_mount *mp,
1364 xfs_agnumber_t agno,
1365 bool skip_rmapbt)
1366{
1367 xfs_alloc_arg_t args;
1368 xfs_trans_t *tp;
62cf990a
DW
1369 int flags;
1370 int error;
1371
1372 memset(&args, 0, sizeof(args));
1373 args.mp = mp;
1374 args.agno = agno;
1375 args.alignment = 1;
e2f60652 1376 args.pag = libxfs_perag_get(mp, agno);
f9c9fd94 1377 error = -libxfs_trans_alloc_rollable(mp, 0, &tp);
62cf990a
DW
1378 if (error)
1379 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1380 agno, error);
1381 args.tp = tp;
1382
1383 /*
1384 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1385 * the fresh AGFL header from empty to full. That hasn't changed. For
1386 * rmapbt, however, things change a bit.
1387 *
1388 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1389 * expand, so we need to keep the AGFL well-stocked for the expansion.
1390 * However, this expansion can cause the bnobt/cntbt to shrink, which
1391 * can make the AGFL eligible for shrinking. Shrinking involves
1392 * freeing rmapbt entries, but since we haven't finished loading the
1393 * rmapbt with the btree rmaps it's possible for the remove operation
1394 * to fail. The AGFL block is large enough at this point to absorb any
1395 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1396 *
1397 * During the initial AGFL regeneration during AGF generation in phase5
1398 * we must also disable rmapbt modifications because the AGF that
1399 * libxfs reads does not yet point to the new rmapbt. These initial
1400 * AGFL entries are added just prior to adding the AG btree block rmaps
1401 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1402 * empty and cannot shrink.
1403 */
1404 flags = XFS_ALLOC_FLAG_NOSHRINK;
1405 if (skip_rmapbt)
1406 flags |= XFS_ALLOC_FLAG_NORMAP;
e2f60652
DW
1407 error = -libxfs_alloc_fix_freelist(&args, flags);
1408 libxfs_perag_put(args.pag);
62cf990a
DW
1409 if (error) {
1410 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1411 agno, error);
1412 }
f2279d8d
DW
1413 error = -libxfs_trans_commit(tp);
1414 if (error)
1415 do_error(_("%s: commit failed, error %d\n"), __func__, error);
62cf990a
DW
1416}
1417
1418/*
1419 * Remember how many AGFL entries came from excess AG btree allocations and
1420 * therefore already have rmap entries.
1421 */
1422void
1423rmap_store_agflcount(
1424 struct xfs_mount *mp,
1425 xfs_agnumber_t agno,
1426 int count)
1427{
2d273771 1428 if (!rmap_needs_work(mp))
62cf990a
DW
1429 return;
1430
1431 ag_rmaps[agno].ar_flcount = count;
1432}