]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/rmap.c
xfsprogs: remove double-underscore integer types
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
CommitLineData
9e0f480e
DW
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include <libxfs.h>
21#include "btree.h"
22#include "err_protos.h"
23#include "libxlog.h"
24#include "incore.h"
25#include "globals.h"
26#include "dinode.h"
27#include "slab.h"
28#include "rmap.h"
29
30#undef RMAP_DEBUG
31
32#ifdef RMAP_DEBUG
33# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
34#else
35# define dbg_printf(f, a...)
36#endif
37
38/* per-AG rmap object anchor */
39struct xfs_ag_rmap {
40 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
1102c155 41 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
62cf990a
DW
42 int ar_flcount; /* agfl entries from leftover */
43 /* agbt allocations */
b7f12e53 44 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
00f34bca 45 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
9e0f480e
DW
46};
47
48static struct xfs_ag_rmap *ag_rmaps;
11b9e510 49static bool rmapbt_suspect;
80dbc783 50static bool refcbt_suspect;
9e0f480e 51
197c2c6a 52static inline int rmap_compare(const void *a, const void *b)
9e0f480e 53{
197c2c6a 54 return libxfs_rmap_compare(a, b);
9e0f480e
DW
55}
56
57/*
58 * Returns true if we must reconstruct either the reference count or reverse
59 * mapping trees.
60 */
61bool
2d273771 62rmap_needs_work(
9e0f480e
DW
63 struct xfs_mount *mp)
64{
00f34bca
DW
65 return xfs_sb_version_hasreflink(&mp->m_sb) ||
66 xfs_sb_version_hasrmapbt(&mp->m_sb);
9e0f480e
DW
67}
68
69/*
70 * Initialize per-AG reverse map data.
71 */
72void
2d273771 73rmaps_init(
9e0f480e
DW
74 struct xfs_mount *mp)
75{
76 xfs_agnumber_t i;
77 int error;
78
2d273771 79 if (!rmap_needs_work(mp))
9e0f480e
DW
80 return;
81
82 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
83 if (!ag_rmaps)
84 do_error(_("couldn't allocate per-AG reverse map roots\n"));
85
86 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
87 error = init_slab(&ag_rmaps[i].ar_rmaps,
88 sizeof(struct xfs_rmap_irec));
89 if (error)
90 do_error(
91_("Insufficient memory while allocating reverse mapping slabs."));
1102c155
DW
92 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
93 sizeof(struct xfs_rmap_irec));
94 if (error)
95 do_error(
96_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
b7f12e53 97 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
00f34bca
DW
98 error = init_slab(&ag_rmaps[i].ar_refcount_items,
99 sizeof(struct xfs_refcount_irec));
100 if (error)
101 do_error(
102_("Insufficient memory while allocating refcount item slabs."));
9e0f480e
DW
103 }
104}
105
106/*
107 * Free the per-AG reverse-mapping data.
108 */
109void
2d273771 110rmaps_free(
9e0f480e
DW
111 struct xfs_mount *mp)
112{
113 xfs_agnumber_t i;
114
2d273771 115 if (!rmap_needs_work(mp))
9e0f480e
DW
116 return;
117
1102c155 118 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
9e0f480e 119 free_slab(&ag_rmaps[i].ar_rmaps);
1102c155 120 free_slab(&ag_rmaps[i].ar_raw_rmaps);
00f34bca 121 free_slab(&ag_rmaps[i].ar_refcount_items);
1102c155 122 }
9e0f480e
DW
123 free(ag_rmaps);
124 ag_rmaps = NULL;
125}
126
1102c155
DW
127/*
128 * Decide if two reverse-mapping records can be merged.
129 */
130bool
2d273771 131rmaps_are_mergeable(
1102c155
DW
132 struct xfs_rmap_irec *r1,
133 struct xfs_rmap_irec *r2)
134{
135 if (r1->rm_owner != r2->rm_owner)
136 return false;
137 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
138 return false;
139 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
140 XFS_RMAP_LEN_MAX)
141 return false;
142 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
143 return true;
144 /* must be an inode owner below here */
145 if (r1->rm_flags != r2->rm_flags)
146 return false;
147 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
148 return true;
149 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
150}
151
9e0f480e
DW
152/*
153 * Add an observation about a block mapping in an inode's data or attribute
154 * fork for later btree reconstruction.
155 */
156int
2d273771 157rmap_add_rec(
9e0f480e
DW
158 struct xfs_mount *mp,
159 xfs_ino_t ino,
160 int whichfork,
161 struct xfs_bmbt_irec *irec)
162{
9e0f480e
DW
163 struct xfs_rmap_irec rmap;
164 xfs_agnumber_t agno;
165 xfs_agblock_t agbno;
b7f12e53
DW
166 struct xfs_rmap_irec *last_rmap;
167 int error = 0;
9e0f480e 168
2d273771 169 if (!rmap_needs_work(mp))
9e0f480e
DW
170 return 0;
171
172 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
173 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
174 ASSERT(agno != NULLAGNUMBER);
175 ASSERT(agno < mp->m_sb.sb_agcount);
176 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
177 ASSERT(ino != NULLFSINO);
178 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
179
9e0f480e
DW
180 rmap.rm_owner = ino;
181 rmap.rm_offset = irec->br_startoff;
182 rmap.rm_flags = 0;
183 if (whichfork == XFS_ATTR_FORK)
184 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
185 rmap.rm_startblock = agbno;
186 rmap.rm_blockcount = irec->br_blockcount;
187 if (irec->br_state == XFS_EXT_UNWRITTEN)
188 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
b7f12e53
DW
189 last_rmap = &ag_rmaps[agno].ar_last_rmap;
190 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
191 *last_rmap = rmap;
2d273771 192 else if (rmaps_are_mergeable(last_rmap, &rmap))
b7f12e53
DW
193 last_rmap->rm_blockcount += rmap.rm_blockcount;
194 else {
195 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
196 if (error)
197 return error;
198 *last_rmap = rmap;
199 }
200
201 return error;
202}
203
204/* Finish collecting inode data/attr fork rmaps. */
205int
2d273771 206rmap_finish_collecting_fork_recs(
b7f12e53
DW
207 struct xfs_mount *mp,
208 xfs_agnumber_t agno)
209{
2d273771 210 if (!rmap_needs_work(mp) ||
b7f12e53
DW
211 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
212 return 0;
213 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
9e0f480e
DW
214}
215
1102c155
DW
216/* add a raw rmap; these will be merged later */
217static int
2d273771 218__rmap_add_raw_rec(
1102c155
DW
219 struct xfs_mount *mp,
220 xfs_agnumber_t agno,
221 xfs_agblock_t agbno,
222 xfs_extlen_t len,
223 uint64_t owner,
224 bool is_attr,
225 bool is_bmbt)
226{
227 struct xfs_rmap_irec rmap;
228
229 ASSERT(len != 0);
230 rmap.rm_owner = owner;
231 rmap.rm_offset = 0;
232 rmap.rm_flags = 0;
233 if (is_attr)
234 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
235 if (is_bmbt)
236 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
237 rmap.rm_startblock = agbno;
238 rmap.rm_blockcount = len;
239 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
240}
241
00efc33a
DW
242/*
243 * Add a reverse mapping for an inode fork's block mapping btree block.
244 */
245int
2d273771 246rmap_add_bmbt_rec(
00efc33a
DW
247 struct xfs_mount *mp,
248 xfs_ino_t ino,
249 int whichfork,
250 xfs_fsblock_t fsbno)
251{
252 xfs_agnumber_t agno;
253 xfs_agblock_t agbno;
254
2d273771 255 if (!rmap_needs_work(mp))
00efc33a
DW
256 return 0;
257
258 agno = XFS_FSB_TO_AGNO(mp, fsbno);
259 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
260 ASSERT(agno != NULLAGNUMBER);
261 ASSERT(agno < mp->m_sb.sb_agcount);
262 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
263
2d273771 264 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
00efc33a
DW
265 whichfork == XFS_ATTR_FORK, true);
266}
267
1102c155
DW
268/*
269 * Add a reverse mapping for a per-AG fixed metadata extent.
270 */
271int
2d273771 272rmap_add_ag_rec(
1102c155
DW
273 struct xfs_mount *mp,
274 xfs_agnumber_t agno,
275 xfs_agblock_t agbno,
276 xfs_extlen_t len,
277 uint64_t owner)
278{
2d273771 279 if (!rmap_needs_work(mp))
1102c155
DW
280 return 0;
281
282 ASSERT(agno != NULLAGNUMBER);
283 ASSERT(agno < mp->m_sb.sb_agcount);
284 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
285
2d273771 286 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
1102c155
DW
287}
288
289/*
290 * Merge adjacent raw rmaps and add them to the main rmap list.
291 */
292int
2d273771 293rmap_fold_raw_recs(
1102c155
DW
294 struct xfs_mount *mp,
295 xfs_agnumber_t agno)
296{
297 struct xfs_slab_cursor *cur = NULL;
298 struct xfs_rmap_irec *prev, *rec;
299 size_t old_sz;
138ce9ff 300 int error = 0;
1102c155
DW
301
302 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
303 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
304 goto no_raw;
305 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
306 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
307 &cur);
308 if (error)
309 goto err;
310
311 prev = pop_slab_cursor(cur);
312 rec = pop_slab_cursor(cur);
138ce9ff 313 while (prev && rec) {
2d273771 314 if (rmaps_are_mergeable(prev, rec)) {
1102c155
DW
315 prev->rm_blockcount += rec->rm_blockcount;
316 rec = pop_slab_cursor(cur);
317 continue;
318 }
319 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
320 if (error)
321 goto err;
322 prev = rec;
323 rec = pop_slab_cursor(cur);
324 }
325 if (prev) {
326 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
327 if (error)
328 goto err;
329 }
330 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
331 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
332 sizeof(struct xfs_rmap_irec));
333 if (error)
334 do_error(
335_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
336no_raw:
337 if (old_sz)
338 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
339err:
340 free_slab_cursor(&cur);
341 return error;
342}
343
713b6817
DW
344static int
345find_first_zero_bit(
14f8b681 346 uint64_t mask)
713b6817
DW
347{
348 int n;
349 int b = 0;
350
351 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
352 b++;
353
354 return b;
355}
356
357static int
358popcnt(
14f8b681 359 uint64_t mask)
713b6817
DW
360{
361 int n;
362 int b = 0;
363
364 if (mask == 0)
365 return 0;
366
367 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
368 if (mask & 1)
369 b++;
370
371 return b;
372}
373
374/*
375 * Add an allocation group's fixed metadata to the rmap list. This includes
376 * sb/agi/agf/agfl headers, inode chunks, and the log.
377 */
378int
2d273771 379rmap_add_fixed_ag_rec(
713b6817
DW
380 struct xfs_mount *mp,
381 xfs_agnumber_t agno)
382{
383 xfs_fsblock_t fsbno;
384 xfs_agblock_t agbno;
385 ino_tree_node_t *ino_rec;
386 xfs_agino_t agino;
387 int error;
388 int startidx;
389 int nr;
390
2d273771 391 if (!rmap_needs_work(mp))
713b6817
DW
392 return 0;
393
394 /* sb/agi/agf/agfl headers */
2d273771 395 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
713b6817
DW
396 XFS_RMAP_OWN_FS);
397 if (error)
398 goto out;
399
400 /* inodes */
401 ino_rec = findfirst_inode_rec(agno);
402 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
403 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
404 startidx = find_first_zero_bit(ino_rec->ir_sparse);
405 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
406 } else {
407 startidx = 0;
408 nr = XFS_INODES_PER_CHUNK;
409 }
410 nr /= mp->m_sb.sb_inopblock;
411 if (nr == 0)
412 nr = 1;
413 agino = ino_rec->ino_startnum + startidx;
414 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
415 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
2d273771 416 error = rmap_add_ag_rec(mp, agno, agbno, nr,
713b6817
DW
417 XFS_RMAP_OWN_INODES);
418 if (error)
419 goto out;
420 }
421 }
422
423 /* log */
424 fsbno = mp->m_sb.sb_logstart;
425 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
426 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
2d273771 427 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
713b6817
DW
428 XFS_RMAP_OWN_LOG);
429 if (error)
430 goto out;
431 }
432out:
433 return error;
434}
435
62cf990a
DW
436/*
437 * Copy the per-AG btree reverse-mapping data into the rmapbt.
438 *
439 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
440 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
441 * building the AG btrees we can record all the blocks allocated for each
442 * btree, but we cannot resolve the conflict between the fact that one has to
443 * finish allocating the space for the rmapbt before building the bnobt and the
444 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
445 * Therefore we record in-core the rmaps for each btree and here use the
446 * libxfs rmap functions to finish building the rmap btree.
447 *
448 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
449 * recorded in memory. The rmapbt has not been set up yet, so we need to be
450 * able to "expand" the AGFL without updating the rmapbt. After we've written
451 * out the new AGF header the new rmapbt is available, so this function reads
452 * each AGFL to generate rmap entries. These entries are merged with the AG
453 * btree rmap entries, and then we use libxfs' rmap functions to add them to
454 * the rmapbt, after which it is fully regenerated.
455 */
456int
2d273771 457rmap_store_ag_btree_rec(
62cf990a
DW
458 struct xfs_mount *mp,
459 xfs_agnumber_t agno)
460{
461 struct xfs_slab_cursor *rm_cur;
462 struct xfs_rmap_irec *rm_rec = NULL;
463 struct xfs_buf *agbp = NULL;
464 struct xfs_buf *agflbp = NULL;
465 struct xfs_trans *tp;
466 struct xfs_trans_res tres = {0};
467 __be32 *agfl_bno, *b;
468 int error = 0;
469 struct xfs_owner_info oinfo;
470
471 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
472 return 0;
473
474 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
475 free_slab(&ag_rmaps[agno].ar_rmaps);
476 error = init_slab(&ag_rmaps[agno].ar_rmaps,
477 sizeof(struct xfs_rmap_irec));
478 if (error)
479 goto err;
480
481 /* Add the AGFL blocks to the rmap list */
e2f60652 482 error = -libxfs_trans_read_buf(
62cf990a
DW
483 mp, NULL, mp->m_ddev_targp,
484 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
485 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
486 if (error)
487 goto err;
488
636f06d8
DW
489 /*
490 * Sometimes, the blocks at the beginning of the AGFL are there
491 * because we overestimated how many blocks we needed to rebuild
492 * the freespace btrees. ar_flcount records the number of
493 * blocks in this situation. Since those blocks already have an
494 * rmap, we only need to add rmap records for AGFL blocks past
495 * that point in the AGFL because those blocks are a result of a
496 * no-rmap no-shrink freelist fixup that we did earlier.
497 */
62cf990a 498 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
636f06d8
DW
499 b = agfl_bno + ag_rmaps[agno].ar_flcount;
500 while (*b != NULLAGBLOCK && b - agfl_bno < XFS_AGFL_SIZE(mp)) {
2d273771 501 error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
62cf990a
DW
502 XFS_RMAP_OWN_AG);
503 if (error)
504 goto err;
505 b++;
506 }
507 libxfs_putbuf(agflbp);
508 agflbp = NULL;
509
510 /* Merge all the raw rmaps into the main list */
2d273771 511 error = rmap_fold_raw_recs(mp, agno);
62cf990a
DW
512 if (error)
513 goto err;
514
515 /* Create cursors to refcount structures */
516 error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
517 &rm_cur);
518 if (error)
519 goto err;
520
521 /* Insert rmaps into the btree one at a time */
522 rm_rec = pop_slab_cursor(rm_cur);
523 while (rm_rec) {
524 error = -libxfs_trans_alloc(mp, &tres, 16, 0, 0, &tp);
525 if (error)
526 goto err_slab;
527
e2f60652 528 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
62cf990a
DW
529 if (error)
530 goto err_trans;
531
532 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
533 libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
e2f60652 534 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
62cf990a
DW
535 rm_rec->rm_blockcount, &oinfo);
536 if (error)
537 goto err_trans;
538
539 error = -libxfs_trans_commit(tp);
540 if (error)
541 goto err_slab;
542
543 fix_freelist(mp, agno, false);
544
545 rm_rec = pop_slab_cursor(rm_cur);
546 }
547
548 free_slab_cursor(&rm_cur);
549 return 0;
550
551err_trans:
552 libxfs_trans_cancel(tp);
553err_slab:
554 free_slab_cursor(&rm_cur);
555err:
556 if (agflbp)
557 libxfs_putbuf(agflbp);
62cf990a
DW
558 return error;
559}
560
9e0f480e
DW
561#ifdef RMAP_DEBUG
562static void
2d273771 563rmap_dump(
9e0f480e
DW
564 const char *msg,
565 xfs_agnumber_t agno,
566 struct xfs_rmap_irec *rmap)
567{
568 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
569 msg, rmap,
570 (unsigned int)agno,
571 (unsigned long long)rmap->rm_startblock,
572 (unsigned long long)rmap->rm_owner,
573 (unsigned long long)rmap->rm_offset,
574 (unsigned int)rmap->rm_blockcount,
575 (unsigned int)rmap->rm_flags);
576}
577#else
2d273771 578# define rmap_dump(m, a, r)
9e0f480e 579#endif
11b9e510 580
00f34bca
DW
581/*
582 * Rebuilding the Reference Count & Reverse Mapping Btrees
583 *
584 * The reference count (refcnt) and reverse mapping (rmap) btrees are
585 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
586 * mappings must be processed into reference counts at the end of phase
587 * 4, and the rmaps must be recorded during phase 4. There is a need to
588 * access the rmaps in physical block order, but no particular need for
589 * random access, so the slab.c code provides a big logical array
590 * (consisting of smaller slabs) and some inorder iterator functions.
591 *
592 * Once we've recorded all the reverse mappings, we're ready to
593 * translate the rmaps into refcount entries. Imagine the rmap entries
594 * as rectangles representing extents of physical blocks, and that the
595 * rectangles can be laid down to allow them to overlap each other; then
596 * we know that we must emit a refcnt btree entry wherever the amount of
597 * overlap changes, i.e. the emission stimulus is level-triggered:
598 *
599 * - ---
600 * -- ----- ---- --- ------
601 * -- ---- ----------- ---- ---------
602 * -------------------------------- -----------
603 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
604 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
605 *
606 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
607 *
608 * Note that in the actual refcnt btree we don't store the refcount < 2
609 * cases because the bnobt tells us which blocks are free; single-use
610 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
611 * supports storing multiple entries covering a given block we could
612 * theoretically dispense with the refcntbt and simply count rmaps, but
613 * that's inefficient in the (hot) write path, so we'll take the cost of
614 * the extra tree to save time. Also there's no guarantee that rmap
615 * will be enabled.
616 *
617 * Given an array of rmaps sorted by physical block number, a starting
618 * physical block (sp), a bag to hold rmaps that cover sp, and the next
619 * physical block where the level changes (np), we can reconstruct the
620 * refcount btree as follows:
621 *
622 * While there are still unprocessed rmaps in the array,
623 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
624 * - Add to the bag all rmaps in the array where startblock == sp.
625 * - Set np to the physical block where the bag size will change. This
626 * is the minimum of (the pblk of the next unprocessed rmap) and
627 * (startblock + len of each rmap in the bag).
628 * - Record the bag size as old_bag_size.
629 *
630 * - While the bag isn't empty,
631 * - Remove from the bag all rmaps where startblock + len == np.
632 * - Add to the bag all rmaps in the array where startblock == np.
633 * - If the bag size isn't old_bag_size, store the refcount entry
634 * (sp, np - sp, bag_size) in the refcnt btree.
635 * - If the bag is empty, break out of the inner loop.
636 * - Set old_bag_size to the bag size
637 * - Set sp = np.
638 * - Set np to the physical block where the bag size will change.
639 * This is the minimum of (the pblk of the next unprocessed rmap)
640 * and (startblock + len of each rmap in the bag).
641 *
642 * An implementation detail is that because this processing happens
643 * during phase 4, the refcount entries are stored in an array so that
644 * phase 5 can load them into the refcount btree. The rmaps can be
645 * loaded directly into the rmap btree during phase 5 as well.
646 */
647
ca8d7d6a
DW
648/*
649 * Mark all inodes in the reverse-mapping observation stack as requiring the
650 * reflink inode flag, if the stack depth is greater than 1.
651 */
652static void
653mark_inode_rl(
654 struct xfs_mount *mp,
655 struct xfs_bag *rmaps)
656{
657 xfs_agnumber_t iagno;
658 struct xfs_rmap_irec *rmap;
659 struct ino_tree_node *irec;
660 int off;
661 size_t idx;
662 xfs_agino_t ino;
663
664 if (bag_count(rmaps) < 2)
665 return;
666
667 /* Reflink flag accounting */
668 foreach_bag_ptr(rmaps, idx, rmap) {
669 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
670 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
671 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
672 pthread_mutex_lock(&ag_locks[iagno].lock);
673 irec = find_inode_rec(mp, iagno, ino);
674 off = get_inode_offset(mp, rmap->rm_owner, irec);
675 /* lock here because we might go outside this ag */
676 set_inode_is_rl(irec, off);
677 pthread_mutex_unlock(&ag_locks[iagno].lock);
678 }
679}
680
00f34bca
DW
681/*
682 * Emit a refcount object for refcntbt reconstruction during phase 5.
683 */
684#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
685static void
686refcount_emit(
687 struct xfs_mount *mp,
688 xfs_agnumber_t agno,
689 xfs_agblock_t agbno,
690 xfs_extlen_t len,
691 size_t nr_rmaps)
692{
693 struct xfs_refcount_irec rlrec;
694 int error;
695 struct xfs_slab *rlslab;
696
697 rlslab = ag_rmaps[agno].ar_refcount_items;
698 ASSERT(nr_rmaps > 0);
699
700 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
701 agno, agbno, len, nr_rmaps);
702 rlrec.rc_startblock = agbno;
703 rlrec.rc_blockcount = len;
704 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
705 error = slab_add(rlslab, &rlrec);
706 if (error)
707 do_error(
708_("Insufficient memory while recreating refcount tree."));
709}
710#undef REFCOUNT_CLAMP
711
712/*
713 * Transform a pile of physical block mapping observations into refcount data
714 * for eventual rebuilding of the btrees.
715 */
716#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
717int
718compute_refcounts(
719 struct xfs_mount *mp,
720 xfs_agnumber_t agno)
721{
722 struct xfs_bag *stack_top = NULL;
723 struct xfs_slab *rmaps;
724 struct xfs_slab_cursor *rmaps_cur;
725 struct xfs_rmap_irec *array_cur;
726 struct xfs_rmap_irec *rmap;
727 xfs_agblock_t sbno; /* first bno of this rmap set */
728 xfs_agblock_t cbno; /* first bno of this refcount set */
729 xfs_agblock_t nbno; /* next bno where rmap set changes */
730 size_t n, idx;
731 size_t old_stack_nr;
732 int error;
733
734 if (!xfs_sb_version_hasreflink(&mp->m_sb))
735 return 0;
736
737 rmaps = ag_rmaps[agno].ar_rmaps;
738
739 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
740 if (error)
741 return error;
742
743 error = init_bag(&stack_top);
744 if (error)
745 goto err;
746
747 /* While there are rmaps to be processed... */
748 n = 0;
749 while (n < slab_count(rmaps)) {
750 array_cur = peek_slab_cursor(rmaps_cur);
751 sbno = cbno = array_cur->rm_startblock;
752 /* Push all rmaps with pblk == sbno onto the stack */
753 for (;
754 array_cur && array_cur->rm_startblock == sbno;
755 array_cur = peek_slab_cursor(rmaps_cur)) {
756 advance_slab_cursor(rmaps_cur); n++;
757 rmap_dump("push0", agno, array_cur);
758 error = bag_add(stack_top, array_cur);
759 if (error)
760 goto err;
761 }
ca8d7d6a 762 mark_inode_rl(mp, stack_top);
00f34bca
DW
763
764 /* Set nbno to the bno of the next refcount change */
ff14f594 765 if (n < slab_count(rmaps) && array_cur)
00f34bca
DW
766 nbno = array_cur->rm_startblock;
767 else
768 nbno = NULLAGBLOCK;
769 foreach_bag_ptr(stack_top, idx, rmap) {
770 nbno = min(nbno, RMAP_END(rmap));
771 }
772
773 /* Emit reverse mappings, if needed */
774 ASSERT(nbno > sbno);
775 old_stack_nr = bag_count(stack_top);
776
777 /* While stack isn't empty... */
778 while (bag_count(stack_top)) {
779 /* Pop all rmaps that end at nbno */
780 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
781 if (RMAP_END(rmap) != nbno)
782 continue;
783 rmap_dump("pop", agno, rmap);
784 error = bag_remove(stack_top, idx);
785 if (error)
786 goto err;
787 }
788
789 /* Push array items that start at nbno */
790 for (;
791 array_cur && array_cur->rm_startblock == nbno;
792 array_cur = peek_slab_cursor(rmaps_cur)) {
793 advance_slab_cursor(rmaps_cur); n++;
794 rmap_dump("push1", agno, array_cur);
795 error = bag_add(stack_top, array_cur);
796 if (error)
797 goto err;
798 }
ca8d7d6a 799 mark_inode_rl(mp, stack_top);
00f34bca
DW
800
801 /* Emit refcount if necessary */
802 ASSERT(nbno > cbno);
803 if (bag_count(stack_top) != old_stack_nr) {
804 if (old_stack_nr > 1) {
805 refcount_emit(mp, agno, cbno,
806 nbno - cbno,
807 old_stack_nr);
808 }
809 cbno = nbno;
810 }
811
812 /* Stack empty, go find the next rmap */
813 if (bag_count(stack_top) == 0)
814 break;
815 old_stack_nr = bag_count(stack_top);
816 sbno = nbno;
817
818 /* Set nbno to the bno of the next refcount change */
819 if (n < slab_count(rmaps))
820 nbno = array_cur->rm_startblock;
821 else
822 nbno = NULLAGBLOCK;
823 foreach_bag_ptr(stack_top, idx, rmap) {
824 nbno = min(nbno, RMAP_END(rmap));
825 }
826
827 /* Emit reverse mappings, if needed */
828 ASSERT(nbno > sbno);
829 }
830 }
831err:
832 free_bag(&stack_top);
833 free_slab_cursor(&rmaps_cur);
834
835 return error;
836}
837#undef RMAP_END
838
11b9e510
DW
839/*
840 * Return the number of rmap objects for an AG.
841 */
842size_t
843rmap_record_count(
844 struct xfs_mount *mp,
845 xfs_agnumber_t agno)
846{
847 return slab_count(ag_rmaps[agno].ar_rmaps);
848}
849
850/*
851 * Return a slab cursor that will return rmap objects in order.
852 */
853int
2d273771 854rmap_init_cursor(
11b9e510
DW
855 xfs_agnumber_t agno,
856 struct xfs_slab_cursor **cur)
857{
858 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
859}
860
861/*
862 * Disable the refcount btree check.
863 */
864void
865rmap_avoid_check(void)
866{
867 rmapbt_suspect = true;
868}
869
870/* Look for an rmap in the rmapbt that matches a given rmap. */
871static int
2d273771 872rmap_lookup(
11b9e510
DW
873 struct xfs_btree_cur *bt_cur,
874 struct xfs_rmap_irec *rm_rec,
875 struct xfs_rmap_irec *tmp,
876 int *have)
877{
878 int error;
879
880 /* Use the regular btree retrieval routine. */
881 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
882 rm_rec->rm_blockcount,
883 rm_rec->rm_owner, rm_rec->rm_offset,
884 rm_rec->rm_flags, have);
885 if (error)
886 return error;
887 if (*have == 0)
888 return error;
889 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
890}
891
7ba02033
DW
892/* Look for an rmap in the rmapbt that matches a given rmap. */
893static int
894rmap_lookup_overlapped(
895 struct xfs_btree_cur *bt_cur,
896 struct xfs_rmap_irec *rm_rec,
897 struct xfs_rmap_irec *tmp,
898 int *have)
899{
900 /* Have to use our fancy version for overlapped */
901 return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
902 rm_rec->rm_owner, rm_rec->rm_offset,
903 rm_rec->rm_flags, tmp, have);
904}
905
11b9e510
DW
906/* Does the btree rmap cover the observed rmap? */
907#define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
908#define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
909static bool
2d273771 910rmap_is_good(
11b9e510
DW
911 struct xfs_rmap_irec *observed,
912 struct xfs_rmap_irec *btree)
913{
914 /* Can't have mismatches in the flags or the owner. */
915 if (btree->rm_flags != observed->rm_flags ||
916 btree->rm_owner != observed->rm_owner)
917 return false;
918
919 /*
920 * Btree record can't physically start after the observed
921 * record, nor can it end before the observed record.
922 */
923 if (btree->rm_startblock > observed->rm_startblock ||
924 NEXTP(btree) < NEXTP(observed))
925 return false;
926
927 /* If this is metadata or bmbt, we're done. */
928 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
929 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
930 return true;
931 /*
932 * Btree record can't logically start after the observed
933 * record, nor can it end before the observed record.
934 */
935 if (btree->rm_offset > observed->rm_offset ||
936 NEXTL(btree) < NEXTL(observed))
937 return false;
938
939 return true;
940}
941#undef NEXTP
942#undef NEXTL
943
944/*
945 * Compare the observed reverse mappings against what's in the ag btree.
946 */
947int
2d273771 948rmaps_verify_btree(
11b9e510
DW
949 struct xfs_mount *mp,
950 xfs_agnumber_t agno)
951{
952 struct xfs_slab_cursor *rm_cur;
953 struct xfs_btree_cur *bt_cur = NULL;
954 int error;
955 int have;
956 struct xfs_buf *agbp = NULL;
957 struct xfs_rmap_irec *rm_rec;
958 struct xfs_rmap_irec tmp;
959 struct xfs_perag *pag; /* per allocation group data */
960
961 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
962 return 0;
963 if (rmapbt_suspect) {
964 if (no_modify && agno == 0)
965 do_warn(_("would rebuild corrupt rmap btrees.\n"));
966 return 0;
967 }
968
969 /* Create cursors to refcount structures */
2d273771 970 error = rmap_init_cursor(agno, &rm_cur);
11b9e510
DW
971 if (error)
972 return error;
973
974 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
975 if (error)
976 goto err;
977
978 /* Leave the per-ag data "uninitialized" since we rewrite it later */
e2f60652 979 pag = libxfs_perag_get(mp, agno);
11b9e510 980 pag->pagf_init = 0;
e2f60652 981 libxfs_perag_put(pag);
11b9e510
DW
982
983 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
984 if (!bt_cur) {
985 error = -ENOMEM;
986 goto err;
987 }
988
989 rm_rec = pop_slab_cursor(rm_cur);
990 while (rm_rec) {
2d273771 991 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
11b9e510
DW
992 if (error)
993 goto err;
7ba02033
DW
994 /*
995 * Using the range query is expensive, so only do it if
996 * the regular lookup doesn't find anything or if it doesn't
997 * match the observed rmap.
998 */
999 if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
1000 (!have || !rmap_is_good(rm_rec, &tmp))) {
1001 error = rmap_lookup_overlapped(bt_cur, rm_rec,
1002 &tmp, &have);
1003 if (error)
1004 goto err;
1005 }
11b9e510
DW
1006 if (!have) {
1007 do_warn(
1008_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
1009%s%soff %"PRIu64"\n"),
1010 agno, rm_rec->rm_startblock,
1011 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1012 _("unwritten ") : "",
1013 rm_rec->rm_blockcount,
1014 rm_rec->rm_owner,
1015 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1016 _("attr ") : "",
1017 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1018 _("bmbt ") : "",
1019 rm_rec->rm_offset);
1020 goto next_loop;
1021 }
1022
1023 /* Compare each refcount observation against the btree's */
2d273771 1024 if (!rmap_is_good(rm_rec, &tmp)) {
11b9e510
DW
1025 do_warn(
1026_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1027%"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1028 agno, tmp.rm_startblock,
1029 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1030 _("unwritten ") : "",
1031 tmp.rm_blockcount,
1032 tmp.rm_owner,
1033 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1034 _("attr ") : "",
1035 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1036 _("bmbt ") : "",
1037 tmp.rm_offset,
1038 agno, rm_rec->rm_startblock,
1039 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1040 _("unwritten ") : "",
1041 rm_rec->rm_blockcount,
1042 rm_rec->rm_owner,
1043 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1044 _("attr ") : "",
1045 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1046 _("bmbt ") : "",
1047 rm_rec->rm_offset);
1048 goto next_loop;
1049 }
1050next_loop:
1051 rm_rec = pop_slab_cursor(rm_cur);
1052 }
1053
1054err:
1055 if (bt_cur)
1056 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1057 if (agbp)
1058 libxfs_putbuf(agbp);
1059 free_slab_cursor(&rm_cur);
1060 return 0;
1061}
1062
1063/*
1064 * Compare the key fields of two rmap records -- positive if key1 > key2,
1065 * negative if key1 < key2, and zero if equal.
1066 */
14f8b681 1067int64_t
11b9e510
DW
1068rmap_diffkeys(
1069 struct xfs_rmap_irec *kp1,
1070 struct xfs_rmap_irec *kp2)
1071{
1072 __u64 oa;
1073 __u64 ob;
14f8b681 1074 int64_t d;
11b9e510
DW
1075 struct xfs_rmap_irec tmp;
1076
1077 tmp = *kp1;
1078 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1079 oa = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510
DW
1080 tmp = *kp2;
1081 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1082 ob = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510 1083
14f8b681 1084 d = (int64_t)kp1->rm_startblock - kp2->rm_startblock;
11b9e510
DW
1085 if (d)
1086 return d;
1087
1088 if (kp1->rm_owner > kp2->rm_owner)
1089 return 1;
1090 else if (kp2->rm_owner > kp1->rm_owner)
1091 return -1;
1092
1093 if (oa > ob)
1094 return 1;
1095 else if (ob > oa)
1096 return -1;
1097 return 0;
1098}
1099
1100/* Compute the high key of an rmap record. */
1101void
1102rmap_high_key_from_rec(
1103 struct xfs_rmap_irec *rec,
1104 struct xfs_rmap_irec *key)
1105{
1106 int adj;
1107
1108 adj = rec->rm_blockcount - 1;
1109
1110 key->rm_startblock = rec->rm_startblock + adj;
1111 key->rm_owner = rec->rm_owner;
1112 key->rm_offset = rec->rm_offset;
1113 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1114 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1115 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1116 return;
1117 key->rm_offset += adj;
1118}
62cf990a 1119
7e174ec7
DW
1120/*
1121 * Record that an inode had the reflink flag set when repair started. The
1122 * inode reflink flag will be adjusted as necessary.
1123 */
1124void
1125record_inode_reflink_flag(
1126 struct xfs_mount *mp,
1127 struct xfs_dinode *dino,
1128 xfs_agnumber_t agno,
1129 xfs_agino_t ino,
1130 xfs_ino_t lino)
1131{
1132 struct ino_tree_node *irec;
1133 int off;
1134
1135 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1136 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1137 return;
1138 irec = find_inode_rec(mp, agno, ino);
1139 off = get_inode_offset(mp, lino, irec);
1140 ASSERT(!inode_was_rl(irec, off));
1141 set_inode_was_rl(irec, off);
1142 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1143 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1144}
1145
ca8d7d6a
DW
1146/*
1147 * Fix an inode's reflink flag.
1148 */
1149static int
1150fix_inode_reflink_flag(
1151 struct xfs_mount *mp,
1152 xfs_agnumber_t agno,
1153 xfs_agino_t agino,
1154 bool set)
1155{
1156 struct xfs_dinode *dino;
1157 struct xfs_buf *buf;
1158
1159 if (set)
1160 do_warn(
1161_("setting reflink flag on inode %"PRIu64"\n"),
1162 XFS_AGINO_TO_INO(mp, agno, agino));
1163 else if (!no_modify) /* && !set */
1164 do_warn(
1165_("clearing reflink flag on inode %"PRIu64"\n"),
1166 XFS_AGINO_TO_INO(mp, agno, agino));
1167 if (no_modify)
1168 return 0;
1169
1170 buf = get_agino_buf(mp, agno, agino, &dino);
1171 if (!buf)
1172 return 1;
1173 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1174 if (set)
1175 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1176 else
1177 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1178 libxfs_dinode_calc_crc(mp, dino);
1179 libxfs_writebuf(buf, 0);
1180
1181 return 0;
1182}
1183
1184/*
1185 * Fix discrepancies between the state of the inode reflink flag and our
1186 * observations as to whether or not the inode really needs it.
1187 */
1188int
1189fix_inode_reflink_flags(
1190 struct xfs_mount *mp,
1191 xfs_agnumber_t agno)
1192{
1193 struct ino_tree_node *irec;
1194 int bit;
14f8b681
DW
1195 uint64_t was;
1196 uint64_t is;
1197 uint64_t diff;
1198 uint64_t mask;
ca8d7d6a
DW
1199 int error = 0;
1200 xfs_agino_t agino;
1201
1202 /*
1203 * Update the reflink flag for any inode where there's a discrepancy
1204 * between the inode flag and whether or not we found any reflinked
1205 * extents.
1206 */
1207 for (irec = findfirst_inode_rec(agno);
1208 irec != NULL;
1209 irec = next_ino_rec(irec)) {
1210 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1211 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1212 was = irec->ino_was_rl;
1213 is = irec->ino_is_rl;
1214 if (was == is)
1215 continue;
1216 diff = was ^ is;
1217 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1218 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1219 irec->ino_startnum),
1220 was, is, diff);
1221
1222 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1223 agino = bit + irec->ino_startnum;
1224 if (!(diff & mask))
1225 continue;
1226 else if (was & mask)
1227 error = fix_inode_reflink_flag(mp, agno, agino,
1228 false);
1229 else if (is & mask)
1230 error = fix_inode_reflink_flag(mp, agno, agino,
1231 true);
1232 else
1233 ASSERT(0);
1234 if (error)
1235 do_error(
1236_("Unable to fix reflink flag on inode %"PRIu64".\n"),
1237 XFS_AGINO_TO_INO(mp, agno, agino));
1238 }
1239 }
1240
1241 return error;
1242}
1243
80dbc783
DW
1244/*
1245 * Return the number of refcount objects for an AG.
1246 */
1247size_t
1248refcount_record_count(
1249 struct xfs_mount *mp,
1250 xfs_agnumber_t agno)
1251{
1252 return slab_count(ag_rmaps[agno].ar_refcount_items);
1253}
1254
1255/*
1256 * Return a slab cursor that will return refcount objects in order.
1257 */
1258int
1259init_refcount_cursor(
1260 xfs_agnumber_t agno,
1261 struct xfs_slab_cursor **cur)
1262{
1263 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1264}
1265
1266/*
1267 * Disable the refcount btree check.
1268 */
1269void
1270refcount_avoid_check(void)
1271{
1272 refcbt_suspect = true;
1273}
1274
1275/*
1276 * Compare the observed reference counts against what's in the ag btree.
1277 */
1278int
1279check_refcounts(
1280 struct xfs_mount *mp,
1281 xfs_agnumber_t agno)
1282{
1283 struct xfs_slab_cursor *rl_cur;
1284 struct xfs_btree_cur *bt_cur = NULL;
1285 int error;
1286 int have;
1287 int i;
1288 struct xfs_buf *agbp = NULL;
1289 struct xfs_refcount_irec *rl_rec;
1290 struct xfs_refcount_irec tmp;
1291 struct xfs_perag *pag; /* per allocation group data */
1292
1293 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1294 return 0;
1295 if (refcbt_suspect) {
1296 if (no_modify && agno == 0)
1297 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1298 return 0;
1299 }
1300
1301 /* Create cursors to refcount structures */
1302 error = init_refcount_cursor(agno, &rl_cur);
1303 if (error)
1304 return error;
1305
1306 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1307 if (error)
1308 goto err;
1309
1310 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1311 pag = libxfs_perag_get(mp, agno);
1312 pag->pagf_init = 0;
1313 libxfs_perag_put(pag);
1314
1315 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
1316 if (!bt_cur) {
1317 error = -ENOMEM;
1318 goto err;
1319 }
1320
1321 rl_rec = pop_slab_cursor(rl_cur);
1322 while (rl_rec) {
1323 /* Look for a refcount record in the btree */
1324 error = -libxfs_refcount_lookup_le(bt_cur,
1325 rl_rec->rc_startblock, &have);
1326 if (error)
1327 goto err;
1328 if (!have) {
1329 do_warn(
1330_("Missing reference count record for (%u/%u) len %u count %u\n"),
1331 agno, rl_rec->rc_startblock,
1332 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1333 goto next_loop;
1334 }
1335
1336 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1337 if (error)
1338 goto err;
1339 if (!i) {
1340 do_warn(
1341_("Missing reference count record for (%u/%u) len %u count %u\n"),
1342 agno, rl_rec->rc_startblock,
1343 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1344 goto next_loop;
1345 }
1346
1347 /* Compare each refcount observation against the btree's */
1348 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1349 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1350 tmp.rc_refcount < rl_rec->rc_refcount)
1351 do_warn(
1352_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1353 agno, tmp.rc_startblock, tmp.rc_blockcount,
1354 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1355 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1356next_loop:
1357 rl_rec = pop_slab_cursor(rl_cur);
1358 }
1359
1360err:
1361 if (bt_cur)
1362 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1363 if (agbp)
1364 libxfs_putbuf(agbp);
1365 free_slab_cursor(&rl_cur);
1366 return 0;
1367}
1368
62cf990a
DW
1369/*
1370 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1371 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1372 * because we're updating the rmapbt).
1373 */
1374void
1375fix_freelist(
1376 struct xfs_mount *mp,
1377 xfs_agnumber_t agno,
1378 bool skip_rmapbt)
1379{
1380 xfs_alloc_arg_t args;
1381 xfs_trans_t *tp;
1382 struct xfs_trans_res tres = {0};
1383 int flags;
1384 int error;
1385
1386 memset(&args, 0, sizeof(args));
1387 args.mp = mp;
1388 args.agno = agno;
1389 args.alignment = 1;
e2f60652 1390 args.pag = libxfs_perag_get(mp, agno);
62cf990a
DW
1391 error = -libxfs_trans_alloc(mp, &tres,
1392 libxfs_alloc_min_freelist(mp, args.pag), 0, 0, &tp);
1393 if (error)
1394 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1395 agno, error);
1396 args.tp = tp;
1397
1398 /*
1399 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1400 * the fresh AGFL header from empty to full. That hasn't changed. For
1401 * rmapbt, however, things change a bit.
1402 *
1403 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1404 * expand, so we need to keep the AGFL well-stocked for the expansion.
1405 * However, this expansion can cause the bnobt/cntbt to shrink, which
1406 * can make the AGFL eligible for shrinking. Shrinking involves
1407 * freeing rmapbt entries, but since we haven't finished loading the
1408 * rmapbt with the btree rmaps it's possible for the remove operation
1409 * to fail. The AGFL block is large enough at this point to absorb any
1410 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1411 *
1412 * During the initial AGFL regeneration during AGF generation in phase5
1413 * we must also disable rmapbt modifications because the AGF that
1414 * libxfs reads does not yet point to the new rmapbt. These initial
1415 * AGFL entries are added just prior to adding the AG btree block rmaps
1416 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1417 * empty and cannot shrink.
1418 */
1419 flags = XFS_ALLOC_FLAG_NOSHRINK;
1420 if (skip_rmapbt)
1421 flags |= XFS_ALLOC_FLAG_NORMAP;
e2f60652
DW
1422 error = -libxfs_alloc_fix_freelist(&args, flags);
1423 libxfs_perag_put(args.pag);
62cf990a
DW
1424 if (error) {
1425 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1426 agno, error);
1427 }
1428 libxfs_trans_commit(tp);
1429}
1430
1431/*
1432 * Remember how many AGFL entries came from excess AG btree allocations and
1433 * therefore already have rmap entries.
1434 */
1435void
1436rmap_store_agflcount(
1437 struct xfs_mount *mp,
1438 xfs_agnumber_t agno,
1439 int count)
1440{
2d273771 1441 if (!rmap_needs_work(mp))
62cf990a
DW
1442 return;
1443
1444 ag_rmaps[agno].ar_flcount = count;
1445}