]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/rmap.c
xfs_repair: check the refcount btree against our observed reference counts when -n
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
1 /*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20 #include <libxfs.h>
21 #include "btree.h"
22 #include "err_protos.h"
23 #include "libxlog.h"
24 #include "incore.h"
25 #include "globals.h"
26 #include "dinode.h"
27 #include "slab.h"
28 #include "rmap.h"
29
30 #undef RMAP_DEBUG
31
32 #ifdef RMAP_DEBUG
33 # define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
34 #else
35 # define dbg_printf(f, a...)
36 #endif
37
38 /* per-AG rmap object anchor */
39 struct xfs_ag_rmap {
40 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
41 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
42 int ar_flcount; /* agfl entries from leftover */
43 /* agbt allocations */
44 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
45 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
46 };
47
48 static struct xfs_ag_rmap *ag_rmaps;
49 static bool rmapbt_suspect;
50 static bool refcbt_suspect;
51
52 /*
53 * Compare rmap observations for array sorting.
54 */
55 static int
56 rmap_compare(
57 const void *a,
58 const void *b)
59 {
60 const struct xfs_rmap_irec *pa;
61 const struct xfs_rmap_irec *pb;
62 __u64 oa;
63 __u64 ob;
64
65 pa = a; pb = b;
66 oa = libxfs_rmap_irec_offset_pack(pa);
67 ob = libxfs_rmap_irec_offset_pack(pb);
68
69 if (pa->rm_startblock < pb->rm_startblock)
70 return -1;
71 else if (pa->rm_startblock > pb->rm_startblock)
72 return 1;
73 else if (pa->rm_owner < pb->rm_owner)
74 return -1;
75 else if (pa->rm_owner > pb->rm_owner)
76 return 1;
77 else if (oa < ob)
78 return -1;
79 else if (oa > ob)
80 return 1;
81 else
82 return 0;
83 }
84
85 /*
86 * Returns true if we must reconstruct either the reference count or reverse
87 * mapping trees.
88 */
89 bool
90 rmap_needs_work(
91 struct xfs_mount *mp)
92 {
93 return xfs_sb_version_hasreflink(&mp->m_sb) ||
94 xfs_sb_version_hasrmapbt(&mp->m_sb);
95 }
96
97 /*
98 * Initialize per-AG reverse map data.
99 */
100 void
101 rmaps_init(
102 struct xfs_mount *mp)
103 {
104 xfs_agnumber_t i;
105 int error;
106
107 if (!rmap_needs_work(mp))
108 return;
109
110 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
111 if (!ag_rmaps)
112 do_error(_("couldn't allocate per-AG reverse map roots\n"));
113
114 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
115 error = init_slab(&ag_rmaps[i].ar_rmaps,
116 sizeof(struct xfs_rmap_irec));
117 if (error)
118 do_error(
119 _("Insufficient memory while allocating reverse mapping slabs."));
120 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
121 sizeof(struct xfs_rmap_irec));
122 if (error)
123 do_error(
124 _("Insufficient memory while allocating raw metadata reverse mapping slabs."));
125 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
126 error = init_slab(&ag_rmaps[i].ar_refcount_items,
127 sizeof(struct xfs_refcount_irec));
128 if (error)
129 do_error(
130 _("Insufficient memory while allocating refcount item slabs."));
131 }
132 }
133
134 /*
135 * Free the per-AG reverse-mapping data.
136 */
137 void
138 rmaps_free(
139 struct xfs_mount *mp)
140 {
141 xfs_agnumber_t i;
142
143 if (!rmap_needs_work(mp))
144 return;
145
146 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
147 free_slab(&ag_rmaps[i].ar_rmaps);
148 free_slab(&ag_rmaps[i].ar_raw_rmaps);
149 free_slab(&ag_rmaps[i].ar_refcount_items);
150 }
151 free(ag_rmaps);
152 ag_rmaps = NULL;
153 }
154
155 /*
156 * Decide if two reverse-mapping records can be merged.
157 */
158 bool
159 rmaps_are_mergeable(
160 struct xfs_rmap_irec *r1,
161 struct xfs_rmap_irec *r2)
162 {
163 if (r1->rm_owner != r2->rm_owner)
164 return false;
165 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
166 return false;
167 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
168 XFS_RMAP_LEN_MAX)
169 return false;
170 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
171 return true;
172 /* must be an inode owner below here */
173 if (r1->rm_flags != r2->rm_flags)
174 return false;
175 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
176 return true;
177 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
178 }
179
180 /*
181 * Add an observation about a block mapping in an inode's data or attribute
182 * fork for later btree reconstruction.
183 */
184 int
185 rmap_add_rec(
186 struct xfs_mount *mp,
187 xfs_ino_t ino,
188 int whichfork,
189 struct xfs_bmbt_irec *irec)
190 {
191 struct xfs_rmap_irec rmap;
192 xfs_agnumber_t agno;
193 xfs_agblock_t agbno;
194 struct xfs_rmap_irec *last_rmap;
195 int error = 0;
196
197 if (!rmap_needs_work(mp))
198 return 0;
199
200 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
201 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
202 ASSERT(agno != NULLAGNUMBER);
203 ASSERT(agno < mp->m_sb.sb_agcount);
204 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
205 ASSERT(ino != NULLFSINO);
206 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
207
208 rmap.rm_owner = ino;
209 rmap.rm_offset = irec->br_startoff;
210 rmap.rm_flags = 0;
211 if (whichfork == XFS_ATTR_FORK)
212 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
213 rmap.rm_startblock = agbno;
214 rmap.rm_blockcount = irec->br_blockcount;
215 if (irec->br_state == XFS_EXT_UNWRITTEN)
216 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
217 last_rmap = &ag_rmaps[agno].ar_last_rmap;
218 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
219 *last_rmap = rmap;
220 else if (rmaps_are_mergeable(last_rmap, &rmap))
221 last_rmap->rm_blockcount += rmap.rm_blockcount;
222 else {
223 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
224 if (error)
225 return error;
226 *last_rmap = rmap;
227 }
228
229 return error;
230 }
231
232 /* Finish collecting inode data/attr fork rmaps. */
233 int
234 rmap_finish_collecting_fork_recs(
235 struct xfs_mount *mp,
236 xfs_agnumber_t agno)
237 {
238 if (!rmap_needs_work(mp) ||
239 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
240 return 0;
241 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
242 }
243
244 /* add a raw rmap; these will be merged later */
245 static int
246 __rmap_add_raw_rec(
247 struct xfs_mount *mp,
248 xfs_agnumber_t agno,
249 xfs_agblock_t agbno,
250 xfs_extlen_t len,
251 uint64_t owner,
252 bool is_attr,
253 bool is_bmbt)
254 {
255 struct xfs_rmap_irec rmap;
256
257 ASSERT(len != 0);
258 rmap.rm_owner = owner;
259 rmap.rm_offset = 0;
260 rmap.rm_flags = 0;
261 if (is_attr)
262 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
263 if (is_bmbt)
264 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
265 rmap.rm_startblock = agbno;
266 rmap.rm_blockcount = len;
267 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
268 }
269
270 /*
271 * Add a reverse mapping for an inode fork's block mapping btree block.
272 */
273 int
274 rmap_add_bmbt_rec(
275 struct xfs_mount *mp,
276 xfs_ino_t ino,
277 int whichfork,
278 xfs_fsblock_t fsbno)
279 {
280 xfs_agnumber_t agno;
281 xfs_agblock_t agbno;
282
283 if (!rmap_needs_work(mp))
284 return 0;
285
286 agno = XFS_FSB_TO_AGNO(mp, fsbno);
287 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
288 ASSERT(agno != NULLAGNUMBER);
289 ASSERT(agno < mp->m_sb.sb_agcount);
290 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
291
292 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
293 whichfork == XFS_ATTR_FORK, true);
294 }
295
296 /*
297 * Add a reverse mapping for a per-AG fixed metadata extent.
298 */
299 int
300 rmap_add_ag_rec(
301 struct xfs_mount *mp,
302 xfs_agnumber_t agno,
303 xfs_agblock_t agbno,
304 xfs_extlen_t len,
305 uint64_t owner)
306 {
307 if (!rmap_needs_work(mp))
308 return 0;
309
310 ASSERT(agno != NULLAGNUMBER);
311 ASSERT(agno < mp->m_sb.sb_agcount);
312 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
313
314 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
315 }
316
317 /*
318 * Merge adjacent raw rmaps and add them to the main rmap list.
319 */
320 int
321 rmap_fold_raw_recs(
322 struct xfs_mount *mp,
323 xfs_agnumber_t agno)
324 {
325 struct xfs_slab_cursor *cur = NULL;
326 struct xfs_rmap_irec *prev, *rec;
327 size_t old_sz;
328 int error = 0;
329
330 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
331 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
332 goto no_raw;
333 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
334 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
335 &cur);
336 if (error)
337 goto err;
338
339 prev = pop_slab_cursor(cur);
340 rec = pop_slab_cursor(cur);
341 while (prev && rec) {
342 if (rmaps_are_mergeable(prev, rec)) {
343 prev->rm_blockcount += rec->rm_blockcount;
344 rec = pop_slab_cursor(cur);
345 continue;
346 }
347 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
348 if (error)
349 goto err;
350 prev = rec;
351 rec = pop_slab_cursor(cur);
352 }
353 if (prev) {
354 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
355 if (error)
356 goto err;
357 }
358 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
359 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
360 sizeof(struct xfs_rmap_irec));
361 if (error)
362 do_error(
363 _("Insufficient memory while allocating raw metadata reverse mapping slabs."));
364 no_raw:
365 if (old_sz)
366 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
367 err:
368 free_slab_cursor(&cur);
369 return error;
370 }
371
372 static int
373 find_first_zero_bit(
374 __uint64_t mask)
375 {
376 int n;
377 int b = 0;
378
379 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
380 b++;
381
382 return b;
383 }
384
385 static int
386 popcnt(
387 __uint64_t mask)
388 {
389 int n;
390 int b = 0;
391
392 if (mask == 0)
393 return 0;
394
395 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
396 if (mask & 1)
397 b++;
398
399 return b;
400 }
401
402 /*
403 * Add an allocation group's fixed metadata to the rmap list. This includes
404 * sb/agi/agf/agfl headers, inode chunks, and the log.
405 */
406 int
407 rmap_add_fixed_ag_rec(
408 struct xfs_mount *mp,
409 xfs_agnumber_t agno)
410 {
411 xfs_fsblock_t fsbno;
412 xfs_agblock_t agbno;
413 ino_tree_node_t *ino_rec;
414 xfs_agino_t agino;
415 int error;
416 int startidx;
417 int nr;
418
419 if (!rmap_needs_work(mp))
420 return 0;
421
422 /* sb/agi/agf/agfl headers */
423 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
424 XFS_RMAP_OWN_FS);
425 if (error)
426 goto out;
427
428 /* inodes */
429 ino_rec = findfirst_inode_rec(agno);
430 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
431 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
432 startidx = find_first_zero_bit(ino_rec->ir_sparse);
433 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
434 } else {
435 startidx = 0;
436 nr = XFS_INODES_PER_CHUNK;
437 }
438 nr /= mp->m_sb.sb_inopblock;
439 if (nr == 0)
440 nr = 1;
441 agino = ino_rec->ino_startnum + startidx;
442 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
443 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
444 error = rmap_add_ag_rec(mp, agno, agbno, nr,
445 XFS_RMAP_OWN_INODES);
446 if (error)
447 goto out;
448 }
449 }
450
451 /* log */
452 fsbno = mp->m_sb.sb_logstart;
453 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
454 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
455 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
456 XFS_RMAP_OWN_LOG);
457 if (error)
458 goto out;
459 }
460 out:
461 return error;
462 }
463
464 /*
465 * Copy the per-AG btree reverse-mapping data into the rmapbt.
466 *
467 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
468 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
469 * building the AG btrees we can record all the blocks allocated for each
470 * btree, but we cannot resolve the conflict between the fact that one has to
471 * finish allocating the space for the rmapbt before building the bnobt and the
472 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
473 * Therefore we record in-core the rmaps for each btree and here use the
474 * libxfs rmap functions to finish building the rmap btree.
475 *
476 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
477 * recorded in memory. The rmapbt has not been set up yet, so we need to be
478 * able to "expand" the AGFL without updating the rmapbt. After we've written
479 * out the new AGF header the new rmapbt is available, so this function reads
480 * each AGFL to generate rmap entries. These entries are merged with the AG
481 * btree rmap entries, and then we use libxfs' rmap functions to add them to
482 * the rmapbt, after which it is fully regenerated.
483 */
484 int
485 rmap_store_ag_btree_rec(
486 struct xfs_mount *mp,
487 xfs_agnumber_t agno)
488 {
489 struct xfs_slab_cursor *rm_cur;
490 struct xfs_rmap_irec *rm_rec = NULL;
491 struct xfs_buf *agbp = NULL;
492 struct xfs_buf *agflbp = NULL;
493 struct xfs_trans *tp;
494 struct xfs_trans_res tres = {0};
495 __be32 *agfl_bno, *b;
496 int error = 0;
497 struct xfs_owner_info oinfo;
498
499 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
500 return 0;
501
502 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
503 free_slab(&ag_rmaps[agno].ar_rmaps);
504 error = init_slab(&ag_rmaps[agno].ar_rmaps,
505 sizeof(struct xfs_rmap_irec));
506 if (error)
507 goto err;
508
509 /* Add the AGFL blocks to the rmap list */
510 error = -libxfs_trans_read_buf(
511 mp, NULL, mp->m_ddev_targp,
512 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
513 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
514 if (error)
515 goto err;
516
517 /*
518 * Sometimes, the blocks at the beginning of the AGFL are there
519 * because we overestimated how many blocks we needed to rebuild
520 * the freespace btrees. ar_flcount records the number of
521 * blocks in this situation. Since those blocks already have an
522 * rmap, we only need to add rmap records for AGFL blocks past
523 * that point in the AGFL because those blocks are a result of a
524 * no-rmap no-shrink freelist fixup that we did earlier.
525 */
526 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
527 b = agfl_bno + ag_rmaps[agno].ar_flcount;
528 while (*b != NULLAGBLOCK && b - agfl_bno < XFS_AGFL_SIZE(mp)) {
529 error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
530 XFS_RMAP_OWN_AG);
531 if (error)
532 goto err;
533 b++;
534 }
535 libxfs_putbuf(agflbp);
536 agflbp = NULL;
537
538 /* Merge all the raw rmaps into the main list */
539 error = rmap_fold_raw_recs(mp, agno);
540 if (error)
541 goto err;
542
543 /* Create cursors to refcount structures */
544 error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
545 &rm_cur);
546 if (error)
547 goto err;
548
549 /* Insert rmaps into the btree one at a time */
550 rm_rec = pop_slab_cursor(rm_cur);
551 while (rm_rec) {
552 error = -libxfs_trans_alloc(mp, &tres, 16, 0, 0, &tp);
553 if (error)
554 goto err_slab;
555
556 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
557 if (error)
558 goto err_trans;
559
560 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
561 libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
562 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
563 rm_rec->rm_blockcount, &oinfo);
564 if (error)
565 goto err_trans;
566
567 error = -libxfs_trans_commit(tp);
568 if (error)
569 goto err_slab;
570
571 fix_freelist(mp, agno, false);
572
573 rm_rec = pop_slab_cursor(rm_cur);
574 }
575
576 free_slab_cursor(&rm_cur);
577 return 0;
578
579 err_trans:
580 libxfs_trans_cancel(tp);
581 err_slab:
582 free_slab_cursor(&rm_cur);
583 err:
584 if (agflbp)
585 libxfs_putbuf(agflbp);
586 return error;
587 }
588
589 #ifdef RMAP_DEBUG
590 static void
591 rmap_dump(
592 const char *msg,
593 xfs_agnumber_t agno,
594 struct xfs_rmap_irec *rmap)
595 {
596 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
597 msg, rmap,
598 (unsigned int)agno,
599 (unsigned long long)rmap->rm_startblock,
600 (unsigned long long)rmap->rm_owner,
601 (unsigned long long)rmap->rm_offset,
602 (unsigned int)rmap->rm_blockcount,
603 (unsigned int)rmap->rm_flags);
604 }
605 #else
606 # define rmap_dump(m, a, r)
607 #endif
608
609 /*
610 * Rebuilding the Reference Count & Reverse Mapping Btrees
611 *
612 * The reference count (refcnt) and reverse mapping (rmap) btrees are
613 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
614 * mappings must be processed into reference counts at the end of phase
615 * 4, and the rmaps must be recorded during phase 4. There is a need to
616 * access the rmaps in physical block order, but no particular need for
617 * random access, so the slab.c code provides a big logical array
618 * (consisting of smaller slabs) and some inorder iterator functions.
619 *
620 * Once we've recorded all the reverse mappings, we're ready to
621 * translate the rmaps into refcount entries. Imagine the rmap entries
622 * as rectangles representing extents of physical blocks, and that the
623 * rectangles can be laid down to allow them to overlap each other; then
624 * we know that we must emit a refcnt btree entry wherever the amount of
625 * overlap changes, i.e. the emission stimulus is level-triggered:
626 *
627 * - ---
628 * -- ----- ---- --- ------
629 * -- ---- ----------- ---- ---------
630 * -------------------------------- -----------
631 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
632 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
633 *
634 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
635 *
636 * Note that in the actual refcnt btree we don't store the refcount < 2
637 * cases because the bnobt tells us which blocks are free; single-use
638 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
639 * supports storing multiple entries covering a given block we could
640 * theoretically dispense with the refcntbt and simply count rmaps, but
641 * that's inefficient in the (hot) write path, so we'll take the cost of
642 * the extra tree to save time. Also there's no guarantee that rmap
643 * will be enabled.
644 *
645 * Given an array of rmaps sorted by physical block number, a starting
646 * physical block (sp), a bag to hold rmaps that cover sp, and the next
647 * physical block where the level changes (np), we can reconstruct the
648 * refcount btree as follows:
649 *
650 * While there are still unprocessed rmaps in the array,
651 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
652 * - Add to the bag all rmaps in the array where startblock == sp.
653 * - Set np to the physical block where the bag size will change. This
654 * is the minimum of (the pblk of the next unprocessed rmap) and
655 * (startblock + len of each rmap in the bag).
656 * - Record the bag size as old_bag_size.
657 *
658 * - While the bag isn't empty,
659 * - Remove from the bag all rmaps where startblock + len == np.
660 * - Add to the bag all rmaps in the array where startblock == np.
661 * - If the bag size isn't old_bag_size, store the refcount entry
662 * (sp, np - sp, bag_size) in the refcnt btree.
663 * - If the bag is empty, break out of the inner loop.
664 * - Set old_bag_size to the bag size
665 * - Set sp = np.
666 * - Set np to the physical block where the bag size will change.
667 * This is the minimum of (the pblk of the next unprocessed rmap)
668 * and (startblock + len of each rmap in the bag).
669 *
670 * An implementation detail is that because this processing happens
671 * during phase 4, the refcount entries are stored in an array so that
672 * phase 5 can load them into the refcount btree. The rmaps can be
673 * loaded directly into the rmap btree during phase 5 as well.
674 */
675
676 /*
677 * Mark all inodes in the reverse-mapping observation stack as requiring the
678 * reflink inode flag, if the stack depth is greater than 1.
679 */
680 static void
681 mark_inode_rl(
682 struct xfs_mount *mp,
683 struct xfs_bag *rmaps)
684 {
685 xfs_agnumber_t iagno;
686 struct xfs_rmap_irec *rmap;
687 struct ino_tree_node *irec;
688 int off;
689 size_t idx;
690 xfs_agino_t ino;
691
692 if (bag_count(rmaps) < 2)
693 return;
694
695 /* Reflink flag accounting */
696 foreach_bag_ptr(rmaps, idx, rmap) {
697 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
698 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
699 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
700 pthread_mutex_lock(&ag_locks[iagno].lock);
701 irec = find_inode_rec(mp, iagno, ino);
702 off = get_inode_offset(mp, rmap->rm_owner, irec);
703 /* lock here because we might go outside this ag */
704 set_inode_is_rl(irec, off);
705 pthread_mutex_unlock(&ag_locks[iagno].lock);
706 }
707 }
708
709 /*
710 * Emit a refcount object for refcntbt reconstruction during phase 5.
711 */
712 #define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
713 static void
714 refcount_emit(
715 struct xfs_mount *mp,
716 xfs_agnumber_t agno,
717 xfs_agblock_t agbno,
718 xfs_extlen_t len,
719 size_t nr_rmaps)
720 {
721 struct xfs_refcount_irec rlrec;
722 int error;
723 struct xfs_slab *rlslab;
724
725 rlslab = ag_rmaps[agno].ar_refcount_items;
726 ASSERT(nr_rmaps > 0);
727
728 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
729 agno, agbno, len, nr_rmaps);
730 rlrec.rc_startblock = agbno;
731 rlrec.rc_blockcount = len;
732 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
733 error = slab_add(rlslab, &rlrec);
734 if (error)
735 do_error(
736 _("Insufficient memory while recreating refcount tree."));
737 }
738 #undef REFCOUNT_CLAMP
739
740 /*
741 * Transform a pile of physical block mapping observations into refcount data
742 * for eventual rebuilding of the btrees.
743 */
744 #define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
745 int
746 compute_refcounts(
747 struct xfs_mount *mp,
748 xfs_agnumber_t agno)
749 {
750 struct xfs_bag *stack_top = NULL;
751 struct xfs_slab *rmaps;
752 struct xfs_slab_cursor *rmaps_cur;
753 struct xfs_rmap_irec *array_cur;
754 struct xfs_rmap_irec *rmap;
755 xfs_agblock_t sbno; /* first bno of this rmap set */
756 xfs_agblock_t cbno; /* first bno of this refcount set */
757 xfs_agblock_t nbno; /* next bno where rmap set changes */
758 size_t n, idx;
759 size_t old_stack_nr;
760 int error;
761
762 if (!xfs_sb_version_hasreflink(&mp->m_sb))
763 return 0;
764
765 rmaps = ag_rmaps[agno].ar_rmaps;
766
767 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
768 if (error)
769 return error;
770
771 error = init_bag(&stack_top);
772 if (error)
773 goto err;
774
775 /* While there are rmaps to be processed... */
776 n = 0;
777 while (n < slab_count(rmaps)) {
778 array_cur = peek_slab_cursor(rmaps_cur);
779 sbno = cbno = array_cur->rm_startblock;
780 /* Push all rmaps with pblk == sbno onto the stack */
781 for (;
782 array_cur && array_cur->rm_startblock == sbno;
783 array_cur = peek_slab_cursor(rmaps_cur)) {
784 advance_slab_cursor(rmaps_cur); n++;
785 rmap_dump("push0", agno, array_cur);
786 error = bag_add(stack_top, array_cur);
787 if (error)
788 goto err;
789 }
790 mark_inode_rl(mp, stack_top);
791
792 /* Set nbno to the bno of the next refcount change */
793 if (n < slab_count(rmaps))
794 nbno = array_cur->rm_startblock;
795 else
796 nbno = NULLAGBLOCK;
797 foreach_bag_ptr(stack_top, idx, rmap) {
798 nbno = min(nbno, RMAP_END(rmap));
799 }
800
801 /* Emit reverse mappings, if needed */
802 ASSERT(nbno > sbno);
803 old_stack_nr = bag_count(stack_top);
804
805 /* While stack isn't empty... */
806 while (bag_count(stack_top)) {
807 /* Pop all rmaps that end at nbno */
808 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
809 if (RMAP_END(rmap) != nbno)
810 continue;
811 rmap_dump("pop", agno, rmap);
812 error = bag_remove(stack_top, idx);
813 if (error)
814 goto err;
815 }
816
817 /* Push array items that start at nbno */
818 for (;
819 array_cur && array_cur->rm_startblock == nbno;
820 array_cur = peek_slab_cursor(rmaps_cur)) {
821 advance_slab_cursor(rmaps_cur); n++;
822 rmap_dump("push1", agno, array_cur);
823 error = bag_add(stack_top, array_cur);
824 if (error)
825 goto err;
826 }
827 mark_inode_rl(mp, stack_top);
828
829 /* Emit refcount if necessary */
830 ASSERT(nbno > cbno);
831 if (bag_count(stack_top) != old_stack_nr) {
832 if (old_stack_nr > 1) {
833 refcount_emit(mp, agno, cbno,
834 nbno - cbno,
835 old_stack_nr);
836 }
837 cbno = nbno;
838 }
839
840 /* Stack empty, go find the next rmap */
841 if (bag_count(stack_top) == 0)
842 break;
843 old_stack_nr = bag_count(stack_top);
844 sbno = nbno;
845
846 /* Set nbno to the bno of the next refcount change */
847 if (n < slab_count(rmaps))
848 nbno = array_cur->rm_startblock;
849 else
850 nbno = NULLAGBLOCK;
851 foreach_bag_ptr(stack_top, idx, rmap) {
852 nbno = min(nbno, RMAP_END(rmap));
853 }
854
855 /* Emit reverse mappings, if needed */
856 ASSERT(nbno > sbno);
857 }
858 }
859 err:
860 free_bag(&stack_top);
861 free_slab_cursor(&rmaps_cur);
862
863 return error;
864 }
865 #undef RMAP_END
866
867 /*
868 * Return the number of rmap objects for an AG.
869 */
870 size_t
871 rmap_record_count(
872 struct xfs_mount *mp,
873 xfs_agnumber_t agno)
874 {
875 return slab_count(ag_rmaps[agno].ar_rmaps);
876 }
877
878 /*
879 * Return a slab cursor that will return rmap objects in order.
880 */
881 int
882 rmap_init_cursor(
883 xfs_agnumber_t agno,
884 struct xfs_slab_cursor **cur)
885 {
886 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
887 }
888
889 /*
890 * Disable the refcount btree check.
891 */
892 void
893 rmap_avoid_check(void)
894 {
895 rmapbt_suspect = true;
896 }
897
898 /* Look for an rmap in the rmapbt that matches a given rmap. */
899 static int
900 rmap_lookup(
901 struct xfs_btree_cur *bt_cur,
902 struct xfs_rmap_irec *rm_rec,
903 struct xfs_rmap_irec *tmp,
904 int *have)
905 {
906 int error;
907
908 /* Use the regular btree retrieval routine. */
909 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
910 rm_rec->rm_blockcount,
911 rm_rec->rm_owner, rm_rec->rm_offset,
912 rm_rec->rm_flags, have);
913 if (error)
914 return error;
915 if (*have == 0)
916 return error;
917 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
918 }
919
920 /* Does the btree rmap cover the observed rmap? */
921 #define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
922 #define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
923 static bool
924 rmap_is_good(
925 struct xfs_rmap_irec *observed,
926 struct xfs_rmap_irec *btree)
927 {
928 /* Can't have mismatches in the flags or the owner. */
929 if (btree->rm_flags != observed->rm_flags ||
930 btree->rm_owner != observed->rm_owner)
931 return false;
932
933 /*
934 * Btree record can't physically start after the observed
935 * record, nor can it end before the observed record.
936 */
937 if (btree->rm_startblock > observed->rm_startblock ||
938 NEXTP(btree) < NEXTP(observed))
939 return false;
940
941 /* If this is metadata or bmbt, we're done. */
942 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
943 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
944 return true;
945 /*
946 * Btree record can't logically start after the observed
947 * record, nor can it end before the observed record.
948 */
949 if (btree->rm_offset > observed->rm_offset ||
950 NEXTL(btree) < NEXTL(observed))
951 return false;
952
953 return true;
954 }
955 #undef NEXTP
956 #undef NEXTL
957
958 /*
959 * Compare the observed reverse mappings against what's in the ag btree.
960 */
961 int
962 rmaps_verify_btree(
963 struct xfs_mount *mp,
964 xfs_agnumber_t agno)
965 {
966 struct xfs_slab_cursor *rm_cur;
967 struct xfs_btree_cur *bt_cur = NULL;
968 int error;
969 int have;
970 struct xfs_buf *agbp = NULL;
971 struct xfs_rmap_irec *rm_rec;
972 struct xfs_rmap_irec tmp;
973 struct xfs_perag *pag; /* per allocation group data */
974
975 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
976 return 0;
977 if (rmapbt_suspect) {
978 if (no_modify && agno == 0)
979 do_warn(_("would rebuild corrupt rmap btrees.\n"));
980 return 0;
981 }
982
983 /* Create cursors to refcount structures */
984 error = rmap_init_cursor(agno, &rm_cur);
985 if (error)
986 return error;
987
988 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
989 if (error)
990 goto err;
991
992 /* Leave the per-ag data "uninitialized" since we rewrite it later */
993 pag = libxfs_perag_get(mp, agno);
994 pag->pagf_init = 0;
995 libxfs_perag_put(pag);
996
997 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
998 if (!bt_cur) {
999 error = -ENOMEM;
1000 goto err;
1001 }
1002
1003 rm_rec = pop_slab_cursor(rm_cur);
1004 while (rm_rec) {
1005 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
1006 if (error)
1007 goto err;
1008 if (!have) {
1009 do_warn(
1010 _("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
1011 %s%soff %"PRIu64"\n"),
1012 agno, rm_rec->rm_startblock,
1013 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1014 _("unwritten ") : "",
1015 rm_rec->rm_blockcount,
1016 rm_rec->rm_owner,
1017 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1018 _("attr ") : "",
1019 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1020 _("bmbt ") : "",
1021 rm_rec->rm_offset);
1022 goto next_loop;
1023 }
1024
1025 /* Compare each refcount observation against the btree's */
1026 if (!rmap_is_good(rm_rec, &tmp)) {
1027 do_warn(
1028 _("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1029 %"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1030 agno, tmp.rm_startblock,
1031 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1032 _("unwritten ") : "",
1033 tmp.rm_blockcount,
1034 tmp.rm_owner,
1035 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1036 _("attr ") : "",
1037 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1038 _("bmbt ") : "",
1039 tmp.rm_offset,
1040 agno, rm_rec->rm_startblock,
1041 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1042 _("unwritten ") : "",
1043 rm_rec->rm_blockcount,
1044 rm_rec->rm_owner,
1045 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1046 _("attr ") : "",
1047 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1048 _("bmbt ") : "",
1049 rm_rec->rm_offset);
1050 goto next_loop;
1051 }
1052 next_loop:
1053 rm_rec = pop_slab_cursor(rm_cur);
1054 }
1055
1056 err:
1057 if (bt_cur)
1058 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1059 if (agbp)
1060 libxfs_putbuf(agbp);
1061 free_slab_cursor(&rm_cur);
1062 return 0;
1063 }
1064
1065 /*
1066 * Compare the key fields of two rmap records -- positive if key1 > key2,
1067 * negative if key1 < key2, and zero if equal.
1068 */
1069 __int64_t
1070 rmap_diffkeys(
1071 struct xfs_rmap_irec *kp1,
1072 struct xfs_rmap_irec *kp2)
1073 {
1074 __u64 oa;
1075 __u64 ob;
1076 __int64_t d;
1077 struct xfs_rmap_irec tmp;
1078
1079 tmp = *kp1;
1080 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
1081 oa = libxfs_rmap_irec_offset_pack(&tmp);
1082 tmp = *kp2;
1083 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
1084 ob = libxfs_rmap_irec_offset_pack(&tmp);
1085
1086 d = (__int64_t)kp1->rm_startblock - kp2->rm_startblock;
1087 if (d)
1088 return d;
1089
1090 if (kp1->rm_owner > kp2->rm_owner)
1091 return 1;
1092 else if (kp2->rm_owner > kp1->rm_owner)
1093 return -1;
1094
1095 if (oa > ob)
1096 return 1;
1097 else if (ob > oa)
1098 return -1;
1099 return 0;
1100 }
1101
1102 /* Compute the high key of an rmap record. */
1103 void
1104 rmap_high_key_from_rec(
1105 struct xfs_rmap_irec *rec,
1106 struct xfs_rmap_irec *key)
1107 {
1108 int adj;
1109
1110 adj = rec->rm_blockcount - 1;
1111
1112 key->rm_startblock = rec->rm_startblock + adj;
1113 key->rm_owner = rec->rm_owner;
1114 key->rm_offset = rec->rm_offset;
1115 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1116 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1117 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1118 return;
1119 key->rm_offset += adj;
1120 }
1121
1122 /*
1123 * Record that an inode had the reflink flag set when repair started. The
1124 * inode reflink flag will be adjusted as necessary.
1125 */
1126 void
1127 record_inode_reflink_flag(
1128 struct xfs_mount *mp,
1129 struct xfs_dinode *dino,
1130 xfs_agnumber_t agno,
1131 xfs_agino_t ino,
1132 xfs_ino_t lino)
1133 {
1134 struct ino_tree_node *irec;
1135 int off;
1136
1137 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1138 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1139 return;
1140 irec = find_inode_rec(mp, agno, ino);
1141 off = get_inode_offset(mp, lino, irec);
1142 ASSERT(!inode_was_rl(irec, off));
1143 set_inode_was_rl(irec, off);
1144 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1145 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1146 }
1147
1148 /*
1149 * Fix an inode's reflink flag.
1150 */
1151 static int
1152 fix_inode_reflink_flag(
1153 struct xfs_mount *mp,
1154 xfs_agnumber_t agno,
1155 xfs_agino_t agino,
1156 bool set)
1157 {
1158 struct xfs_dinode *dino;
1159 struct xfs_buf *buf;
1160
1161 if (set)
1162 do_warn(
1163 _("setting reflink flag on inode %"PRIu64"\n"),
1164 XFS_AGINO_TO_INO(mp, agno, agino));
1165 else if (!no_modify) /* && !set */
1166 do_warn(
1167 _("clearing reflink flag on inode %"PRIu64"\n"),
1168 XFS_AGINO_TO_INO(mp, agno, agino));
1169 if (no_modify)
1170 return 0;
1171
1172 buf = get_agino_buf(mp, agno, agino, &dino);
1173 if (!buf)
1174 return 1;
1175 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1176 if (set)
1177 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1178 else
1179 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1180 libxfs_dinode_calc_crc(mp, dino);
1181 libxfs_writebuf(buf, 0);
1182
1183 return 0;
1184 }
1185
1186 /*
1187 * Fix discrepancies between the state of the inode reflink flag and our
1188 * observations as to whether or not the inode really needs it.
1189 */
1190 int
1191 fix_inode_reflink_flags(
1192 struct xfs_mount *mp,
1193 xfs_agnumber_t agno)
1194 {
1195 struct ino_tree_node *irec;
1196 int bit;
1197 __uint64_t was;
1198 __uint64_t is;
1199 __uint64_t diff;
1200 __uint64_t mask;
1201 int error = 0;
1202 xfs_agino_t agino;
1203
1204 /*
1205 * Update the reflink flag for any inode where there's a discrepancy
1206 * between the inode flag and whether or not we found any reflinked
1207 * extents.
1208 */
1209 for (irec = findfirst_inode_rec(agno);
1210 irec != NULL;
1211 irec = next_ino_rec(irec)) {
1212 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1213 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1214 was = irec->ino_was_rl;
1215 is = irec->ino_is_rl;
1216 if (was == is)
1217 continue;
1218 diff = was ^ is;
1219 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1220 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1221 irec->ino_startnum),
1222 was, is, diff);
1223
1224 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1225 agino = bit + irec->ino_startnum;
1226 if (!(diff & mask))
1227 continue;
1228 else if (was & mask)
1229 error = fix_inode_reflink_flag(mp, agno, agino,
1230 false);
1231 else if (is & mask)
1232 error = fix_inode_reflink_flag(mp, agno, agino,
1233 true);
1234 else
1235 ASSERT(0);
1236 if (error)
1237 do_error(
1238 _("Unable to fix reflink flag on inode %"PRIu64".\n"),
1239 XFS_AGINO_TO_INO(mp, agno, agino));
1240 }
1241 }
1242
1243 return error;
1244 }
1245
1246 /*
1247 * Return the number of refcount objects for an AG.
1248 */
1249 size_t
1250 refcount_record_count(
1251 struct xfs_mount *mp,
1252 xfs_agnumber_t agno)
1253 {
1254 return slab_count(ag_rmaps[agno].ar_refcount_items);
1255 }
1256
1257 /*
1258 * Return a slab cursor that will return refcount objects in order.
1259 */
1260 int
1261 init_refcount_cursor(
1262 xfs_agnumber_t agno,
1263 struct xfs_slab_cursor **cur)
1264 {
1265 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1266 }
1267
1268 /*
1269 * Disable the refcount btree check.
1270 */
1271 void
1272 refcount_avoid_check(void)
1273 {
1274 refcbt_suspect = true;
1275 }
1276
1277 /*
1278 * Compare the observed reference counts against what's in the ag btree.
1279 */
1280 int
1281 check_refcounts(
1282 struct xfs_mount *mp,
1283 xfs_agnumber_t agno)
1284 {
1285 struct xfs_slab_cursor *rl_cur;
1286 struct xfs_btree_cur *bt_cur = NULL;
1287 int error;
1288 int have;
1289 int i;
1290 struct xfs_buf *agbp = NULL;
1291 struct xfs_refcount_irec *rl_rec;
1292 struct xfs_refcount_irec tmp;
1293 struct xfs_perag *pag; /* per allocation group data */
1294
1295 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1296 return 0;
1297 if (refcbt_suspect) {
1298 if (no_modify && agno == 0)
1299 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1300 return 0;
1301 }
1302
1303 /* Create cursors to refcount structures */
1304 error = init_refcount_cursor(agno, &rl_cur);
1305 if (error)
1306 return error;
1307
1308 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1309 if (error)
1310 goto err;
1311
1312 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1313 pag = libxfs_perag_get(mp, agno);
1314 pag->pagf_init = 0;
1315 libxfs_perag_put(pag);
1316
1317 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
1318 if (!bt_cur) {
1319 error = -ENOMEM;
1320 goto err;
1321 }
1322
1323 rl_rec = pop_slab_cursor(rl_cur);
1324 while (rl_rec) {
1325 /* Look for a refcount record in the btree */
1326 error = -libxfs_refcount_lookup_le(bt_cur,
1327 rl_rec->rc_startblock, &have);
1328 if (error)
1329 goto err;
1330 if (!have) {
1331 do_warn(
1332 _("Missing reference count record for (%u/%u) len %u count %u\n"),
1333 agno, rl_rec->rc_startblock,
1334 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1335 goto next_loop;
1336 }
1337
1338 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1339 if (error)
1340 goto err;
1341 if (!i) {
1342 do_warn(
1343 _("Missing reference count record for (%u/%u) len %u count %u\n"),
1344 agno, rl_rec->rc_startblock,
1345 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1346 goto next_loop;
1347 }
1348
1349 /* Compare each refcount observation against the btree's */
1350 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1351 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1352 tmp.rc_refcount < rl_rec->rc_refcount)
1353 do_warn(
1354 _("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1355 agno, tmp.rc_startblock, tmp.rc_blockcount,
1356 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1357 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1358 next_loop:
1359 rl_rec = pop_slab_cursor(rl_cur);
1360 }
1361
1362 err:
1363 if (bt_cur)
1364 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1365 if (agbp)
1366 libxfs_putbuf(agbp);
1367 free_slab_cursor(&rl_cur);
1368 return 0;
1369 }
1370
1371 /*
1372 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1373 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1374 * because we're updating the rmapbt).
1375 */
1376 void
1377 fix_freelist(
1378 struct xfs_mount *mp,
1379 xfs_agnumber_t agno,
1380 bool skip_rmapbt)
1381 {
1382 xfs_alloc_arg_t args;
1383 xfs_trans_t *tp;
1384 struct xfs_trans_res tres = {0};
1385 int flags;
1386 int error;
1387
1388 memset(&args, 0, sizeof(args));
1389 args.mp = mp;
1390 args.agno = agno;
1391 args.alignment = 1;
1392 args.pag = libxfs_perag_get(mp, agno);
1393 error = -libxfs_trans_alloc(mp, &tres,
1394 libxfs_alloc_min_freelist(mp, args.pag), 0, 0, &tp);
1395 if (error)
1396 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1397 agno, error);
1398 args.tp = tp;
1399
1400 /*
1401 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1402 * the fresh AGFL header from empty to full. That hasn't changed. For
1403 * rmapbt, however, things change a bit.
1404 *
1405 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1406 * expand, so we need to keep the AGFL well-stocked for the expansion.
1407 * However, this expansion can cause the bnobt/cntbt to shrink, which
1408 * can make the AGFL eligible for shrinking. Shrinking involves
1409 * freeing rmapbt entries, but since we haven't finished loading the
1410 * rmapbt with the btree rmaps it's possible for the remove operation
1411 * to fail. The AGFL block is large enough at this point to absorb any
1412 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1413 *
1414 * During the initial AGFL regeneration during AGF generation in phase5
1415 * we must also disable rmapbt modifications because the AGF that
1416 * libxfs reads does not yet point to the new rmapbt. These initial
1417 * AGFL entries are added just prior to adding the AG btree block rmaps
1418 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1419 * empty and cannot shrink.
1420 */
1421 flags = XFS_ALLOC_FLAG_NOSHRINK;
1422 if (skip_rmapbt)
1423 flags |= XFS_ALLOC_FLAG_NORMAP;
1424 error = -libxfs_alloc_fix_freelist(&args, flags);
1425 libxfs_perag_put(args.pag);
1426 if (error) {
1427 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1428 agno, error);
1429 }
1430 libxfs_trans_commit(tp);
1431 }
1432
1433 /*
1434 * Remember how many AGFL entries came from excess AG btree allocations and
1435 * therefore already have rmap entries.
1436 */
1437 void
1438 rmap_store_agflcount(
1439 struct xfs_mount *mp,
1440 xfs_agnumber_t agno,
1441 int count)
1442 {
1443 if (!rmap_needs_work(mp))
1444 return;
1445
1446 ag_rmaps[agno].ar_flcount = count;
1447 }