]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/rmap.c
xfs_scrub: initialize movon in xfs_scrub_connections
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
1 /*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20 #include <libxfs.h>
21 #include "btree.h"
22 #include "err_protos.h"
23 #include "libxlog.h"
24 #include "incore.h"
25 #include "globals.h"
26 #include "dinode.h"
27 #include "slab.h"
28 #include "rmap.h"
29
30 #undef RMAP_DEBUG
31
32 #ifdef RMAP_DEBUG
33 # define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
34 #else
35 # define dbg_printf(f, a...)
36 #endif
37
38 /* per-AG rmap object anchor */
39 struct xfs_ag_rmap {
40 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
41 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
42 int ar_flcount; /* agfl entries from leftover */
43 /* agbt allocations */
44 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
45 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
46 };
47
48 static struct xfs_ag_rmap *ag_rmaps;
49 static bool rmapbt_suspect;
50 static bool refcbt_suspect;
51
52 static inline int rmap_compare(const void *a, const void *b)
53 {
54 return libxfs_rmap_compare(a, b);
55 }
56
57 /*
58 * Returns true if we must reconstruct either the reference count or reverse
59 * mapping trees.
60 */
61 bool
62 rmap_needs_work(
63 struct xfs_mount *mp)
64 {
65 return xfs_sb_version_hasreflink(&mp->m_sb) ||
66 xfs_sb_version_hasrmapbt(&mp->m_sb);
67 }
68
69 /*
70 * Initialize per-AG reverse map data.
71 */
72 void
73 rmaps_init(
74 struct xfs_mount *mp)
75 {
76 xfs_agnumber_t i;
77 int error;
78
79 if (!rmap_needs_work(mp))
80 return;
81
82 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
83 if (!ag_rmaps)
84 do_error(_("couldn't allocate per-AG reverse map roots\n"));
85
86 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
87 error = init_slab(&ag_rmaps[i].ar_rmaps,
88 sizeof(struct xfs_rmap_irec));
89 if (error)
90 do_error(
91 _("Insufficient memory while allocating reverse mapping slabs."));
92 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
93 sizeof(struct xfs_rmap_irec));
94 if (error)
95 do_error(
96 _("Insufficient memory while allocating raw metadata reverse mapping slabs."));
97 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
98 error = init_slab(&ag_rmaps[i].ar_refcount_items,
99 sizeof(struct xfs_refcount_irec));
100 if (error)
101 do_error(
102 _("Insufficient memory while allocating refcount item slabs."));
103 }
104 }
105
106 /*
107 * Free the per-AG reverse-mapping data.
108 */
109 void
110 rmaps_free(
111 struct xfs_mount *mp)
112 {
113 xfs_agnumber_t i;
114
115 if (!rmap_needs_work(mp))
116 return;
117
118 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
119 free_slab(&ag_rmaps[i].ar_rmaps);
120 free_slab(&ag_rmaps[i].ar_raw_rmaps);
121 free_slab(&ag_rmaps[i].ar_refcount_items);
122 }
123 free(ag_rmaps);
124 ag_rmaps = NULL;
125 }
126
127 /*
128 * Decide if two reverse-mapping records can be merged.
129 */
130 bool
131 rmaps_are_mergeable(
132 struct xfs_rmap_irec *r1,
133 struct xfs_rmap_irec *r2)
134 {
135 if (r1->rm_owner != r2->rm_owner)
136 return false;
137 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
138 return false;
139 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
140 XFS_RMAP_LEN_MAX)
141 return false;
142 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
143 return true;
144 /* must be an inode owner below here */
145 if (r1->rm_flags != r2->rm_flags)
146 return false;
147 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
148 return true;
149 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
150 }
151
152 /*
153 * Add an observation about a block mapping in an inode's data or attribute
154 * fork for later btree reconstruction.
155 */
156 int
157 rmap_add_rec(
158 struct xfs_mount *mp,
159 xfs_ino_t ino,
160 int whichfork,
161 struct xfs_bmbt_irec *irec)
162 {
163 struct xfs_rmap_irec rmap;
164 xfs_agnumber_t agno;
165 xfs_agblock_t agbno;
166 struct xfs_rmap_irec *last_rmap;
167 int error = 0;
168
169 if (!rmap_needs_work(mp))
170 return 0;
171
172 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
173 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
174 ASSERT(agno != NULLAGNUMBER);
175 ASSERT(agno < mp->m_sb.sb_agcount);
176 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
177 ASSERT(ino != NULLFSINO);
178 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
179
180 rmap.rm_owner = ino;
181 rmap.rm_offset = irec->br_startoff;
182 rmap.rm_flags = 0;
183 if (whichfork == XFS_ATTR_FORK)
184 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
185 rmap.rm_startblock = agbno;
186 rmap.rm_blockcount = irec->br_blockcount;
187 if (irec->br_state == XFS_EXT_UNWRITTEN)
188 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
189 last_rmap = &ag_rmaps[agno].ar_last_rmap;
190 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
191 *last_rmap = rmap;
192 else if (rmaps_are_mergeable(last_rmap, &rmap))
193 last_rmap->rm_blockcount += rmap.rm_blockcount;
194 else {
195 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
196 if (error)
197 return error;
198 *last_rmap = rmap;
199 }
200
201 return error;
202 }
203
204 /* Finish collecting inode data/attr fork rmaps. */
205 int
206 rmap_finish_collecting_fork_recs(
207 struct xfs_mount *mp,
208 xfs_agnumber_t agno)
209 {
210 if (!rmap_needs_work(mp) ||
211 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
212 return 0;
213 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
214 }
215
216 /* add a raw rmap; these will be merged later */
217 static int
218 __rmap_add_raw_rec(
219 struct xfs_mount *mp,
220 xfs_agnumber_t agno,
221 xfs_agblock_t agbno,
222 xfs_extlen_t len,
223 uint64_t owner,
224 bool is_attr,
225 bool is_bmbt)
226 {
227 struct xfs_rmap_irec rmap;
228
229 ASSERT(len != 0);
230 rmap.rm_owner = owner;
231 rmap.rm_offset = 0;
232 rmap.rm_flags = 0;
233 if (is_attr)
234 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
235 if (is_bmbt)
236 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
237 rmap.rm_startblock = agbno;
238 rmap.rm_blockcount = len;
239 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
240 }
241
242 /*
243 * Add a reverse mapping for an inode fork's block mapping btree block.
244 */
245 int
246 rmap_add_bmbt_rec(
247 struct xfs_mount *mp,
248 xfs_ino_t ino,
249 int whichfork,
250 xfs_fsblock_t fsbno)
251 {
252 xfs_agnumber_t agno;
253 xfs_agblock_t agbno;
254
255 if (!rmap_needs_work(mp))
256 return 0;
257
258 agno = XFS_FSB_TO_AGNO(mp, fsbno);
259 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
260 ASSERT(agno != NULLAGNUMBER);
261 ASSERT(agno < mp->m_sb.sb_agcount);
262 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
263
264 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
265 whichfork == XFS_ATTR_FORK, true);
266 }
267
268 /*
269 * Add a reverse mapping for a per-AG fixed metadata extent.
270 */
271 int
272 rmap_add_ag_rec(
273 struct xfs_mount *mp,
274 xfs_agnumber_t agno,
275 xfs_agblock_t agbno,
276 xfs_extlen_t len,
277 uint64_t owner)
278 {
279 if (!rmap_needs_work(mp))
280 return 0;
281
282 ASSERT(agno != NULLAGNUMBER);
283 ASSERT(agno < mp->m_sb.sb_agcount);
284 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
285
286 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
287 }
288
289 /*
290 * Merge adjacent raw rmaps and add them to the main rmap list.
291 */
292 int
293 rmap_fold_raw_recs(
294 struct xfs_mount *mp,
295 xfs_agnumber_t agno)
296 {
297 struct xfs_slab_cursor *cur = NULL;
298 struct xfs_rmap_irec *prev, *rec;
299 size_t old_sz;
300 int error = 0;
301
302 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
303 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
304 goto no_raw;
305 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
306 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
307 &cur);
308 if (error)
309 goto err;
310
311 prev = pop_slab_cursor(cur);
312 rec = pop_slab_cursor(cur);
313 while (prev && rec) {
314 if (rmaps_are_mergeable(prev, rec)) {
315 prev->rm_blockcount += rec->rm_blockcount;
316 rec = pop_slab_cursor(cur);
317 continue;
318 }
319 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
320 if (error)
321 goto err;
322 prev = rec;
323 rec = pop_slab_cursor(cur);
324 }
325 if (prev) {
326 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
327 if (error)
328 goto err;
329 }
330 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
331 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
332 sizeof(struct xfs_rmap_irec));
333 if (error)
334 do_error(
335 _("Insufficient memory while allocating raw metadata reverse mapping slabs."));
336 no_raw:
337 if (old_sz)
338 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
339 err:
340 free_slab_cursor(&cur);
341 return error;
342 }
343
344 static int
345 find_first_zero_bit(
346 uint64_t mask)
347 {
348 int n;
349 int b = 0;
350
351 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
352 b++;
353
354 return b;
355 }
356
357 static int
358 popcnt(
359 uint64_t mask)
360 {
361 int n;
362 int b = 0;
363
364 if (mask == 0)
365 return 0;
366
367 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
368 if (mask & 1)
369 b++;
370
371 return b;
372 }
373
374 /*
375 * Add an allocation group's fixed metadata to the rmap list. This includes
376 * sb/agi/agf/agfl headers, inode chunks, and the log.
377 */
378 int
379 rmap_add_fixed_ag_rec(
380 struct xfs_mount *mp,
381 xfs_agnumber_t agno)
382 {
383 xfs_fsblock_t fsbno;
384 xfs_agblock_t agbno;
385 ino_tree_node_t *ino_rec;
386 xfs_agino_t agino;
387 int error;
388 int startidx;
389 int nr;
390
391 if (!rmap_needs_work(mp))
392 return 0;
393
394 /* sb/agi/agf/agfl headers */
395 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
396 XFS_RMAP_OWN_FS);
397 if (error)
398 goto out;
399
400 /* inodes */
401 ino_rec = findfirst_inode_rec(agno);
402 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
403 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
404 startidx = find_first_zero_bit(ino_rec->ir_sparse);
405 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
406 } else {
407 startidx = 0;
408 nr = XFS_INODES_PER_CHUNK;
409 }
410 nr /= mp->m_sb.sb_inopblock;
411 if (nr == 0)
412 nr = 1;
413 agino = ino_rec->ino_startnum + startidx;
414 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
415 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
416 error = rmap_add_ag_rec(mp, agno, agbno, nr,
417 XFS_RMAP_OWN_INODES);
418 if (error)
419 goto out;
420 }
421 }
422
423 /* log */
424 fsbno = mp->m_sb.sb_logstart;
425 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
426 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
427 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
428 XFS_RMAP_OWN_LOG);
429 if (error)
430 goto out;
431 }
432 out:
433 return error;
434 }
435
436 /*
437 * Copy the per-AG btree reverse-mapping data into the rmapbt.
438 *
439 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
440 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
441 * building the AG btrees we can record all the blocks allocated for each
442 * btree, but we cannot resolve the conflict between the fact that one has to
443 * finish allocating the space for the rmapbt before building the bnobt and the
444 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
445 * Therefore we record in-core the rmaps for each btree and here use the
446 * libxfs rmap functions to finish building the rmap btree.
447 *
448 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
449 * recorded in memory. The rmapbt has not been set up yet, so we need to be
450 * able to "expand" the AGFL without updating the rmapbt. After we've written
451 * out the new AGF header the new rmapbt is available, so this function reads
452 * each AGFL to generate rmap entries. These entries are merged with the AG
453 * btree rmap entries, and then we use libxfs' rmap functions to add them to
454 * the rmapbt, after which it is fully regenerated.
455 */
456 int
457 rmap_store_ag_btree_rec(
458 struct xfs_mount *mp,
459 xfs_agnumber_t agno)
460 {
461 struct xfs_slab_cursor *rm_cur;
462 struct xfs_rmap_irec *rm_rec = NULL;
463 struct xfs_buf *agbp = NULL;
464 struct xfs_buf *agflbp = NULL;
465 struct xfs_trans *tp;
466 struct xfs_trans_res tres = {0};
467 __be32 *agfl_bno, *b;
468 int error = 0;
469 struct xfs_owner_info oinfo;
470
471 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
472 return 0;
473
474 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
475 free_slab(&ag_rmaps[agno].ar_rmaps);
476 error = init_slab(&ag_rmaps[agno].ar_rmaps,
477 sizeof(struct xfs_rmap_irec));
478 if (error)
479 goto err;
480
481 /* Add the AGFL blocks to the rmap list */
482 error = -libxfs_trans_read_buf(
483 mp, NULL, mp->m_ddev_targp,
484 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
485 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
486 if (error)
487 goto err;
488
489 /*
490 * Sometimes, the blocks at the beginning of the AGFL are there
491 * because we overestimated how many blocks we needed to rebuild
492 * the freespace btrees. ar_flcount records the number of
493 * blocks in this situation. Since those blocks already have an
494 * rmap, we only need to add rmap records for AGFL blocks past
495 * that point in the AGFL because those blocks are a result of a
496 * no-rmap no-shrink freelist fixup that we did earlier.
497 */
498 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
499 b = agfl_bno + ag_rmaps[agno].ar_flcount;
500 while (*b != NULLAGBLOCK && b - agfl_bno < XFS_AGFL_SIZE(mp)) {
501 error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
502 XFS_RMAP_OWN_AG);
503 if (error)
504 goto err;
505 b++;
506 }
507 libxfs_putbuf(agflbp);
508 agflbp = NULL;
509
510 /* Merge all the raw rmaps into the main list */
511 error = rmap_fold_raw_recs(mp, agno);
512 if (error)
513 goto err;
514
515 /* Create cursors to refcount structures */
516 error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
517 &rm_cur);
518 if (error)
519 goto err;
520
521 /* Insert rmaps into the btree one at a time */
522 rm_rec = pop_slab_cursor(rm_cur);
523 while (rm_rec) {
524 error = -libxfs_trans_alloc(mp, &tres, 16, 0, 0, &tp);
525 if (error)
526 goto err_slab;
527
528 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
529 if (error)
530 goto err_trans;
531
532 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
533 libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
534 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
535 rm_rec->rm_blockcount, &oinfo);
536 if (error)
537 goto err_trans;
538
539 error = -libxfs_trans_commit(tp);
540 if (error)
541 goto err_slab;
542
543 fix_freelist(mp, agno, false);
544
545 rm_rec = pop_slab_cursor(rm_cur);
546 }
547
548 free_slab_cursor(&rm_cur);
549 return 0;
550
551 err_trans:
552 libxfs_trans_cancel(tp);
553 err_slab:
554 free_slab_cursor(&rm_cur);
555 err:
556 if (agflbp)
557 libxfs_putbuf(agflbp);
558 return error;
559 }
560
561 #ifdef RMAP_DEBUG
562 static void
563 rmap_dump(
564 const char *msg,
565 xfs_agnumber_t agno,
566 struct xfs_rmap_irec *rmap)
567 {
568 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
569 msg, rmap,
570 (unsigned int)agno,
571 (unsigned long long)rmap->rm_startblock,
572 (unsigned long long)rmap->rm_owner,
573 (unsigned long long)rmap->rm_offset,
574 (unsigned int)rmap->rm_blockcount,
575 (unsigned int)rmap->rm_flags);
576 }
577 #else
578 # define rmap_dump(m, a, r)
579 #endif
580
581 /*
582 * Rebuilding the Reference Count & Reverse Mapping Btrees
583 *
584 * The reference count (refcnt) and reverse mapping (rmap) btrees are
585 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
586 * mappings must be processed into reference counts at the end of phase
587 * 4, and the rmaps must be recorded during phase 4. There is a need to
588 * access the rmaps in physical block order, but no particular need for
589 * random access, so the slab.c code provides a big logical array
590 * (consisting of smaller slabs) and some inorder iterator functions.
591 *
592 * Once we've recorded all the reverse mappings, we're ready to
593 * translate the rmaps into refcount entries. Imagine the rmap entries
594 * as rectangles representing extents of physical blocks, and that the
595 * rectangles can be laid down to allow them to overlap each other; then
596 * we know that we must emit a refcnt btree entry wherever the amount of
597 * overlap changes, i.e. the emission stimulus is level-triggered:
598 *
599 * - ---
600 * -- ----- ---- --- ------
601 * -- ---- ----------- ---- ---------
602 * -------------------------------- -----------
603 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
604 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
605 *
606 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
607 *
608 * Note that in the actual refcnt btree we don't store the refcount < 2
609 * cases because the bnobt tells us which blocks are free; single-use
610 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
611 * supports storing multiple entries covering a given block we could
612 * theoretically dispense with the refcntbt and simply count rmaps, but
613 * that's inefficient in the (hot) write path, so we'll take the cost of
614 * the extra tree to save time. Also there's no guarantee that rmap
615 * will be enabled.
616 *
617 * Given an array of rmaps sorted by physical block number, a starting
618 * physical block (sp), a bag to hold rmaps that cover sp, and the next
619 * physical block where the level changes (np), we can reconstruct the
620 * refcount btree as follows:
621 *
622 * While there are still unprocessed rmaps in the array,
623 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
624 * - Add to the bag all rmaps in the array where startblock == sp.
625 * - Set np to the physical block where the bag size will change. This
626 * is the minimum of (the pblk of the next unprocessed rmap) and
627 * (startblock + len of each rmap in the bag).
628 * - Record the bag size as old_bag_size.
629 *
630 * - While the bag isn't empty,
631 * - Remove from the bag all rmaps where startblock + len == np.
632 * - Add to the bag all rmaps in the array where startblock == np.
633 * - If the bag size isn't old_bag_size, store the refcount entry
634 * (sp, np - sp, bag_size) in the refcnt btree.
635 * - If the bag is empty, break out of the inner loop.
636 * - Set old_bag_size to the bag size
637 * - Set sp = np.
638 * - Set np to the physical block where the bag size will change.
639 * This is the minimum of (the pblk of the next unprocessed rmap)
640 * and (startblock + len of each rmap in the bag).
641 *
642 * An implementation detail is that because this processing happens
643 * during phase 4, the refcount entries are stored in an array so that
644 * phase 5 can load them into the refcount btree. The rmaps can be
645 * loaded directly into the rmap btree during phase 5 as well.
646 */
647
648 /*
649 * Mark all inodes in the reverse-mapping observation stack as requiring the
650 * reflink inode flag, if the stack depth is greater than 1.
651 */
652 static void
653 mark_inode_rl(
654 struct xfs_mount *mp,
655 struct xfs_bag *rmaps)
656 {
657 xfs_agnumber_t iagno;
658 struct xfs_rmap_irec *rmap;
659 struct ino_tree_node *irec;
660 int off;
661 size_t idx;
662 xfs_agino_t ino;
663
664 if (bag_count(rmaps) < 2)
665 return;
666
667 /* Reflink flag accounting */
668 foreach_bag_ptr(rmaps, idx, rmap) {
669 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
670 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
671 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
672 pthread_mutex_lock(&ag_locks[iagno].lock);
673 irec = find_inode_rec(mp, iagno, ino);
674 off = get_inode_offset(mp, rmap->rm_owner, irec);
675 /* lock here because we might go outside this ag */
676 set_inode_is_rl(irec, off);
677 pthread_mutex_unlock(&ag_locks[iagno].lock);
678 }
679 }
680
681 /*
682 * Emit a refcount object for refcntbt reconstruction during phase 5.
683 */
684 #define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
685 static void
686 refcount_emit(
687 struct xfs_mount *mp,
688 xfs_agnumber_t agno,
689 xfs_agblock_t agbno,
690 xfs_extlen_t len,
691 size_t nr_rmaps)
692 {
693 struct xfs_refcount_irec rlrec;
694 int error;
695 struct xfs_slab *rlslab;
696
697 rlslab = ag_rmaps[agno].ar_refcount_items;
698 ASSERT(nr_rmaps > 0);
699
700 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
701 agno, agbno, len, nr_rmaps);
702 rlrec.rc_startblock = agbno;
703 rlrec.rc_blockcount = len;
704 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
705 error = slab_add(rlslab, &rlrec);
706 if (error)
707 do_error(
708 _("Insufficient memory while recreating refcount tree."));
709 }
710 #undef REFCOUNT_CLAMP
711
712 /*
713 * Transform a pile of physical block mapping observations into refcount data
714 * for eventual rebuilding of the btrees.
715 */
716 #define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
717 int
718 compute_refcounts(
719 struct xfs_mount *mp,
720 xfs_agnumber_t agno)
721 {
722 struct xfs_bag *stack_top = NULL;
723 struct xfs_slab *rmaps;
724 struct xfs_slab_cursor *rmaps_cur;
725 struct xfs_rmap_irec *array_cur;
726 struct xfs_rmap_irec *rmap;
727 xfs_agblock_t sbno; /* first bno of this rmap set */
728 xfs_agblock_t cbno; /* first bno of this refcount set */
729 xfs_agblock_t nbno; /* next bno where rmap set changes */
730 size_t n, idx;
731 size_t old_stack_nr;
732 int error;
733
734 if (!xfs_sb_version_hasreflink(&mp->m_sb))
735 return 0;
736
737 rmaps = ag_rmaps[agno].ar_rmaps;
738
739 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
740 if (error)
741 return error;
742
743 error = init_bag(&stack_top);
744 if (error)
745 goto err;
746
747 /* While there are rmaps to be processed... */
748 n = 0;
749 while (n < slab_count(rmaps)) {
750 array_cur = peek_slab_cursor(rmaps_cur);
751 sbno = cbno = array_cur->rm_startblock;
752 /* Push all rmaps with pblk == sbno onto the stack */
753 for (;
754 array_cur && array_cur->rm_startblock == sbno;
755 array_cur = peek_slab_cursor(rmaps_cur)) {
756 advance_slab_cursor(rmaps_cur); n++;
757 rmap_dump("push0", agno, array_cur);
758 error = bag_add(stack_top, array_cur);
759 if (error)
760 goto err;
761 }
762 mark_inode_rl(mp, stack_top);
763
764 /* Set nbno to the bno of the next refcount change */
765 if (n < slab_count(rmaps) && array_cur)
766 nbno = array_cur->rm_startblock;
767 else
768 nbno = NULLAGBLOCK;
769 foreach_bag_ptr(stack_top, idx, rmap) {
770 nbno = min(nbno, RMAP_END(rmap));
771 }
772
773 /* Emit reverse mappings, if needed */
774 ASSERT(nbno > sbno);
775 old_stack_nr = bag_count(stack_top);
776
777 /* While stack isn't empty... */
778 while (bag_count(stack_top)) {
779 /* Pop all rmaps that end at nbno */
780 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
781 if (RMAP_END(rmap) != nbno)
782 continue;
783 rmap_dump("pop", agno, rmap);
784 error = bag_remove(stack_top, idx);
785 if (error)
786 goto err;
787 }
788
789 /* Push array items that start at nbno */
790 for (;
791 array_cur && array_cur->rm_startblock == nbno;
792 array_cur = peek_slab_cursor(rmaps_cur)) {
793 advance_slab_cursor(rmaps_cur); n++;
794 rmap_dump("push1", agno, array_cur);
795 error = bag_add(stack_top, array_cur);
796 if (error)
797 goto err;
798 }
799 mark_inode_rl(mp, stack_top);
800
801 /* Emit refcount if necessary */
802 ASSERT(nbno > cbno);
803 if (bag_count(stack_top) != old_stack_nr) {
804 if (old_stack_nr > 1) {
805 refcount_emit(mp, agno, cbno,
806 nbno - cbno,
807 old_stack_nr);
808 }
809 cbno = nbno;
810 }
811
812 /* Stack empty, go find the next rmap */
813 if (bag_count(stack_top) == 0)
814 break;
815 old_stack_nr = bag_count(stack_top);
816 sbno = nbno;
817
818 /* Set nbno to the bno of the next refcount change */
819 if (n < slab_count(rmaps))
820 nbno = array_cur->rm_startblock;
821 else
822 nbno = NULLAGBLOCK;
823 foreach_bag_ptr(stack_top, idx, rmap) {
824 nbno = min(nbno, RMAP_END(rmap));
825 }
826
827 /* Emit reverse mappings, if needed */
828 ASSERT(nbno > sbno);
829 }
830 }
831 err:
832 free_bag(&stack_top);
833 free_slab_cursor(&rmaps_cur);
834
835 return error;
836 }
837 #undef RMAP_END
838
839 /*
840 * Return the number of rmap objects for an AG.
841 */
842 size_t
843 rmap_record_count(
844 struct xfs_mount *mp,
845 xfs_agnumber_t agno)
846 {
847 return slab_count(ag_rmaps[agno].ar_rmaps);
848 }
849
850 /*
851 * Return a slab cursor that will return rmap objects in order.
852 */
853 int
854 rmap_init_cursor(
855 xfs_agnumber_t agno,
856 struct xfs_slab_cursor **cur)
857 {
858 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
859 }
860
861 /*
862 * Disable the refcount btree check.
863 */
864 void
865 rmap_avoid_check(void)
866 {
867 rmapbt_suspect = true;
868 }
869
870 /* Look for an rmap in the rmapbt that matches a given rmap. */
871 static int
872 rmap_lookup(
873 struct xfs_btree_cur *bt_cur,
874 struct xfs_rmap_irec *rm_rec,
875 struct xfs_rmap_irec *tmp,
876 int *have)
877 {
878 int error;
879
880 /* Use the regular btree retrieval routine. */
881 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
882 rm_rec->rm_blockcount,
883 rm_rec->rm_owner, rm_rec->rm_offset,
884 rm_rec->rm_flags, have);
885 if (error)
886 return error;
887 if (*have == 0)
888 return error;
889 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
890 }
891
892 /* Look for an rmap in the rmapbt that matches a given rmap. */
893 static int
894 rmap_lookup_overlapped(
895 struct xfs_btree_cur *bt_cur,
896 struct xfs_rmap_irec *rm_rec,
897 struct xfs_rmap_irec *tmp,
898 int *have)
899 {
900 /* Have to use our fancy version for overlapped */
901 return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
902 rm_rec->rm_owner, rm_rec->rm_offset,
903 rm_rec->rm_flags, tmp, have);
904 }
905
906 /* Does the btree rmap cover the observed rmap? */
907 #define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
908 #define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
909 static bool
910 rmap_is_good(
911 struct xfs_rmap_irec *observed,
912 struct xfs_rmap_irec *btree)
913 {
914 /* Can't have mismatches in the flags or the owner. */
915 if (btree->rm_flags != observed->rm_flags ||
916 btree->rm_owner != observed->rm_owner)
917 return false;
918
919 /*
920 * Btree record can't physically start after the observed
921 * record, nor can it end before the observed record.
922 */
923 if (btree->rm_startblock > observed->rm_startblock ||
924 NEXTP(btree) < NEXTP(observed))
925 return false;
926
927 /* If this is metadata or bmbt, we're done. */
928 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
929 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
930 return true;
931 /*
932 * Btree record can't logically start after the observed
933 * record, nor can it end before the observed record.
934 */
935 if (btree->rm_offset > observed->rm_offset ||
936 NEXTL(btree) < NEXTL(observed))
937 return false;
938
939 return true;
940 }
941 #undef NEXTP
942 #undef NEXTL
943
944 /*
945 * Compare the observed reverse mappings against what's in the ag btree.
946 */
947 int
948 rmaps_verify_btree(
949 struct xfs_mount *mp,
950 xfs_agnumber_t agno)
951 {
952 struct xfs_slab_cursor *rm_cur;
953 struct xfs_btree_cur *bt_cur = NULL;
954 int error;
955 int have;
956 struct xfs_buf *agbp = NULL;
957 struct xfs_rmap_irec *rm_rec;
958 struct xfs_rmap_irec tmp;
959 struct xfs_perag *pag; /* per allocation group data */
960
961 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
962 return 0;
963 if (rmapbt_suspect) {
964 if (no_modify && agno == 0)
965 do_warn(_("would rebuild corrupt rmap btrees.\n"));
966 return 0;
967 }
968
969 /* Create cursors to refcount structures */
970 error = rmap_init_cursor(agno, &rm_cur);
971 if (error)
972 return error;
973
974 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
975 if (error)
976 goto err;
977
978 /* Leave the per-ag data "uninitialized" since we rewrite it later */
979 pag = libxfs_perag_get(mp, agno);
980 pag->pagf_init = 0;
981 libxfs_perag_put(pag);
982
983 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
984 if (!bt_cur) {
985 error = -ENOMEM;
986 goto err;
987 }
988
989 rm_rec = pop_slab_cursor(rm_cur);
990 while (rm_rec) {
991 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
992 if (error)
993 goto err;
994 /*
995 * Using the range query is expensive, so only do it if
996 * the regular lookup doesn't find anything or if it doesn't
997 * match the observed rmap.
998 */
999 if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
1000 (!have || !rmap_is_good(rm_rec, &tmp))) {
1001 error = rmap_lookup_overlapped(bt_cur, rm_rec,
1002 &tmp, &have);
1003 if (error)
1004 goto err;
1005 }
1006 if (!have) {
1007 do_warn(
1008 _("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
1009 %s%soff %"PRIu64"\n"),
1010 agno, rm_rec->rm_startblock,
1011 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1012 _("unwritten ") : "",
1013 rm_rec->rm_blockcount,
1014 rm_rec->rm_owner,
1015 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1016 _("attr ") : "",
1017 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1018 _("bmbt ") : "",
1019 rm_rec->rm_offset);
1020 goto next_loop;
1021 }
1022
1023 /* Compare each refcount observation against the btree's */
1024 if (!rmap_is_good(rm_rec, &tmp)) {
1025 do_warn(
1026 _("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1027 %"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1028 agno, tmp.rm_startblock,
1029 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1030 _("unwritten ") : "",
1031 tmp.rm_blockcount,
1032 tmp.rm_owner,
1033 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1034 _("attr ") : "",
1035 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1036 _("bmbt ") : "",
1037 tmp.rm_offset,
1038 agno, rm_rec->rm_startblock,
1039 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1040 _("unwritten ") : "",
1041 rm_rec->rm_blockcount,
1042 rm_rec->rm_owner,
1043 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1044 _("attr ") : "",
1045 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1046 _("bmbt ") : "",
1047 rm_rec->rm_offset);
1048 goto next_loop;
1049 }
1050 next_loop:
1051 rm_rec = pop_slab_cursor(rm_cur);
1052 }
1053
1054 err:
1055 if (bt_cur)
1056 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1057 if (agbp)
1058 libxfs_putbuf(agbp);
1059 free_slab_cursor(&rm_cur);
1060 return 0;
1061 }
1062
1063 /*
1064 * Compare the key fields of two rmap records -- positive if key1 > key2,
1065 * negative if key1 < key2, and zero if equal.
1066 */
1067 int64_t
1068 rmap_diffkeys(
1069 struct xfs_rmap_irec *kp1,
1070 struct xfs_rmap_irec *kp2)
1071 {
1072 __u64 oa;
1073 __u64 ob;
1074 int64_t d;
1075 struct xfs_rmap_irec tmp;
1076
1077 tmp = *kp1;
1078 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
1079 oa = libxfs_rmap_irec_offset_pack(&tmp);
1080 tmp = *kp2;
1081 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
1082 ob = libxfs_rmap_irec_offset_pack(&tmp);
1083
1084 d = (int64_t)kp1->rm_startblock - kp2->rm_startblock;
1085 if (d)
1086 return d;
1087
1088 if (kp1->rm_owner > kp2->rm_owner)
1089 return 1;
1090 else if (kp2->rm_owner > kp1->rm_owner)
1091 return -1;
1092
1093 if (oa > ob)
1094 return 1;
1095 else if (ob > oa)
1096 return -1;
1097 return 0;
1098 }
1099
1100 /* Compute the high key of an rmap record. */
1101 void
1102 rmap_high_key_from_rec(
1103 struct xfs_rmap_irec *rec,
1104 struct xfs_rmap_irec *key)
1105 {
1106 int adj;
1107
1108 adj = rec->rm_blockcount - 1;
1109
1110 key->rm_startblock = rec->rm_startblock + adj;
1111 key->rm_owner = rec->rm_owner;
1112 key->rm_offset = rec->rm_offset;
1113 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1114 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1115 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1116 return;
1117 key->rm_offset += adj;
1118 }
1119
1120 /*
1121 * Record that an inode had the reflink flag set when repair started. The
1122 * inode reflink flag will be adjusted as necessary.
1123 */
1124 void
1125 record_inode_reflink_flag(
1126 struct xfs_mount *mp,
1127 struct xfs_dinode *dino,
1128 xfs_agnumber_t agno,
1129 xfs_agino_t ino,
1130 xfs_ino_t lino)
1131 {
1132 struct ino_tree_node *irec;
1133 int off;
1134
1135 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1136 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1137 return;
1138 irec = find_inode_rec(mp, agno, ino);
1139 off = get_inode_offset(mp, lino, irec);
1140 ASSERT(!inode_was_rl(irec, off));
1141 set_inode_was_rl(irec, off);
1142 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1143 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1144 }
1145
1146 /*
1147 * Fix an inode's reflink flag.
1148 */
1149 static int
1150 fix_inode_reflink_flag(
1151 struct xfs_mount *mp,
1152 xfs_agnumber_t agno,
1153 xfs_agino_t agino,
1154 bool set)
1155 {
1156 struct xfs_dinode *dino;
1157 struct xfs_buf *buf;
1158
1159 if (set)
1160 do_warn(
1161 _("setting reflink flag on inode %"PRIu64"\n"),
1162 XFS_AGINO_TO_INO(mp, agno, agino));
1163 else if (!no_modify) /* && !set */
1164 do_warn(
1165 _("clearing reflink flag on inode %"PRIu64"\n"),
1166 XFS_AGINO_TO_INO(mp, agno, agino));
1167 if (no_modify)
1168 return 0;
1169
1170 buf = get_agino_buf(mp, agno, agino, &dino);
1171 if (!buf)
1172 return 1;
1173 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1174 if (set)
1175 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1176 else
1177 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1178 libxfs_dinode_calc_crc(mp, dino);
1179 libxfs_writebuf(buf, 0);
1180
1181 return 0;
1182 }
1183
1184 /*
1185 * Fix discrepancies between the state of the inode reflink flag and our
1186 * observations as to whether or not the inode really needs it.
1187 */
1188 int
1189 fix_inode_reflink_flags(
1190 struct xfs_mount *mp,
1191 xfs_agnumber_t agno)
1192 {
1193 struct ino_tree_node *irec;
1194 int bit;
1195 uint64_t was;
1196 uint64_t is;
1197 uint64_t diff;
1198 uint64_t mask;
1199 int error = 0;
1200 xfs_agino_t agino;
1201
1202 /*
1203 * Update the reflink flag for any inode where there's a discrepancy
1204 * between the inode flag and whether or not we found any reflinked
1205 * extents.
1206 */
1207 for (irec = findfirst_inode_rec(agno);
1208 irec != NULL;
1209 irec = next_ino_rec(irec)) {
1210 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1211 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1212 was = irec->ino_was_rl;
1213 is = irec->ino_is_rl;
1214 if (was == is)
1215 continue;
1216 diff = was ^ is;
1217 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1218 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1219 irec->ino_startnum),
1220 was, is, diff);
1221
1222 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1223 agino = bit + irec->ino_startnum;
1224 if (!(diff & mask))
1225 continue;
1226 else if (was & mask)
1227 error = fix_inode_reflink_flag(mp, agno, agino,
1228 false);
1229 else if (is & mask)
1230 error = fix_inode_reflink_flag(mp, agno, agino,
1231 true);
1232 else
1233 ASSERT(0);
1234 if (error)
1235 do_error(
1236 _("Unable to fix reflink flag on inode %"PRIu64".\n"),
1237 XFS_AGINO_TO_INO(mp, agno, agino));
1238 }
1239 }
1240
1241 return error;
1242 }
1243
1244 /*
1245 * Return the number of refcount objects for an AG.
1246 */
1247 size_t
1248 refcount_record_count(
1249 struct xfs_mount *mp,
1250 xfs_agnumber_t agno)
1251 {
1252 return slab_count(ag_rmaps[agno].ar_refcount_items);
1253 }
1254
1255 /*
1256 * Return a slab cursor that will return refcount objects in order.
1257 */
1258 int
1259 init_refcount_cursor(
1260 xfs_agnumber_t agno,
1261 struct xfs_slab_cursor **cur)
1262 {
1263 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1264 }
1265
1266 /*
1267 * Disable the refcount btree check.
1268 */
1269 void
1270 refcount_avoid_check(void)
1271 {
1272 refcbt_suspect = true;
1273 }
1274
1275 /*
1276 * Compare the observed reference counts against what's in the ag btree.
1277 */
1278 int
1279 check_refcounts(
1280 struct xfs_mount *mp,
1281 xfs_agnumber_t agno)
1282 {
1283 struct xfs_slab_cursor *rl_cur;
1284 struct xfs_btree_cur *bt_cur = NULL;
1285 int error;
1286 int have;
1287 int i;
1288 struct xfs_buf *agbp = NULL;
1289 struct xfs_refcount_irec *rl_rec;
1290 struct xfs_refcount_irec tmp;
1291 struct xfs_perag *pag; /* per allocation group data */
1292
1293 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1294 return 0;
1295 if (refcbt_suspect) {
1296 if (no_modify && agno == 0)
1297 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1298 return 0;
1299 }
1300
1301 /* Create cursors to refcount structures */
1302 error = init_refcount_cursor(agno, &rl_cur);
1303 if (error)
1304 return error;
1305
1306 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1307 if (error)
1308 goto err;
1309
1310 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1311 pag = libxfs_perag_get(mp, agno);
1312 pag->pagf_init = 0;
1313 libxfs_perag_put(pag);
1314
1315 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
1316 if (!bt_cur) {
1317 error = -ENOMEM;
1318 goto err;
1319 }
1320
1321 rl_rec = pop_slab_cursor(rl_cur);
1322 while (rl_rec) {
1323 /* Look for a refcount record in the btree */
1324 error = -libxfs_refcount_lookup_le(bt_cur,
1325 rl_rec->rc_startblock, &have);
1326 if (error)
1327 goto err;
1328 if (!have) {
1329 do_warn(
1330 _("Missing reference count record for (%u/%u) len %u count %u\n"),
1331 agno, rl_rec->rc_startblock,
1332 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1333 goto next_loop;
1334 }
1335
1336 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1337 if (error)
1338 goto err;
1339 if (!i) {
1340 do_warn(
1341 _("Missing reference count record for (%u/%u) len %u count %u\n"),
1342 agno, rl_rec->rc_startblock,
1343 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1344 goto next_loop;
1345 }
1346
1347 /* Compare each refcount observation against the btree's */
1348 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1349 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1350 tmp.rc_refcount < rl_rec->rc_refcount)
1351 do_warn(
1352 _("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1353 agno, tmp.rc_startblock, tmp.rc_blockcount,
1354 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1355 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1356 next_loop:
1357 rl_rec = pop_slab_cursor(rl_cur);
1358 }
1359
1360 err:
1361 if (bt_cur)
1362 libxfs_btree_del_cursor(bt_cur, error ? XFS_BTREE_ERROR :
1363 XFS_BTREE_NOERROR);
1364 if (agbp)
1365 libxfs_putbuf(agbp);
1366 free_slab_cursor(&rl_cur);
1367 return 0;
1368 }
1369
1370 /*
1371 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1372 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1373 * because we're updating the rmapbt).
1374 */
1375 void
1376 fix_freelist(
1377 struct xfs_mount *mp,
1378 xfs_agnumber_t agno,
1379 bool skip_rmapbt)
1380 {
1381 xfs_alloc_arg_t args;
1382 xfs_trans_t *tp;
1383 struct xfs_trans_res tres = {0};
1384 int flags;
1385 int error;
1386
1387 memset(&args, 0, sizeof(args));
1388 args.mp = mp;
1389 args.agno = agno;
1390 args.alignment = 1;
1391 args.pag = libxfs_perag_get(mp, agno);
1392 error = -libxfs_trans_alloc(mp, &tres,
1393 libxfs_alloc_min_freelist(mp, args.pag), 0, 0, &tp);
1394 if (error)
1395 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1396 agno, error);
1397 args.tp = tp;
1398
1399 /*
1400 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1401 * the fresh AGFL header from empty to full. That hasn't changed. For
1402 * rmapbt, however, things change a bit.
1403 *
1404 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1405 * expand, so we need to keep the AGFL well-stocked for the expansion.
1406 * However, this expansion can cause the bnobt/cntbt to shrink, which
1407 * can make the AGFL eligible for shrinking. Shrinking involves
1408 * freeing rmapbt entries, but since we haven't finished loading the
1409 * rmapbt with the btree rmaps it's possible for the remove operation
1410 * to fail. The AGFL block is large enough at this point to absorb any
1411 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1412 *
1413 * During the initial AGFL regeneration during AGF generation in phase5
1414 * we must also disable rmapbt modifications because the AGF that
1415 * libxfs reads does not yet point to the new rmapbt. These initial
1416 * AGFL entries are added just prior to adding the AG btree block rmaps
1417 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1418 * empty and cannot shrink.
1419 */
1420 flags = XFS_ALLOC_FLAG_NOSHRINK;
1421 if (skip_rmapbt)
1422 flags |= XFS_ALLOC_FLAG_NORMAP;
1423 error = -libxfs_alloc_fix_freelist(&args, flags);
1424 libxfs_perag_put(args.pag);
1425 if (error) {
1426 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1427 agno, error);
1428 }
1429 libxfs_trans_commit(tp);
1430 }
1431
1432 /*
1433 * Remember how many AGFL entries came from excess AG btree allocations and
1434 * therefore already have rmap entries.
1435 */
1436 void
1437 rmap_store_agflcount(
1438 struct xfs_mount *mp,
1439 xfs_agnumber_t agno,
1440 int count)
1441 {
1442 if (!rmap_needs_work(mp))
1443 return;
1444
1445 ag_rmaps[agno].ar_flcount = count;
1446 }