]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - repair/rmap.c
xfs_repair: fix bogus rmapbt record owner check
[thirdparty/xfsprogs-dev.git] / repair / rmap.c
CommitLineData
9e0f480e
DW
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include <libxfs.h>
21#include "btree.h"
22#include "err_protos.h"
23#include "libxlog.h"
24#include "incore.h"
25#include "globals.h"
26#include "dinode.h"
27#include "slab.h"
28#include "rmap.h"
29
30#undef RMAP_DEBUG
31
32#ifdef RMAP_DEBUG
33# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
34#else
35# define dbg_printf(f, a...)
36#endif
37
38/* per-AG rmap object anchor */
39struct xfs_ag_rmap {
40 struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
1102c155 41 struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
62cf990a
DW
42 int ar_flcount; /* agfl entries from leftover */
43 /* agbt allocations */
b7f12e53 44 struct xfs_rmap_irec ar_last_rmap; /* last rmap seen */
00f34bca 45 struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
9e0f480e
DW
46};
47
48static struct xfs_ag_rmap *ag_rmaps;
11b9e510 49static bool rmapbt_suspect;
80dbc783 50static bool refcbt_suspect;
9e0f480e
DW
51
52/*
53 * Compare rmap observations for array sorting.
54 */
55static int
56rmap_compare(
57 const void *a,
58 const void *b)
59{
60 const struct xfs_rmap_irec *pa;
61 const struct xfs_rmap_irec *pb;
62 __u64 oa;
63 __u64 ob;
64
65 pa = a; pb = b;
e2f60652
DW
66 oa = libxfs_rmap_irec_offset_pack(pa);
67 ob = libxfs_rmap_irec_offset_pack(pb);
9e0f480e
DW
68
69 if (pa->rm_startblock < pb->rm_startblock)
70 return -1;
71 else if (pa->rm_startblock > pb->rm_startblock)
72 return 1;
73 else if (pa->rm_owner < pb->rm_owner)
74 return -1;
75 else if (pa->rm_owner > pb->rm_owner)
76 return 1;
77 else if (oa < ob)
78 return -1;
79 else if (oa > ob)
80 return 1;
81 else
82 return 0;
83}
84
85/*
86 * Returns true if we must reconstruct either the reference count or reverse
87 * mapping trees.
88 */
89bool
2d273771 90rmap_needs_work(
9e0f480e
DW
91 struct xfs_mount *mp)
92{
00f34bca
DW
93 return xfs_sb_version_hasreflink(&mp->m_sb) ||
94 xfs_sb_version_hasrmapbt(&mp->m_sb);
9e0f480e
DW
95}
96
97/*
98 * Initialize per-AG reverse map data.
99 */
100void
2d273771 101rmaps_init(
9e0f480e
DW
102 struct xfs_mount *mp)
103{
104 xfs_agnumber_t i;
105 int error;
106
2d273771 107 if (!rmap_needs_work(mp))
9e0f480e
DW
108 return;
109
110 ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
111 if (!ag_rmaps)
112 do_error(_("couldn't allocate per-AG reverse map roots\n"));
113
114 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
115 error = init_slab(&ag_rmaps[i].ar_rmaps,
116 sizeof(struct xfs_rmap_irec));
117 if (error)
118 do_error(
119_("Insufficient memory while allocating reverse mapping slabs."));
1102c155
DW
120 error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
121 sizeof(struct xfs_rmap_irec));
122 if (error)
123 do_error(
124_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
b7f12e53 125 ag_rmaps[i].ar_last_rmap.rm_owner = XFS_RMAP_OWN_UNKNOWN;
00f34bca
DW
126 error = init_slab(&ag_rmaps[i].ar_refcount_items,
127 sizeof(struct xfs_refcount_irec));
128 if (error)
129 do_error(
130_("Insufficient memory while allocating refcount item slabs."));
9e0f480e
DW
131 }
132}
133
134/*
135 * Free the per-AG reverse-mapping data.
136 */
137void
2d273771 138rmaps_free(
9e0f480e
DW
139 struct xfs_mount *mp)
140{
141 xfs_agnumber_t i;
142
2d273771 143 if (!rmap_needs_work(mp))
9e0f480e
DW
144 return;
145
1102c155 146 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
9e0f480e 147 free_slab(&ag_rmaps[i].ar_rmaps);
1102c155 148 free_slab(&ag_rmaps[i].ar_raw_rmaps);
00f34bca 149 free_slab(&ag_rmaps[i].ar_refcount_items);
1102c155 150 }
9e0f480e
DW
151 free(ag_rmaps);
152 ag_rmaps = NULL;
153}
154
1102c155
DW
155/*
156 * Decide if two reverse-mapping records can be merged.
157 */
158bool
2d273771 159rmaps_are_mergeable(
1102c155
DW
160 struct xfs_rmap_irec *r1,
161 struct xfs_rmap_irec *r2)
162{
163 if (r1->rm_owner != r2->rm_owner)
164 return false;
165 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
166 return false;
167 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
168 XFS_RMAP_LEN_MAX)
169 return false;
170 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
171 return true;
172 /* must be an inode owner below here */
173 if (r1->rm_flags != r2->rm_flags)
174 return false;
175 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
176 return true;
177 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
178}
179
9e0f480e
DW
180/*
181 * Add an observation about a block mapping in an inode's data or attribute
182 * fork for later btree reconstruction.
183 */
184int
2d273771 185rmap_add_rec(
9e0f480e
DW
186 struct xfs_mount *mp,
187 xfs_ino_t ino,
188 int whichfork,
189 struct xfs_bmbt_irec *irec)
190{
9e0f480e
DW
191 struct xfs_rmap_irec rmap;
192 xfs_agnumber_t agno;
193 xfs_agblock_t agbno;
b7f12e53
DW
194 struct xfs_rmap_irec *last_rmap;
195 int error = 0;
9e0f480e 196
2d273771 197 if (!rmap_needs_work(mp))
9e0f480e
DW
198 return 0;
199
200 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
201 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
202 ASSERT(agno != NULLAGNUMBER);
203 ASSERT(agno < mp->m_sb.sb_agcount);
204 ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
205 ASSERT(ino != NULLFSINO);
206 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
207
9e0f480e
DW
208 rmap.rm_owner = ino;
209 rmap.rm_offset = irec->br_startoff;
210 rmap.rm_flags = 0;
211 if (whichfork == XFS_ATTR_FORK)
212 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
213 rmap.rm_startblock = agbno;
214 rmap.rm_blockcount = irec->br_blockcount;
215 if (irec->br_state == XFS_EXT_UNWRITTEN)
216 rmap.rm_flags |= XFS_RMAP_UNWRITTEN;
b7f12e53
DW
217 last_rmap = &ag_rmaps[agno].ar_last_rmap;
218 if (last_rmap->rm_owner == XFS_RMAP_OWN_UNKNOWN)
219 *last_rmap = rmap;
2d273771 220 else if (rmaps_are_mergeable(last_rmap, &rmap))
b7f12e53
DW
221 last_rmap->rm_blockcount += rmap.rm_blockcount;
222 else {
223 error = slab_add(ag_rmaps[agno].ar_rmaps, last_rmap);
224 if (error)
225 return error;
226 *last_rmap = rmap;
227 }
228
229 return error;
230}
231
232/* Finish collecting inode data/attr fork rmaps. */
233int
2d273771 234rmap_finish_collecting_fork_recs(
b7f12e53
DW
235 struct xfs_mount *mp,
236 xfs_agnumber_t agno)
237{
2d273771 238 if (!rmap_needs_work(mp) ||
b7f12e53
DW
239 ag_rmaps[agno].ar_last_rmap.rm_owner == XFS_RMAP_OWN_UNKNOWN)
240 return 0;
241 return slab_add(ag_rmaps[agno].ar_rmaps, &ag_rmaps[agno].ar_last_rmap);
9e0f480e
DW
242}
243
1102c155
DW
244/* add a raw rmap; these will be merged later */
245static int
2d273771 246__rmap_add_raw_rec(
1102c155
DW
247 struct xfs_mount *mp,
248 xfs_agnumber_t agno,
249 xfs_agblock_t agbno,
250 xfs_extlen_t len,
251 uint64_t owner,
252 bool is_attr,
253 bool is_bmbt)
254{
255 struct xfs_rmap_irec rmap;
256
257 ASSERT(len != 0);
258 rmap.rm_owner = owner;
259 rmap.rm_offset = 0;
260 rmap.rm_flags = 0;
261 if (is_attr)
262 rmap.rm_flags |= XFS_RMAP_ATTR_FORK;
263 if (is_bmbt)
264 rmap.rm_flags |= XFS_RMAP_BMBT_BLOCK;
265 rmap.rm_startblock = agbno;
266 rmap.rm_blockcount = len;
267 return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
268}
269
00efc33a
DW
270/*
271 * Add a reverse mapping for an inode fork's block mapping btree block.
272 */
273int
2d273771 274rmap_add_bmbt_rec(
00efc33a
DW
275 struct xfs_mount *mp,
276 xfs_ino_t ino,
277 int whichfork,
278 xfs_fsblock_t fsbno)
279{
280 xfs_agnumber_t agno;
281 xfs_agblock_t agbno;
282
2d273771 283 if (!rmap_needs_work(mp))
00efc33a
DW
284 return 0;
285
286 agno = XFS_FSB_TO_AGNO(mp, fsbno);
287 agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
288 ASSERT(agno != NULLAGNUMBER);
289 ASSERT(agno < mp->m_sb.sb_agcount);
290 ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
291
2d273771 292 return __rmap_add_raw_rec(mp, agno, agbno, 1, ino,
00efc33a
DW
293 whichfork == XFS_ATTR_FORK, true);
294}
295
1102c155
DW
296/*
297 * Add a reverse mapping for a per-AG fixed metadata extent.
298 */
299int
2d273771 300rmap_add_ag_rec(
1102c155
DW
301 struct xfs_mount *mp,
302 xfs_agnumber_t agno,
303 xfs_agblock_t agbno,
304 xfs_extlen_t len,
305 uint64_t owner)
306{
2d273771 307 if (!rmap_needs_work(mp))
1102c155
DW
308 return 0;
309
310 ASSERT(agno != NULLAGNUMBER);
311 ASSERT(agno < mp->m_sb.sb_agcount);
312 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
313
2d273771 314 return __rmap_add_raw_rec(mp, agno, agbno, len, owner, false, false);
1102c155
DW
315}
316
317/*
318 * Merge adjacent raw rmaps and add them to the main rmap list.
319 */
320int
2d273771 321rmap_fold_raw_recs(
1102c155
DW
322 struct xfs_mount *mp,
323 xfs_agnumber_t agno)
324{
325 struct xfs_slab_cursor *cur = NULL;
326 struct xfs_rmap_irec *prev, *rec;
327 size_t old_sz;
138ce9ff 328 int error = 0;
1102c155
DW
329
330 old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
331 if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
332 goto no_raw;
333 qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
334 error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
335 &cur);
336 if (error)
337 goto err;
338
339 prev = pop_slab_cursor(cur);
340 rec = pop_slab_cursor(cur);
138ce9ff 341 while (prev && rec) {
2d273771 342 if (rmaps_are_mergeable(prev, rec)) {
1102c155
DW
343 prev->rm_blockcount += rec->rm_blockcount;
344 rec = pop_slab_cursor(cur);
345 continue;
346 }
347 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
348 if (error)
349 goto err;
350 prev = rec;
351 rec = pop_slab_cursor(cur);
352 }
353 if (prev) {
354 error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
355 if (error)
356 goto err;
357 }
358 free_slab(&ag_rmaps[agno].ar_raw_rmaps);
359 error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
360 sizeof(struct xfs_rmap_irec));
361 if (error)
362 do_error(
363_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
364no_raw:
365 if (old_sz)
366 qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
367err:
368 free_slab_cursor(&cur);
369 return error;
370}
371
713b6817
DW
372static int
373find_first_zero_bit(
374 __uint64_t mask)
375{
376 int n;
377 int b = 0;
378
379 for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
380 b++;
381
382 return b;
383}
384
385static int
386popcnt(
387 __uint64_t mask)
388{
389 int n;
390 int b = 0;
391
392 if (mask == 0)
393 return 0;
394
395 for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
396 if (mask & 1)
397 b++;
398
399 return b;
400}
401
402/*
403 * Add an allocation group's fixed metadata to the rmap list. This includes
404 * sb/agi/agf/agfl headers, inode chunks, and the log.
405 */
406int
2d273771 407rmap_add_fixed_ag_rec(
713b6817
DW
408 struct xfs_mount *mp,
409 xfs_agnumber_t agno)
410{
411 xfs_fsblock_t fsbno;
412 xfs_agblock_t agbno;
413 ino_tree_node_t *ino_rec;
414 xfs_agino_t agino;
415 int error;
416 int startidx;
417 int nr;
418
2d273771 419 if (!rmap_needs_work(mp))
713b6817
DW
420 return 0;
421
422 /* sb/agi/agf/agfl headers */
2d273771 423 error = rmap_add_ag_rec(mp, agno, 0, XFS_BNO_BLOCK(mp),
713b6817
DW
424 XFS_RMAP_OWN_FS);
425 if (error)
426 goto out;
427
428 /* inodes */
429 ino_rec = findfirst_inode_rec(agno);
430 for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
431 if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
432 startidx = find_first_zero_bit(ino_rec->ir_sparse);
433 nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
434 } else {
435 startidx = 0;
436 nr = XFS_INODES_PER_CHUNK;
437 }
438 nr /= mp->m_sb.sb_inopblock;
439 if (nr == 0)
440 nr = 1;
441 agino = ino_rec->ino_startnum + startidx;
442 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
443 if (XFS_AGINO_TO_OFFSET(mp, agino) == 0) {
2d273771 444 error = rmap_add_ag_rec(mp, agno, agbno, nr,
713b6817
DW
445 XFS_RMAP_OWN_INODES);
446 if (error)
447 goto out;
448 }
449 }
450
451 /* log */
452 fsbno = mp->m_sb.sb_logstart;
453 if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
454 agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
2d273771 455 error = rmap_add_ag_rec(mp, agno, agbno, mp->m_sb.sb_logblocks,
713b6817
DW
456 XFS_RMAP_OWN_LOG);
457 if (error)
458 goto out;
459 }
460out:
461 return error;
462}
463
62cf990a
DW
464/*
465 * Copy the per-AG btree reverse-mapping data into the rmapbt.
466 *
467 * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
468 * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
469 * building the AG btrees we can record all the blocks allocated for each
470 * btree, but we cannot resolve the conflict between the fact that one has to
471 * finish allocating the space for the rmapbt before building the bnobt and the
472 * fact that allocating blocks for the bnobt requires adding rmapbt entries.
473 * Therefore we record in-core the rmaps for each btree and here use the
474 * libxfs rmap functions to finish building the rmap btree.
475 *
476 * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
477 * recorded in memory. The rmapbt has not been set up yet, so we need to be
478 * able to "expand" the AGFL without updating the rmapbt. After we've written
479 * out the new AGF header the new rmapbt is available, so this function reads
480 * each AGFL to generate rmap entries. These entries are merged with the AG
481 * btree rmap entries, and then we use libxfs' rmap functions to add them to
482 * the rmapbt, after which it is fully regenerated.
483 */
484int
2d273771 485rmap_store_ag_btree_rec(
62cf990a
DW
486 struct xfs_mount *mp,
487 xfs_agnumber_t agno)
488{
489 struct xfs_slab_cursor *rm_cur;
490 struct xfs_rmap_irec *rm_rec = NULL;
491 struct xfs_buf *agbp = NULL;
492 struct xfs_buf *agflbp = NULL;
493 struct xfs_trans *tp;
494 struct xfs_trans_res tres = {0};
495 __be32 *agfl_bno, *b;
496 int error = 0;
497 struct xfs_owner_info oinfo;
498
499 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
500 return 0;
501
502 /* Release the ar_rmaps; they were put into the rmapbt during p5. */
503 free_slab(&ag_rmaps[agno].ar_rmaps);
504 error = init_slab(&ag_rmaps[agno].ar_rmaps,
505 sizeof(struct xfs_rmap_irec));
506 if (error)
507 goto err;
508
509 /* Add the AGFL blocks to the rmap list */
e2f60652 510 error = -libxfs_trans_read_buf(
62cf990a
DW
511 mp, NULL, mp->m_ddev_targp,
512 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
513 XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
514 if (error)
515 goto err;
516
636f06d8
DW
517 /*
518 * Sometimes, the blocks at the beginning of the AGFL are there
519 * because we overestimated how many blocks we needed to rebuild
520 * the freespace btrees. ar_flcount records the number of
521 * blocks in this situation. Since those blocks already have an
522 * rmap, we only need to add rmap records for AGFL blocks past
523 * that point in the AGFL because those blocks are a result of a
524 * no-rmap no-shrink freelist fixup that we did earlier.
525 */
62cf990a 526 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
636f06d8
DW
527 b = agfl_bno + ag_rmaps[agno].ar_flcount;
528 while (*b != NULLAGBLOCK && b - agfl_bno < XFS_AGFL_SIZE(mp)) {
2d273771 529 error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
62cf990a
DW
530 XFS_RMAP_OWN_AG);
531 if (error)
532 goto err;
533 b++;
534 }
535 libxfs_putbuf(agflbp);
536 agflbp = NULL;
537
538 /* Merge all the raw rmaps into the main list */
2d273771 539 error = rmap_fold_raw_recs(mp, agno);
62cf990a
DW
540 if (error)
541 goto err;
542
543 /* Create cursors to refcount structures */
544 error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
545 &rm_cur);
546 if (error)
547 goto err;
548
549 /* Insert rmaps into the btree one at a time */
550 rm_rec = pop_slab_cursor(rm_cur);
551 while (rm_rec) {
552 error = -libxfs_trans_alloc(mp, &tres, 16, 0, 0, &tp);
553 if (error)
554 goto err_slab;
555
e2f60652 556 error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
62cf990a
DW
557 if (error)
558 goto err_trans;
559
560 ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
561 libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
e2f60652 562 error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
62cf990a
DW
563 rm_rec->rm_blockcount, &oinfo);
564 if (error)
565 goto err_trans;
566
567 error = -libxfs_trans_commit(tp);
568 if (error)
569 goto err_slab;
570
571 fix_freelist(mp, agno, false);
572
573 rm_rec = pop_slab_cursor(rm_cur);
574 }
575
576 free_slab_cursor(&rm_cur);
577 return 0;
578
579err_trans:
580 libxfs_trans_cancel(tp);
581err_slab:
582 free_slab_cursor(&rm_cur);
583err:
584 if (agflbp)
585 libxfs_putbuf(agflbp);
62cf990a
DW
586 return error;
587}
588
9e0f480e
DW
589#ifdef RMAP_DEBUG
590static void
2d273771 591rmap_dump(
9e0f480e
DW
592 const char *msg,
593 xfs_agnumber_t agno,
594 struct xfs_rmap_irec *rmap)
595{
596 printf("%s: %p agno=%u pblk=%llu own=%lld lblk=%llu len=%u flags=0x%x\n",
597 msg, rmap,
598 (unsigned int)agno,
599 (unsigned long long)rmap->rm_startblock,
600 (unsigned long long)rmap->rm_owner,
601 (unsigned long long)rmap->rm_offset,
602 (unsigned int)rmap->rm_blockcount,
603 (unsigned int)rmap->rm_flags);
604}
605#else
2d273771 606# define rmap_dump(m, a, r)
9e0f480e 607#endif
11b9e510 608
00f34bca
DW
609/*
610 * Rebuilding the Reference Count & Reverse Mapping Btrees
611 *
612 * The reference count (refcnt) and reverse mapping (rmap) btrees are
613 * rebuilt during phase 5, like all other AG btrees. Therefore, reverse
614 * mappings must be processed into reference counts at the end of phase
615 * 4, and the rmaps must be recorded during phase 4. There is a need to
616 * access the rmaps in physical block order, but no particular need for
617 * random access, so the slab.c code provides a big logical array
618 * (consisting of smaller slabs) and some inorder iterator functions.
619 *
620 * Once we've recorded all the reverse mappings, we're ready to
621 * translate the rmaps into refcount entries. Imagine the rmap entries
622 * as rectangles representing extents of physical blocks, and that the
623 * rectangles can be laid down to allow them to overlap each other; then
624 * we know that we must emit a refcnt btree entry wherever the amount of
625 * overlap changes, i.e. the emission stimulus is level-triggered:
626 *
627 * - ---
628 * -- ----- ---- --- ------
629 * -- ---- ----------- ---- ---------
630 * -------------------------------- -----------
631 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
632 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
633 *
634 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
635 *
636 * Note that in the actual refcnt btree we don't store the refcount < 2
637 * cases because the bnobt tells us which blocks are free; single-use
638 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
639 * supports storing multiple entries covering a given block we could
640 * theoretically dispense with the refcntbt and simply count rmaps, but
641 * that's inefficient in the (hot) write path, so we'll take the cost of
642 * the extra tree to save time. Also there's no guarantee that rmap
643 * will be enabled.
644 *
645 * Given an array of rmaps sorted by physical block number, a starting
646 * physical block (sp), a bag to hold rmaps that cover sp, and the next
647 * physical block where the level changes (np), we can reconstruct the
648 * refcount btree as follows:
649 *
650 * While there are still unprocessed rmaps in the array,
651 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
652 * - Add to the bag all rmaps in the array where startblock == sp.
653 * - Set np to the physical block where the bag size will change. This
654 * is the minimum of (the pblk of the next unprocessed rmap) and
655 * (startblock + len of each rmap in the bag).
656 * - Record the bag size as old_bag_size.
657 *
658 * - While the bag isn't empty,
659 * - Remove from the bag all rmaps where startblock + len == np.
660 * - Add to the bag all rmaps in the array where startblock == np.
661 * - If the bag size isn't old_bag_size, store the refcount entry
662 * (sp, np - sp, bag_size) in the refcnt btree.
663 * - If the bag is empty, break out of the inner loop.
664 * - Set old_bag_size to the bag size
665 * - Set sp = np.
666 * - Set np to the physical block where the bag size will change.
667 * This is the minimum of (the pblk of the next unprocessed rmap)
668 * and (startblock + len of each rmap in the bag).
669 *
670 * An implementation detail is that because this processing happens
671 * during phase 4, the refcount entries are stored in an array so that
672 * phase 5 can load them into the refcount btree. The rmaps can be
673 * loaded directly into the rmap btree during phase 5 as well.
674 */
675
ca8d7d6a
DW
676/*
677 * Mark all inodes in the reverse-mapping observation stack as requiring the
678 * reflink inode flag, if the stack depth is greater than 1.
679 */
680static void
681mark_inode_rl(
682 struct xfs_mount *mp,
683 struct xfs_bag *rmaps)
684{
685 xfs_agnumber_t iagno;
686 struct xfs_rmap_irec *rmap;
687 struct ino_tree_node *irec;
688 int off;
689 size_t idx;
690 xfs_agino_t ino;
691
692 if (bag_count(rmaps) < 2)
693 return;
694
695 /* Reflink flag accounting */
696 foreach_bag_ptr(rmaps, idx, rmap) {
697 ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
698 iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
699 ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
700 pthread_mutex_lock(&ag_locks[iagno].lock);
701 irec = find_inode_rec(mp, iagno, ino);
702 off = get_inode_offset(mp, rmap->rm_owner, irec);
703 /* lock here because we might go outside this ag */
704 set_inode_is_rl(irec, off);
705 pthread_mutex_unlock(&ag_locks[iagno].lock);
706 }
707}
708
00f34bca
DW
709/*
710 * Emit a refcount object for refcntbt reconstruction during phase 5.
711 */
712#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
713static void
714refcount_emit(
715 struct xfs_mount *mp,
716 xfs_agnumber_t agno,
717 xfs_agblock_t agbno,
718 xfs_extlen_t len,
719 size_t nr_rmaps)
720{
721 struct xfs_refcount_irec rlrec;
722 int error;
723 struct xfs_slab *rlslab;
724
725 rlslab = ag_rmaps[agno].ar_refcount_items;
726 ASSERT(nr_rmaps > 0);
727
728 dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
729 agno, agbno, len, nr_rmaps);
730 rlrec.rc_startblock = agbno;
731 rlrec.rc_blockcount = len;
732 rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
733 error = slab_add(rlslab, &rlrec);
734 if (error)
735 do_error(
736_("Insufficient memory while recreating refcount tree."));
737}
738#undef REFCOUNT_CLAMP
739
740/*
741 * Transform a pile of physical block mapping observations into refcount data
742 * for eventual rebuilding of the btrees.
743 */
744#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
745int
746compute_refcounts(
747 struct xfs_mount *mp,
748 xfs_agnumber_t agno)
749{
750 struct xfs_bag *stack_top = NULL;
751 struct xfs_slab *rmaps;
752 struct xfs_slab_cursor *rmaps_cur;
753 struct xfs_rmap_irec *array_cur;
754 struct xfs_rmap_irec *rmap;
755 xfs_agblock_t sbno; /* first bno of this rmap set */
756 xfs_agblock_t cbno; /* first bno of this refcount set */
757 xfs_agblock_t nbno; /* next bno where rmap set changes */
758 size_t n, idx;
759 size_t old_stack_nr;
760 int error;
761
762 if (!xfs_sb_version_hasreflink(&mp->m_sb))
763 return 0;
764
765 rmaps = ag_rmaps[agno].ar_rmaps;
766
767 error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
768 if (error)
769 return error;
770
771 error = init_bag(&stack_top);
772 if (error)
773 goto err;
774
775 /* While there are rmaps to be processed... */
776 n = 0;
777 while (n < slab_count(rmaps)) {
778 array_cur = peek_slab_cursor(rmaps_cur);
779 sbno = cbno = array_cur->rm_startblock;
780 /* Push all rmaps with pblk == sbno onto the stack */
781 for (;
782 array_cur && array_cur->rm_startblock == sbno;
783 array_cur = peek_slab_cursor(rmaps_cur)) {
784 advance_slab_cursor(rmaps_cur); n++;
785 rmap_dump("push0", agno, array_cur);
786 error = bag_add(stack_top, array_cur);
787 if (error)
788 goto err;
789 }
ca8d7d6a 790 mark_inode_rl(mp, stack_top);
00f34bca
DW
791
792 /* Set nbno to the bno of the next refcount change */
793 if (n < slab_count(rmaps))
794 nbno = array_cur->rm_startblock;
795 else
796 nbno = NULLAGBLOCK;
797 foreach_bag_ptr(stack_top, idx, rmap) {
798 nbno = min(nbno, RMAP_END(rmap));
799 }
800
801 /* Emit reverse mappings, if needed */
802 ASSERT(nbno > sbno);
803 old_stack_nr = bag_count(stack_top);
804
805 /* While stack isn't empty... */
806 while (bag_count(stack_top)) {
807 /* Pop all rmaps that end at nbno */
808 foreach_bag_ptr_reverse(stack_top, idx, rmap) {
809 if (RMAP_END(rmap) != nbno)
810 continue;
811 rmap_dump("pop", agno, rmap);
812 error = bag_remove(stack_top, idx);
813 if (error)
814 goto err;
815 }
816
817 /* Push array items that start at nbno */
818 for (;
819 array_cur && array_cur->rm_startblock == nbno;
820 array_cur = peek_slab_cursor(rmaps_cur)) {
821 advance_slab_cursor(rmaps_cur); n++;
822 rmap_dump("push1", agno, array_cur);
823 error = bag_add(stack_top, array_cur);
824 if (error)
825 goto err;
826 }
ca8d7d6a 827 mark_inode_rl(mp, stack_top);
00f34bca
DW
828
829 /* Emit refcount if necessary */
830 ASSERT(nbno > cbno);
831 if (bag_count(stack_top) != old_stack_nr) {
832 if (old_stack_nr > 1) {
833 refcount_emit(mp, agno, cbno,
834 nbno - cbno,
835 old_stack_nr);
836 }
837 cbno = nbno;
838 }
839
840 /* Stack empty, go find the next rmap */
841 if (bag_count(stack_top) == 0)
842 break;
843 old_stack_nr = bag_count(stack_top);
844 sbno = nbno;
845
846 /* Set nbno to the bno of the next refcount change */
847 if (n < slab_count(rmaps))
848 nbno = array_cur->rm_startblock;
849 else
850 nbno = NULLAGBLOCK;
851 foreach_bag_ptr(stack_top, idx, rmap) {
852 nbno = min(nbno, RMAP_END(rmap));
853 }
854
855 /* Emit reverse mappings, if needed */
856 ASSERT(nbno > sbno);
857 }
858 }
859err:
860 free_bag(&stack_top);
861 free_slab_cursor(&rmaps_cur);
862
863 return error;
864}
865#undef RMAP_END
866
11b9e510
DW
867/*
868 * Return the number of rmap objects for an AG.
869 */
870size_t
871rmap_record_count(
872 struct xfs_mount *mp,
873 xfs_agnumber_t agno)
874{
875 return slab_count(ag_rmaps[agno].ar_rmaps);
876}
877
878/*
879 * Return a slab cursor that will return rmap objects in order.
880 */
881int
2d273771 882rmap_init_cursor(
11b9e510
DW
883 xfs_agnumber_t agno,
884 struct xfs_slab_cursor **cur)
885{
886 return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
887}
888
889/*
890 * Disable the refcount btree check.
891 */
892void
893rmap_avoid_check(void)
894{
895 rmapbt_suspect = true;
896}
897
898/* Look for an rmap in the rmapbt that matches a given rmap. */
899static int
2d273771 900rmap_lookup(
11b9e510
DW
901 struct xfs_btree_cur *bt_cur,
902 struct xfs_rmap_irec *rm_rec,
903 struct xfs_rmap_irec *tmp,
904 int *have)
905{
906 int error;
907
908 /* Use the regular btree retrieval routine. */
909 error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
910 rm_rec->rm_blockcount,
911 rm_rec->rm_owner, rm_rec->rm_offset,
912 rm_rec->rm_flags, have);
913 if (error)
914 return error;
915 if (*have == 0)
916 return error;
917 return -libxfs_rmap_get_rec(bt_cur, tmp, have);
918}
919
7ba02033
DW
920/* Look for an rmap in the rmapbt that matches a given rmap. */
921static int
922rmap_lookup_overlapped(
923 struct xfs_btree_cur *bt_cur,
924 struct xfs_rmap_irec *rm_rec,
925 struct xfs_rmap_irec *tmp,
926 int *have)
927{
928 /* Have to use our fancy version for overlapped */
929 return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
930 rm_rec->rm_owner, rm_rec->rm_offset,
931 rm_rec->rm_flags, tmp, have);
932}
933
11b9e510
DW
934/* Does the btree rmap cover the observed rmap? */
935#define NEXTP(x) ((x)->rm_startblock + (x)->rm_blockcount)
936#define NEXTL(x) ((x)->rm_offset + (x)->rm_blockcount)
937static bool
2d273771 938rmap_is_good(
11b9e510
DW
939 struct xfs_rmap_irec *observed,
940 struct xfs_rmap_irec *btree)
941{
942 /* Can't have mismatches in the flags or the owner. */
943 if (btree->rm_flags != observed->rm_flags ||
944 btree->rm_owner != observed->rm_owner)
945 return false;
946
947 /*
948 * Btree record can't physically start after the observed
949 * record, nor can it end before the observed record.
950 */
951 if (btree->rm_startblock > observed->rm_startblock ||
952 NEXTP(btree) < NEXTP(observed))
953 return false;
954
955 /* If this is metadata or bmbt, we're done. */
956 if (XFS_RMAP_NON_INODE_OWNER(observed->rm_owner) ||
957 (observed->rm_flags & XFS_RMAP_BMBT_BLOCK))
958 return true;
959 /*
960 * Btree record can't logically start after the observed
961 * record, nor can it end before the observed record.
962 */
963 if (btree->rm_offset > observed->rm_offset ||
964 NEXTL(btree) < NEXTL(observed))
965 return false;
966
967 return true;
968}
969#undef NEXTP
970#undef NEXTL
971
972/*
973 * Compare the observed reverse mappings against what's in the ag btree.
974 */
975int
2d273771 976rmaps_verify_btree(
11b9e510
DW
977 struct xfs_mount *mp,
978 xfs_agnumber_t agno)
979{
980 struct xfs_slab_cursor *rm_cur;
981 struct xfs_btree_cur *bt_cur = NULL;
982 int error;
983 int have;
984 struct xfs_buf *agbp = NULL;
985 struct xfs_rmap_irec *rm_rec;
986 struct xfs_rmap_irec tmp;
987 struct xfs_perag *pag; /* per allocation group data */
988
989 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
990 return 0;
991 if (rmapbt_suspect) {
992 if (no_modify && agno == 0)
993 do_warn(_("would rebuild corrupt rmap btrees.\n"));
994 return 0;
995 }
996
997 /* Create cursors to refcount structures */
2d273771 998 error = rmap_init_cursor(agno, &rm_cur);
11b9e510
DW
999 if (error)
1000 return error;
1001
1002 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1003 if (error)
1004 goto err;
1005
1006 /* Leave the per-ag data "uninitialized" since we rewrite it later */
e2f60652 1007 pag = libxfs_perag_get(mp, agno);
11b9e510 1008 pag->pagf_init = 0;
e2f60652 1009 libxfs_perag_put(pag);
11b9e510
DW
1010
1011 bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
1012 if (!bt_cur) {
1013 error = -ENOMEM;
1014 goto err;
1015 }
1016
1017 rm_rec = pop_slab_cursor(rm_cur);
1018 while (rm_rec) {
2d273771 1019 error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
11b9e510
DW
1020 if (error)
1021 goto err;
7ba02033
DW
1022 /*
1023 * Using the range query is expensive, so only do it if
1024 * the regular lookup doesn't find anything or if it doesn't
1025 * match the observed rmap.
1026 */
1027 if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
1028 (!have || !rmap_is_good(rm_rec, &tmp))) {
1029 error = rmap_lookup_overlapped(bt_cur, rm_rec,
1030 &tmp, &have);
1031 if (error)
1032 goto err;
1033 }
11b9e510
DW
1034 if (!have) {
1035 do_warn(
1036_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
1037%s%soff %"PRIu64"\n"),
1038 agno, rm_rec->rm_startblock,
1039 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1040 _("unwritten ") : "",
1041 rm_rec->rm_blockcount,
1042 rm_rec->rm_owner,
1043 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1044 _("attr ") : "",
1045 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1046 _("bmbt ") : "",
1047 rm_rec->rm_offset);
1048 goto next_loop;
1049 }
1050
1051 /* Compare each refcount observation against the btree's */
2d273771 1052 if (!rmap_is_good(rm_rec, &tmp)) {
11b9e510
DW
1053 do_warn(
1054_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRId64" %s%soff \
1055%"PRIu64"; should be (%u/%u) %slen %u owner %"PRId64" %s%soff %"PRIu64"\n"),
1056 agno, tmp.rm_startblock,
1057 (tmp.rm_flags & XFS_RMAP_UNWRITTEN) ?
1058 _("unwritten ") : "",
1059 tmp.rm_blockcount,
1060 tmp.rm_owner,
1061 (tmp.rm_flags & XFS_RMAP_ATTR_FORK) ?
1062 _("attr ") : "",
1063 (tmp.rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1064 _("bmbt ") : "",
1065 tmp.rm_offset,
1066 agno, rm_rec->rm_startblock,
1067 (rm_rec->rm_flags & XFS_RMAP_UNWRITTEN) ?
1068 _("unwritten ") : "",
1069 rm_rec->rm_blockcount,
1070 rm_rec->rm_owner,
1071 (rm_rec->rm_flags & XFS_RMAP_ATTR_FORK) ?
1072 _("attr ") : "",
1073 (rm_rec->rm_flags & XFS_RMAP_BMBT_BLOCK) ?
1074 _("bmbt ") : "",
1075 rm_rec->rm_offset);
1076 goto next_loop;
1077 }
1078next_loop:
1079 rm_rec = pop_slab_cursor(rm_cur);
1080 }
1081
1082err:
1083 if (bt_cur)
1084 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1085 if (agbp)
1086 libxfs_putbuf(agbp);
1087 free_slab_cursor(&rm_cur);
1088 return 0;
1089}
1090
1091/*
1092 * Compare the key fields of two rmap records -- positive if key1 > key2,
1093 * negative if key1 < key2, and zero if equal.
1094 */
1095__int64_t
1096rmap_diffkeys(
1097 struct xfs_rmap_irec *kp1,
1098 struct xfs_rmap_irec *kp2)
1099{
1100 __u64 oa;
1101 __u64 ob;
1102 __int64_t d;
1103 struct xfs_rmap_irec tmp;
1104
1105 tmp = *kp1;
1106 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1107 oa = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510
DW
1108 tmp = *kp2;
1109 tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
e2f60652 1110 ob = libxfs_rmap_irec_offset_pack(&tmp);
11b9e510
DW
1111
1112 d = (__int64_t)kp1->rm_startblock - kp2->rm_startblock;
1113 if (d)
1114 return d;
1115
1116 if (kp1->rm_owner > kp2->rm_owner)
1117 return 1;
1118 else if (kp2->rm_owner > kp1->rm_owner)
1119 return -1;
1120
1121 if (oa > ob)
1122 return 1;
1123 else if (ob > oa)
1124 return -1;
1125 return 0;
1126}
1127
1128/* Compute the high key of an rmap record. */
1129void
1130rmap_high_key_from_rec(
1131 struct xfs_rmap_irec *rec,
1132 struct xfs_rmap_irec *key)
1133{
1134 int adj;
1135
1136 adj = rec->rm_blockcount - 1;
1137
1138 key->rm_startblock = rec->rm_startblock + adj;
1139 key->rm_owner = rec->rm_owner;
1140 key->rm_offset = rec->rm_offset;
1141 key->rm_flags = rec->rm_flags & XFS_RMAP_KEY_FLAGS;
1142 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
1143 (rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1144 return;
1145 key->rm_offset += adj;
1146}
62cf990a 1147
7e174ec7
DW
1148/*
1149 * Record that an inode had the reflink flag set when repair started. The
1150 * inode reflink flag will be adjusted as necessary.
1151 */
1152void
1153record_inode_reflink_flag(
1154 struct xfs_mount *mp,
1155 struct xfs_dinode *dino,
1156 xfs_agnumber_t agno,
1157 xfs_agino_t ino,
1158 xfs_ino_t lino)
1159{
1160 struct ino_tree_node *irec;
1161 int off;
1162
1163 ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
1164 if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
1165 return;
1166 irec = find_inode_rec(mp, agno, ino);
1167 off = get_inode_offset(mp, lino, irec);
1168 ASSERT(!inode_was_rl(irec, off));
1169 set_inode_was_rl(irec, off);
1170 dbg_printf("set was_rl lino=%llu was=0x%llx\n",
1171 (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
1172}
1173
ca8d7d6a
DW
1174/*
1175 * Fix an inode's reflink flag.
1176 */
1177static int
1178fix_inode_reflink_flag(
1179 struct xfs_mount *mp,
1180 xfs_agnumber_t agno,
1181 xfs_agino_t agino,
1182 bool set)
1183{
1184 struct xfs_dinode *dino;
1185 struct xfs_buf *buf;
1186
1187 if (set)
1188 do_warn(
1189_("setting reflink flag on inode %"PRIu64"\n"),
1190 XFS_AGINO_TO_INO(mp, agno, agino));
1191 else if (!no_modify) /* && !set */
1192 do_warn(
1193_("clearing reflink flag on inode %"PRIu64"\n"),
1194 XFS_AGINO_TO_INO(mp, agno, agino));
1195 if (no_modify)
1196 return 0;
1197
1198 buf = get_agino_buf(mp, agno, agino, &dino);
1199 if (!buf)
1200 return 1;
1201 ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
1202 if (set)
1203 dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
1204 else
1205 dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
1206 libxfs_dinode_calc_crc(mp, dino);
1207 libxfs_writebuf(buf, 0);
1208
1209 return 0;
1210}
1211
1212/*
1213 * Fix discrepancies between the state of the inode reflink flag and our
1214 * observations as to whether or not the inode really needs it.
1215 */
1216int
1217fix_inode_reflink_flags(
1218 struct xfs_mount *mp,
1219 xfs_agnumber_t agno)
1220{
1221 struct ino_tree_node *irec;
1222 int bit;
1223 __uint64_t was;
1224 __uint64_t is;
1225 __uint64_t diff;
1226 __uint64_t mask;
1227 int error = 0;
1228 xfs_agino_t agino;
1229
1230 /*
1231 * Update the reflink flag for any inode where there's a discrepancy
1232 * between the inode flag and whether or not we found any reflinked
1233 * extents.
1234 */
1235 for (irec = findfirst_inode_rec(agno);
1236 irec != NULL;
1237 irec = next_ino_rec(irec)) {
1238 ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
1239 ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
1240 was = irec->ino_was_rl;
1241 is = irec->ino_is_rl;
1242 if (was == is)
1243 continue;
1244 diff = was ^ is;
1245 dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
1246 (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
1247 irec->ino_startnum),
1248 was, is, diff);
1249
1250 for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
1251 agino = bit + irec->ino_startnum;
1252 if (!(diff & mask))
1253 continue;
1254 else if (was & mask)
1255 error = fix_inode_reflink_flag(mp, agno, agino,
1256 false);
1257 else if (is & mask)
1258 error = fix_inode_reflink_flag(mp, agno, agino,
1259 true);
1260 else
1261 ASSERT(0);
1262 if (error)
1263 do_error(
1264_("Unable to fix reflink flag on inode %"PRIu64".\n"),
1265 XFS_AGINO_TO_INO(mp, agno, agino));
1266 }
1267 }
1268
1269 return error;
1270}
1271
80dbc783
DW
1272/*
1273 * Return the number of refcount objects for an AG.
1274 */
1275size_t
1276refcount_record_count(
1277 struct xfs_mount *mp,
1278 xfs_agnumber_t agno)
1279{
1280 return slab_count(ag_rmaps[agno].ar_refcount_items);
1281}
1282
1283/*
1284 * Return a slab cursor that will return refcount objects in order.
1285 */
1286int
1287init_refcount_cursor(
1288 xfs_agnumber_t agno,
1289 struct xfs_slab_cursor **cur)
1290{
1291 return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
1292}
1293
1294/*
1295 * Disable the refcount btree check.
1296 */
1297void
1298refcount_avoid_check(void)
1299{
1300 refcbt_suspect = true;
1301}
1302
1303/*
1304 * Compare the observed reference counts against what's in the ag btree.
1305 */
1306int
1307check_refcounts(
1308 struct xfs_mount *mp,
1309 xfs_agnumber_t agno)
1310{
1311 struct xfs_slab_cursor *rl_cur;
1312 struct xfs_btree_cur *bt_cur = NULL;
1313 int error;
1314 int have;
1315 int i;
1316 struct xfs_buf *agbp = NULL;
1317 struct xfs_refcount_irec *rl_rec;
1318 struct xfs_refcount_irec tmp;
1319 struct xfs_perag *pag; /* per allocation group data */
1320
1321 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1322 return 0;
1323 if (refcbt_suspect) {
1324 if (no_modify && agno == 0)
1325 do_warn(_("would rebuild corrupt refcount btrees.\n"));
1326 return 0;
1327 }
1328
1329 /* Create cursors to refcount structures */
1330 error = init_refcount_cursor(agno, &rl_cur);
1331 if (error)
1332 return error;
1333
1334 error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
1335 if (error)
1336 goto err;
1337
1338 /* Leave the per-ag data "uninitialized" since we rewrite it later */
1339 pag = libxfs_perag_get(mp, agno);
1340 pag->pagf_init = 0;
1341 libxfs_perag_put(pag);
1342
1343 bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
1344 if (!bt_cur) {
1345 error = -ENOMEM;
1346 goto err;
1347 }
1348
1349 rl_rec = pop_slab_cursor(rl_cur);
1350 while (rl_rec) {
1351 /* Look for a refcount record in the btree */
1352 error = -libxfs_refcount_lookup_le(bt_cur,
1353 rl_rec->rc_startblock, &have);
1354 if (error)
1355 goto err;
1356 if (!have) {
1357 do_warn(
1358_("Missing reference count record for (%u/%u) len %u count %u\n"),
1359 agno, rl_rec->rc_startblock,
1360 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1361 goto next_loop;
1362 }
1363
1364 error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
1365 if (error)
1366 goto err;
1367 if (!i) {
1368 do_warn(
1369_("Missing reference count record for (%u/%u) len %u count %u\n"),
1370 agno, rl_rec->rc_startblock,
1371 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1372 goto next_loop;
1373 }
1374
1375 /* Compare each refcount observation against the btree's */
1376 if (tmp.rc_startblock != rl_rec->rc_startblock ||
1377 tmp.rc_blockcount < rl_rec->rc_blockcount ||
1378 tmp.rc_refcount < rl_rec->rc_refcount)
1379 do_warn(
1380_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
1381 agno, tmp.rc_startblock, tmp.rc_blockcount,
1382 tmp.rc_refcount, agno, rl_rec->rc_startblock,
1383 rl_rec->rc_blockcount, rl_rec->rc_refcount);
1384next_loop:
1385 rl_rec = pop_slab_cursor(rl_cur);
1386 }
1387
1388err:
1389 if (bt_cur)
1390 libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
1391 if (agbp)
1392 libxfs_putbuf(agbp);
1393 free_slab_cursor(&rl_cur);
1394 return 0;
1395}
1396
62cf990a
DW
1397/*
1398 * Regenerate the AGFL so that we don't run out of it while rebuilding the
1399 * rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
1400 * because we're updating the rmapbt).
1401 */
1402void
1403fix_freelist(
1404 struct xfs_mount *mp,
1405 xfs_agnumber_t agno,
1406 bool skip_rmapbt)
1407{
1408 xfs_alloc_arg_t args;
1409 xfs_trans_t *tp;
1410 struct xfs_trans_res tres = {0};
1411 int flags;
1412 int error;
1413
1414 memset(&args, 0, sizeof(args));
1415 args.mp = mp;
1416 args.agno = agno;
1417 args.alignment = 1;
e2f60652 1418 args.pag = libxfs_perag_get(mp, agno);
62cf990a
DW
1419 error = -libxfs_trans_alloc(mp, &tres,
1420 libxfs_alloc_min_freelist(mp, args.pag), 0, 0, &tp);
1421 if (error)
1422 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1423 agno, error);
1424 args.tp = tp;
1425
1426 /*
1427 * Prior to rmapbt, all we had to do to fix the freelist is "expand"
1428 * the fresh AGFL header from empty to full. That hasn't changed. For
1429 * rmapbt, however, things change a bit.
1430 *
1431 * When we're stuffing the rmapbt with the AG btree rmaps the tree can
1432 * expand, so we need to keep the AGFL well-stocked for the expansion.
1433 * However, this expansion can cause the bnobt/cntbt to shrink, which
1434 * can make the AGFL eligible for shrinking. Shrinking involves
1435 * freeing rmapbt entries, but since we haven't finished loading the
1436 * rmapbt with the btree rmaps it's possible for the remove operation
1437 * to fail. The AGFL block is large enough at this point to absorb any
1438 * blocks freed from the bnobt/cntbt, so we can disable shrinking.
1439 *
1440 * During the initial AGFL regeneration during AGF generation in phase5
1441 * we must also disable rmapbt modifications because the AGF that
1442 * libxfs reads does not yet point to the new rmapbt. These initial
1443 * AGFL entries are added just prior to adding the AG btree block rmaps
1444 * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
1445 * empty and cannot shrink.
1446 */
1447 flags = XFS_ALLOC_FLAG_NOSHRINK;
1448 if (skip_rmapbt)
1449 flags |= XFS_ALLOC_FLAG_NORMAP;
e2f60652
DW
1450 error = -libxfs_alloc_fix_freelist(&args, flags);
1451 libxfs_perag_put(args.pag);
62cf990a
DW
1452 if (error) {
1453 do_error(_("failed to fix AGFL on AG %d, error %d\n"),
1454 agno, error);
1455 }
1456 libxfs_trans_commit(tp);
1457}
1458
1459/*
1460 * Remember how many AGFL entries came from excess AG btree allocations and
1461 * therefore already have rmap entries.
1462 */
1463void
1464rmap_store_agflcount(
1465 struct xfs_mount *mp,
1466 xfs_agnumber_t agno,
1467 int count)
1468{
2d273771 1469 if (!rmap_needs_work(mp))
62cf990a
DW
1470 return;
1471
1472 ag_rmaps[agno].ar_flcount = count;
1473}