1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
6 #include "libxfs_priv.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_alloc.h"
17 #include "xfs_btree.h"
18 #include "xfs_bmap_btree.h"
20 #include "xfs_trace.h"
24 * Convert on-disk form of btree root to in-memory form.
29 xfs_bmdr_block_t
*dblock
,
31 struct xfs_btree_block
*rblock
,
34 struct xfs_mount
*mp
= ip
->i_mount
;
41 xfs_btree_init_block_int(mp
, rblock
, XFS_BUF_DADDR_NULL
,
42 XFS_BTNUM_BMAP
, 0, 0, ip
->i_ino
,
44 rblock
->bb_level
= dblock
->bb_level
;
45 ASSERT(be16_to_cpu(rblock
->bb_level
) > 0);
46 rblock
->bb_numrecs
= dblock
->bb_numrecs
;
47 dmxr
= xfs_bmdr_maxrecs(dblocklen
, 0);
48 fkp
= XFS_BMDR_KEY_ADDR(dblock
, 1);
49 tkp
= XFS_BMBT_KEY_ADDR(mp
, rblock
, 1);
50 fpp
= XFS_BMDR_PTR_ADDR(dblock
, 1, dmxr
);
51 tpp
= XFS_BMAP_BROOT_PTR_ADDR(mp
, rblock
, 1, rblocklen
);
52 dmxr
= be16_to_cpu(dblock
->bb_numrecs
);
53 memcpy(tkp
, fkp
, sizeof(*fkp
) * dmxr
);
54 memcpy(tpp
, fpp
, sizeof(*fpp
) * dmxr
);
58 xfs_bmbt_disk_get_all(
59 struct xfs_bmbt_rec
*rec
,
60 struct xfs_bmbt_irec
*irec
)
62 uint64_t l0
= get_unaligned_be64(&rec
->l0
);
63 uint64_t l1
= get_unaligned_be64(&rec
->l1
);
65 irec
->br_startoff
= (l0
& xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN
)) >> 9;
66 irec
->br_startblock
= ((l0
& xfs_mask64lo(9)) << 43) | (l1
>> 21);
67 irec
->br_blockcount
= l1
& xfs_mask64lo(21);
68 if (l0
>> (64 - BMBT_EXNTFLAG_BITLEN
))
69 irec
->br_state
= XFS_EXT_UNWRITTEN
;
71 irec
->br_state
= XFS_EXT_NORM
;
75 * Extract the blockcount field from an on disk bmap extent record.
78 xfs_bmbt_disk_get_blockcount(
81 return (xfs_filblks_t
)(be64_to_cpu(r
->l1
) & xfs_mask64lo(21));
85 * Extract the startoff field from a disk format bmap extent record.
88 xfs_bmbt_disk_get_startoff(
91 return ((xfs_fileoff_t
)be64_to_cpu(r
->l0
) &
92 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN
)) >> 9;
96 * Set all the fields in a bmap extent record from the uncompressed form.
99 xfs_bmbt_disk_set_all(
100 struct xfs_bmbt_rec
*r
,
101 struct xfs_bmbt_irec
*s
)
103 int extent_flag
= (s
->br_state
!= XFS_EXT_NORM
);
105 ASSERT(s
->br_state
== XFS_EXT_NORM
|| s
->br_state
== XFS_EXT_UNWRITTEN
);
106 ASSERT(!(s
->br_startoff
& xfs_mask64hi(64-BMBT_STARTOFF_BITLEN
)));
107 ASSERT(!(s
->br_blockcount
& xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN
)));
108 ASSERT(!(s
->br_startblock
& xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN
)));
111 ((xfs_bmbt_rec_base_t
)extent_flag
<< 63) |
112 ((xfs_bmbt_rec_base_t
)s
->br_startoff
<< 9) |
113 ((xfs_bmbt_rec_base_t
)s
->br_startblock
>> 43), &r
->l0
);
115 ((xfs_bmbt_rec_base_t
)s
->br_startblock
<< 21) |
116 ((xfs_bmbt_rec_base_t
)s
->br_blockcount
&
117 (xfs_bmbt_rec_base_t
)xfs_mask64lo(21)), &r
->l1
);
121 * Convert in-memory form of btree root to on-disk form.
125 struct xfs_mount
*mp
,
126 struct xfs_btree_block
*rblock
,
128 xfs_bmdr_block_t
*dblock
,
137 if (xfs_sb_version_hascrc(&mp
->m_sb
)) {
138 ASSERT(rblock
->bb_magic
== cpu_to_be32(XFS_BMAP_CRC_MAGIC
));
139 ASSERT(uuid_equal(&rblock
->bb_u
.l
.bb_uuid
,
140 &mp
->m_sb
.sb_meta_uuid
));
141 ASSERT(rblock
->bb_u
.l
.bb_blkno
==
142 cpu_to_be64(XFS_BUF_DADDR_NULL
));
144 ASSERT(rblock
->bb_magic
== cpu_to_be32(XFS_BMAP_MAGIC
));
145 ASSERT(rblock
->bb_u
.l
.bb_leftsib
== cpu_to_be64(NULLFSBLOCK
));
146 ASSERT(rblock
->bb_u
.l
.bb_rightsib
== cpu_to_be64(NULLFSBLOCK
));
147 ASSERT(rblock
->bb_level
!= 0);
148 dblock
->bb_level
= rblock
->bb_level
;
149 dblock
->bb_numrecs
= rblock
->bb_numrecs
;
150 dmxr
= xfs_bmdr_maxrecs(dblocklen
, 0);
151 fkp
= XFS_BMBT_KEY_ADDR(mp
, rblock
, 1);
152 tkp
= XFS_BMDR_KEY_ADDR(dblock
, 1);
153 fpp
= XFS_BMAP_BROOT_PTR_ADDR(mp
, rblock
, 1, rblocklen
);
154 tpp
= XFS_BMDR_PTR_ADDR(dblock
, 1, dmxr
);
155 dmxr
= be16_to_cpu(dblock
->bb_numrecs
);
156 memcpy(tkp
, fkp
, sizeof(*fkp
) * dmxr
);
157 memcpy(tpp
, fpp
, sizeof(*fpp
) * dmxr
);
160 STATIC
struct xfs_btree_cur
*
162 struct xfs_btree_cur
*cur
)
164 struct xfs_btree_cur
*new;
166 new = xfs_bmbt_init_cursor(cur
->bc_mp
, cur
->bc_tp
,
167 cur
->bc_ino
.ip
, cur
->bc_ino
.whichfork
);
170 * Copy the firstblock, dfops, and flags values,
171 * since init cursor doesn't get them.
173 new->bc_ino
.flags
= cur
->bc_ino
.flags
;
179 xfs_bmbt_update_cursor(
180 struct xfs_btree_cur
*src
,
181 struct xfs_btree_cur
*dst
)
183 ASSERT((dst
->bc_tp
->t_firstblock
!= NULLFSBLOCK
) ||
184 (dst
->bc_ino
.ip
->i_d
.di_flags
& XFS_DIFLAG_REALTIME
));
186 dst
->bc_ino
.allocated
+= src
->bc_ino
.allocated
;
187 dst
->bc_tp
->t_firstblock
= src
->bc_tp
->t_firstblock
;
189 src
->bc_ino
.allocated
= 0;
193 xfs_bmbt_alloc_block(
194 struct xfs_btree_cur
*cur
,
195 union xfs_btree_ptr
*start
,
196 union xfs_btree_ptr
*new,
199 xfs_alloc_arg_t args
; /* block allocation args */
200 int error
; /* error return value */
202 memset(&args
, 0, sizeof(args
));
203 args
.tp
= cur
->bc_tp
;
204 args
.mp
= cur
->bc_mp
;
205 args
.fsbno
= cur
->bc_tp
->t_firstblock
;
206 xfs_rmap_ino_bmbt_owner(&args
.oinfo
, cur
->bc_ino
.ip
->i_ino
,
207 cur
->bc_ino
.whichfork
);
209 if (args
.fsbno
== NULLFSBLOCK
) {
210 args
.fsbno
= be64_to_cpu(start
->l
);
211 args
.type
= XFS_ALLOCTYPE_START_BNO
;
213 * Make sure there is sufficient room left in the AG to
214 * complete a full tree split for an extent insert. If
215 * we are converting the middle part of an extent then
216 * we may need space for two tree splits.
218 * We are relying on the caller to make the correct block
219 * reservation for this operation to succeed. If the
220 * reservation amount is insufficient then we may fail a
221 * block allocation here and corrupt the filesystem.
223 args
.minleft
= args
.tp
->t_blk_res
;
224 } else if (cur
->bc_tp
->t_flags
& XFS_TRANS_LOWMODE
) {
225 args
.type
= XFS_ALLOCTYPE_START_BNO
;
227 args
.type
= XFS_ALLOCTYPE_NEAR_BNO
;
230 args
.minlen
= args
.maxlen
= args
.prod
= 1;
231 args
.wasdel
= cur
->bc_ino
.flags
& XFS_BTCUR_BMBT_WASDEL
;
232 if (!args
.wasdel
&& args
.tp
->t_blk_res
== 0) {
236 error
= xfs_alloc_vextent(&args
);
240 if (args
.fsbno
== NULLFSBLOCK
&& args
.minleft
) {
242 * Could not find an AG with enough free space to satisfy
243 * a full btree split. Try again and if
244 * successful activate the lowspace algorithm.
247 args
.type
= XFS_ALLOCTYPE_FIRST_AG
;
248 error
= xfs_alloc_vextent(&args
);
251 cur
->bc_tp
->t_flags
|= XFS_TRANS_LOWMODE
;
253 if (WARN_ON_ONCE(args
.fsbno
== NULLFSBLOCK
)) {
258 ASSERT(args
.len
== 1);
259 cur
->bc_tp
->t_firstblock
= args
.fsbno
;
260 cur
->bc_ino
.allocated
++;
261 cur
->bc_ino
.ip
->i_d
.di_nblocks
++;
262 xfs_trans_log_inode(args
.tp
, cur
->bc_ino
.ip
, XFS_ILOG_CORE
);
263 xfs_trans_mod_dquot_byino(args
.tp
, cur
->bc_ino
.ip
,
264 XFS_TRANS_DQ_BCOUNT
, 1L);
266 new->l
= cpu_to_be64(args
.fsbno
);
277 struct xfs_btree_cur
*cur
,
280 struct xfs_mount
*mp
= cur
->bc_mp
;
281 struct xfs_inode
*ip
= cur
->bc_ino
.ip
;
282 struct xfs_trans
*tp
= cur
->bc_tp
;
283 xfs_fsblock_t fsbno
= XFS_DADDR_TO_FSB(mp
, XFS_BUF_ADDR(bp
));
284 struct xfs_owner_info oinfo
;
286 xfs_rmap_ino_bmbt_owner(&oinfo
, ip
->i_ino
, cur
->bc_ino
.whichfork
);
287 xfs_bmap_add_free(cur
->bc_tp
, fsbno
, 1, &oinfo
);
288 ip
->i_d
.di_nblocks
--;
290 xfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
291 xfs_trans_mod_dquot_byino(tp
, ip
, XFS_TRANS_DQ_BCOUNT
, -1L);
296 xfs_bmbt_get_minrecs(
297 struct xfs_btree_cur
*cur
,
300 if (level
== cur
->bc_nlevels
- 1) {
301 struct xfs_ifork
*ifp
;
303 ifp
= XFS_IFORK_PTR(cur
->bc_ino
.ip
,
304 cur
->bc_ino
.whichfork
);
306 return xfs_bmbt_maxrecs(cur
->bc_mp
,
307 ifp
->if_broot_bytes
, level
== 0) / 2;
310 return cur
->bc_mp
->m_bmap_dmnr
[level
!= 0];
314 xfs_bmbt_get_maxrecs(
315 struct xfs_btree_cur
*cur
,
318 if (level
== cur
->bc_nlevels
- 1) {
319 struct xfs_ifork
*ifp
;
321 ifp
= XFS_IFORK_PTR(cur
->bc_ino
.ip
,
322 cur
->bc_ino
.whichfork
);
324 return xfs_bmbt_maxrecs(cur
->bc_mp
,
325 ifp
->if_broot_bytes
, level
== 0);
328 return cur
->bc_mp
->m_bmap_dmxr
[level
!= 0];
333 * Get the maximum records we could store in the on-disk format.
335 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
336 * for the root node this checks the available space in the dinode fork
337 * so that we can resize the in-memory buffer to match it. After a
338 * resize to the maximum size this function returns the same value
339 * as xfs_bmbt_get_maxrecs for the root node, too.
342 xfs_bmbt_get_dmaxrecs(
343 struct xfs_btree_cur
*cur
,
346 if (level
!= cur
->bc_nlevels
- 1)
347 return cur
->bc_mp
->m_bmap_dmxr
[level
!= 0];
348 return xfs_bmdr_maxrecs(cur
->bc_ino
.forksize
, level
== 0);
352 xfs_bmbt_init_key_from_rec(
353 union xfs_btree_key
*key
,
354 union xfs_btree_rec
*rec
)
356 key
->bmbt
.br_startoff
=
357 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec
->bmbt
));
361 xfs_bmbt_init_high_key_from_rec(
362 union xfs_btree_key
*key
,
363 union xfs_btree_rec
*rec
)
365 key
->bmbt
.br_startoff
= cpu_to_be64(
366 xfs_bmbt_disk_get_startoff(&rec
->bmbt
) +
367 xfs_bmbt_disk_get_blockcount(&rec
->bmbt
) - 1);
371 xfs_bmbt_init_rec_from_cur(
372 struct xfs_btree_cur
*cur
,
373 union xfs_btree_rec
*rec
)
375 xfs_bmbt_disk_set_all(&rec
->bmbt
, &cur
->bc_rec
.b
);
379 xfs_bmbt_init_ptr_from_cur(
380 struct xfs_btree_cur
*cur
,
381 union xfs_btree_ptr
*ptr
)
388 struct xfs_btree_cur
*cur
,
389 union xfs_btree_key
*key
)
391 return (int64_t)be64_to_cpu(key
->bmbt
.br_startoff
) -
392 cur
->bc_rec
.b
.br_startoff
;
396 xfs_bmbt_diff_two_keys(
397 struct xfs_btree_cur
*cur
,
398 union xfs_btree_key
*k1
,
399 union xfs_btree_key
*k2
)
401 uint64_t a
= be64_to_cpu(k1
->bmbt
.br_startoff
);
402 uint64_t b
= be64_to_cpu(k2
->bmbt
.br_startoff
);
405 * Note: This routine previously casted a and b to int64 and subtracted
406 * them to generate a result. This lead to problems if b was the
407 * "maximum" key value (all ones) being signed incorrectly, hence this
408 * somewhat less efficient version.
417 static xfs_failaddr_t
421 struct xfs_mount
*mp
= bp
->b_mount
;
422 struct xfs_btree_block
*block
= XFS_BUF_TO_BLOCK(bp
);
426 if (!xfs_verify_magic(bp
, block
->bb_magic
))
427 return __this_address
;
429 if (xfs_sb_version_hascrc(&mp
->m_sb
)) {
431 * XXX: need a better way of verifying the owner here. Right now
432 * just make sure there has been one set.
434 fa
= xfs_btree_lblock_v5hdr_verify(bp
, XFS_RMAP_OWN_UNKNOWN
);
440 * numrecs and level verification.
442 * We don't know what fork we belong to, so just verify that the level
443 * is less than the maximum of the two. Later checks will be more
446 level
= be16_to_cpu(block
->bb_level
);
447 if (level
> max(mp
->m_bm_maxlevels
[0], mp
->m_bm_maxlevels
[1]))
448 return __this_address
;
450 return xfs_btree_lblock_verify(bp
, mp
->m_bmap_dmxr
[level
!= 0]);
454 xfs_bmbt_read_verify(
459 if (!xfs_btree_lblock_verify_crc(bp
))
460 xfs_verifier_error(bp
, -EFSBADCRC
, __this_address
);
462 fa
= xfs_bmbt_verify(bp
);
464 xfs_verifier_error(bp
, -EFSCORRUPTED
, fa
);
468 trace_xfs_btree_corrupt(bp
, _RET_IP_
);
472 xfs_bmbt_write_verify(
477 fa
= xfs_bmbt_verify(bp
);
479 trace_xfs_btree_corrupt(bp
, _RET_IP_
);
480 xfs_verifier_error(bp
, -EFSCORRUPTED
, fa
);
483 xfs_btree_lblock_calc_crc(bp
);
486 const struct xfs_buf_ops xfs_bmbt_buf_ops
= {
488 .magic
= { cpu_to_be32(XFS_BMAP_MAGIC
),
489 cpu_to_be32(XFS_BMAP_CRC_MAGIC
) },
490 .verify_read
= xfs_bmbt_read_verify
,
491 .verify_write
= xfs_bmbt_write_verify
,
492 .verify_struct
= xfs_bmbt_verify
,
497 xfs_bmbt_keys_inorder(
498 struct xfs_btree_cur
*cur
,
499 union xfs_btree_key
*k1
,
500 union xfs_btree_key
*k2
)
502 return be64_to_cpu(k1
->bmbt
.br_startoff
) <
503 be64_to_cpu(k2
->bmbt
.br_startoff
);
507 xfs_bmbt_recs_inorder(
508 struct xfs_btree_cur
*cur
,
509 union xfs_btree_rec
*r1
,
510 union xfs_btree_rec
*r2
)
512 return xfs_bmbt_disk_get_startoff(&r1
->bmbt
) +
513 xfs_bmbt_disk_get_blockcount(&r1
->bmbt
) <=
514 xfs_bmbt_disk_get_startoff(&r2
->bmbt
);
517 static const struct xfs_btree_ops xfs_bmbt_ops
= {
518 .rec_len
= sizeof(xfs_bmbt_rec_t
),
519 .key_len
= sizeof(xfs_bmbt_key_t
),
521 .dup_cursor
= xfs_bmbt_dup_cursor
,
522 .update_cursor
= xfs_bmbt_update_cursor
,
523 .alloc_block
= xfs_bmbt_alloc_block
,
524 .free_block
= xfs_bmbt_free_block
,
525 .get_maxrecs
= xfs_bmbt_get_maxrecs
,
526 .get_minrecs
= xfs_bmbt_get_minrecs
,
527 .get_dmaxrecs
= xfs_bmbt_get_dmaxrecs
,
528 .init_key_from_rec
= xfs_bmbt_init_key_from_rec
,
529 .init_high_key_from_rec
= xfs_bmbt_init_high_key_from_rec
,
530 .init_rec_from_cur
= xfs_bmbt_init_rec_from_cur
,
531 .init_ptr_from_cur
= xfs_bmbt_init_ptr_from_cur
,
532 .key_diff
= xfs_bmbt_key_diff
,
533 .diff_two_keys
= xfs_bmbt_diff_two_keys
,
534 .buf_ops
= &xfs_bmbt_buf_ops
,
535 .keys_inorder
= xfs_bmbt_keys_inorder
,
536 .recs_inorder
= xfs_bmbt_recs_inorder
,
540 * Allocate a new bmap btree cursor.
542 struct xfs_btree_cur
* /* new bmap btree cursor */
543 xfs_bmbt_init_cursor(
544 struct xfs_mount
*mp
, /* file system mount point */
545 struct xfs_trans
*tp
, /* transaction pointer */
546 struct xfs_inode
*ip
, /* inode owning the btree */
547 int whichfork
) /* data or attr fork */
549 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
550 struct xfs_btree_cur
*cur
;
551 ASSERT(whichfork
!= XFS_COW_FORK
);
553 cur
= kmem_cache_zalloc(xfs_btree_cur_zone
, GFP_NOFS
| __GFP_NOFAIL
);
557 cur
->bc_nlevels
= be16_to_cpu(ifp
->if_broot
->bb_level
) + 1;
558 cur
->bc_btnum
= XFS_BTNUM_BMAP
;
559 cur
->bc_blocklog
= mp
->m_sb
.sb_blocklog
;
560 cur
->bc_statoff
= XFS_STATS_CALC_INDEX(xs_bmbt_2
);
562 cur
->bc_ops
= &xfs_bmbt_ops
;
563 cur
->bc_flags
= XFS_BTREE_LONG_PTRS
| XFS_BTREE_ROOT_IN_INODE
;
564 if (xfs_sb_version_hascrc(&mp
->m_sb
))
565 cur
->bc_flags
|= XFS_BTREE_CRC_BLOCKS
;
567 cur
->bc_ino
.forksize
= XFS_IFORK_SIZE(ip
, whichfork
);
569 cur
->bc_ino
.allocated
= 0;
570 cur
->bc_ino
.flags
= 0;
571 cur
->bc_ino
.whichfork
= whichfork
;
577 * Calculate number of records in a bmap btree block.
581 struct xfs_mount
*mp
,
585 blocklen
-= XFS_BMBT_BLOCK_LEN(mp
);
588 return blocklen
/ sizeof(xfs_bmbt_rec_t
);
589 return blocklen
/ (sizeof(xfs_bmbt_key_t
) + sizeof(xfs_bmbt_ptr_t
));
593 * Calculate number of records in a bmap btree inode root.
600 blocklen
-= sizeof(xfs_bmdr_block_t
);
603 return blocklen
/ sizeof(xfs_bmdr_rec_t
);
604 return blocklen
/ (sizeof(xfs_bmdr_key_t
) + sizeof(xfs_bmdr_ptr_t
));
608 * Change the owner of a btree format fork fo the inode passed in. Change it to
609 * the owner of that is passed in so that we can change owners before or after
610 * we switch forks between inodes. The operation that the caller is doing will
611 * determine whether is needs to change owner before or after the switch.
613 * For demand paged transactional modification, the fork switch should be done
614 * after reading in all the blocks, modifying them and pinning them in the
615 * transaction. For modification when the buffers are already pinned in memory,
616 * the fork switch can be done before changing the owner as we won't need to
617 * validate the owner until the btree buffers are unpinned and writes can occur
620 * For recovery based ownership change, there is no transactional context and
621 * so a buffer list must be supplied so that we can record the buffers that we
622 * modified for the caller to issue IO on.
625 xfs_bmbt_change_owner(
626 struct xfs_trans
*tp
,
627 struct xfs_inode
*ip
,
630 struct list_head
*buffer_list
)
632 struct xfs_btree_cur
*cur
;
635 ASSERT(tp
|| buffer_list
);
636 ASSERT(!(tp
&& buffer_list
));
637 ASSERT(XFS_IFORK_PTR(ip
, whichfork
)->if_format
== XFS_DINODE_FMT_BTREE
);
639 cur
= xfs_bmbt_init_cursor(ip
->i_mount
, tp
, ip
, whichfork
);
640 cur
->bc_ino
.flags
|= XFS_BTCUR_BMBT_INVALID_OWNER
;
642 error
= xfs_btree_change_owner(cur
, new_owner
, buffer_list
);
643 xfs_btree_del_cursor(cur
, error
);
647 /* Calculate the bmap btree size for some records. */
650 struct xfs_mount
*mp
,
651 unsigned long long len
)
653 return xfs_btree_calc_size(mp
->m_bmap_dmnr
, len
);