1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
6 #include "libxfs_priv.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_trans.h"
14 #include "xfs_btree.h"
15 #include "xfs_bmap_btree.h"
17 #include "xfs_trace.h"
18 #include "xfs_attr_sf.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
21 #include "xfs_dir2_priv.h"
22 #include "xfs_attr_leaf.h"
23 #include "xfs_shared.h"
26 kmem_zone_t
*xfs_ifork_zone
;
28 STATIC
int xfs_iformat_local(xfs_inode_t
*, xfs_dinode_t
*, int, int);
29 STATIC
int xfs_iformat_extents(xfs_inode_t
*, xfs_dinode_t
*, int);
30 STATIC
int xfs_iformat_btree(xfs_inode_t
*, xfs_dinode_t
*, int);
33 * Copy inode type and data and attr format specific information from the
34 * on-disk inode to the in-core inode and fork structures. For fifos, devices,
35 * and sockets this means set i_rdev to the proper value. For files,
36 * directories, and symlinks this means to bring in the in-line data or extent
37 * pointers as well as the attribute fork. For a fork in B-tree format, only
38 * the root is immediately brought in-core. The rest will be read in later when
39 * first referenced (see xfs_iread_extents()).
44 struct xfs_dinode
*dip
)
46 struct inode
*inode
= VFS_I(ip
);
47 struct xfs_attr_shortform
*atp
;
52 switch (inode
->i_mode
& S_IFMT
) {
58 inode
->i_rdev
= xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip
));
64 switch (dip
->di_format
) {
65 case XFS_DINODE_FMT_LOCAL
:
66 di_size
= be64_to_cpu(dip
->di_size
);
68 error
= xfs_iformat_local(ip
, dip
, XFS_DATA_FORK
, size
);
70 case XFS_DINODE_FMT_EXTENTS
:
71 error
= xfs_iformat_extents(ip
, dip
, XFS_DATA_FORK
);
73 case XFS_DINODE_FMT_BTREE
:
74 error
= xfs_iformat_btree(ip
, dip
, XFS_DATA_FORK
);
87 if (xfs_is_reflink_inode(ip
)) {
88 ASSERT(ip
->i_cowfp
== NULL
);
89 xfs_ifork_init_cow(ip
);
92 if (!XFS_DFORK_Q(dip
))
95 ASSERT(ip
->i_afp
== NULL
);
96 ip
->i_afp
= kmem_zone_zalloc(xfs_ifork_zone
, KM_SLEEP
| KM_NOFS
);
98 switch (dip
->di_aformat
) {
99 case XFS_DINODE_FMT_LOCAL
:
100 atp
= (xfs_attr_shortform_t
*)XFS_DFORK_APTR(dip
);
101 size
= be16_to_cpu(atp
->hdr
.totsize
);
103 error
= xfs_iformat_local(ip
, dip
, XFS_ATTR_FORK
, size
);
105 case XFS_DINODE_FMT_EXTENTS
:
106 error
= xfs_iformat_extents(ip
, dip
, XFS_ATTR_FORK
);
108 case XFS_DINODE_FMT_BTREE
:
109 error
= xfs_iformat_btree(ip
, dip
, XFS_ATTR_FORK
);
112 error
= -EFSCORRUPTED
;
116 kmem_zone_free(xfs_ifork_zone
, ip
->i_afp
);
119 kmem_zone_free(xfs_ifork_zone
, ip
->i_cowfp
);
121 xfs_idestroy_fork(ip
, XFS_DATA_FORK
);
128 struct xfs_inode
*ip
,
133 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
134 int mem_size
= size
, real_size
= 0;
138 * If we are using the local fork to store a symlink body we need to
139 * zero-terminate it so that we can pass it back to the VFS directly.
140 * Overallocate the in-memory fork by one for that and add a zero
141 * to terminate it below.
143 zero_terminate
= S_ISLNK(VFS_I(ip
)->i_mode
);
148 real_size
= roundup(mem_size
, 4);
149 ifp
->if_u1
.if_data
= kmem_alloc(real_size
, KM_SLEEP
| KM_NOFS
);
150 memcpy(ifp
->if_u1
.if_data
, data
, size
);
152 ifp
->if_u1
.if_data
[size
] = '\0';
154 ifp
->if_u1
.if_data
= NULL
;
157 ifp
->if_bytes
= size
;
158 ifp
->if_flags
&= ~(XFS_IFEXTENTS
| XFS_IFBROOT
);
159 ifp
->if_flags
|= XFS_IFINLINE
;
163 * The file is in-lined in the on-disk inode.
173 * If the size is unreasonable, then something
174 * is wrong and we just bail out rather than crash in
175 * kmem_alloc() or memcpy() below.
177 if (unlikely(size
> XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
))) {
178 xfs_warn(ip
->i_mount
,
179 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
180 (unsigned long long) ip
->i_ino
, size
,
181 XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
));
182 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
183 "xfs_iformat_local", dip
, sizeof(*dip
),
185 return -EFSCORRUPTED
;
188 xfs_init_local_fork(ip
, whichfork
, XFS_DFORK_PTR(dip
, whichfork
), size
);
193 * The file consists of a set of extents all of which fit into the on-disk
198 struct xfs_inode
*ip
,
199 struct xfs_dinode
*dip
,
202 struct xfs_mount
*mp
= ip
->i_mount
;
203 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
204 int state
= xfs_bmap_fork_to_state(whichfork
);
205 int nex
= XFS_DFORK_NEXTENTS(dip
, whichfork
);
206 int size
= nex
* sizeof(xfs_bmbt_rec_t
);
207 struct xfs_iext_cursor icur
;
208 struct xfs_bmbt_rec
*dp
;
209 struct xfs_bmbt_irec
new;
213 * If the number of extents is unreasonable, then something is wrong and
214 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
216 if (unlikely(size
< 0 || size
> XFS_DFORK_SIZE(dip
, mp
, whichfork
))) {
217 xfs_warn(ip
->i_mount
, "corrupt inode %Lu ((a)extents = %d).",
218 (unsigned long long) ip
->i_ino
, nex
);
219 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
220 "xfs_iformat_extents(1)", dip
, sizeof(*dip
),
222 return -EFSCORRUPTED
;
226 ifp
->if_u1
.if_root
= NULL
;
229 dp
= (xfs_bmbt_rec_t
*) XFS_DFORK_PTR(dip
, whichfork
);
231 xfs_iext_first(ifp
, &icur
);
232 for (i
= 0; i
< nex
; i
++, dp
++) {
235 xfs_bmbt_disk_get_all(dp
, &new);
236 fa
= xfs_bmap_validate_extent(ip
, whichfork
, &new);
238 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
239 "xfs_iformat_extents(2)",
240 dp
, sizeof(*dp
), fa
);
241 return -EFSCORRUPTED
;
244 xfs_iext_insert(ip
, &icur
, &new, state
);
245 trace_xfs_read_extent(ip
, &icur
, state
, _THIS_IP_
);
246 xfs_iext_next(ifp
, &icur
);
249 ifp
->if_flags
|= XFS_IFEXTENTS
;
254 * The file has too many extents to fit into
255 * the inode, so they are in B-tree format.
256 * Allocate a buffer for the root of the B-tree
257 * and copy the root into it. The i_extents
258 * field will remain NULL until all of the
259 * extents are read in (when they are needed).
267 struct xfs_mount
*mp
= ip
->i_mount
;
268 xfs_bmdr_block_t
*dfp
;
275 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
276 dfp
= (xfs_bmdr_block_t
*)XFS_DFORK_PTR(dip
, whichfork
);
277 size
= XFS_BMAP_BROOT_SPACE(mp
, dfp
);
278 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
279 level
= be16_to_cpu(dfp
->bb_level
);
282 * blow out if -- fork has less extents than can fit in
283 * fork (fork shouldn't be a btree format), root btree
284 * block has more records than can fit into the fork,
285 * or the number of extents is greater than the number of
288 if (unlikely(XFS_IFORK_NEXTENTS(ip
, whichfork
) <=
289 XFS_IFORK_MAXEXT(ip
, whichfork
) ||
291 XFS_BMDR_SPACE_CALC(nrecs
) >
292 XFS_DFORK_SIZE(dip
, mp
, whichfork
) ||
293 XFS_IFORK_NEXTENTS(ip
, whichfork
) > ip
->i_d
.di_nblocks
) ||
294 level
== 0 || level
> XFS_BTREE_MAXLEVELS
) {
295 xfs_warn(mp
, "corrupt inode %Lu (btree).",
296 (unsigned long long) ip
->i_ino
);
297 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
298 "xfs_iformat_btree", dfp
, size
,
300 return -EFSCORRUPTED
;
303 ifp
->if_broot_bytes
= size
;
304 ifp
->if_broot
= kmem_alloc(size
, KM_SLEEP
| KM_NOFS
);
305 ASSERT(ifp
->if_broot
!= NULL
);
307 * Copy and convert from the on-disk structure
308 * to the in-memory structure.
310 xfs_bmdr_to_bmbt(ip
, dfp
, XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
),
311 ifp
->if_broot
, size
);
312 ifp
->if_flags
&= ~XFS_IFEXTENTS
;
313 ifp
->if_flags
|= XFS_IFBROOT
;
316 ifp
->if_u1
.if_root
= NULL
;
322 * Reallocate the space for if_broot based on the number of records
323 * being added or deleted as indicated in rec_diff. Move the records
324 * and pointers in if_broot to fit the new size. When shrinking this
325 * will eliminate holes between the records and pointers created by
326 * the caller. When growing this will create holes to be filled in
329 * The caller must not request to add more records than would fit in
330 * the on-disk inode root. If the if_broot is currently NULL, then
331 * if we are adding records, one will be allocated. The caller must also
332 * not request that the number of records go below zero, although
335 * ip -- the inode whose if_broot area is changing
336 * ext_diff -- the change in the number of records, positive or negative,
337 * requested for the if_broot array.
345 struct xfs_mount
*mp
= ip
->i_mount
;
348 struct xfs_btree_block
*new_broot
;
355 * Handle the degenerate case quietly.
361 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
364 * If there wasn't any memory allocated before, just
365 * allocate it now and get out.
367 if (ifp
->if_broot_bytes
== 0) {
368 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, rec_diff
);
369 ifp
->if_broot
= kmem_alloc(new_size
, KM_SLEEP
| KM_NOFS
);
370 ifp
->if_broot_bytes
= (int)new_size
;
375 * If there is already an existing if_broot, then we need
376 * to realloc() it and shift the pointers to their new
377 * location. The records don't change location because
378 * they are kept butted up against the btree block header.
380 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
381 new_max
= cur_max
+ rec_diff
;
382 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
383 ifp
->if_broot
= kmem_realloc(ifp
->if_broot
, new_size
,
385 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
386 ifp
->if_broot_bytes
);
387 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
389 ifp
->if_broot_bytes
= (int)new_size
;
390 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
391 XFS_IFORK_SIZE(ip
, whichfork
));
392 memmove(np
, op
, cur_max
* (uint
)sizeof(xfs_fsblock_t
));
397 * rec_diff is less than 0. In this case, we are shrinking the
398 * if_broot buffer. It must already exist. If we go to zero
399 * records, just get rid of the root and clear the status bit.
401 ASSERT((ifp
->if_broot
!= NULL
) && (ifp
->if_broot_bytes
> 0));
402 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
403 new_max
= cur_max
+ rec_diff
;
404 ASSERT(new_max
>= 0);
406 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
410 new_broot
= kmem_alloc(new_size
, KM_SLEEP
| KM_NOFS
);
412 * First copy over the btree block header.
414 memcpy(new_broot
, ifp
->if_broot
,
415 XFS_BMBT_BLOCK_LEN(ip
->i_mount
));
418 ifp
->if_flags
&= ~XFS_IFBROOT
;
422 * Only copy the records and pointers if there are any.
426 * First copy the records.
428 op
= (char *)XFS_BMBT_REC_ADDR(mp
, ifp
->if_broot
, 1);
429 np
= (char *)XFS_BMBT_REC_ADDR(mp
, new_broot
, 1);
430 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_bmbt_rec_t
));
433 * Then copy the pointers.
435 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
436 ifp
->if_broot_bytes
);
437 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, new_broot
, 1,
439 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_fsblock_t
));
441 kmem_free(ifp
->if_broot
);
442 ifp
->if_broot
= new_broot
;
443 ifp
->if_broot_bytes
= (int)new_size
;
445 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
446 XFS_IFORK_SIZE(ip
, whichfork
));
452 * This is called when the amount of space needed for if_data
453 * is increased or decreased. The change in size is indicated by
454 * the number of bytes that need to be added or deleted in the
455 * byte_diff parameter.
457 * If the amount of space needed has decreased below the size of the
458 * inline buffer, then switch to using the inline buffer. Otherwise,
459 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
462 * ip -- the inode whose if_data area is changing
463 * byte_diff -- the change in the number of bytes, positive or negative,
464 * requested for the if_data array.
476 if (byte_diff
== 0) {
480 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
481 new_size
= (int)ifp
->if_bytes
+ byte_diff
;
482 ASSERT(new_size
>= 0);
485 kmem_free(ifp
->if_u1
.if_data
);
486 ifp
->if_u1
.if_data
= NULL
;
490 * Stuck with malloc/realloc.
491 * For inline data, the underlying buffer must be
492 * a multiple of 4 bytes in size so that it can be
493 * logged and stay on word boundaries. We enforce
496 real_size
= roundup(new_size
, 4);
497 if (ifp
->if_u1
.if_data
== NULL
) {
498 ifp
->if_u1
.if_data
= kmem_alloc(real_size
,
502 * Only do the realloc if the underlying size
503 * is really changing.
506 kmem_realloc(ifp
->if_u1
.if_data
,
511 ifp
->if_bytes
= new_size
;
512 ASSERT(ifp
->if_bytes
<= XFS_IFORK_SIZE(ip
, whichfork
));
522 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
523 if (ifp
->if_broot
!= NULL
) {
524 kmem_free(ifp
->if_broot
);
525 ifp
->if_broot
= NULL
;
529 * If the format is local, then we can't have an extents
530 * array so just look for an inline data array. If we're
531 * not local then we may or may not have an extents list,
532 * so check and free it up if we do.
534 if (XFS_IFORK_FORMAT(ip
, whichfork
) == XFS_DINODE_FMT_LOCAL
) {
535 if (ifp
->if_u1
.if_data
!= NULL
) {
536 kmem_free(ifp
->if_u1
.if_data
);
537 ifp
->if_u1
.if_data
= NULL
;
539 } else if ((ifp
->if_flags
& XFS_IFEXTENTS
) && ifp
->if_height
) {
540 xfs_iext_destroy(ifp
);
543 if (whichfork
== XFS_ATTR_FORK
) {
544 kmem_zone_free(xfs_ifork_zone
, ip
->i_afp
);
546 } else if (whichfork
== XFS_COW_FORK
) {
547 kmem_zone_free(xfs_ifork_zone
, ip
->i_cowfp
);
553 * Convert in-core extents to on-disk form
555 * In the case of the data fork, the in-core and on-disk fork sizes can be
556 * different due to delayed allocation extents. We only copy on-disk extents
557 * here, so callers must always use the physical fork size to determine the
558 * size of the buffer passed to this routine. We will return the size actually
563 struct xfs_inode
*ip
,
564 struct xfs_bmbt_rec
*dp
,
567 int state
= xfs_bmap_fork_to_state(whichfork
);
568 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
569 struct xfs_iext_cursor icur
;
570 struct xfs_bmbt_irec rec
;
573 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
| XFS_ILOCK_SHARED
));
574 ASSERT(ifp
->if_bytes
> 0);
576 for_each_xfs_iext(ifp
, &icur
, &rec
) {
577 if (isnullstartblock(rec
.br_startblock
))
579 ASSERT(xfs_bmap_validate_extent(ip
, whichfork
, &rec
) == NULL
);
580 xfs_bmbt_disk_set_all(dp
, &rec
);
581 trace_xfs_write_extent(ip
, &icur
, state
, _RET_IP_
);
582 copied
+= sizeof(struct xfs_bmbt_rec
);
587 ASSERT(copied
<= ifp
->if_bytes
);
592 * Each of the following cases stores data into the same region
593 * of the on-disk inode, so only one of them can be valid at
594 * any given time. While it is possible to have conflicting formats
595 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
596 * in EXTENTS format, this can only happen when the fork has
597 * changed formats after being modified but before being flushed.
598 * In these cases, the format always takes precedence, because the
599 * format indicates the current state of the fork.
605 xfs_inode_log_item_t
*iip
,
611 static const short brootflag
[2] =
612 { XFS_ILOG_DBROOT
, XFS_ILOG_ABROOT
};
613 static const short dataflag
[2] =
614 { XFS_ILOG_DDATA
, XFS_ILOG_ADATA
};
615 static const short extflag
[2] =
616 { XFS_ILOG_DEXT
, XFS_ILOG_AEXT
};
620 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
622 * This can happen if we gave up in iformat in an error path,
623 * for the attribute fork.
626 ASSERT(whichfork
== XFS_ATTR_FORK
);
629 cp
= XFS_DFORK_PTR(dip
, whichfork
);
631 switch (XFS_IFORK_FORMAT(ip
, whichfork
)) {
632 case XFS_DINODE_FMT_LOCAL
:
633 if ((iip
->ili_fields
& dataflag
[whichfork
]) &&
634 (ifp
->if_bytes
> 0)) {
635 ASSERT(ifp
->if_u1
.if_data
!= NULL
);
636 ASSERT(ifp
->if_bytes
<= XFS_IFORK_SIZE(ip
, whichfork
));
637 memcpy(cp
, ifp
->if_u1
.if_data
, ifp
->if_bytes
);
641 case XFS_DINODE_FMT_EXTENTS
:
642 ASSERT((ifp
->if_flags
& XFS_IFEXTENTS
) ||
643 !(iip
->ili_fields
& extflag
[whichfork
]));
644 if ((iip
->ili_fields
& extflag
[whichfork
]) &&
645 (ifp
->if_bytes
> 0)) {
646 ASSERT(XFS_IFORK_NEXTENTS(ip
, whichfork
) > 0);
647 (void)xfs_iextents_copy(ip
, (xfs_bmbt_rec_t
*)cp
,
652 case XFS_DINODE_FMT_BTREE
:
653 if ((iip
->ili_fields
& brootflag
[whichfork
]) &&
654 (ifp
->if_broot_bytes
> 0)) {
655 ASSERT(ifp
->if_broot
!= NULL
);
656 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
657 XFS_IFORK_SIZE(ip
, whichfork
));
658 xfs_bmbt_to_bmdr(mp
, ifp
->if_broot
, ifp
->if_broot_bytes
,
659 (xfs_bmdr_block_t
*)cp
,
660 XFS_DFORK_SIZE(dip
, mp
, whichfork
));
664 case XFS_DINODE_FMT_DEV
:
665 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
666 ASSERT(whichfork
== XFS_DATA_FORK
);
667 xfs_dinode_put_rdev(dip
,
668 linux_to_xfs_dev_t(VFS_I(ip
)->i_rdev
));
678 /* Convert bmap state flags to an inode fork. */
680 xfs_iext_state_to_fork(
681 struct xfs_inode
*ip
,
684 if (state
& BMAP_COWFORK
)
686 else if (state
& BMAP_ATTRFORK
)
692 * Initialize an inode's copy-on-write fork.
696 struct xfs_inode
*ip
)
701 ip
->i_cowfp
= kmem_zone_zalloc(xfs_ifork_zone
,
703 ip
->i_cowfp
->if_flags
= XFS_IFEXTENTS
;
704 ip
->i_cformat
= XFS_DINODE_FMT_EXTENTS
;
708 /* Default fork content verifiers. */
709 struct xfs_ifork_ops xfs_default_ifork_ops
= {
710 .verify_attr
= xfs_attr_shortform_verify
,
711 .verify_dir
= xfs_dir2_sf_verify
,
712 .verify_symlink
= xfs_symlink_shortform_verify
,
715 /* Verify the inline contents of the data fork of an inode. */
717 xfs_ifork_verify_data(
718 struct xfs_inode
*ip
,
719 struct xfs_ifork_ops
*ops
)
721 /* Non-local data fork, we're done. */
722 if (ip
->i_d
.di_format
!= XFS_DINODE_FMT_LOCAL
)
725 /* Check the inline data fork if there is one. */
726 switch (VFS_I(ip
)->i_mode
& S_IFMT
) {
728 return ops
->verify_dir(ip
);
730 return ops
->verify_symlink(ip
);
736 /* Verify the inline contents of the attr fork of an inode. */
738 xfs_ifork_verify_attr(
739 struct xfs_inode
*ip
,
740 struct xfs_ifork_ops
*ops
)
742 /* There has to be an attr fork allocated if aformat is local. */
743 if (ip
->i_d
.di_aformat
!= XFS_DINODE_FMT_LOCAL
)
745 if (!XFS_IFORK_PTR(ip
, XFS_ATTR_FORK
))
746 return __this_address
;
747 return ops
->verify_attr(ip
);