1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
6 #include "libxfs_priv.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_trans.h"
14 #include "xfs_btree.h"
15 #include "xfs_bmap_btree.h"
17 #include "xfs_trace.h"
18 #include "xfs_attr_sf.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
21 #include "xfs_dir2_priv.h"
22 #include "xfs_attr_leaf.h"
23 #include "xfs_shared.h"
26 kmem_zone_t
*xfs_ifork_zone
;
28 STATIC
int xfs_iformat_local(xfs_inode_t
*, xfs_dinode_t
*, int, int);
29 STATIC
int xfs_iformat_extents(xfs_inode_t
*, xfs_dinode_t
*, int);
30 STATIC
int xfs_iformat_btree(xfs_inode_t
*, xfs_dinode_t
*, int);
33 * Copy inode type and data and attr format specific information from the
34 * on-disk inode to the in-core inode and fork structures. For fifos, devices,
35 * and sockets this means set i_rdev to the proper value. For files,
36 * directories, and symlinks this means to bring in the in-line data or extent
37 * pointers as well as the attribute fork. For a fork in B-tree format, only
38 * the root is immediately brought in-core. The rest will be read in later when
39 * first referenced (see xfs_iread_extents()).
44 struct xfs_dinode
*dip
)
46 struct inode
*inode
= VFS_I(ip
);
47 struct xfs_attr_shortform
*atp
;
52 switch (inode
->i_mode
& S_IFMT
) {
58 inode
->i_rdev
= xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip
));
64 switch (dip
->di_format
) {
65 case XFS_DINODE_FMT_LOCAL
:
66 di_size
= be64_to_cpu(dip
->di_size
);
68 error
= xfs_iformat_local(ip
, dip
, XFS_DATA_FORK
, size
);
70 case XFS_DINODE_FMT_EXTENTS
:
71 error
= xfs_iformat_extents(ip
, dip
, XFS_DATA_FORK
);
73 case XFS_DINODE_FMT_BTREE
:
74 error
= xfs_iformat_btree(ip
, dip
, XFS_DATA_FORK
);
87 if (xfs_is_reflink_inode(ip
)) {
88 ASSERT(ip
->i_cowfp
== NULL
);
89 xfs_ifork_init_cow(ip
);
92 if (!XFS_DFORK_Q(dip
))
95 ASSERT(ip
->i_afp
== NULL
);
96 ip
->i_afp
= kmem_zone_zalloc(xfs_ifork_zone
, KM_SLEEP
| KM_NOFS
);
98 switch (dip
->di_aformat
) {
99 case XFS_DINODE_FMT_LOCAL
:
100 atp
= (xfs_attr_shortform_t
*)XFS_DFORK_APTR(dip
);
101 size
= be16_to_cpu(atp
->hdr
.totsize
);
103 error
= xfs_iformat_local(ip
, dip
, XFS_ATTR_FORK
, size
);
105 case XFS_DINODE_FMT_EXTENTS
:
106 error
= xfs_iformat_extents(ip
, dip
, XFS_ATTR_FORK
);
108 case XFS_DINODE_FMT_BTREE
:
109 error
= xfs_iformat_btree(ip
, dip
, XFS_ATTR_FORK
);
112 error
= -EFSCORRUPTED
;
116 kmem_zone_free(xfs_ifork_zone
, ip
->i_afp
);
119 kmem_zone_free(xfs_ifork_zone
, ip
->i_cowfp
);
121 xfs_idestroy_fork(ip
, XFS_DATA_FORK
);
128 struct xfs_inode
*ip
,
133 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
134 int mem_size
= size
, real_size
= 0;
138 * If we are using the local fork to store a symlink body we need to
139 * zero-terminate it so that we can pass it back to the VFS directly.
140 * Overallocate the in-memory fork by one for that and add a zero
141 * to terminate it below.
143 zero_terminate
= S_ISLNK(VFS_I(ip
)->i_mode
);
148 real_size
= roundup(mem_size
, 4);
149 ifp
->if_u1
.if_data
= kmem_alloc(real_size
, KM_SLEEP
| KM_NOFS
);
150 memcpy(ifp
->if_u1
.if_data
, data
, size
);
152 ifp
->if_u1
.if_data
[size
] = '\0';
154 ifp
->if_u1
.if_data
= NULL
;
157 ifp
->if_bytes
= size
;
158 ifp
->if_flags
&= ~(XFS_IFEXTENTS
| XFS_IFBROOT
);
159 ifp
->if_flags
|= XFS_IFINLINE
;
163 * The file is in-lined in the on-disk inode.
173 * If the size is unreasonable, then something
174 * is wrong and we just bail out rather than crash in
175 * kmem_alloc() or memcpy() below.
177 if (unlikely(size
> XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
))) {
178 xfs_warn(ip
->i_mount
,
179 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
180 (unsigned long long) ip
->i_ino
, size
,
181 XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
));
182 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
183 "xfs_iformat_local", dip
, sizeof(*dip
),
185 return -EFSCORRUPTED
;
188 xfs_init_local_fork(ip
, whichfork
, XFS_DFORK_PTR(dip
, whichfork
), size
);
193 * The file consists of a set of extents all of which fit into the on-disk
198 struct xfs_inode
*ip
,
199 struct xfs_dinode
*dip
,
202 struct xfs_mount
*mp
= ip
->i_mount
;
203 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
204 int state
= xfs_bmap_fork_to_state(whichfork
);
205 int nex
= XFS_DFORK_NEXTENTS(dip
, whichfork
);
206 int size
= nex
* sizeof(xfs_bmbt_rec_t
);
207 struct xfs_iext_cursor icur
;
208 struct xfs_bmbt_rec
*dp
;
209 struct xfs_bmbt_irec
new;
213 * If the number of extents is unreasonable, then something is wrong and
214 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
216 if (unlikely(size
< 0 || size
> XFS_DFORK_SIZE(dip
, mp
, whichfork
))) {
217 xfs_warn(ip
->i_mount
, "corrupt inode %Lu ((a)extents = %d).",
218 (unsigned long long) ip
->i_ino
, nex
);
219 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
220 "xfs_iformat_extents(1)", dip
, sizeof(*dip
),
222 return -EFSCORRUPTED
;
226 ifp
->if_u1
.if_root
= NULL
;
229 dp
= (xfs_bmbt_rec_t
*) XFS_DFORK_PTR(dip
, whichfork
);
231 xfs_iext_first(ifp
, &icur
);
232 for (i
= 0; i
< nex
; i
++, dp
++) {
235 xfs_bmbt_disk_get_all(dp
, &new);
236 fa
= xfs_bmap_validate_extent(ip
, whichfork
, &new);
238 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
239 "xfs_iformat_extents(2)",
240 dp
, sizeof(*dp
), fa
);
241 return -EFSCORRUPTED
;
244 xfs_iext_insert(ip
, &icur
, &new, state
);
245 trace_xfs_read_extent(ip
, &icur
, state
, _THIS_IP_
);
246 xfs_iext_next(ifp
, &icur
);
249 ifp
->if_flags
|= XFS_IFEXTENTS
;
254 * The file has too many extents to fit into
255 * the inode, so they are in B-tree format.
256 * Allocate a buffer for the root of the B-tree
257 * and copy the root into it. The i_extents
258 * field will remain NULL until all of the
259 * extents are read in (when they are needed).
267 struct xfs_mount
*mp
= ip
->i_mount
;
268 xfs_bmdr_block_t
*dfp
;
269 struct xfs_ifork
*ifp
;
275 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
276 dfp
= (xfs_bmdr_block_t
*)XFS_DFORK_PTR(dip
, whichfork
);
277 size
= XFS_BMAP_BROOT_SPACE(mp
, dfp
);
278 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
279 level
= be16_to_cpu(dfp
->bb_level
);
282 * blow out if -- fork has less extents than can fit in
283 * fork (fork shouldn't be a btree format), root btree
284 * block has more records than can fit into the fork,
285 * or the number of extents is greater than the number of
288 if (unlikely(XFS_IFORK_NEXTENTS(ip
, whichfork
) <=
289 XFS_IFORK_MAXEXT(ip
, whichfork
) ||
291 XFS_BMDR_SPACE_CALC(nrecs
) >
292 XFS_DFORK_SIZE(dip
, mp
, whichfork
) ||
293 XFS_IFORK_NEXTENTS(ip
, whichfork
) > ip
->i_d
.di_nblocks
) ||
294 level
== 0 || level
> XFS_BTREE_MAXLEVELS
) {
295 xfs_warn(mp
, "corrupt inode %Lu (btree).",
296 (unsigned long long) ip
->i_ino
);
297 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
298 "xfs_iformat_btree", dfp
, size
,
300 return -EFSCORRUPTED
;
303 ifp
->if_broot_bytes
= size
;
304 ifp
->if_broot
= kmem_alloc(size
, KM_SLEEP
| KM_NOFS
);
305 ASSERT(ifp
->if_broot
!= NULL
);
307 * Copy and convert from the on-disk structure
308 * to the in-memory structure.
310 xfs_bmdr_to_bmbt(ip
, dfp
, XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
),
311 ifp
->if_broot
, size
);
312 ifp
->if_flags
&= ~XFS_IFEXTENTS
;
313 ifp
->if_flags
|= XFS_IFBROOT
;
316 ifp
->if_u1
.if_root
= NULL
;
322 * Reallocate the space for if_broot based on the number of records
323 * being added or deleted as indicated in rec_diff. Move the records
324 * and pointers in if_broot to fit the new size. When shrinking this
325 * will eliminate holes between the records and pointers created by
326 * the caller. When growing this will create holes to be filled in
329 * The caller must not request to add more records than would fit in
330 * the on-disk inode root. If the if_broot is currently NULL, then
331 * if we are adding records, one will be allocated. The caller must also
332 * not request that the number of records go below zero, although
335 * ip -- the inode whose if_broot area is changing
336 * ext_diff -- the change in the number of records, positive or negative,
337 * requested for the if_broot array.
345 struct xfs_mount
*mp
= ip
->i_mount
;
347 struct xfs_ifork
*ifp
;
348 struct xfs_btree_block
*new_broot
;
355 * Handle the degenerate case quietly.
361 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
364 * If there wasn't any memory allocated before, just
365 * allocate it now and get out.
367 if (ifp
->if_broot_bytes
== 0) {
368 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, rec_diff
);
369 ifp
->if_broot
= kmem_alloc(new_size
, KM_SLEEP
| KM_NOFS
);
370 ifp
->if_broot_bytes
= (int)new_size
;
375 * If there is already an existing if_broot, then we need
376 * to realloc() it and shift the pointers to their new
377 * location. The records don't change location because
378 * they are kept butted up against the btree block header.
380 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
381 new_max
= cur_max
+ rec_diff
;
382 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
383 ifp
->if_broot
= kmem_realloc(ifp
->if_broot
, new_size
,
385 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
386 ifp
->if_broot_bytes
);
387 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
389 ifp
->if_broot_bytes
= (int)new_size
;
390 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
391 XFS_IFORK_SIZE(ip
, whichfork
));
392 memmove(np
, op
, cur_max
* (uint
)sizeof(xfs_fsblock_t
));
397 * rec_diff is less than 0. In this case, we are shrinking the
398 * if_broot buffer. It must already exist. If we go to zero
399 * records, just get rid of the root and clear the status bit.
401 ASSERT((ifp
->if_broot
!= NULL
) && (ifp
->if_broot_bytes
> 0));
402 cur_max
= xfs_bmbt_maxrecs(mp
, ifp
->if_broot_bytes
, 0);
403 new_max
= cur_max
+ rec_diff
;
404 ASSERT(new_max
>= 0);
406 new_size
= XFS_BMAP_BROOT_SPACE_CALC(mp
, new_max
);
410 new_broot
= kmem_alloc(new_size
, KM_SLEEP
| KM_NOFS
);
412 * First copy over the btree block header.
414 memcpy(new_broot
, ifp
->if_broot
,
415 XFS_BMBT_BLOCK_LEN(ip
->i_mount
));
418 ifp
->if_flags
&= ~XFS_IFBROOT
;
422 * Only copy the records and pointers if there are any.
426 * First copy the records.
428 op
= (char *)XFS_BMBT_REC_ADDR(mp
, ifp
->if_broot
, 1);
429 np
= (char *)XFS_BMBT_REC_ADDR(mp
, new_broot
, 1);
430 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_bmbt_rec_t
));
433 * Then copy the pointers.
435 op
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, ifp
->if_broot
, 1,
436 ifp
->if_broot_bytes
);
437 np
= (char *)XFS_BMAP_BROOT_PTR_ADDR(mp
, new_broot
, 1,
439 memcpy(np
, op
, new_max
* (uint
)sizeof(xfs_fsblock_t
));
441 kmem_free(ifp
->if_broot
);
442 ifp
->if_broot
= new_broot
;
443 ifp
->if_broot_bytes
= (int)new_size
;
445 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
446 XFS_IFORK_SIZE(ip
, whichfork
));
452 * This is called when the amount of space needed for if_data
453 * is increased or decreased. The change in size is indicated by
454 * the number of bytes that need to be added or deleted in the
455 * byte_diff parameter.
457 * If the amount of space needed has decreased below the size of the
458 * inline buffer, then switch to using the inline buffer. Otherwise,
459 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
462 * ip -- the inode whose if_data area is changing
463 * byte_diff -- the change in the number of bytes, positive or negative,
464 * requested for the if_data array.
468 struct xfs_inode
*ip
,
472 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
473 int new_size
= (int)ifp
->if_bytes
+ byte_diff
;
475 ASSERT(new_size
>= 0);
476 ASSERT(new_size
<= XFS_IFORK_SIZE(ip
, whichfork
));
482 kmem_free(ifp
->if_u1
.if_data
);
483 ifp
->if_u1
.if_data
= NULL
;
489 * For inline data, the underlying buffer must be a multiple of 4 bytes
490 * in size so that it can be logged and stay on word boundaries.
491 * We enforce that here.
493 ifp
->if_u1
.if_data
= kmem_realloc(ifp
->if_u1
.if_data
,
494 roundup(new_size
, 4), KM_SLEEP
| KM_NOFS
);
495 ifp
->if_bytes
= new_size
;
503 struct xfs_ifork
*ifp
;
505 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
506 if (ifp
->if_broot
!= NULL
) {
507 kmem_free(ifp
->if_broot
);
508 ifp
->if_broot
= NULL
;
512 * If the format is local, then we can't have an extents
513 * array so just look for an inline data array. If we're
514 * not local then we may or may not have an extents list,
515 * so check and free it up if we do.
517 if (XFS_IFORK_FORMAT(ip
, whichfork
) == XFS_DINODE_FMT_LOCAL
) {
518 if (ifp
->if_u1
.if_data
!= NULL
) {
519 kmem_free(ifp
->if_u1
.if_data
);
520 ifp
->if_u1
.if_data
= NULL
;
522 } else if ((ifp
->if_flags
& XFS_IFEXTENTS
) && ifp
->if_height
) {
523 xfs_iext_destroy(ifp
);
526 if (whichfork
== XFS_ATTR_FORK
) {
527 kmem_zone_free(xfs_ifork_zone
, ip
->i_afp
);
529 } else if (whichfork
== XFS_COW_FORK
) {
530 kmem_zone_free(xfs_ifork_zone
, ip
->i_cowfp
);
536 * Convert in-core extents to on-disk form
538 * In the case of the data fork, the in-core and on-disk fork sizes can be
539 * different due to delayed allocation extents. We only copy on-disk extents
540 * here, so callers must always use the physical fork size to determine the
541 * size of the buffer passed to this routine. We will return the size actually
546 struct xfs_inode
*ip
,
547 struct xfs_bmbt_rec
*dp
,
550 int state
= xfs_bmap_fork_to_state(whichfork
);
551 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
552 struct xfs_iext_cursor icur
;
553 struct xfs_bmbt_irec rec
;
556 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
| XFS_ILOCK_SHARED
));
557 ASSERT(ifp
->if_bytes
> 0);
559 for_each_xfs_iext(ifp
, &icur
, &rec
) {
560 if (isnullstartblock(rec
.br_startblock
))
562 ASSERT(xfs_bmap_validate_extent(ip
, whichfork
, &rec
) == NULL
);
563 xfs_bmbt_disk_set_all(dp
, &rec
);
564 trace_xfs_write_extent(ip
, &icur
, state
, _RET_IP_
);
565 copied
+= sizeof(struct xfs_bmbt_rec
);
570 ASSERT(copied
<= ifp
->if_bytes
);
575 * Each of the following cases stores data into the same region
576 * of the on-disk inode, so only one of them can be valid at
577 * any given time. While it is possible to have conflicting formats
578 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
579 * in EXTENTS format, this can only happen when the fork has
580 * changed formats after being modified but before being flushed.
581 * In these cases, the format always takes precedence, because the
582 * format indicates the current state of the fork.
588 xfs_inode_log_item_t
*iip
,
592 struct xfs_ifork
*ifp
;
594 static const short brootflag
[2] =
595 { XFS_ILOG_DBROOT
, XFS_ILOG_ABROOT
};
596 static const short dataflag
[2] =
597 { XFS_ILOG_DDATA
, XFS_ILOG_ADATA
};
598 static const short extflag
[2] =
599 { XFS_ILOG_DEXT
, XFS_ILOG_AEXT
};
603 ifp
= XFS_IFORK_PTR(ip
, whichfork
);
605 * This can happen if we gave up in iformat in an error path,
606 * for the attribute fork.
609 ASSERT(whichfork
== XFS_ATTR_FORK
);
612 cp
= XFS_DFORK_PTR(dip
, whichfork
);
614 switch (XFS_IFORK_FORMAT(ip
, whichfork
)) {
615 case XFS_DINODE_FMT_LOCAL
:
616 if ((iip
->ili_fields
& dataflag
[whichfork
]) &&
617 (ifp
->if_bytes
> 0)) {
618 ASSERT(ifp
->if_u1
.if_data
!= NULL
);
619 ASSERT(ifp
->if_bytes
<= XFS_IFORK_SIZE(ip
, whichfork
));
620 memcpy(cp
, ifp
->if_u1
.if_data
, ifp
->if_bytes
);
624 case XFS_DINODE_FMT_EXTENTS
:
625 ASSERT((ifp
->if_flags
& XFS_IFEXTENTS
) ||
626 !(iip
->ili_fields
& extflag
[whichfork
]));
627 if ((iip
->ili_fields
& extflag
[whichfork
]) &&
628 (ifp
->if_bytes
> 0)) {
629 ASSERT(XFS_IFORK_NEXTENTS(ip
, whichfork
) > 0);
630 (void)xfs_iextents_copy(ip
, (xfs_bmbt_rec_t
*)cp
,
635 case XFS_DINODE_FMT_BTREE
:
636 if ((iip
->ili_fields
& brootflag
[whichfork
]) &&
637 (ifp
->if_broot_bytes
> 0)) {
638 ASSERT(ifp
->if_broot
!= NULL
);
639 ASSERT(XFS_BMAP_BMDR_SPACE(ifp
->if_broot
) <=
640 XFS_IFORK_SIZE(ip
, whichfork
));
641 xfs_bmbt_to_bmdr(mp
, ifp
->if_broot
, ifp
->if_broot_bytes
,
642 (xfs_bmdr_block_t
*)cp
,
643 XFS_DFORK_SIZE(dip
, mp
, whichfork
));
647 case XFS_DINODE_FMT_DEV
:
648 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
649 ASSERT(whichfork
== XFS_DATA_FORK
);
650 xfs_dinode_put_rdev(dip
,
651 linux_to_xfs_dev_t(VFS_I(ip
)->i_rdev
));
661 /* Convert bmap state flags to an inode fork. */
663 xfs_iext_state_to_fork(
664 struct xfs_inode
*ip
,
667 if (state
& BMAP_COWFORK
)
669 else if (state
& BMAP_ATTRFORK
)
675 * Initialize an inode's copy-on-write fork.
679 struct xfs_inode
*ip
)
684 ip
->i_cowfp
= kmem_zone_zalloc(xfs_ifork_zone
,
686 ip
->i_cowfp
->if_flags
= XFS_IFEXTENTS
;
687 ip
->i_cformat
= XFS_DINODE_FMT_EXTENTS
;
691 /* Default fork content verifiers. */
692 struct xfs_ifork_ops xfs_default_ifork_ops
= {
693 .verify_attr
= xfs_attr_shortform_verify
,
694 .verify_dir
= xfs_dir2_sf_verify
,
695 .verify_symlink
= xfs_symlink_shortform_verify
,
698 /* Verify the inline contents of the data fork of an inode. */
700 xfs_ifork_verify_data(
701 struct xfs_inode
*ip
,
702 struct xfs_ifork_ops
*ops
)
704 /* Non-local data fork, we're done. */
705 if (ip
->i_d
.di_format
!= XFS_DINODE_FMT_LOCAL
)
708 /* Check the inline data fork if there is one. */
709 switch (VFS_I(ip
)->i_mode
& S_IFMT
) {
711 return ops
->verify_dir(ip
);
713 return ops
->verify_symlink(ip
);
719 /* Verify the inline contents of the attr fork of an inode. */
721 xfs_ifork_verify_attr(
722 struct xfs_inode
*ip
,
723 struct xfs_ifork_ops
*ops
)
725 /* There has to be an attr fork allocated if aformat is local. */
726 if (ip
->i_d
.di_aformat
!= XFS_DINODE_FMT_LOCAL
)
728 if (!XFS_IFORK_PTR(ip
, XFS_ATTR_FORK
))
729 return __this_address
;
730 return ops
->verify_attr(ip
);