1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
7 #include "libxfs_priv.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_btree.h"
17 #include "xfs_bmap_btree.h"
19 #include "xfs_trace.h"
20 #include "xfs_da_format.h"
21 #include "xfs_da_btree.h"
22 #include "xfs_dir2_priv.h"
23 #include "xfs_attr_leaf.h"
24 #include "xfs_types.h"
25 #include "xfs_errortag.h"
26 #include "xfs_health.h"
27 #include "xfs_symlink_remote.h"
28 #include "xfs_rtrmap_btree.h"
29 #include "xfs_rtrefcount_btree.h"
31 struct kmem_cache
*xfs_ifork_cache
;
40 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, whichfork
);
45 * If we are using the local fork to store a symlink body we need to
46 * zero-terminate it so that we can pass it back to the VFS directly.
47 * Overallocate the in-memory fork by one for that and add a zero
48 * to terminate it below.
50 zero_terminate
= S_ISLNK(VFS_I(ip
)->i_mode
);
55 char *new_data
= kmalloc(mem_size
,
56 GFP_KERNEL
| __GFP_NOLOCKDEP
| __GFP_NOFAIL
);
58 memcpy(new_data
, data
, size
);
60 new_data
[size
] = '\0';
62 ifp
->if_data
= new_data
;
71 * The file is in-lined in the on-disk inode.
76 struct xfs_dinode
*dip
,
81 * If the size is unreasonable, then something
82 * is wrong and we just bail out rather than crash in
83 * kmalloc() or memcpy() below.
85 if (unlikely(size
> XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
))) {
87 "corrupt inode %llu (bad size %d for local fork, size = %zd).",
88 (unsigned long long) ip
->i_ino
, size
,
89 XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
));
90 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
91 "xfs_iformat_local", dip
, sizeof(*dip
),
93 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
97 xfs_init_local_fork(ip
, whichfork
, XFS_DFORK_PTR(dip
, whichfork
), size
);
102 * The file consists of a set of extents all of which fit into the on-disk
107 struct xfs_inode
*ip
,
108 struct xfs_dinode
*dip
,
111 struct xfs_mount
*mp
= ip
->i_mount
;
112 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, whichfork
);
113 int state
= xfs_bmap_fork_to_state(whichfork
);
114 xfs_extnum_t nex
= xfs_dfork_nextents(dip
, whichfork
);
115 int size
= nex
* sizeof(xfs_bmbt_rec_t
);
116 struct xfs_iext_cursor icur
;
117 struct xfs_bmbt_rec
*dp
;
118 struct xfs_bmbt_irec
new;
122 * If the number of extents is unreasonable, then something is wrong and
123 * we just bail out rather than crash in kmalloc() or memcpy() below.
125 if (unlikely(size
< 0 || size
> XFS_DFORK_SIZE(dip
, mp
, whichfork
))) {
126 xfs_warn(ip
->i_mount
, "corrupt inode %llu ((a)extents = %llu).",
128 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
129 "xfs_iformat_extents(1)", dip
, sizeof(*dip
),
131 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
132 return -EFSCORRUPTED
;
139 dp
= (xfs_bmbt_rec_t
*) XFS_DFORK_PTR(dip
, whichfork
);
141 xfs_iext_first(ifp
, &icur
);
142 for (i
= 0; i
< nex
; i
++, dp
++) {
145 xfs_bmbt_disk_get_all(dp
, &new);
146 fa
= xfs_bmap_validate_extent(ip
, whichfork
, &new);
148 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
149 "xfs_iformat_extents(2)",
150 dp
, sizeof(*dp
), fa
);
151 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
152 return xfs_bmap_complain_bad_rec(ip
, whichfork
,
156 xfs_iext_insert(ip
, &icur
, &new, state
);
157 trace_xfs_read_extent(ip
, &icur
, state
, _THIS_IP_
);
158 xfs_iext_next(ifp
, &icur
);
165 * The file has too many extents to fit into
166 * the inode, so they are in B-tree format.
167 * Allocate a buffer for the root of the B-tree
168 * and copy the root into it. The i_extents
169 * field will remain NULL until all of the
170 * extents are read in (when they are needed).
174 struct xfs_inode
*ip
,
175 struct xfs_dinode
*dip
,
178 struct xfs_mount
*mp
= ip
->i_mount
;
179 xfs_bmdr_block_t
*dfp
;
180 struct xfs_ifork
*ifp
;
181 struct xfs_btree_block
*broot
;
186 ifp
= xfs_ifork_ptr(ip
, whichfork
);
187 dfp
= (xfs_bmdr_block_t
*)XFS_DFORK_PTR(dip
, whichfork
);
188 size
= xfs_bmap_broot_space(mp
, dfp
);
189 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
190 level
= be16_to_cpu(dfp
->bb_level
);
193 * blow out if -- fork has less extents than can fit in
194 * fork (fork shouldn't be a btree format), root btree
195 * block has more records than can fit into the fork,
196 * or the number of extents is greater than the number of
199 if (unlikely(ifp
->if_nextents
<= XFS_IFORK_MAXEXT(ip
, whichfork
) ||
201 xfs_bmdr_space_calc(nrecs
) >
202 XFS_DFORK_SIZE(dip
, mp
, whichfork
) ||
203 ifp
->if_nextents
> ip
->i_nblocks
) ||
204 level
== 0 || level
> XFS_BM_MAXLEVELS(mp
, whichfork
)) {
205 xfs_warn(mp
, "corrupt inode %llu (btree).",
206 (unsigned long long) ip
->i_ino
);
207 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
,
208 "xfs_iformat_btree", dfp
, size
,
210 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
211 return -EFSCORRUPTED
;
214 broot
= xfs_broot_alloc(ifp
, size
);
216 * Copy and convert from the on-disk structure
217 * to the in-memory structure.
219 xfs_bmdr_to_bmbt(ip
, dfp
, XFS_DFORK_SIZE(dip
, ip
->i_mount
, whichfork
),
229 xfs_iformat_data_fork(
230 struct xfs_inode
*ip
,
231 struct xfs_dinode
*dip
)
233 struct inode
*inode
= VFS_I(ip
);
237 * Initialize the extent count early, as the per-format routines may
238 * depend on it. Use release semantics to set needextents /after/ we
239 * set the format. This ensures that we can use acquire semantics on
240 * needextents in xfs_need_iread_extents() and be guaranteed to see a
241 * valid format value after that load.
243 ip
->i_df
.if_format
= dip
->di_format
;
244 ip
->i_df
.if_nextents
= xfs_dfork_data_extents(dip
);
245 smp_store_release(&ip
->i_df
.if_needextents
,
246 ip
->i_df
.if_format
== XFS_DINODE_FMT_BTREE
? 1 : 0);
248 switch (inode
->i_mode
& S_IFMT
) {
254 inode
->i_rdev
= xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip
));
259 switch (ip
->i_df
.if_format
) {
260 case XFS_DINODE_FMT_LOCAL
:
261 error
= xfs_iformat_local(ip
, dip
, XFS_DATA_FORK
,
262 be64_to_cpu(dip
->di_size
));
264 error
= xfs_ifork_verify_local_data(ip
);
266 case XFS_DINODE_FMT_EXTENTS
:
267 return xfs_iformat_extents(ip
, dip
, XFS_DATA_FORK
);
268 case XFS_DINODE_FMT_BTREE
:
269 return xfs_iformat_btree(ip
, dip
, XFS_DATA_FORK
);
270 case XFS_DINODE_FMT_META_BTREE
:
271 switch (ip
->i_metatype
) {
272 case XFS_METAFILE_RTRMAP
:
273 return xfs_iformat_rtrmap(ip
, dip
);
274 case XFS_METAFILE_RTREFCOUNT
:
275 return xfs_iformat_rtrefcount(ip
, dip
);
281 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, __func__
,
282 dip
, sizeof(*dip
), __this_address
);
283 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
284 return -EFSCORRUPTED
;
288 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, __func__
, dip
,
289 sizeof(*dip
), __this_address
);
290 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
291 return -EFSCORRUPTED
;
296 xfs_dfork_attr_shortform_size(
297 struct xfs_dinode
*dip
)
299 struct xfs_attr_sf_hdr
*sf
= XFS_DFORK_APTR(dip
);
301 return be16_to_cpu(sf
->totsize
);
306 struct xfs_inode
*ip
,
307 enum xfs_dinode_fmt format
,
308 xfs_extnum_t nextents
)
311 * Initialize the extent count early, as the per-format routines may
312 * depend on it. Use release semantics to set needextents /after/ we
313 * set the format. This ensures that we can use acquire semantics on
314 * needextents in xfs_need_iread_extents() and be guaranteed to see a
315 * valid format value after that load.
317 ip
->i_af
.if_format
= format
;
318 ip
->i_af
.if_nextents
= nextents
;
319 smp_store_release(&ip
->i_af
.if_needextents
,
320 ip
->i_af
.if_format
== XFS_DINODE_FMT_BTREE
? 1 : 0);
325 struct xfs_inode
*ip
)
327 xfs_idestroy_fork(&ip
->i_af
);
328 memset(&ip
->i_af
, 0, sizeof(struct xfs_ifork
));
329 ip
->i_af
.if_format
= XFS_DINODE_FMT_EXTENTS
;
333 xfs_iformat_attr_fork(
334 struct xfs_inode
*ip
,
335 struct xfs_dinode
*dip
)
337 xfs_extnum_t naextents
= xfs_dfork_attr_extents(dip
);
341 * Initialize the extent count early, as the per-format routines may
344 xfs_ifork_init_attr(ip
, dip
->di_aformat
, naextents
);
346 switch (ip
->i_af
.if_format
) {
347 case XFS_DINODE_FMT_LOCAL
:
348 error
= xfs_iformat_local(ip
, dip
, XFS_ATTR_FORK
,
349 xfs_dfork_attr_shortform_size(dip
));
351 error
= xfs_ifork_verify_local_attr(ip
);
353 case XFS_DINODE_FMT_EXTENTS
:
354 error
= xfs_iformat_extents(ip
, dip
, XFS_ATTR_FORK
);
356 case XFS_DINODE_FMT_BTREE
:
357 error
= xfs_iformat_btree(ip
, dip
, XFS_ATTR_FORK
);
360 xfs_inode_verifier_error(ip
, error
, __func__
, dip
,
361 sizeof(*dip
), __this_address
);
362 xfs_inode_mark_sick(ip
, XFS_SICK_INO_CORE
);
363 error
= -EFSCORRUPTED
;
368 xfs_ifork_zap_attr(ip
);
373 * Allocate the if_broot component of an inode fork so that it is @new_size
374 * bytes in size, using __GFP_NOLOCKDEP like all the other code that
375 * initializes a broot during inode load. Returns if_broot.
377 struct xfs_btree_block
*
379 struct xfs_ifork
*ifp
,
382 ASSERT(ifp
->if_broot
== NULL
);
384 ifp
->if_broot
= kmalloc(new_size
,
385 GFP_KERNEL
| __GFP_NOLOCKDEP
| __GFP_NOFAIL
);
386 ifp
->if_broot_bytes
= new_size
;
387 return ifp
->if_broot
;
391 * Reallocate the if_broot component of an inode fork so that it is @new_size
392 * bytes in size. Returns if_broot.
394 struct xfs_btree_block
*
396 struct xfs_ifork
*ifp
,
399 /* No size change? No action needed. */
400 if (new_size
== ifp
->if_broot_bytes
)
401 return ifp
->if_broot
;
403 /* New size is zero, free it. */
405 ifp
->if_broot_bytes
= 0;
406 kfree(ifp
->if_broot
);
407 ifp
->if_broot
= NULL
;
412 * Shrinking the iroot means we allocate a new smaller object and copy
413 * it. We don't trust krealloc not to nop on realloc-down.
415 if (ifp
->if_broot_bytes
> 0 && ifp
->if_broot_bytes
> new_size
) {
416 struct xfs_btree_block
*old_broot
= ifp
->if_broot
;
418 ifp
->if_broot
= kmalloc(new_size
, GFP_KERNEL
| __GFP_NOFAIL
);
419 ifp
->if_broot_bytes
= new_size
;
420 memcpy(ifp
->if_broot
, old_broot
, new_size
);
422 return ifp
->if_broot
;
426 * Growing the iroot means we can krealloc. This may get us the same
429 ifp
->if_broot
= krealloc(ifp
->if_broot
, new_size
,
430 GFP_KERNEL
| __GFP_NOFAIL
);
431 ifp
->if_broot_bytes
= new_size
;
432 return ifp
->if_broot
;
436 * This is called when the amount of space needed for if_data
437 * is increased or decreased. The change in size is indicated by
438 * the number of bytes that need to be added or deleted in the
439 * byte_diff parameter.
441 * If the amount of space needed has decreased below the size of the
442 * inline buffer, then switch to using the inline buffer. Otherwise,
443 * use krealloc() or kmalloc() to adjust the size of the buffer
446 * ip -- the inode whose if_data area is changing
447 * byte_diff -- the change in the number of bytes, positive or negative,
448 * requested for the if_data array.
452 struct xfs_inode
*ip
,
456 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, whichfork
);
457 int64_t new_size
= ifp
->if_bytes
+ byte_diff
;
459 ASSERT(new_size
>= 0);
460 ASSERT(new_size
<= xfs_inode_fork_size(ip
, whichfork
));
463 ifp
->if_data
= krealloc(ifp
->if_data
, new_size
,
464 GFP_KERNEL
| __GFP_NOFAIL
);
467 ifp
->if_bytes
= new_size
;
473 /* Free all memory and reset a fork back to its initial state. */
476 struct xfs_ifork
*ifp
)
478 if (ifp
->if_broot
!= NULL
) {
479 kfree(ifp
->if_broot
);
480 ifp
->if_broot
= NULL
;
483 switch (ifp
->if_format
) {
484 case XFS_DINODE_FMT_LOCAL
:
488 case XFS_DINODE_FMT_EXTENTS
:
489 case XFS_DINODE_FMT_BTREE
:
491 xfs_iext_destroy(ifp
);
497 * Convert in-core extents to on-disk form
499 * In the case of the data fork, the in-core and on-disk fork sizes can be
500 * different due to delayed allocation extents. We only copy on-disk extents
501 * here, so callers must always use the physical fork size to determine the
502 * size of the buffer passed to this routine. We will return the size actually
507 struct xfs_inode
*ip
,
508 struct xfs_bmbt_rec
*dp
,
511 int state
= xfs_bmap_fork_to_state(whichfork
);
512 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, whichfork
);
513 struct xfs_iext_cursor icur
;
514 struct xfs_bmbt_irec rec
;
517 xfs_assert_ilocked(ip
, XFS_ILOCK_EXCL
| XFS_ILOCK_SHARED
);
518 ASSERT(ifp
->if_bytes
> 0);
520 for_each_xfs_iext(ifp
, &icur
, &rec
) {
521 if (isnullstartblock(rec
.br_startblock
))
523 ASSERT(xfs_bmap_validate_extent(ip
, whichfork
, &rec
) == NULL
);
524 xfs_bmbt_disk_set_all(dp
, &rec
);
525 trace_xfs_write_extent(ip
, &icur
, state
, _RET_IP_
);
526 copied
+= sizeof(struct xfs_bmbt_rec
);
531 ASSERT(copied
<= ifp
->if_bytes
);
536 * Each of the following cases stores data into the same region
537 * of the on-disk inode, so only one of them can be valid at
538 * any given time. While it is possible to have conflicting formats
539 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
540 * in EXTENTS format, this can only happen when the fork has
541 * changed formats after being modified but before being flushed.
542 * In these cases, the format always takes precedence, because the
543 * format indicates the current state of the fork.
547 struct xfs_inode
*ip
,
548 struct xfs_dinode
*dip
,
549 struct xfs_inode_log_item
*iip
,
553 struct xfs_ifork
*ifp
;
555 static const short brootflag
[2] =
556 { XFS_ILOG_DBROOT
, XFS_ILOG_ABROOT
};
557 static const short dataflag
[2] =
558 { XFS_ILOG_DDATA
, XFS_ILOG_ADATA
};
559 static const short extflag
[2] =
560 { XFS_ILOG_DEXT
, XFS_ILOG_AEXT
};
564 ifp
= xfs_ifork_ptr(ip
, whichfork
);
566 * This can happen if we gave up in iformat in an error path,
567 * for the attribute fork.
570 ASSERT(whichfork
== XFS_ATTR_FORK
);
573 cp
= XFS_DFORK_PTR(dip
, whichfork
);
575 switch (ifp
->if_format
) {
576 case XFS_DINODE_FMT_LOCAL
:
577 if ((iip
->ili_fields
& dataflag
[whichfork
]) &&
578 (ifp
->if_bytes
> 0)) {
579 ASSERT(ifp
->if_data
!= NULL
);
580 ASSERT(ifp
->if_bytes
<= xfs_inode_fork_size(ip
, whichfork
));
581 memcpy(cp
, ifp
->if_data
, ifp
->if_bytes
);
585 case XFS_DINODE_FMT_EXTENTS
:
586 if ((iip
->ili_fields
& extflag
[whichfork
]) &&
587 (ifp
->if_bytes
> 0)) {
588 ASSERT(ifp
->if_nextents
> 0);
589 (void)xfs_iextents_copy(ip
, (xfs_bmbt_rec_t
*)cp
,
594 case XFS_DINODE_FMT_BTREE
:
595 if ((iip
->ili_fields
& brootflag
[whichfork
]) &&
596 (ifp
->if_broot_bytes
> 0)) {
597 ASSERT(ifp
->if_broot
!= NULL
);
598 ASSERT(xfs_bmap_bmdr_space(ifp
->if_broot
) <=
599 xfs_inode_fork_size(ip
, whichfork
));
600 xfs_bmbt_to_bmdr(mp
, ifp
->if_broot
, ifp
->if_broot_bytes
,
601 (xfs_bmdr_block_t
*)cp
,
602 XFS_DFORK_SIZE(dip
, mp
, whichfork
));
606 case XFS_DINODE_FMT_DEV
:
607 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
608 ASSERT(whichfork
== XFS_DATA_FORK
);
609 xfs_dinode_put_rdev(dip
,
610 linux_to_xfs_dev_t(VFS_I(ip
)->i_rdev
));
614 case XFS_DINODE_FMT_META_BTREE
:
615 ASSERT(whichfork
== XFS_DATA_FORK
);
617 if (!(iip
->ili_fields
& brootflag
[whichfork
]))
620 switch (ip
->i_metatype
) {
621 case XFS_METAFILE_RTRMAP
:
622 xfs_iflush_rtrmap(ip
, dip
);
624 case XFS_METAFILE_RTREFCOUNT
:
625 xfs_iflush_rtrefcount(ip
, dip
);
639 /* Convert bmap state flags to an inode fork. */
641 xfs_iext_state_to_fork(
642 struct xfs_inode
*ip
,
645 if (state
& BMAP_COWFORK
)
647 else if (state
& BMAP_ATTRFORK
)
653 * Initialize an inode's copy-on-write fork.
657 struct xfs_inode
*ip
)
662 ip
->i_cowfp
= kmem_cache_zalloc(xfs_ifork_cache
,
663 GFP_KERNEL
| __GFP_NOLOCKDEP
| __GFP_NOFAIL
);
664 ip
->i_cowfp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
667 /* Verify the inline contents of the data fork of an inode. */
669 xfs_ifork_verify_local_data(
670 struct xfs_inode
*ip
)
672 xfs_failaddr_t fa
= NULL
;
674 switch (VFS_I(ip
)->i_mode
& S_IFMT
) {
676 struct xfs_mount
*mp
= ip
->i_mount
;
677 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, XFS_DATA_FORK
);
678 struct xfs_dir2_sf_hdr
*sfp
= ifp
->if_data
;
680 fa
= xfs_dir2_sf_verify(mp
, sfp
, ifp
->if_bytes
);
684 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, XFS_DATA_FORK
);
686 fa
= xfs_symlink_shortform_verify(ifp
->if_data
, ifp
->if_bytes
);
694 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, "data fork",
695 ip
->i_df
.if_data
, ip
->i_df
.if_bytes
, fa
);
696 return -EFSCORRUPTED
;
702 /* Verify the inline contents of the attr fork of an inode. */
704 xfs_ifork_verify_local_attr(
705 struct xfs_inode
*ip
)
707 struct xfs_ifork
*ifp
= &ip
->i_af
;
710 if (!xfs_inode_has_attr_fork(ip
)) {
713 struct xfs_ifork
*ifp
= &ip
->i_af
;
715 ASSERT(ifp
->if_format
== XFS_DINODE_FMT_LOCAL
);
716 fa
= xfs_attr_shortform_verify(ifp
->if_data
, ifp
->if_bytes
);
719 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, "attr fork",
720 ifp
->if_data
, ifp
->if_bytes
, fa
);
721 return -EFSCORRUPTED
;
728 * Check if the inode fork supports adding nr_to_add more extents.
730 * If it doesn't but we can upgrade it to large extent counters, do the upgrade.
731 * If we can't upgrade or are already using big counters but still can't fit the
732 * additional extents, return -EFBIG.
735 xfs_iext_count_extend(
736 struct xfs_trans
*tp
,
737 struct xfs_inode
*ip
,
741 struct xfs_mount
*mp
= ip
->i_mount
;
743 xfs_inode_has_large_extent_counts(ip
);
744 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, whichfork
);
747 ASSERT(nr_to_add
<= XFS_MAX_EXTCNT_UPGRADE_NR
);
749 if (whichfork
== XFS_COW_FORK
)
752 /* no point in upgrading if if_nextents overflows */
753 nr_exts
= ifp
->if_nextents
+ nr_to_add
;
754 if (nr_exts
< ifp
->if_nextents
)
757 if (XFS_TEST_ERROR(false, mp
, XFS_ERRTAG_REDUCE_MAX_IEXTENTS
) &&
761 if (nr_exts
> xfs_iext_max_nextents(has_large
, whichfork
)) {
762 if (has_large
|| !xfs_has_large_extent_counts(mp
))
764 ip
->i_diflags2
|= XFS_DIFLAG2_NREXT64
;
765 xfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
770 /* Decide if a file mapping is on the realtime device or not. */
772 xfs_ifork_is_realtime(
773 struct xfs_inode
*ip
,
776 return XFS_IS_REALTIME_INODE(ip
) && whichfork
!= XFS_ATTR_FORK
;