1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
6 #include "libxfs_priv.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_errortag.h"
15 #include "xfs_trans.h"
16 #include "xfs_ialloc.h"
21 * If we are doing readahead on an inode buffer, we might be in log recovery
22 * reading an inode allocation buffer that hasn't yet been replayed, and hence
23 * has not had the inode cores stamped into it. Hence for readahead, the buffer
24 * may be potentially invalid.
26 * If the readahead buffer is invalid, we need to mark it with an error and
27 * clear the DONE status of the buffer so that a followup read will re-read it
28 * from disk. We don't report the error otherwise to avoid warnings during log
29 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here
30 * because all we want to do is say readahead failed; there is no-one to report
31 * the error to, so this will distinguish it from a non-ra verifier failure.
32 * Changes to this readahead error behaviour also need to be reflected in
33 * xfs_dquot_buf_readahead_verify().
40 struct xfs_mount
*mp
= bp
->b_mount
;
46 * Validate the magic number and version of every inode in the buffer
48 agno
= xfs_daddr_to_agno(mp
, XFS_BUF_ADDR(bp
));
49 ni
= XFS_BB_TO_FSB(mp
, bp
->b_length
) * mp
->m_sb
.sb_inopblock
;
50 for (i
= 0; i
< ni
; i
++) {
53 xfs_agino_t unlinked_ino
;
55 dip
= xfs_buf_offset(bp
, (i
<< mp
->m_sb
.sb_inodelog
));
56 unlinked_ino
= be32_to_cpu(dip
->di_next_unlinked
);
57 di_ok
= xfs_verify_magic16(bp
, dip
->di_magic
) &&
58 xfs_dinode_good_version(&mp
->m_sb
, dip
->di_version
) &&
59 xfs_verify_agino_or_null(mp
, agno
, unlinked_ino
);
60 if (unlikely(XFS_TEST_ERROR(!di_ok
, mp
,
61 XFS_ERRTAG_ITOBP_INOTOBP
))) {
63 bp
->b_flags
&= ~XBF_DONE
;
64 xfs_buf_ioerror(bp
, -EIO
);
70 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
71 (unsigned long long)bp
->b_bn
, i
,
72 be16_to_cpu(dip
->di_magic
));
74 xfs_buf_verifier_error(bp
, -EFSCORRUPTED
,
75 __func__
, dip
, sizeof(*dip
),
84 xfs_inode_buf_read_verify(
87 xfs_inode_buf_verify(bp
, false);
91 xfs_inode_buf_readahead_verify(
94 xfs_inode_buf_verify(bp
, true);
98 xfs_inode_buf_write_verify(
101 xfs_inode_buf_verify(bp
, false);
104 const struct xfs_buf_ops xfs_inode_buf_ops
= {
106 .magic16
= { cpu_to_be16(XFS_DINODE_MAGIC
),
107 cpu_to_be16(XFS_DINODE_MAGIC
) },
108 .verify_read
= xfs_inode_buf_read_verify
,
109 .verify_write
= xfs_inode_buf_write_verify
,
112 const struct xfs_buf_ops xfs_inode_buf_ra_ops
= {
113 .name
= "xfs_inode_ra",
114 .magic16
= { cpu_to_be16(XFS_DINODE_MAGIC
),
115 cpu_to_be16(XFS_DINODE_MAGIC
) },
116 .verify_read
= xfs_inode_buf_readahead_verify
,
117 .verify_write
= xfs_inode_buf_write_verify
,
122 * This routine is called to map an inode to the buffer containing the on-disk
123 * version of the inode. It returns a pointer to the buffer containing the
124 * on-disk inode in the bpp parameter.
128 struct xfs_mount
*mp
,
129 struct xfs_trans
*tp
,
130 struct xfs_imap
*imap
,
131 struct xfs_buf
**bpp
)
133 return xfs_trans_read_buf(mp
, tp
, mp
->m_ddev_targp
, imap
->im_blkno
,
134 imap
->im_len
, XBF_UNMAPPED
, bpp
,
138 static inline struct timespec64
xfs_inode_decode_bigtime(uint64_t ts
)
140 struct timespec64 tv
;
143 tv
.tv_sec
= xfs_bigtime_to_unix(div_u64_rem(ts
, NSEC_PER_SEC
, &n
));
149 /* Convert an ondisk timestamp to an incore timestamp. */
151 xfs_inode_from_disk_ts(
152 struct xfs_dinode
*dip
,
153 const xfs_timestamp_t ts
)
155 struct timespec64 tv
;
156 struct xfs_legacy_timestamp
*lts
;
158 if (xfs_dinode_has_bigtime(dip
))
159 return xfs_inode_decode_bigtime(be64_to_cpu(ts
));
161 lts
= (struct xfs_legacy_timestamp
*)&ts
;
162 tv
.tv_sec
= (int)be32_to_cpu(lts
->t_sec
);
163 tv
.tv_nsec
= (int)be32_to_cpu(lts
->t_nsec
);
170 struct xfs_inode
*ip
,
171 struct xfs_dinode
*from
)
173 struct xfs_icdinode
*to
= &ip
->i_d
;
174 struct inode
*inode
= VFS_I(ip
);
178 ASSERT(ip
->i_cowfp
== NULL
);
179 ASSERT(ip
->i_afp
== NULL
);
181 fa
= xfs_dinode_verify(ip
->i_mount
, ip
->i_ino
, from
);
183 xfs_inode_verifier_error(ip
, -EFSCORRUPTED
, "dinode", from
,
185 return -EFSCORRUPTED
;
189 * First get the permanent information that is needed to allocate an
190 * inode. If the inode is unused, mode is zero and we shouldn't mess
191 * with the uninitialized part of it.
193 to
->di_flushiter
= be16_to_cpu(from
->di_flushiter
);
194 inode
->i_generation
= be32_to_cpu(from
->di_gen
);
195 inode
->i_mode
= be16_to_cpu(from
->di_mode
);
200 * Convert v1 inodes immediately to v2 inode format as this is the
201 * minimum inode version format we support in the rest of the code.
202 * They will also be unconditionally written back to disk as v2 inodes.
204 if (unlikely(from
->di_version
== 1)) {
205 set_nlink(inode
, be16_to_cpu(from
->di_onlink
));
208 set_nlink(inode
, be32_to_cpu(from
->di_nlink
));
209 ip
->i_projid
= (prid_t
)be16_to_cpu(from
->di_projid_hi
) << 16 |
210 be16_to_cpu(from
->di_projid_lo
);
213 i_uid_write(inode
, be32_to_cpu(from
->di_uid
));
214 i_gid_write(inode
, be32_to_cpu(from
->di_gid
));
217 * Time is signed, so need to convert to signed 32 bit before
218 * storing in inode timestamp which may be 64 bit. Otherwise
219 * a time before epoch is converted to a time long after epoch
222 inode
->i_atime
= xfs_inode_from_disk_ts(from
, from
->di_atime
);
223 inode
->i_mtime
= xfs_inode_from_disk_ts(from
, from
->di_mtime
);
224 inode
->i_ctime
= xfs_inode_from_disk_ts(from
, from
->di_ctime
);
226 ip
->i_disk_size
= be64_to_cpu(from
->di_size
);
227 to
->di_nblocks
= be64_to_cpu(from
->di_nblocks
);
228 to
->di_extsize
= be32_to_cpu(from
->di_extsize
);
229 to
->di_forkoff
= from
->di_forkoff
;
230 to
->di_flags
= be16_to_cpu(from
->di_flags
);
232 if (from
->di_dmevmask
|| from
->di_dmstate
)
233 xfs_iflags_set(ip
, XFS_IPRESERVE_DM_FIELDS
);
235 if (xfs_sb_version_has_v3inode(&ip
->i_mount
->m_sb
)) {
236 inode_set_iversion_queried(inode
,
237 be64_to_cpu(from
->di_changecount
));
238 to
->di_crtime
= xfs_inode_from_disk_ts(from
, from
->di_crtime
);
239 to
->di_flags2
= be64_to_cpu(from
->di_flags2
);
240 to
->di_cowextsize
= be32_to_cpu(from
->di_cowextsize
);
243 error
= xfs_iformat_data_fork(ip
, from
);
246 if (from
->di_forkoff
) {
247 error
= xfs_iformat_attr_fork(ip
, from
);
249 goto out_destroy_data_fork
;
251 if (xfs_is_reflink_inode(ip
))
252 xfs_ifork_init_cow(ip
);
255 out_destroy_data_fork
:
256 xfs_idestroy_fork(&ip
->i_df
);
260 /* Convert an incore timestamp to an ondisk timestamp. */
261 static inline xfs_timestamp_t
262 xfs_inode_to_disk_ts(
263 struct xfs_inode
*ip
,
264 const struct timespec64 tv
)
266 struct xfs_legacy_timestamp
*lts
;
269 if (xfs_inode_has_bigtime(ip
))
270 return cpu_to_be64(xfs_inode_encode_bigtime(tv
));
272 lts
= (struct xfs_legacy_timestamp
*)&ts
;
273 lts
->t_sec
= cpu_to_be32(tv
.tv_sec
);
274 lts
->t_nsec
= cpu_to_be32(tv
.tv_nsec
);
281 struct xfs_inode
*ip
,
282 struct xfs_dinode
*to
,
285 struct xfs_icdinode
*from
= &ip
->i_d
;
286 struct inode
*inode
= VFS_I(ip
);
288 to
->di_magic
= cpu_to_be16(XFS_DINODE_MAGIC
);
291 to
->di_format
= xfs_ifork_format(&ip
->i_df
);
292 to
->di_uid
= cpu_to_be32(i_uid_read(inode
));
293 to
->di_gid
= cpu_to_be32(i_gid_read(inode
));
294 to
->di_projid_lo
= cpu_to_be16(ip
->i_projid
& 0xffff);
295 to
->di_projid_hi
= cpu_to_be16(ip
->i_projid
>> 16);
297 memset(to
->di_pad
, 0, sizeof(to
->di_pad
));
298 to
->di_atime
= xfs_inode_to_disk_ts(ip
, inode
->i_atime
);
299 to
->di_mtime
= xfs_inode_to_disk_ts(ip
, inode
->i_mtime
);
300 to
->di_ctime
= xfs_inode_to_disk_ts(ip
, inode
->i_ctime
);
301 to
->di_nlink
= cpu_to_be32(inode
->i_nlink
);
302 to
->di_gen
= cpu_to_be32(inode
->i_generation
);
303 to
->di_mode
= cpu_to_be16(inode
->i_mode
);
305 to
->di_size
= cpu_to_be64(ip
->i_disk_size
);
306 to
->di_nblocks
= cpu_to_be64(from
->di_nblocks
);
307 to
->di_extsize
= cpu_to_be32(from
->di_extsize
);
308 to
->di_nextents
= cpu_to_be32(xfs_ifork_nextents(&ip
->i_df
));
309 to
->di_anextents
= cpu_to_be16(xfs_ifork_nextents(ip
->i_afp
));
310 to
->di_forkoff
= from
->di_forkoff
;
311 to
->di_aformat
= xfs_ifork_format(ip
->i_afp
);
312 to
->di_flags
= cpu_to_be16(from
->di_flags
);
314 if (xfs_sb_version_has_v3inode(&ip
->i_mount
->m_sb
)) {
316 to
->di_changecount
= cpu_to_be64(inode_peek_iversion(inode
));
317 to
->di_crtime
= xfs_inode_to_disk_ts(ip
, from
->di_crtime
);
318 to
->di_flags2
= cpu_to_be64(from
->di_flags2
);
319 to
->di_cowextsize
= cpu_to_be32(from
->di_cowextsize
);
320 to
->di_ino
= cpu_to_be64(ip
->i_ino
);
321 to
->di_lsn
= cpu_to_be64(lsn
);
322 memset(to
->di_pad2
, 0, sizeof(to
->di_pad2
));
323 uuid_copy(&to
->di_uuid
, &ip
->i_mount
->m_sb
.sb_meta_uuid
);
324 to
->di_flushiter
= 0;
327 to
->di_flushiter
= cpu_to_be16(from
->di_flushiter
);
331 static xfs_failaddr_t
332 xfs_dinode_verify_fork(
333 struct xfs_dinode
*dip
,
334 struct xfs_mount
*mp
,
337 uint32_t di_nextents
= XFS_DFORK_NEXTENTS(dip
, whichfork
);
339 switch (XFS_DFORK_FORMAT(dip
, whichfork
)) {
340 case XFS_DINODE_FMT_LOCAL
:
342 * no local regular files yet
344 if (whichfork
== XFS_DATA_FORK
) {
345 if (S_ISREG(be16_to_cpu(dip
->di_mode
)))
346 return __this_address
;
347 if (be64_to_cpu(dip
->di_size
) >
348 XFS_DFORK_SIZE(dip
, mp
, whichfork
))
349 return __this_address
;
352 return __this_address
;
354 case XFS_DINODE_FMT_EXTENTS
:
355 if (di_nextents
> XFS_DFORK_MAXEXT(dip
, mp
, whichfork
))
356 return __this_address
;
358 case XFS_DINODE_FMT_BTREE
:
359 if (whichfork
== XFS_ATTR_FORK
) {
360 if (di_nextents
> MAXAEXTNUM
)
361 return __this_address
;
362 } else if (di_nextents
> MAXEXTNUM
) {
363 return __this_address
;
367 return __this_address
;
372 static xfs_failaddr_t
373 xfs_dinode_verify_forkoff(
374 struct xfs_dinode
*dip
,
375 struct xfs_mount
*mp
)
377 if (!dip
->di_forkoff
)
380 switch (dip
->di_format
) {
381 case XFS_DINODE_FMT_DEV
:
382 if (dip
->di_forkoff
!= (roundup(sizeof(xfs_dev_t
), 8) >> 3))
383 return __this_address
;
385 case XFS_DINODE_FMT_LOCAL
: /* fall through ... */
386 case XFS_DINODE_FMT_EXTENTS
: /* fall through ... */
387 case XFS_DINODE_FMT_BTREE
:
388 if (dip
->di_forkoff
>= (XFS_LITINO(mp
) >> 3))
389 return __this_address
;
392 return __this_address
;
399 struct xfs_mount
*mp
,
401 struct xfs_dinode
*dip
)
409 if (dip
->di_magic
!= cpu_to_be16(XFS_DINODE_MAGIC
))
410 return __this_address
;
412 /* Verify v3 integrity information first */
413 if (dip
->di_version
>= 3) {
414 if (!xfs_sb_version_has_v3inode(&mp
->m_sb
))
415 return __this_address
;
416 if (!xfs_verify_cksum((char *)dip
, mp
->m_sb
.sb_inodesize
,
418 return __this_address
;
419 if (be64_to_cpu(dip
->di_ino
) != ino
)
420 return __this_address
;
421 if (!uuid_equal(&dip
->di_uuid
, &mp
->m_sb
.sb_meta_uuid
))
422 return __this_address
;
425 /* don't allow invalid i_size */
426 di_size
= be64_to_cpu(dip
->di_size
);
427 if (di_size
& (1ULL << 63))
428 return __this_address
;
430 mode
= be16_to_cpu(dip
->di_mode
);
431 if (mode
&& xfs_mode_to_ftype(mode
) == XFS_DIR3_FT_UNKNOWN
)
432 return __this_address
;
434 /* No zero-length symlinks/dirs. */
435 if ((S_ISLNK(mode
) || S_ISDIR(mode
)) && di_size
== 0)
436 return __this_address
;
438 /* Fork checks carried over from xfs_iformat_fork */
440 be32_to_cpu(dip
->di_nextents
) + be16_to_cpu(dip
->di_anextents
) >
441 be64_to_cpu(dip
->di_nblocks
))
442 return __this_address
;
444 if (mode
&& XFS_DFORK_BOFF(dip
) > mp
->m_sb
.sb_inodesize
)
445 return __this_address
;
447 flags
= be16_to_cpu(dip
->di_flags
);
449 if (mode
&& (flags
& XFS_DIFLAG_REALTIME
) && !mp
->m_rtdev_targp
)
450 return __this_address
;
452 /* check for illegal values of forkoff */
453 fa
= xfs_dinode_verify_forkoff(dip
, mp
);
457 /* Do we have appropriate data fork formats for the mode? */
458 switch (mode
& S_IFMT
) {
463 if (dip
->di_format
!= XFS_DINODE_FMT_DEV
)
464 return __this_address
;
469 fa
= xfs_dinode_verify_fork(dip
, mp
, XFS_DATA_FORK
);
474 /* Uninitialized inode ok. */
477 return __this_address
;
480 if (dip
->di_forkoff
) {
481 fa
= xfs_dinode_verify_fork(dip
, mp
, XFS_ATTR_FORK
);
486 * If there is no fork offset, this may be a freshly-made inode
487 * in a new disk cluster, in which case di_aformat is zeroed.
488 * Otherwise, such an inode must be in EXTENTS format; this goes
489 * for freed inodes as well.
491 switch (dip
->di_aformat
) {
493 case XFS_DINODE_FMT_EXTENTS
:
496 return __this_address
;
498 if (dip
->di_anextents
)
499 return __this_address
;
502 /* extent size hint validation */
503 fa
= xfs_inode_validate_extsize(mp
, be32_to_cpu(dip
->di_extsize
),
508 /* only version 3 or greater inodes are extensively verified here */
509 if (dip
->di_version
< 3)
512 flags2
= be64_to_cpu(dip
->di_flags2
);
514 /* don't allow reflink/cowextsize if we don't have reflink */
515 if ((flags2
& (XFS_DIFLAG2_REFLINK
| XFS_DIFLAG2_COWEXTSIZE
)) &&
516 !xfs_sb_version_hasreflink(&mp
->m_sb
))
517 return __this_address
;
519 /* only regular files get reflink */
520 if ((flags2
& XFS_DIFLAG2_REFLINK
) && (mode
& S_IFMT
) != S_IFREG
)
521 return __this_address
;
523 /* don't let reflink and realtime mix */
524 if ((flags2
& XFS_DIFLAG2_REFLINK
) && (flags
& XFS_DIFLAG_REALTIME
))
525 return __this_address
;
527 /* COW extent size hint validation */
528 fa
= xfs_inode_validate_cowextsize(mp
, be32_to_cpu(dip
->di_cowextsize
),
529 mode
, flags
, flags2
);
533 /* bigtime iflag can only happen on bigtime filesystems */
534 if (xfs_dinode_has_bigtime(dip
) &&
535 !xfs_sb_version_hasbigtime(&mp
->m_sb
))
536 return __this_address
;
543 struct xfs_mount
*mp
,
544 struct xfs_dinode
*dip
)
548 if (dip
->di_version
< 3)
551 ASSERT(xfs_sb_version_hascrc(&mp
->m_sb
));
552 crc
= xfs_start_cksum_update((char *)dip
, mp
->m_sb
.sb_inodesize
,
554 dip
->di_crc
= xfs_end_cksum(crc
);
558 * Validate di_extsize hint.
560 * The rules are documented at xfs_ioctl_setattr_check_extsize().
561 * These functions must be kept in sync with each other.
564 xfs_inode_validate_extsize(
565 struct xfs_mount
*mp
,
573 uint32_t extsize_bytes
;
574 uint32_t blocksize_bytes
;
576 rt_flag
= (flags
& XFS_DIFLAG_REALTIME
);
577 hint_flag
= (flags
& XFS_DIFLAG_EXTSIZE
);
578 inherit_flag
= (flags
& XFS_DIFLAG_EXTSZINHERIT
);
579 extsize_bytes
= XFS_FSB_TO_B(mp
, extsize
);
582 blocksize_bytes
= mp
->m_sb
.sb_rextsize
<< mp
->m_sb
.sb_blocklog
;
584 blocksize_bytes
= mp
->m_sb
.sb_blocksize
;
586 if ((hint_flag
|| inherit_flag
) && !(S_ISDIR(mode
) || S_ISREG(mode
)))
587 return __this_address
;
589 if (hint_flag
&& !S_ISREG(mode
))
590 return __this_address
;
592 if (inherit_flag
&& !S_ISDIR(mode
))
593 return __this_address
;
595 if ((hint_flag
|| inherit_flag
) && extsize
== 0)
596 return __this_address
;
598 /* free inodes get flags set to zero but extsize remains */
599 if (mode
&& !(hint_flag
|| inherit_flag
) && extsize
!= 0)
600 return __this_address
;
602 if (extsize_bytes
% blocksize_bytes
)
603 return __this_address
;
605 if (extsize
> MAXEXTLEN
)
606 return __this_address
;
608 if (!rt_flag
&& extsize
> mp
->m_sb
.sb_agblocks
/ 2)
609 return __this_address
;
615 * Validate di_cowextsize hint.
617 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
618 * These functions must be kept in sync with each other.
621 xfs_inode_validate_cowextsize(
622 struct xfs_mount
*mp
,
630 uint32_t cowextsize_bytes
;
632 rt_flag
= (flags
& XFS_DIFLAG_REALTIME
);
633 hint_flag
= (flags2
& XFS_DIFLAG2_COWEXTSIZE
);
634 cowextsize_bytes
= XFS_FSB_TO_B(mp
, cowextsize
);
636 if (hint_flag
&& !xfs_sb_version_hasreflink(&mp
->m_sb
))
637 return __this_address
;
639 if (hint_flag
&& !(S_ISDIR(mode
) || S_ISREG(mode
)))
640 return __this_address
;
642 if (hint_flag
&& cowextsize
== 0)
643 return __this_address
;
645 /* free inodes get flags set to zero but cowextsize remains */
646 if (mode
&& !hint_flag
&& cowextsize
!= 0)
647 return __this_address
;
649 if (hint_flag
&& rt_flag
)
650 return __this_address
;
652 if (cowextsize_bytes
% mp
->m_sb
.sb_blocksize
)
653 return __this_address
;
655 if (cowextsize
> MAXEXTLEN
)
656 return __this_address
;
658 if (cowextsize
> mp
->m_sb
.sb_agblocks
/ 2)
659 return __this_address
;