]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blobdiff - libxfs/xfs_inode_buf.c
xfs: fix transaction leak on remote attr set/remove failure
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_inode_buf.c
index 036389d971cd8d3c7b3ac4fb5514b387001a1dca..03ab18bb1f6098a6c12f2e68d8d2a980ec61af96 100644 (file)
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "libxfs_priv.h"
 #include "xfs_fs.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_inode.h"
+#include "xfs_errortag.h"
 #include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_ialloc.h"
+#include "xfs_dir2.h"
 
 /*
  * Check that none of the inode's in the buffer have a next
@@ -87,40 +77,46 @@ xfs_inode_buf_verify(
        bool            readahead)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+       xfs_agnumber_t  agno;
        int             i;
        int             ni;
 
        /*
         * Validate the magic number and version of every inode in the buffer
         */
+       agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
        ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
        for (i = 0; i < ni; i++) {
                int             di_ok;
                xfs_dinode_t    *dip;
+               xfs_agino_t     unlinked_ino;
 
                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
+               unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-                       xfs_dinode_good_version(mp, dip->di_version);
+                       xfs_dinode_good_version(mp, dip->di_version) &&
+                       (unlinked_ino == NULLAGINO ||
+                        xfs_verify_agino(mp, agno, unlinked_ino));
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-                                               XFS_ERRTAG_ITOBP_INOTOBP,
-                                               XFS_RANDOM_ITOBP_INOTOBP))) {
+                                               XFS_ERRTAG_ITOBP_INOTOBP))) {
                        if (readahead) {
                                bp->b_flags &= ~XBF_DONE;
                                xfs_buf_ioerror(bp, -EIO);
                                return;
                        }
 
-                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
-                       xfs_verifier_error(bp);
 #ifdef DEBUG
                        xfs_alert(mp,
                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
                                (unsigned long long)bp->b_bn, i,
                                be16_to_cpu(dip->di_magic));
 #endif
+                       xfs_buf_verifier_error(bp, -EFSCORRUPTED,
+                                       __func__, dip, sizeof(*dip),
+                                       NULL);
+                       return;
                }
        }
-       xfs_inobp_check(mp, bp);
 }
 
 
@@ -189,11 +185,6 @@ xfs_imap_to_bp(
                        ASSERT(buf_flags & XBF_TRYLOCK);
                        return error;
                }
-
-               if (error == -EFSCORRUPTED &&
-                   (iget_flags & XFS_IGET_UNTRUSTED))
-                       return -EINVAL;
-
                xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
                        __func__, error);
                return error;
@@ -261,10 +252,12 @@ xfs_inode_from_disk(
        to->di_flags    = be16_to_cpu(from->di_flags);
 
        if (to->di_version == 3) {
-               inode->i_version = be64_to_cpu(from->di_changecount);
+               inode_set_iversion_queried(inode,
+                                          be64_to_cpu(from->di_changecount));
                to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
                to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
                to->di_flags2 = be64_to_cpu(from->di_flags2);
+               to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
        }
 }
 
@@ -310,11 +303,11 @@ xfs_inode_to_disk(
        to->di_flags = cpu_to_be16(from->di_flags);
 
        if (from->di_version == 3) {
-               to->di_changecount = cpu_to_be64(inode->i_version);
+               to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
                to->di_flags2 = cpu_to_be64(from->di_flags2);
-
+               to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
                to->di_ino = cpu_to_be64(ip->i_ino);
                to->di_lsn = cpu_to_be64(lsn);
                memset(to->di_pad2, 0, sizeof(to->di_pad2));
@@ -366,6 +359,7 @@ xfs_log_dinode_to_disk(
                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
                to->di_flags2 = cpu_to_be64(from->di_flags2);
+               to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
                to->di_ino = cpu_to_be64(from->di_ino);
                to->di_lsn = cpu_to_be64(from->di_lsn);
                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
@@ -376,29 +370,183 @@ xfs_log_dinode_to_disk(
        }
 }
 
-bool
+static xfs_failaddr_t
+xfs_dinode_verify_fork(
+       struct xfs_dinode       *dip,
+       struct xfs_mount        *mp,
+       int                     whichfork)
+{
+       uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
+
+       switch (XFS_DFORK_FORMAT(dip, whichfork)) {
+       case XFS_DINODE_FMT_LOCAL:
+               /*
+                * no local regular files yet
+                */
+               if (whichfork == XFS_DATA_FORK) {
+                       if (S_ISREG(be16_to_cpu(dip->di_mode)))
+                               return __this_address;
+                       if (be64_to_cpu(dip->di_size) >
+                                       XFS_DFORK_SIZE(dip, mp, whichfork))
+                               return __this_address;
+               }
+               if (di_nextents)
+                       return __this_address;
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
+                       return __this_address;
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               if (whichfork == XFS_ATTR_FORK) {
+                       if (di_nextents > MAXAEXTNUM)
+                               return __this_address;
+               } else if (di_nextents > MAXEXTNUM) {
+                       return __this_address;
+               }
+               break;
+       default:
+               return __this_address;
+       }
+       return NULL;
+}
+
+xfs_failaddr_t
 xfs_dinode_verify(
        struct xfs_mount        *mp,
        xfs_ino_t               ino,
        struct xfs_dinode       *dip)
 {
+       xfs_failaddr_t          fa;
+       uint16_t                mode;
+       uint16_t                flags;
+       uint64_t                flags2;
+       uint64_t                di_size;
+
        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
-               return false;
+               return __this_address;
+
+       /* Verify v3 integrity information first */
+       if (dip->di_version >= 3) {
+               if (!xfs_sb_version_hascrc(&mp->m_sb))
+                       return __this_address;
+               if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+                                     XFS_DINODE_CRC_OFF))
+                       return __this_address;
+               if (be64_to_cpu(dip->di_ino) != ino)
+                       return __this_address;
+               if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
+                       return __this_address;
+       }
+
+       /* don't allow invalid i_size */
+       di_size = be64_to_cpu(dip->di_size);
+       if (di_size & (1ULL << 63))
+               return __this_address;
+
+       mode = be16_to_cpu(dip->di_mode);
+       if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
+               return __this_address;
+
+       /* No zero-length symlinks/dirs. */
+       if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
+               return __this_address;
+
+       /* Fork checks carried over from xfs_iformat_fork */
+       if (mode &&
+           be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
+                       be64_to_cpu(dip->di_nblocks))
+               return __this_address;
+
+       if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
+               return __this_address;
+
+       flags = be16_to_cpu(dip->di_flags);
+
+       if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
+               return __this_address;
+
+       /* Do we have appropriate data fork formats for the mode? */
+       switch (mode & S_IFMT) {
+       case S_IFIFO:
+       case S_IFCHR:
+       case S_IFBLK:
+       case S_IFSOCK:
+               if (dip->di_format != XFS_DINODE_FMT_DEV)
+                       return __this_address;
+               break;
+       case S_IFREG:
+       case S_IFLNK:
+       case S_IFDIR:
+               fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
+               if (fa)
+                       return fa;
+               break;
+       case 0:
+               /* Uninitialized inode ok. */
+               break;
+       default:
+               return __this_address;
+       }
+
+       if (XFS_DFORK_Q(dip)) {
+               fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
+               if (fa)
+                       return fa;
+       } else {
+               /*
+                * If there is no fork offset, this may be a freshly-made inode
+                * in a new disk cluster, in which case di_aformat is zeroed.
+                * Otherwise, such an inode must be in EXTENTS format; this goes
+                * for freed inodes as well.
+                */
+               switch (dip->di_aformat) {
+               case 0:
+               case XFS_DINODE_FMT_EXTENTS:
+                       break;
+               default:
+                       return __this_address;
+               }
+               if (dip->di_anextents)
+                       return __this_address;
+       }
+
+       /* extent size hint validation */
+       fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
+                       mode, flags);
+       if (fa)
+               return fa;
 
        /* only version 3 or greater inodes are extensively verified here */
        if (dip->di_version < 3)
-               return true;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
-       if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             XFS_DINODE_CRC_OFF))
-               return false;
-       if (be64_to_cpu(dip->di_ino) != ino)
-               return false;
-       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
-               return false;
-       return true;
+               return NULL;
+
+       flags2 = be64_to_cpu(dip->di_flags2);
+
+       /* don't allow reflink/cowextsize if we don't have reflink */
+       if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
+            !xfs_sb_version_hasreflink(&mp->m_sb))
+               return __this_address;
+
+       /* only regular files get reflink */
+       if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
+               return __this_address;
+
+       /* don't let reflink and realtime mix */
+       if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
+               return __this_address;
+
+       /* don't let reflink and dax mix */
+       if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
+               return __this_address;
+
+       /* COW extent size hint validation */
+       fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
+                       mode, flags, flags2);
+       if (fa)
+               return fa;
+
+       return NULL;
 }
 
 void
@@ -406,13 +554,13 @@ xfs_dinode_calc_crc(
        struct xfs_mount        *mp,
        struct xfs_dinode       *dip)
 {
-       __uint32_t              crc;
+       uint32_t                crc;
 
        if (dip->di_version < 3)
                return;
 
        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
-       crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
+       crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
                              XFS_DINODE_CRC_OFF);
        dip->di_crc = xfs_end_cksum(crc);
 }
@@ -438,6 +586,7 @@ xfs_iread(
 {
        xfs_buf_t       *bp;
        xfs_dinode_t    *dip;
+       xfs_failaddr_t  fa;
        int             error;
 
        /*
@@ -454,10 +603,7 @@ xfs_iread(
                /* initialise the on-disk inode core */
                memset(&ip->i_d, 0, sizeof(ip->i_d));
                VFS_I(ip)->i_generation = prandom_u32();
-               if (xfs_sb_version_hascrc(&mp->m_sb))
-                       ip->i_d.di_version = 3;
-               else
-                       ip->i_d.di_version = 2;
+               ip->i_d.di_version = 3;
                return 0;
        }
 
@@ -469,11 +615,10 @@ xfs_iread(
                return error;
 
        /* even unallocated inodes are verified */
-       if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
-               xfs_alert(mp, "%s: validation failed for inode %lld failed",
-                               __func__, ip->i_ino);
-
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
+       fa = xfs_dinode_verify(mp, ip->i_ino, dip);
+       if (fa) {
+               xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
+                               sizeof(*dip), fa);
                error = -EFSCORRUPTED;
                goto out_brelse;
        }
@@ -515,7 +660,6 @@ xfs_iread(
        }
 
        ASSERT(ip->i_d.di_version >= 2);
-
        ip->i_delayed_blks = 0;
 
        /*
@@ -541,3 +685,110 @@ xfs_iread(
        xfs_trans_brelse(tp, bp);
        return error;
 }
+
+/*
+ * Validate di_extsize hint.
+ *
+ * The rules are documented at xfs_ioctl_setattr_check_extsize().
+ * These functions must be kept in sync with each other.
+ */
+xfs_failaddr_t
+xfs_inode_validate_extsize(
+       struct xfs_mount                *mp,
+       uint32_t                        extsize,
+       uint16_t                        mode,
+       uint16_t                        flags)
+{
+       bool                            rt_flag;
+       bool                            hint_flag;
+       bool                            inherit_flag;
+       uint32_t                        extsize_bytes;
+       uint32_t                        blocksize_bytes;
+
+       rt_flag = (flags & XFS_DIFLAG_REALTIME);
+       hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
+       inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
+       extsize_bytes = XFS_FSB_TO_B(mp, extsize);
+
+       if (rt_flag)
+               blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
+       else
+               blocksize_bytes = mp->m_sb.sb_blocksize;
+
+       if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
+               return __this_address;
+
+       if (hint_flag && !S_ISREG(mode))
+               return __this_address;
+
+       if (inherit_flag && !S_ISDIR(mode))
+               return __this_address;
+
+       if ((hint_flag || inherit_flag) && extsize == 0)
+               return __this_address;
+
+       /* free inodes get flags set to zero but extsize remains */
+       if (mode && !(hint_flag || inherit_flag) && extsize != 0)
+               return __this_address;
+
+       if (extsize_bytes % blocksize_bytes)
+               return __this_address;
+
+       if (extsize > MAXEXTLEN)
+               return __this_address;
+
+       if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
+               return __this_address;
+
+       return NULL;
+}
+
+/*
+ * Validate di_cowextsize hint.
+ *
+ * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
+ * These functions must be kept in sync with each other.
+ */
+xfs_failaddr_t
+xfs_inode_validate_cowextsize(
+       struct xfs_mount                *mp,
+       uint32_t                        cowextsize,
+       uint16_t                        mode,
+       uint16_t                        flags,
+       uint64_t                        flags2)
+{
+       bool                            rt_flag;
+       bool                            hint_flag;
+       uint32_t                        cowextsize_bytes;
+
+       rt_flag = (flags & XFS_DIFLAG_REALTIME);
+       hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
+       cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
+
+       if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
+               return __this_address;
+
+       if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
+               return __this_address;
+
+       if (hint_flag && cowextsize == 0)
+               return __this_address;
+
+       /* free inodes get flags set to zero but cowextsize remains */
+       if (mode && !hint_flag && cowextsize != 0)
+               return __this_address;
+
+       if (hint_flag && rt_flag)
+               return __this_address;
+
+       if (cowextsize_bytes % mp->m_sb.sb_blocksize)
+               return __this_address;
+
+       if (cowextsize > MAXEXTLEN)
+               return __this_address;
+
+       if (cowextsize > mp->m_sb.sb_agblocks / 2)
+               return __this_address;
+
+       return NULL;
+}