]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs: validate btree records on retrieval
authorDave Chinner <dchinner@redhat.com>
Thu, 28 Jun 2018 20:11:58 +0000 (15:11 -0500)
committerEric Sandeen <sandeen@redhat.com>
Thu, 28 Jun 2018 20:11:58 +0000 (15:11 -0500)
Source kernel commit: 9e6c08d4a8fc21fc496bf4543e5b2360fc610866

So we don't check the validity of records as we walk the btree. When
there are corrupt records in the free space btree (e.g. zero
startblock/length or beyond EOAG) we just blindly use it and things
go bad from there. That leads to assert failures on debug kernels
like this:

XFS: Assertion failed: fs_is_ok, file: fs/xfs/libxfs/xfs_alloc.c, line: 450
....
Call Trace:
xfs_alloc_fixup_trees+0x368/0x5c0
xfs_alloc_ag_vextent_near+0x79a/0xe20
xfs_alloc_ag_vextent+0x1d3/0x330
xfs_alloc_vextent+0x5e9/0x870

Or crashes like this:

XFS (loop0): xfs_buf_find: daddr 0x7fb28 out of range, EOFS 0x8000
.....
BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8
....
Call Trace:
xfs_bmap_add_extent_hole_real+0x67d/0x930
xfs_bmapi_write+0x934/0xc90
xfs_da_grow_inode_int+0x27e/0x2f0
xfs_dir2_grow_inode+0x55/0x130
xfs_dir2_sf_to_block+0x94/0x5d0
xfs_dir2_sf_addname+0xd0/0x590
xfs_dir_createname+0x168/0x1a0
xfs_rename+0x658/0x9b0

By checking that free space records pulled from the trees are
within the valid range, we catch many of these corruptions before
they can do damage.

This is a generic btree record checking deficiency. We need to
validate the records we fetch from all the different btrees before
we use them to catch corruptions like this.

This patch results in a corrupt record emitting an error message and
returning -EFSCORRUPTED, and the higher layers catch that and abort:

XFS (loop0): Size Freespace BTree record corruption in AG 0 detected!
XFS (loop0): start block 0x0 block count 0x0
XFS (loop0): Internal error xfs_trans_cancel at line 1012 of file fs/xfs/xfs_trans.c.  Caller xfs_create+0x42a/0x670
.....
Call Trace:
dump_stack+0x85/0xcb
xfs_trans_cancel+0x19f/0x1c0
xfs_create+0x42a/0x670
xfs_generic_create+0x1f6/0x2c0
vfs_create+0xf9/0x180
do_mknodat+0x1f9/0x210
do_syscall_64+0x5a/0x180
entry_SYSCALL_64_after_hwframe+0x49/0xbe
.....
XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1013 of file fs/xfs/xfs_trans.c.  Return address = ffffffff81500868
XFS (loop0): Corruption of in-memory data detected.  Shutting down filesystem

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
libxfs/libxfs_priv.h
libxfs/util.c
libxfs/xfs_alloc.c
libxfs/xfs_ialloc.c
libxfs/xfs_refcount.c
libxfs/xfs_rmap.c

index 31c13cec1bf4606a523dbb79e99aba5b1fd438b6..705380f99b5577b6cc1716414b0352d2334d684c 100644 (file)
@@ -579,6 +579,8 @@ bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
 
 typedef unsigned char u8;
 unsigned int hweight8(unsigned int w);
+unsigned int hweight32(unsigned int w);
+unsigned int hweight64(__u64 w);
 
 #define BIT_MASK(nr)   (1UL << ((nr) % BITS_PER_LONG))
 #define BIT_WORD(nr)   ((nr) / BITS_PER_LONG)
index 611ab9c02b7fa9a54cc5ebf8a5a1af6afcd3e83a..cb8100fa3b49ea4db4b93dc6d3484285f6e229a8 100644 (file)
@@ -841,3 +841,21 @@ hweight8(unsigned int w)
        res = (res & 0x33) + ((res >> 2) & 0x33);
        return (res + (res >> 4)) & 0x0F;
 }
+
+unsigned int
+hweight32(unsigned int w)
+{
+       unsigned int res = w - ((w >> 1) & 0x55555555);
+       res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+       res = (res + (res >> 4)) & 0x0F0F0F0F;
+       res = res + (res >> 8);
+       return (res + (res >> 16)) & 0x000000FF;
+}
+
+unsigned int
+hweight64(__u64 w)
+{
+       return hweight32((unsigned int)w) +
+              hweight32((unsigned int)(w >> 32));
+}
+
index 24db8152e624dfa9bd7acb6fd427ce5836466dda..c5fb9a64572e40152f9bd65bc2664c176c77a19b 100644 (file)
@@ -223,6 +223,8 @@ xfs_alloc_get_rec(
        xfs_extlen_t            *len,   /* output: length of extent */
        int                     *stat)  /* output: success/failure */
 {
+       struct xfs_mount        *mp = cur->bc_mp;
+       xfs_agnumber_t          agno = cur->bc_private.a.agno;
        union xfs_btree_rec     *rec;
        int                     error;
 
@@ -230,12 +232,28 @@ xfs_alloc_get_rec(
        if (error || !(*stat))
                return error;
        if (rec->alloc.ar_blockcount == 0)
-               return -EFSCORRUPTED;
+               goto out_bad_rec;
 
        *bno = be32_to_cpu(rec->alloc.ar_startblock);
        *len = be32_to_cpu(rec->alloc.ar_blockcount);
 
-       return error;
+       /* check for valid extent range, including overflow */
+       if (!xfs_verify_agbno(mp, agno, *bno))
+               goto out_bad_rec;
+       if (*bno > *bno + *len)
+               goto out_bad_rec;
+       if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
+               goto out_bad_rec;
+
+       return 0;
+
+out_bad_rec:
+       xfs_warn(mp,
+               "%s Freespace BTree record corruption in AG %d detected!",
+               cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
+       xfs_warn(mp,
+               "start block 0x%x block count 0x%x", *bno, *len);
+       return -EFSCORRUPTED;
 }
 
 /*
index 2eeded274cc92a23db1e98522926ad8620a7c463..e209bf8c9c69860a5008e955ee70647cf01274b8 100644 (file)
@@ -127,16 +127,45 @@ xfs_inobt_get_rec(
        struct xfs_inobt_rec_incore     *irec,
        int                             *stat)
 {
+       struct xfs_mount                *mp = cur->bc_mp;
+       xfs_agnumber_t                  agno = cur->bc_private.a.agno;
        union xfs_btree_rec             *rec;
        int                             error;
+       uint64_t                        realfree;
 
        error = xfs_btree_get_rec(cur, &rec, stat);
        if (error || *stat == 0)
                return error;
 
-       xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec);
+       xfs_inobt_btrec_to_irec(mp, rec, irec);
+
+       if (!xfs_verify_agino(mp, agno, irec->ir_startino))
+               goto out_bad_rec;
+       if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
+           irec->ir_count > XFS_INODES_PER_CHUNK)
+               goto out_bad_rec;
+       if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
+               goto out_bad_rec;
+
+       /* if there are no holes, return the first available offset */
+       if (!xfs_inobt_issparse(irec->ir_holemask))
+               realfree = irec->ir_free;
+       else
+               realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
+       if (hweight64(realfree) != irec->ir_freecount)
+               goto out_bad_rec;
 
        return 0;
+
+out_bad_rec:
+       xfs_warn(mp,
+               "%s Inode BTree record corruption in AG %d detected!",
+               cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", agno);
+       xfs_warn(mp,
+"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
+               irec->ir_startino, irec->ir_count, irec->ir_freecount,
+               irec->ir_free, irec->ir_holemask);
+       return -EFSCORRUPTED;
 }
 
 /*
index bc03b30ae1e63cd856ed6c8b74648092c9ab41da..7f64ec900c84a669a9be68dd00f404a7102f8139 100644 (file)
@@ -124,16 +124,53 @@ xfs_refcount_get_rec(
        struct xfs_refcount_irec        *irec,
        int                             *stat)
 {
+       struct xfs_mount                *mp = cur->bc_mp;
+       xfs_agnumber_t                  agno = cur->bc_private.a.agno;
        union xfs_btree_rec             *rec;
        int                             error;
+       xfs_agblock_t                   realstart;
 
        error = xfs_btree_get_rec(cur, &rec, stat);
-       if (!error && *stat == 1) {
-               xfs_refcount_btrec_to_irec(rec, irec);
-               trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno,
-                               irec);
+       if (error || !*stat)
+               return error;
+
+       xfs_refcount_btrec_to_irec(rec, irec);
+
+       agno = cur->bc_private.a.agno;
+       if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
+               goto out_bad_rec;
+
+       /* handle special COW-staging state */
+       realstart = irec->rc_startblock;
+       if (realstart & XFS_REFC_COW_START) {
+               if (irec->rc_refcount != 1)
+                       goto out_bad_rec;
+               realstart &= ~XFS_REFC_COW_START;
+       } else if (irec->rc_refcount < 2) {
+               goto out_bad_rec;
        }
-       return error;
+
+       /* check for valid extent range, including overflow */
+       if (!xfs_verify_agbno(mp, agno, realstart))
+               goto out_bad_rec;
+       if (realstart > realstart + irec->rc_blockcount)
+               goto out_bad_rec;
+       if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1))
+               goto out_bad_rec;
+
+       if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
+               goto out_bad_rec;
+
+       trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec);
+       return 0;
+
+out_bad_rec:
+       xfs_warn(mp,
+               "Refcount BTree record corruption in AG %d detected!", agno);
+       xfs_warn(mp,
+               "Start block 0x%x, block count 0x%x, references 0x%x",
+               irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
+       return -EFSCORRUPTED;
 }
 
 /*
index 4f905adfe52f64f7345edc6d6b13729a882a691f..4a169a4c470d73e01b773d6f7145542e39d7b451 100644 (file)
@@ -37,6 +37,7 @@
 #include "xfs_errortag.h"
 #include "xfs_bmap.h"
 #include "xfs_inode.h"
+#include "xfs_ialloc.h"
 
 /*
  * Lookup the first record less than or equal to [bno, len, owner, offset]
@@ -201,6 +202,8 @@ xfs_rmap_get_rec(
        struct xfs_rmap_irec    *irec,
        int                     *stat)
 {
+       struct xfs_mount        *mp = cur->bc_mp;
+       xfs_agnumber_t          agno = cur->bc_private.a.agno;
        union xfs_btree_rec     *rec;
        int                     error;
 
@@ -208,7 +211,43 @@ xfs_rmap_get_rec(
        if (error || !*stat)
                return error;
 
-       return xfs_rmap_btrec_to_irec(rec, irec);
+       if (xfs_rmap_btrec_to_irec(rec, irec))
+               goto out_bad_rec;
+
+       if (irec->rm_blockcount == 0)
+               goto out_bad_rec;
+       if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) {
+               if (irec->rm_owner != XFS_RMAP_OWN_FS)
+                       goto out_bad_rec;
+               if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
+                       goto out_bad_rec;
+       } else {
+               /* check for valid extent range, including overflow */
+               if (!xfs_verify_agbno(mp, agno, irec->rm_startblock))
+                       goto out_bad_rec;
+               if (irec->rm_startblock >
+                               irec->rm_startblock + irec->rm_blockcount)
+                       goto out_bad_rec;
+               if (!xfs_verify_agbno(mp, agno,
+                               irec->rm_startblock + irec->rm_blockcount - 1))
+                       goto out_bad_rec;
+       }
+
+       if (!(xfs_verify_ino(mp, irec->rm_owner) ||
+             (irec->rm_owner <= XFS_RMAP_OWN_FS &&
+              irec->rm_owner >= XFS_RMAP_OWN_MIN)))
+               goto out_bad_rec;
+
+       return 0;
+out_bad_rec:
+       xfs_warn(mp,
+               "Reverse Mapping BTree record corruption in AG %d detected!",
+               agno);
+       xfs_warn(mp,
+               "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
+               irec->rm_owner, irec->rm_flags, irec->rm_startblock,
+               irec->rm_blockcount);
+       return -EFSCORRUPTED;
 }
 
 struct xfs_find_left_neighbor_info {