From a8bbb6284c9319a8a4f08cb9927e53f07c45a9a4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 13 Mar 2020 23:00:30 -0400 Subject: [PATCH] xfs: fix memory corruption during remote attr value buffer invalidation Source kernel commit: e8db2aafcedb7d88320ab83f1000f1606b26d4d7 While running generic/103, I observed what looks like memory corruption and (with slub debugging turned on) a slub redzone warning on i386 when inactivating an inode with a 64k remote attr value. On a v5 filesystem, maximally sized remote attr values require one block more than 64k worth of space to hold both the remote attribute value header (64 bytes). On a 4k block filesystem this results in a 68k buffer; on a 64k block filesystem, this would be a 128k buffer. Note that even though we'll never use more than 65,600 bytes of this buffer, XFS_MAX_BLOCKSIZE is 64k. This is a problem because the definition of struct xfs_buf_log_format allows for XFS_MAX_BLOCKSIZE worth of dirty bitmap (64k). On i386 when we invalidate a remote attribute, xfs_trans_binval zeroes all 68k worth of the dirty map, writing right off the end of the log item and corrupting memory. We've gotten away with this on x86_64 for years because the compiler inserts a u32 padding on the end of struct xfs_buf_log_format. Fortunately for us, remote attribute values are written to disk with xfs_bwrite(), which is to say that they are not logged. Fix the problem by removing all places where we could end up creating a buffer log item for a remote attribute value and leave a note explaining why. Next, replace the open-coded buffer invalidation with a call to the helper we created in the previous patch that does better checking for bad metadata before marking the buffer stale. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Eric Sandeen --- libxfs/libxfs_priv.h | 2 ++ libxfs/xfs_attr_remote.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 763880134..6d5e47608 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -124,6 +124,8 @@ enum ce { CE_DEBUG, CE_CONT, CE_NOTE, CE_WARN, CE_ALERT, CE_PANIC }; #define xfs_err(mp,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) #define xfs_alert(mp,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) +#define xfs_buf_ioerror_alert(bp,f) ((void) 0); + #define xfs_hex_dump(d,n) ((void) 0) #define xfs_stack_trace() ((void) 0) diff --git a/libxfs/xfs_attr_remote.c b/libxfs/xfs_attr_remote.c index 8a409f21b..c7a4be356 100644 --- a/libxfs/xfs_attr_remote.c +++ b/libxfs/xfs_attr_remote.c @@ -24,6 +24,23 @@ #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ +/* + * Remote Attribute Values + * ======================= + * + * Remote extended attribute values are conceptually simple -- they're written + * to data blocks mapped by an inode's attribute fork, and they have an upper + * size limit of 64k. Setting a value does not involve the XFS log. + * + * However, on a v5 filesystem, maximally sized remote attr values require one + * block more than 64k worth of space to hold both the remote attribute value + * header (64 bytes). On a 4k block filesystem this results in a 68k buffer; + * on a 64k block filesystem, this would be a 128k buffer. Note that the log + * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k). + * Therefore, we /must/ ensure that remote attribute value buffers never touch + * the logging system and therefore never have a log item. + */ + /* * Each contiguous block has a header, so it is not just a simple attribute * length to FSB conversion. @@ -400,17 +417,25 @@ xfs_attr_rmtval_get( (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - error = xfs_trans_read_buf(mp, args->trans, - mp->m_ddev_targp, - dblkno, dblkcnt, 0, &bp, - &xfs_attr3_rmt_buf_ops); - if (error) + bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, + &xfs_attr3_rmt_buf_ops); + if (!bp) + return -ENOMEM; + error = bp->b_error; + if (error) { + xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_relse(bp); + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; + } error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, &offset, &valuelen, &dst); - xfs_trans_brelse(args->trans, bp); + xfs_buf_relse(bp); if (error) return error; -- 2.47.3