From d4e8eb2e40ed705d27877c04333405c64aae3201 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 13 Jun 2017 14:55:31 -0500 Subject: [PATCH] xfs: avoid mount-time deadlock in CoW extent recovery Source kernel commit: 3ecb3ac7b950ff8f6c6a61e8b7b0d6e3546429a0 If a malicious user corrupts the refcount btree to cause a cycle between different levels of the tree, the next mount attempt will deadlock in the CoW recovery routine while grabbing buffer locks. We can use the ability to re-grab a buffer that was previous locked to a transaction to avoid deadlocks, so do that here. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Eric Sandeen --- include/xfs_trans.h | 1 + libxfs/libxfs_api_defs.h | 1 + libxfs/trans.c | 22 ++++++++++++++++++++ libxfs/xfs_refcount.c | 43 +++++++++++++++++++++++++++++----------- 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/include/xfs_trans.h b/include/xfs_trans.h index 44deebb05..d9f9ec8e4 100644 --- a/include/xfs_trans.h +++ b/include/xfs_trans.h @@ -90,6 +90,7 @@ int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); int libxfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, uint blocks, uint rtextents, uint flags, struct xfs_trans **tpp); +int xfs_trans_alloc_empty(struct xfs_mount *mp, struct xfs_trans **tpp); int libxfs_trans_commit(struct xfs_trans *); void libxfs_trans_cancel(struct xfs_trans *); struct xfs_buf *libxfs_trans_getsb(struct xfs_trans *, struct xfs_mount *, int); diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index 31239caad..34716c49c 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -32,6 +32,7 @@ #define xfs_fs_cmn_err libxfs_fs_cmn_err #define xfs_trans_alloc libxfs_trans_alloc +#define xfs_trans_alloc_empty libxfs_trans_alloc_empty #define xfs_trans_add_item libxfs_trans_add_item #define xfs_trans_bhold libxfs_trans_bhold #define xfs_trans_binval libxfs_trans_binval diff --git a/libxfs/trans.c b/libxfs/trans.c index ea60d032a..229fe5615 100644 --- a/libxfs/trans.c +++ b/libxfs/trans.c @@ -193,6 +193,28 @@ libxfs_trans_alloc( return 0; } +/* + * Create an empty transaction with no reservation. This is a defensive + * mechanism for routines that query metadata without actually modifying + * them -- if the metadata being queried is somehow cross-linked (think a + * btree block pointer that points higher in the tree), we risk deadlock. + * However, blocks grabbed as part of a transaction can be re-grabbed. + * The verifiers will notice the corrupt block and the operation will fail + * back to userspace without deadlocking. + * + * Note the zero-length reservation; this transaction MUST be cancelled + * without any dirty data. + */ +int +xfs_trans_alloc_empty( + struct xfs_mount *mp, + struct xfs_trans **tpp) +{ + struct xfs_trans_res resv = {0}; + + return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); +} + void libxfs_trans_cancel( xfs_trans_t *tp) diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c index 0508ec345..713cfc934 100644 --- a/libxfs/xfs_refcount.c +++ b/libxfs/xfs_refcount.c @@ -1628,13 +1628,28 @@ xfs_refcount_recover_cow_leftovers( if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) return -EOPNOTSUPP; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + INIT_LIST_HEAD(&debris); + + /* + * In this first part, we use an empty transaction to gather up + * all the leftover CoW extents so that we can subsequently + * delete them. The empty transaction is used to avoid + * a buffer lock deadlock if there happens to be a loop in the + * refcountbt because we're allowed to re-grab a buffer that is + * already attached to our transaction. When we're done + * recording the CoW debris we cancel the (empty) transaction + * and everything goes away cleanly. + */ + error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (error) + goto out_trans; + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ - INIT_LIST_HEAD(&debris); memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); low.rc.rc_startblock = XFS_REFC_COW_START; @@ -1644,10 +1659,11 @@ xfs_refcount_recover_cow_leftovers( if (error) goto out_cursor; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); + xfs_trans_cancel(tp); /* Now iterate the list to free the leftovers */ - list_for_each_entry(rr, &debris, rr_list) { + list_for_each_entry_safe(rr, n, &debris, rr_list) { /* Set up transaction. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) @@ -1675,8 +1691,16 @@ xfs_refcount_recover_cow_leftovers( error = xfs_trans_commit(tp); if (error) goto out_free; + + list_del(&rr->rr_list); + kmem_free(rr); } + return error; +out_defer: + xfs_defer_cancel(&dfops); +out_trans: + xfs_trans_cancel(tp); out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { @@ -1687,11 +1711,6 @@ out_free: out_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_buf_relse(agbp); - goto out_free; - -out_defer: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_free; + xfs_trans_brelse(tp, agbp); + goto out_trans; } -- 2.47.2