From 128ba9ce6eb8704c13520df8e6502112af63c387 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Jun 2022 14:28:52 -0500 Subject: [PATCH] xfs: share xattr name and value buffers when logging xattr updates Source kernel commit: 4183e4f27f402d712bccab30588a6fe7575963c0 While running xfs/297 and generic/642, I noticed a crash in xfs_attri_item_relog when it tries to copy the attr name to the new xattri log item. I think what happened here was that we called ->iop_commit on the old attri item (which nulls out the pointers) as part of a log force at the same time that a chained attr operation was ongoing. The system was busy enough that at some later point, the defer ops operation decided it was necessary to relog the attri log item, but as we've detached the name buffer from the old attri log item, we can't copy it to the new one, and kaboom. I think there's a broader refcounting problem with LARP mode -- the setxattr code can return to userspace before the CIL actually formats and commits the log item, which results in a UAF bug. Therefore, the xattr log item needs to be able to retain a reference to the name and value buffers until the log items have completely cleared the log. Furthermore, each time we create an intent log item, we allocate new memory and (re)copy the contents; sharing here would be very useful. Solve the UAF and the unnecessary memory allocations by having the log code create a single refcounted buffer to contain the name and value contents. This buffer can be passed from old to new during a relog operation, and the logging code can (optionally) attach it to the xfs_attr_item for reuse when LARP mode is enabled. This also fixes a problem where the xfs_attri_log_item objects weren't being freed back to the same cache where they came from. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Eric Sandeen --- libxfs/Makefile | 1 + libxfs/libxfs_priv.h | 1 + libxfs/linux-err.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ libxfs/xfs_attr.h | 8 ++++++ libxfs/xfs_defer.c | 59 ++++++++++++++++++++++++++++++++++--------- 5 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 libxfs/linux-err.h diff --git a/libxfs/Makefile b/libxfs/Makefile index 3e3c4bd0c..010ee68e2 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -24,6 +24,7 @@ HFILES = \ libxfs_api_defs.h \ init.h \ libxfs_priv.h \ + linux-err.h \ topology.h \ xfs_ag_resv.h \ xfs_alloc.h \ diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 678db44c1..fcdcfebf6 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -49,6 +49,7 @@ #include "libfrog/radix-tree.h" #include "atomic.h" #include "spinlock.h" +#include "linux-err.h" #include "xfs_types.h" #include "xfs_arch.h" diff --git a/libxfs/linux-err.h b/libxfs/linux-err.h new file mode 100644 index 000000000..8344b63cf --- /dev/null +++ b/libxfs/linux-err.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ERR_H +#define _LINUX_ERR_H + +/* Adapted from include/linux/err.h */ + +/* + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a normal + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + */ +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) ((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) + +static inline void * ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline bool IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline bool IS_ERR_OR_NULL(const void *ptr) +{ + return !ptr || IS_ERR_VALUE((unsigned long)ptr); +} + +/** + * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type + * @ptr: The pointer to cast. + * + * Explicitly cast an error-valued pointer to another pointer type in such a + * way as to make it clear that's what's going on. + */ +static inline void * ERR_CAST(const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} + +static inline int PTR_ERR_OR_ZERO(const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#endif /* _LINUX_ERR_H */ diff --git a/libxfs/xfs_attr.h b/libxfs/xfs_attr.h index 3cd9cbb68..e329da3e7 100644 --- a/libxfs/xfs_attr.h +++ b/libxfs/xfs_attr.h @@ -502,6 +502,8 @@ enum xfs_delattr_state { { XFS_DAS_NODE_REMOVE_ATTR, "XFS_DAS_NODE_REMOVE_ATTR" }, \ { XFS_DAS_DONE, "XFS_DAS_DONE" } +struct xfs_attri_log_nameval; + /* * Context used for keeping track of delayed attribute operations */ @@ -517,6 +519,12 @@ struct xfs_attr_intent { struct xfs_da_args *xattri_da_args; + /* + * Shared buffer containing the attr name and value so that the logging + * code can share large memory buffers between log items. + */ + struct xfs_attri_log_nameval *xattri_nameval; + /* * Used by xfs_attr_set to hold a leaf buffer across a transaction roll */ diff --git a/libxfs/xfs_defer.c b/libxfs/xfs_defer.c index 8382e8a17..c4f0269d6 100644 --- a/libxfs/xfs_defer.c +++ b/libxfs/xfs_defer.c @@ -186,35 +186,56 @@ static const struct xfs_defer_op_type *defer_op_types[] = { [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type, }; -static bool +/* + * Ensure there's a log intent item associated with this deferred work item if + * the operation must be restarted on crash. Returns 1 if there's a log item; + * 0 if there isn't; or a negative errno. + */ +static int xfs_defer_create_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp, bool sort) { const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; + struct xfs_log_item *lip; - if (!dfp->dfp_intent) - dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, - dfp->dfp_count, sort); - return dfp->dfp_intent != NULL; + if (dfp->dfp_intent) + return 1; + + lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort); + if (!lip) + return 0; + if (IS_ERR(lip)) + return PTR_ERR(lip); + + dfp->dfp_intent = lip; + return 1; } /* * For each pending item in the intake list, log its intent item and the * associated extents, then add the entire intake list to the end of * the pending list. + * + * Returns 1 if at least one log item was associated with the deferred work; + * 0 if there are no log items; or a negative errno. */ -static bool +static int xfs_defer_create_intents( struct xfs_trans *tp) { struct xfs_defer_pending *dfp; - bool ret = false; + int ret = 0; list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { + int ret2; + trace_xfs_defer_create_intent(tp->t_mountp, dfp); - ret |= xfs_defer_create_intent(tp, dfp, true); + ret2 = xfs_defer_create_intent(tp, dfp, true); + if (ret2 < 0) + return ret2; + ret |= ret2; } return ret; } @@ -452,6 +473,8 @@ xfs_defer_finish_one( dfp->dfp_count--; error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { + int ret; + /* * Caller wants a fresh transaction; put the work item * back on the list and log a new log intent item to @@ -462,7 +485,9 @@ xfs_defer_finish_one( dfp->dfp_count++; dfp->dfp_done = NULL; dfp->dfp_intent = NULL; - xfs_defer_create_intent(tp, dfp, false); + ret = xfs_defer_create_intent(tp, dfp, false); + if (ret < 0) + error = ret; } if (error) @@ -509,10 +534,14 @@ xfs_defer_finish_noroll( * of time that any one intent item can stick around in memory, * pinning the log tail. */ - bool has_intents = xfs_defer_create_intents(*tp); + int has_intents = xfs_defer_create_intents(*tp); list_splice_init(&(*tp)->t_dfops, &dop_pending); + if (has_intents < 0) { + error = has_intents; + goto out_shutdown; + } if (has_intents || dfp) { error = xfs_defer_trans_roll(tp); if (error) @@ -671,13 +700,15 @@ xfs_defer_ops_capture( if (list_empty(&tp->t_dfops)) return NULL; + error = xfs_defer_create_intents(tp); + if (error < 0) + return ERR_PTR(error); + /* Create an object to capture the defer ops. */ dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS); INIT_LIST_HEAD(&dfc->dfc_list); INIT_LIST_HEAD(&dfc->dfc_dfops); - xfs_defer_create_intents(tp); - /* Move the dfops chain and transaction state to the capture struct. */ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops); dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE; @@ -754,6 +785,10 @@ xfs_defer_ops_capture_and_commit( /* If we don't capture anything, commit transaction and exit. */ dfc = xfs_defer_ops_capture(tp); + if (IS_ERR(dfc)) { + xfs_trans_cancel(tp); + return PTR_ERR(dfc); + } if (!dfc) return xfs_trans_commit(tp); -- 2.47.2