From: Greg Kroah-Hartman Date: Wed, 24 Apr 2019 16:51:24 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v3.18.139~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1f407f5af81880366fea175da3123d1b9f4099cf;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch --- diff --git a/queue-4.14/iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch b/queue-4.14/iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch new file mode 100644 index 00000000000..deafe3142c2 --- /dev/null +++ b/queue-4.14/iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch @@ -0,0 +1,113 @@ +From 5a9d929d6e13278df62bd9e3d3ceae8c87ad1eea Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Mon, 8 Jan 2018 10:41:39 -0800 +Subject: iomap: report collisions between directio and buffered writes to userspace + +From: Darrick J. Wong + +commit 5a9d929d6e13278df62bd9e3d3ceae8c87ad1eea upstream. + +If two programs simultaneously try to write to the same part of a file +via direct IO and buffered IO, there's a chance that the post-diowrite +pagecache invalidation will fail on the dirty page. When this happens, +the dio write succeeded, which means that the page cache is no longer +coherent with the disk! + +Programs are not supposed to mix IO types and this is a clear case of +data corruption, so store an EIO which will be reflected to userspace +during the next fsync. Replace the WARN_ON with a ratelimited pr_crit +so that the developers have /some/ kind of breadcrumb to track down the +offending program(s) and file(s) involved. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Liu Bo +Signed-off-by: Greg Kroah-Hartman +Cc: Zubin Mithra + + +--- + fs/direct-io.c | 24 +++++++++++++++++++++++- + fs/iomap.c | 12 ++++++++++-- + include/linux/fs.h | 1 + + 3 files changed, 34 insertions(+), 3 deletions(-) + +--- a/fs/direct-io.c ++++ b/fs/direct-io.c +@@ -219,6 +219,27 @@ static inline struct page *dio_get_page( + return dio->pages[sdio->head]; + } + ++/* ++ * Warn about a page cache invalidation failure during a direct io write. ++ */ ++void dio_warn_stale_pagecache(struct file *filp) ++{ ++ static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); ++ char pathname[128]; ++ struct inode *inode = file_inode(filp); ++ char *path; ++ ++ errseq_set(&inode->i_mapping->wb_err, -EIO); ++ if (__ratelimit(&_rs)) { ++ path = file_path(filp, pathname, sizeof(pathname)); ++ if (IS_ERR(path)) ++ path = "(unknown)"; ++ pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); ++ pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, ++ current->comm); ++ } ++} ++ + /** + * dio_complete() - called when all DIO BIO I/O has been completed + * @offset: the byte offset in the file of the completed operation +@@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio * + err = invalidate_inode_pages2_range(dio->inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + ret - 1) >> PAGE_SHIFT); +- WARN_ON_ONCE(err); ++ if (err) ++ dio_warn_stale_pagecache(dio->iocb->ki_filp); + } + + if (!(dio->flags & DIO_SKIP_DIO_COUNT)) +--- a/fs/iomap.c ++++ b/fs/iomap.c +@@ -753,7 +753,8 @@ static ssize_t iomap_dio_complete(struct + err = invalidate_inode_pages2_range(inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + dio->size - 1) >> PAGE_SHIFT); +- WARN_ON_ONCE(err); ++ if (err) ++ dio_warn_stale_pagecache(iocb->ki_filp); + } + + inode_dio_end(file_inode(iocb->ki_filp)); +@@ -1010,9 +1011,16 @@ iomap_dio_rw(struct kiocb *iocb, struct + if (ret) + goto out_free_dio; + ++ /* ++ * Try to invalidate cache pages for the range we're direct ++ * writing. If this invalidation fails, tough, the write will ++ * still work, but racing two incompatible write paths is a ++ * pretty crazy thing to do, so we don't support it 100%. ++ */ + ret = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); +- WARN_ON_ONCE(ret); ++ if (ret) ++ dio_warn_stale_pagecache(iocb->ki_filp); + ret = 0; + + if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2965,6 +2965,7 @@ enum { + }; + + void dio_end_io(struct bio *bio); ++void dio_warn_stale_pagecache(struct file *filp); + + ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, diff --git a/queue-4.14/series b/queue-4.14/series index ffeee61ff08..ab45d91faf3 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -64,3 +64,6 @@ mm-vmstat.c-fix-proc-vmstat-format-for-config_debug_tlbflush-y-config_smp-n.patc alsa-info-fix-racy-addition-deletion-of-nodes.patch percpu-stop-printing-kernel-addresses.patch tools-include-adopt-linux-bits.h.patch +iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch +xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch +xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch diff --git a/queue-4.14/xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch b/queue-4.14/xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch new file mode 100644 index 00000000000..9d270019c14 --- /dev/null +++ b/queue-4.14/xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch @@ -0,0 +1,123 @@ +From b7b2846fe26f2c0d7f317c874a13d3ecf22670ff Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Thu, 7 Dec 2017 19:07:02 -0800 +Subject: xfs: add the ability to join a held buffer to a defer_ops + +From: Darrick J. Wong + +commit b7b2846fe26f2c0d7f317c874a13d3ecf22670ff upstream. + +In certain cases, defer_ops callers will lock a buffer and want to hold +the lock across transaction rolls. Similar to ijoined inodes, we want +to dirty & join the buffer with each transaction roll in defer_finish so +that afterwards the caller still owns the buffer lock and we haven't +inadvertently pinned the log. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Alex Lyakas +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/libxfs/xfs_defer.c | 39 ++++++++++++++++++++++++++++++++++++--- + fs/xfs/libxfs/xfs_defer.h | 5 ++++- + 2 files changed, 40 insertions(+), 4 deletions(-) + +--- a/fs/xfs/libxfs/xfs_defer.c ++++ b/fs/xfs/libxfs/xfs_defer.c +@@ -249,6 +249,10 @@ xfs_defer_trans_roll( + for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) + xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + ++ /* Hold the (previously bjoin'd) buffer locked across the roll. */ ++ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) ++ xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); ++ + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); + + /* Roll the transaction. */ +@@ -264,6 +268,12 @@ xfs_defer_trans_roll( + for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) + xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + ++ /* Rejoin the buffers and dirty them so the log moves forward. */ ++ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { ++ xfs_trans_bjoin(*tp, dop->dop_bufs[i]); ++ xfs_trans_bhold(*tp, dop->dop_bufs[i]); ++ } ++ + return error; + } + +@@ -295,6 +305,31 @@ xfs_defer_ijoin( + } + } + ++ ASSERT(0); ++ return -EFSCORRUPTED; ++} ++ ++/* ++ * Add this buffer to the deferred op. Each joined buffer is relogged ++ * each time we roll the transaction. ++ */ ++int ++xfs_defer_bjoin( ++ struct xfs_defer_ops *dop, ++ struct xfs_buf *bp) ++{ ++ int i; ++ ++ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { ++ if (dop->dop_bufs[i] == bp) ++ return 0; ++ else if (dop->dop_bufs[i] == NULL) { ++ dop->dop_bufs[i] = bp; ++ return 0; ++ } ++ } ++ ++ ASSERT(0); + return -EFSCORRUPTED; + } + +@@ -493,9 +528,7 @@ xfs_defer_init( + struct xfs_defer_ops *dop, + xfs_fsblock_t *fbp) + { +- dop->dop_committed = false; +- dop->dop_low = false; +- memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); ++ memset(dop, 0, sizeof(struct xfs_defer_ops)); + *fbp = NULLFSBLOCK; + INIT_LIST_HEAD(&dop->dop_intake); + INIT_LIST_HEAD(&dop->dop_pending); +--- a/fs/xfs/libxfs/xfs_defer.h ++++ b/fs/xfs/libxfs/xfs_defer.h +@@ -59,6 +59,7 @@ enum xfs_defer_ops_type { + }; + + #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ ++#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ + + struct xfs_defer_ops { + bool dop_committed; /* did any trans commit? */ +@@ -66,8 +67,9 @@ struct xfs_defer_ops { + struct list_head dop_intake; /* unlogged pending work */ + struct list_head dop_pending; /* logged pending work */ + +- /* relog these inodes with each roll */ ++ /* relog these with each roll */ + struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; ++ struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; + }; + + void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, +@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_o + void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); + bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); + int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); ++int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); + + /* Description of a deferred type. */ + struct xfs_defer_op_type { diff --git a/queue-4.14/xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch b/queue-4.14/xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch new file mode 100644 index 00000000000..dc8e22ecf83 --- /dev/null +++ b/queue-4.14/xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch @@ -0,0 +1,130 @@ +From 6e643cd094de3bd0f97edcc1db0089afa24d909f Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Thu, 7 Dec 2017 19:07:02 -0800 +Subject: xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute + +From: Darrick J. Wong + +commit 6e643cd094de3bd0f97edcc1db0089afa24d909f upstream. + +The new attribute leaf buffer is not held locked across the transaction +roll between the shortform->leaf modification and the addition of the +new entry. As a result, the attribute buffer modification being made is +not atomic from an operational perspective. Hence the AIL push can grab +it in the transient state of "just created" after the initial +transaction is rolled, because the buffer has been released. This leads +to xfs_attr3_leaf_verify() asserting that hdr.count is zero, treating +this as in-memory corruption, and shutting down the filesystem. + +Darrick ported the original patch to 4.15 and reworked it use the +xfs_defer_bjoin helper and hold/join the buffer correctly across the +second transaction roll. + +Signed-off-by: Alex Lyakas +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Alex Lyakas +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/libxfs/xfs_attr.c | 20 +++++++++++++++----- + fs/xfs/libxfs/xfs_attr_leaf.c | 9 ++++++--- + fs/xfs/libxfs/xfs_attr_leaf.h | 3 ++- + 3 files changed, 23 insertions(+), 9 deletions(-) + +--- a/fs/xfs/libxfs/xfs_attr.c ++++ b/fs/xfs/libxfs/xfs_attr.c +@@ -212,6 +212,7 @@ xfs_attr_set( + int flags) + { + struct xfs_mount *mp = dp->i_mount; ++ struct xfs_buf *leaf_bp = NULL; + struct xfs_da_args args; + struct xfs_defer_ops dfops; + struct xfs_trans_res tres; +@@ -327,9 +328,16 @@ xfs_attr_set( + * GROT: another possible req'mt for a double-split btree op. + */ + xfs_defer_init(args.dfops, args.firstblock); +- error = xfs_attr_shortform_to_leaf(&args); ++ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); + if (error) + goto out_defer_cancel; ++ /* ++ * Prevent the leaf buffer from being unlocked so that a ++ * concurrent AIL push cannot grab the half-baked leaf ++ * buffer and run into problems with the write verifier. ++ */ ++ xfs_trans_bhold(args.trans, leaf_bp); ++ xfs_defer_bjoin(args.dfops, leaf_bp); + xfs_defer_ijoin(args.dfops, dp); + error = xfs_defer_finish(&args.trans, args.dfops); + if (error) +@@ -337,13 +345,14 @@ xfs_attr_set( + + /* + * Commit the leaf transformation. We'll need another (linked) +- * transaction to add the new attribute to the leaf. ++ * transaction to add the new attribute to the leaf, which ++ * means that we have to hold & join the leaf buffer here too. + */ +- + error = xfs_trans_roll_inode(&args.trans, dp); + if (error) + goto out; +- ++ xfs_trans_bjoin(args.trans, leaf_bp); ++ leaf_bp = NULL; + } + + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) +@@ -374,8 +383,9 @@ xfs_attr_set( + + out_defer_cancel: + xfs_defer_cancel(&dfops); +- args.trans = NULL; + out: ++ if (leaf_bp) ++ xfs_trans_brelse(args.trans, leaf_bp); + if (args.trans) + xfs_trans_cancel(args.trans); + xfs_iunlock(dp, XFS_ILOCK_EXCL); +--- a/fs/xfs/libxfs/xfs_attr_leaf.c ++++ b/fs/xfs/libxfs/xfs_attr_leaf.c +@@ -739,10 +739,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_ + } + + /* +- * Convert from using the shortform to the leaf. ++ * Convert from using the shortform to the leaf. On success, return the ++ * buffer so that we can keep it locked until we're totally done with it. + */ + int +-xfs_attr_shortform_to_leaf(xfs_da_args_t *args) ++xfs_attr_shortform_to_leaf( ++ struct xfs_da_args *args, ++ struct xfs_buf **leaf_bp) + { + xfs_inode_t *dp; + xfs_attr_shortform_t *sf; +@@ -821,7 +824,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t + sfe = XFS_ATTR_SF_NEXTENTRY(sfe); + } + error = 0; +- ++ *leaf_bp = bp; + out: + kmem_free(tmpbuffer); + return error; +--- a/fs/xfs/libxfs/xfs_attr_leaf.h ++++ b/fs/xfs/libxfs/xfs_attr_leaf.h +@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xf + void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); + int xfs_attr_shortform_lookup(struct xfs_da_args *args); + int xfs_attr_shortform_getvalue(struct xfs_da_args *args); +-int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); ++int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, ++ struct xfs_buf **leaf_bp); + int xfs_attr_shortform_remove(struct xfs_da_args *args); + int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); + int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);