]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 24 Apr 2019 16:51:24 +0000 (18:51 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 24 Apr 2019 16:51:24 +0000 (18:51 +0200)
added patches:
iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch
xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch
xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch

queue-4.14/iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch [new file with mode: 0644]
queue-4.14/xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch [new file with mode: 0644]

diff --git a/queue-4.14/iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch b/queue-4.14/iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch
new file mode 100644 (file)
index 0000000..deafe31
--- /dev/null
@@ -0,0 +1,113 @@
+From 5a9d929d6e13278df62bd9e3d3ceae8c87ad1eea Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Mon, 8 Jan 2018 10:41:39 -0800
+Subject: iomap: report collisions between directio and buffered writes to userspace
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 5a9d929d6e13278df62bd9e3d3ceae8c87ad1eea upstream.
+
+If two programs simultaneously try to write to the same part of a file
+via direct IO and buffered IO, there's a chance that the post-diowrite
+pagecache invalidation will fail on the dirty page.  When this happens,
+the dio write succeeded, which means that the page cache is no longer
+coherent with the disk!
+
+Programs are not supposed to mix IO types and this is a clear case of
+data corruption, so store an EIO which will be reflected to userspace
+during the next fsync.  Replace the WARN_ON with a ratelimited pr_crit
+so that the developers have /some/ kind of breadcrumb to track down the
+offending program(s) and file(s) involved.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Zubin Mithra <zsm@chromium.org>
+
+
+---
+ fs/direct-io.c     |   24 +++++++++++++++++++++++-
+ fs/iomap.c         |   12 ++++++++++--
+ include/linux/fs.h |    1 +
+ 3 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/fs/direct-io.c
++++ b/fs/direct-io.c
+@@ -219,6 +219,27 @@ static inline struct page *dio_get_page(
+       return dio->pages[sdio->head];
+ }
++/*
++ * Warn about a page cache invalidation failure during a direct io write.
++ */
++void dio_warn_stale_pagecache(struct file *filp)
++{
++      static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
++      char pathname[128];
++      struct inode *inode = file_inode(filp);
++      char *path;
++
++      errseq_set(&inode->i_mapping->wb_err, -EIO);
++      if (__ratelimit(&_rs)) {
++              path = file_path(filp, pathname, sizeof(pathname));
++              if (IS_ERR(path))
++                      path = "(unknown)";
++              pr_crit("Page cache invalidation failure on direct I/O.  Possible data corruption due to collision with buffered I/O!\n");
++              pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
++                      current->comm);
++      }
++}
++
+ /**
+  * dio_complete() - called when all DIO BIO I/O has been completed
+  * @offset: the byte offset in the file of the completed operation
+@@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio *
+               err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+                                       offset >> PAGE_SHIFT,
+                                       (offset + ret - 1) >> PAGE_SHIFT);
+-              WARN_ON_ONCE(err);
++              if (err)
++                      dio_warn_stale_pagecache(dio->iocb->ki_filp);
+       }
+       if (!(dio->flags & DIO_SKIP_DIO_COUNT))
+--- a/fs/iomap.c
++++ b/fs/iomap.c
+@@ -753,7 +753,8 @@ static ssize_t iomap_dio_complete(struct
+               err = invalidate_inode_pages2_range(inode->i_mapping,
+                               offset >> PAGE_SHIFT,
+                               (offset + dio->size - 1) >> PAGE_SHIFT);
+-              WARN_ON_ONCE(err);
++              if (err)
++                      dio_warn_stale_pagecache(iocb->ki_filp);
+       }
+       inode_dio_end(file_inode(iocb->ki_filp));
+@@ -1010,9 +1011,16 @@ iomap_dio_rw(struct kiocb *iocb, struct
+       if (ret)
+               goto out_free_dio;
++      /*
++       * Try to invalidate cache pages for the range we're direct
++       * writing.  If this invalidation fails, tough, the write will
++       * still work, but racing two incompatible write paths is a
++       * pretty crazy thing to do, so we don't support it 100%.
++       */
+       ret = invalidate_inode_pages2_range(mapping,
+                       start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+-      WARN_ON_ONCE(ret);
++      if (ret)
++              dio_warn_stale_pagecache(iocb->ki_filp);
+       ret = 0;
+       if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2965,6 +2965,7 @@ enum {
+ };
+ void dio_end_io(struct bio *bio);
++void dio_warn_stale_pagecache(struct file *filp);
+ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+                            struct block_device *bdev, struct iov_iter *iter,
index ffeee61ff08bce552ccc5da56d93337c8254707a..ab45d91faf3f3e7434c528b2947c1ba5d81bee44 100644 (file)
@@ -64,3 +64,6 @@ mm-vmstat.c-fix-proc-vmstat-format-for-config_debug_tlbflush-y-config_smp-n.patc
 alsa-info-fix-racy-addition-deletion-of-nodes.patch
 percpu-stop-printing-kernel-addresses.patch
 tools-include-adopt-linux-bits.h.patch
+iomap-report-collisions-between-directio-and-buffered-writes-to-userspace.patch
+xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch
+xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch
diff --git a/queue-4.14/xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch b/queue-4.14/xfs-add-the-ability-to-join-a-held-buffer-to-a-defer_ops.patch
new file mode 100644 (file)
index 0000000..9d27001
--- /dev/null
@@ -0,0 +1,123 @@
+From b7b2846fe26f2c0d7f317c874a13d3ecf22670ff Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Thu, 7 Dec 2017 19:07:02 -0800
+Subject: xfs: add the ability to join a held buffer to a defer_ops
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit b7b2846fe26f2c0d7f317c874a13d3ecf22670ff upstream.
+
+In certain cases, defer_ops callers will lock a buffer and want to hold
+the lock across transaction rolls.  Similar to ijoined inodes, we want
+to dirty & join the buffer with each transaction roll in defer_finish so
+that afterwards the caller still owns the buffer lock and we haven't
+inadvertently pinned the log.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Alex Lyakas <alex@zadara.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_defer.c |   39 ++++++++++++++++++++++++++++++++++++---
+ fs/xfs/libxfs/xfs_defer.h |    5 ++++-
+ 2 files changed, 40 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_defer.c
++++ b/fs/xfs/libxfs/xfs_defer.c
+@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
+       for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
+               xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
++      /* Hold the (previously bjoin'd) buffer locked across the roll. */
++      for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
++              xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
++
+       trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
+       /* Roll the transaction. */
+@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
+       for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
+               xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
++      /* Rejoin the buffers and dirty them so the log moves forward. */
++      for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
++              xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
++              xfs_trans_bhold(*tp, dop->dop_bufs[i]);
++      }
++
+       return error;
+ }
+@@ -295,6 +305,31 @@ xfs_defer_ijoin(
+               }
+       }
++      ASSERT(0);
++      return -EFSCORRUPTED;
++}
++
++/*
++ * Add this buffer to the deferred op.  Each joined buffer is relogged
++ * each time we roll the transaction.
++ */
++int
++xfs_defer_bjoin(
++      struct xfs_defer_ops            *dop,
++      struct xfs_buf                  *bp)
++{
++      int                             i;
++
++      for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
++              if (dop->dop_bufs[i] == bp)
++                      return 0;
++              else if (dop->dop_bufs[i] == NULL) {
++                      dop->dop_bufs[i] = bp;
++                      return 0;
++              }
++      }
++
++      ASSERT(0);
+       return -EFSCORRUPTED;
+ }
+@@ -493,9 +528,7 @@ xfs_defer_init(
+       struct xfs_defer_ops            *dop,
+       xfs_fsblock_t                   *fbp)
+ {
+-      dop->dop_committed = false;
+-      dop->dop_low = false;
+-      memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
++      memset(dop, 0, sizeof(struct xfs_defer_ops));
+       *fbp = NULLFSBLOCK;
+       INIT_LIST_HEAD(&dop->dop_intake);
+       INIT_LIST_HEAD(&dop->dop_pending);
+--- a/fs/xfs/libxfs/xfs_defer.h
++++ b/fs/xfs/libxfs/xfs_defer.h
+@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
+ };
+ #define XFS_DEFER_OPS_NR_INODES       2       /* join up to two inodes */
++#define XFS_DEFER_OPS_NR_BUFS 2       /* join up to two buffers */
+ struct xfs_defer_ops {
+       bool                    dop_committed;  /* did any trans commit? */
+@@ -66,8 +67,9 @@ struct xfs_defer_ops {
+       struct list_head        dop_intake;     /* unlogged pending work */
+       struct list_head        dop_pending;    /* logged pending work */
+-      /* relog these inodes with each roll */
++      /* relog these with each roll */
+       struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
++      struct xfs_buf          *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
+ };
+ void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
+@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_o
+ void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
+ bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
+ int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
++int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
+ /* Description of a deferred type. */
+ struct xfs_defer_op_type {
diff --git a/queue-4.14/xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch b/queue-4.14/xfs-hold-xfs_buf-locked-between-shortform-leaf-conversion-and-the-addition-of-an-attribute.patch
new file mode 100644 (file)
index 0000000..dc8e22e
--- /dev/null
@@ -0,0 +1,130 @@
+From 6e643cd094de3bd0f97edcc1db0089afa24d909f Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Thu, 7 Dec 2017 19:07:02 -0800
+Subject: xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 6e643cd094de3bd0f97edcc1db0089afa24d909f upstream.
+
+The new attribute leaf buffer is not held locked across the transaction
+roll between the shortform->leaf modification and the addition of the
+new entry.  As a result, the attribute buffer modification being made is
+not atomic from an operational perspective.  Hence the AIL push can grab
+it in the transient state of "just created" after the initial
+transaction is rolled, because the buffer has been released.  This leads
+to xfs_attr3_leaf_verify() asserting that hdr.count is zero, treating
+this as in-memory corruption, and shutting down the filesystem.
+
+Darrick ported the original patch to 4.15 and reworked it use the
+xfs_defer_bjoin helper and hold/join the buffer correctly across the
+second transaction roll.
+
+Signed-off-by: Alex Lyakas <alex@zadarastorage.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Alex Lyakas <alex@zadara.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_attr.c      |   20 +++++++++++++++-----
+ fs/xfs/libxfs/xfs_attr_leaf.c |    9 ++++++---
+ fs/xfs/libxfs/xfs_attr_leaf.h |    3 ++-
+ 3 files changed, 23 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -212,6 +212,7 @@ xfs_attr_set(
+       int                     flags)
+ {
+       struct xfs_mount        *mp = dp->i_mount;
++      struct xfs_buf          *leaf_bp = NULL;
+       struct xfs_da_args      args;
+       struct xfs_defer_ops    dfops;
+       struct xfs_trans_res    tres;
+@@ -327,9 +328,16 @@ xfs_attr_set(
+                * GROT: another possible req'mt for a double-split btree op.
+                */
+               xfs_defer_init(args.dfops, args.firstblock);
+-              error = xfs_attr_shortform_to_leaf(&args);
++              error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
+               if (error)
+                       goto out_defer_cancel;
++              /*
++               * Prevent the leaf buffer from being unlocked so that a
++               * concurrent AIL push cannot grab the half-baked leaf
++               * buffer and run into problems with the write verifier.
++               */
++              xfs_trans_bhold(args.trans, leaf_bp);
++              xfs_defer_bjoin(args.dfops, leaf_bp);
+               xfs_defer_ijoin(args.dfops, dp);
+               error = xfs_defer_finish(&args.trans, args.dfops);
+               if (error)
+@@ -337,13 +345,14 @@ xfs_attr_set(
+               /*
+                * Commit the leaf transformation.  We'll need another (linked)
+-               * transaction to add the new attribute to the leaf.
++               * transaction to add the new attribute to the leaf, which
++               * means that we have to hold & join the leaf buffer here too.
+                */
+-
+               error = xfs_trans_roll_inode(&args.trans, dp);
+               if (error)
+                       goto out;
+-
++              xfs_trans_bjoin(args.trans, leaf_bp);
++              leaf_bp = NULL;
+       }
+       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+@@ -374,8 +383,9 @@ xfs_attr_set(
+ out_defer_cancel:
+       xfs_defer_cancel(&dfops);
+-      args.trans = NULL;
+ out:
++      if (leaf_bp)
++              xfs_trans_brelse(args.trans, leaf_bp);
+       if (args.trans)
+               xfs_trans_cancel(args.trans);
+       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -739,10 +739,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_
+ }
+ /*
+- * Convert from using the shortform to the leaf.
++ * Convert from using the shortform to the leaf.  On success, return the
++ * buffer so that we can keep it locked until we're totally done with it.
+  */
+ int
+-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
++xfs_attr_shortform_to_leaf(
++      struct xfs_da_args      *args,
++      struct xfs_buf          **leaf_bp)
+ {
+       xfs_inode_t *dp;
+       xfs_attr_shortform_t *sf;
+@@ -821,7 +824,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t
+               sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+       }
+       error = 0;
+-
++      *leaf_bp = bp;
+ out:
+       kmem_free(tmpbuffer);
+       return error;
+--- a/fs/xfs/libxfs/xfs_attr_leaf.h
++++ b/fs/xfs/libxfs/xfs_attr_leaf.h
+@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xf
+ void  xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
+ int   xfs_attr_shortform_lookup(struct xfs_da_args *args);
+ int   xfs_attr_shortform_getvalue(struct xfs_da_args *args);
+-int   xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
++int   xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
++                      struct xfs_buf **leaf_bp);
+ int   xfs_attr_shortform_remove(struct xfs_da_args *args);
+ int   xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
+ int   xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);