]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.11-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 5 Jun 2017 14:07:58 +0000 (16:07 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 5 Jun 2017 14:07:58 +0000 (16:07 +0200)
added patches:
xfs-fix-missed-holes-in-seek_hole-implementation.patch
xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch

queue-4.11/series
queue-4.11/xfs-fix-missed-holes-in-seek_hole-implementation.patch [new file with mode: 0644]
queue-4.11/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch [new file with mode: 0644]
queue-4.11/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch [new file with mode: 0644]
queue-4.11/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch [new file with mode: 0644]

index b0ecc875a0de84a6b9495482cf3cbfa2c91eb4e2..afca4905eadfdf35c448913e09dcdf947aeda296 100644 (file)
@@ -90,3 +90,7 @@ x86-boot-use-cross_compile-prefix-for-readelf.patch
 ksm-prevent-crash-after-write_protect_page-fails.patch
 slub-memcg-cure-the-brainless-abuse-of-sysfs-attributes.patch
 drm-gma500-psb-actually-use-vbt-mode-when-it-is-found.patch
+xfs-fix-missed-holes-in-seek_hole-implementation.patch
+xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
+xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
+xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch
diff --git a/queue-4.11/xfs-fix-missed-holes-in-seek_hole-implementation.patch b/queue-4.11/xfs-fix-missed-holes-in-seek_hole-implementation.patch
new file mode 100644 (file)
index 0000000..23feea0
--- /dev/null
@@ -0,0 +1,87 @@
+From 5375023ae1266553a7baa0845e82917d8803f48c Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 18 May 2017 16:36:22 -0700
+Subject: xfs: Fix missed holes in SEEK_HOLE implementation
+
+From: Jan Kara <jack@suse.cz>
+
+commit 5375023ae1266553a7baa0845e82917d8803f48c upstream.
+
+XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as
+can be seen by the following command:
+
+xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k"
+       -c "seek -h 0" file
+wrote 57344/57344 bytes at offset 0
+56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec)
+wrote 8192/8192 bytes at offset 131072
+8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec)
+Whence Result
+HOLE   139264
+
+Where we can see that hole at offset 56k was just ignored by SEEK_HOLE
+implementation. The bug is in xfs_find_get_desired_pgoff() which does
+not properly detect the case when pages are not contiguous.
+
+Fix the problem by properly detecting when found page has larger offset
+than expected.
+
+Fixes: d126d43f631f996daeee5006714fed914be32368
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c |   29 +++++++++--------------------
+ 1 file changed, 9 insertions(+), 20 deletions(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1076,17 +1076,6 @@ xfs_find_get_desired_pgoff(
+                       break;
+               }
+-              /*
+-               * At lease we found one page.  If this is the first time we
+-               * step into the loop, and if the first page index offset is
+-               * greater than the given search offset, a hole was found.
+-               */
+-              if (type == HOLE_OFF && lastoff == startoff &&
+-                  lastoff < page_offset(pvec.pages[0])) {
+-                      found = true;
+-                      break;
+-              }
+-
+               for (i = 0; i < nr_pages; i++) {
+                       struct page     *page = pvec.pages[i];
+                       loff_t          b_offset;
+@@ -1098,18 +1087,18 @@ xfs_find_get_desired_pgoff(
+                        * file mapping. However, page->index will not change
+                        * because we have a reference on the page.
+                        *
+-                       * Searching done if the page index is out of range.
+-                       * If the current offset is not reaches the end of
+-                       * the specified search range, there should be a hole
+-                       * between them.
++                       * If current page offset is beyond where we've ended,
++                       * we've found a hole.
+                        */
+-                      if (page->index > end) {
+-                              if (type == HOLE_OFF && lastoff < endoff) {
+-                                      *offset = lastoff;
+-                                      found = true;
+-                              }
++                      if (type == HOLE_OFF && lastoff < endoff &&
++                          lastoff < page_offset(pvec.pages[i])) {
++                              found = true;
++                              *offset = lastoff;
+                               goto out;
+                       }
++                      /* Searching done if the page index is out of range. */
++                      if (page->index > end)
++                              goto out;
+                       lock_page(page);
+                       /*
diff --git a/queue-4.11/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch b/queue-4.11/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch
new file mode 100644 (file)
index 0000000..d696711
--- /dev/null
@@ -0,0 +1,54 @@
+From 8affebe16d79ebefb1d9d6d56a46dc89716f9453 Mon Sep 17 00:00:00 2001
+From: Eryu Guan <eguan@redhat.com>
+Date: Tue, 23 May 2017 08:30:46 -0700
+Subject: xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff()
+
+From: Eryu Guan <eguan@redhat.com>
+
+commit 8affebe16d79ebefb1d9d6d56a46dc89716f9453 upstream.
+
+xfs_find_get_desired_pgoff() is used to search for offset of hole or
+data in page range [index, end] (both inclusive), and the max number
+of pages to search should be at least one, if end == index.
+Otherwise the only page is missed and no hole or data is found,
+which is not correct.
+
+When block size is smaller than page size, this can be demonstrated
+by preallocating a file with size smaller than page size and writing
+data to the last block. E.g. run this xfs_io command on a 1k block
+size XFS on x86_64 host.
+
+  # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \
+           -c "seek -d 0" /mnt/xfs/testfile
+  wrote 1024/1024 bytes at offset 2048
+  1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec)
+  Whence  Result
+  DATA    EOF
+
+Data at offset 2k was missed, and lseek(2) returned ENXIO.
+
+This is uncovered by generic/285 subtest 07 and 08 on ppc64 host,
+where pagesize is 64k. Because a recent change to generic/285
+reduced the preallocated file size to smaller than 64k.
+
+Signed-off-by: Eryu Guan <eguan@redhat.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1049,7 +1049,7 @@ xfs_find_get_desired_pgoff(
+               unsigned        nr_pages;
+               unsigned int    i;
+-              want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
++              want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
+               nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+                                         want);
+               /*
diff --git a/queue-4.11/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch b/queue-4.11/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch
new file mode 100644 (file)
index 0000000..a7eb287
--- /dev/null
@@ -0,0 +1,155 @@
+From 63db7c815bc0997c29e484d2409684fdd9fcd93b Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Wed, 31 May 2017 08:22:52 -0700
+Subject: xfs: use ->b_state to fix buffer I/O accounting release race
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 63db7c815bc0997c29e484d2409684fdd9fcd93b upstream.
+
+We've had user reports of unmount hangs in xfs_wait_buftarg() that
+analysis shows is due to btp->bt_io_count == -1. bt_io_count
+represents the count of in-flight asynchronous buffers and thus
+should always be >= 0. xfs_wait_buftarg() waits for this value to
+stabilize to zero in order to ensure that all untracked (with
+respect to the lru) buffers have completed I/O processing before
+unmount proceeds to tear down in-core data structures.
+
+The value of -1 implies an I/O accounting decrement race. Indeed,
+the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele()
+(where the buffer lock is no longer held) means that bp->b_flags can
+be updated from an unsafe context. While a user-level reproducer is
+currently not available, some intrusive hacks to run racing buffer
+lookups/ioacct/releases from multiple threads was used to
+successfully manufacture this problem.
+
+Existing callers do not expect to acquire the buffer lock from
+xfs_buf_rele(). Therefore, we can not safely update ->b_flags from
+this context. It turns out that we already have separate buffer
+state bits and associated serialization for dealing with buffer LRU
+state in the form of ->b_state and ->b_lock. Therefore, replace the
+_XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O
+accounting wrappers appropriately and make sure they are used with
+the correct locking. This ensures that buffer in-flight state can be
+modified at buffer release time without racing with modifications
+from a buffer lock holder.
+
+Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Tested-by: Libor Pechacek <lpechacek@suse.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_buf.c |   38 ++++++++++++++++++++++++++------------
+ fs/xfs/xfs_buf.h |    5 ++---
+ 2 files changed, 28 insertions(+), 15 deletions(-)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -97,12 +97,16 @@ static inline void
+ xfs_buf_ioacct_inc(
+       struct xfs_buf  *bp)
+ {
+-      if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
++      if (bp->b_flags & XBF_NO_IOACCT)
+               return;
+       ASSERT(bp->b_flags & XBF_ASYNC);
+-      bp->b_flags |= _XBF_IN_FLIGHT;
+-      percpu_counter_inc(&bp->b_target->bt_io_count);
++      spin_lock(&bp->b_lock);
++      if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
++              bp->b_state |= XFS_BSTATE_IN_FLIGHT;
++              percpu_counter_inc(&bp->b_target->bt_io_count);
++      }
++      spin_unlock(&bp->b_lock);
+ }
+ /*
+@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc(
+  * freed and unaccount from the buftarg.
+  */
+ static inline void
+-xfs_buf_ioacct_dec(
++__xfs_buf_ioacct_dec(
+       struct xfs_buf  *bp)
+ {
+-      if (!(bp->b_flags & _XBF_IN_FLIGHT))
+-              return;
++      ASSERT(spin_is_locked(&bp->b_lock));
+-      bp->b_flags &= ~_XBF_IN_FLIGHT;
+-      percpu_counter_dec(&bp->b_target->bt_io_count);
++      if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
++              bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
++              percpu_counter_dec(&bp->b_target->bt_io_count);
++      }
++}
++
++static inline void
++xfs_buf_ioacct_dec(
++      struct xfs_buf  *bp)
++{
++      spin_lock(&bp->b_lock);
++      __xfs_buf_ioacct_dec(bp);
++      spin_unlock(&bp->b_lock);
+ }
+ /*
+@@ -149,9 +163,9 @@ xfs_buf_stale(
+        * unaccounted (released to LRU) before that occurs. Drop in-flight
+        * status now to preserve accounting consistency.
+        */
+-      xfs_buf_ioacct_dec(bp);
+-
+       spin_lock(&bp->b_lock);
++      __xfs_buf_ioacct_dec(bp);
++
+       atomic_set(&bp->b_lru_ref, 0);
+       if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
+           (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
+@@ -979,12 +993,12 @@ xfs_buf_rele(
+                * ensures the decrement occurs only once per-buf.
+                */
+               if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
+-                      xfs_buf_ioacct_dec(bp);
++                      __xfs_buf_ioacct_dec(bp);
+               goto out_unlock;
+       }
+       /* the last reference has been dropped ... */
+-      xfs_buf_ioacct_dec(bp);
++      __xfs_buf_ioacct_dec(bp);
+       if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
+               /*
+                * If the buffer is added to the LRU take a new reference to the
+--- a/fs/xfs/xfs_buf.h
++++ b/fs/xfs/xfs_buf.h
+@@ -63,7 +63,6 @@ typedef enum {
+ #define _XBF_KMEM      (1 << 21)/* backed by heap memory */
+ #define _XBF_DELWRI_Q  (1 << 22)/* buffer on a delwri queue */
+ #define _XBF_COMPOUND  (1 << 23)/* compound buffer */
+-#define _XBF_IN_FLIGHT         (1 << 25) /* I/O in flight, for accounting purposes */
+ typedef unsigned int xfs_buf_flags_t;
+@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
+       { _XBF_PAGES,           "PAGES" }, \
+       { _XBF_KMEM,            "KMEM" }, \
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
+-      { _XBF_COMPOUND,        "COMPOUND" }, \
+-      { _XBF_IN_FLIGHT,       "IN_FLIGHT" }
++      { _XBF_COMPOUND,        "COMPOUND" }
+ /*
+  * Internal state flags.
+  */
+ #define XFS_BSTATE_DISPOSE     (1 << 0)       /* buffer being discarded */
++#define XFS_BSTATE_IN_FLIGHT   (1 << 1)       /* I/O in flight */
+ /*
+  * The xfs_buftarg contains 2 notions of "sector size" -
diff --git a/queue-4.11/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch b/queue-4.11/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch
new file mode 100644 (file)
index 0000000..fa1cf41
--- /dev/null
@@ -0,0 +1,102 @@
+From 696a562072e3c14bcd13ae5acc19cdf27679e865 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Tue, 28 Mar 2017 14:51:44 -0700
+Subject: xfs: use dedicated log worker wq to avoid deadlock with cil wq
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 696a562072e3c14bcd13ae5acc19cdf27679e865 upstream.
+
+The log covering background task used to be part of the xfssyncd
+workqueue. That workqueue was removed as of commit 5889608df ("xfs:
+syncd workqueue is no more") and the associated work item scheduled
+to the xfs-log wq. The latter is used for log buffer I/O completion.
+
+Since xfs_log_worker() can invoke a log flush, a deadlock is
+possible between the xfs-log and xfs-cil workqueues. Consider the
+following codepath from xfs_log_worker():
+
+xfs_log_worker()
+  xfs_log_force()
+    _xfs_log_force()
+      xlog_cil_force()
+        xlog_cil_force_lsn()
+          xlog_cil_push_now()
+            flush_work()
+
+The above is in xfs-log wq context and blocked waiting on the
+completion of an xfs-cil work item. Concurrently, the cil push in
+progress can end up blocked here:
+
+xlog_cil_push_work()
+  xlog_cil_push()
+    xlog_write()
+      xlog_state_get_iclog_space()
+        xlog_wait(&log->l_flush_wait, ...)
+
+The above is in xfs-cil context waiting on log buffer I/O
+completion, which executes in xfs-log wq context. In this scenario
+both workqueues are deadlocked waiting on eachother.
+
+Add a new workqueue specifically for the high level log covering and
+ail pushing worker, as was the case prior to commit 5889608df.
+
+Diagnosed-by: David Jeffery <djeffery@redhat.com>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_log.c   |    2 +-
+ fs/xfs/xfs_mount.h |    1 +
+ fs/xfs/xfs_super.c |    8 ++++++++
+ 3 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -1293,7 +1293,7 @@ void
+ xfs_log_work_queue(
+       struct xfs_mount        *mp)
+ {
+-      queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work,
++      queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
+                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
+ }
+--- a/fs/xfs/xfs_mount.h
++++ b/fs/xfs/xfs_mount.h
+@@ -183,6 +183,7 @@ typedef struct xfs_mount {
+       struct workqueue_struct *m_reclaim_workqueue;
+       struct workqueue_struct *m_log_workqueue;
+       struct workqueue_struct *m_eofblocks_workqueue;
++      struct workqueue_struct *m_sync_workqueue;
+       /*
+        * Generation of the filesysyem layout.  This is incremented by each
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -877,8 +877,15 @@ xfs_init_mount_workqueues(
+       if (!mp->m_eofblocks_workqueue)
+               goto out_destroy_log;
++      mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
++                                             mp->m_fsname);
++      if (!mp->m_sync_workqueue)
++              goto out_destroy_eofb;
++
+       return 0;
++out_destroy_eofb:
++      destroy_workqueue(mp->m_eofblocks_workqueue);
+ out_destroy_log:
+       destroy_workqueue(mp->m_log_workqueue);
+ out_destroy_reclaim:
+@@ -899,6 +906,7 @@ STATIC void
+ xfs_destroy_mount_workqueues(
+       struct xfs_mount        *mp)
+ {
++      destroy_workqueue(mp->m_sync_workqueue);
+       destroy_workqueue(mp->m_eofblocks_workqueue);
+       destroy_workqueue(mp->m_log_workqueue);
+       destroy_workqueue(mp->m_reclaim_workqueue);