From: Greg Kroah-Hartman Date: Mon, 5 Jun 2017 14:07:58 +0000 (+0200) Subject: 4.11-stable patches X-Git-Tag: v3.18.56~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4351ceb28fd7b027a82af3c166c5f7d458fd4c45;p=thirdparty%2Fkernel%2Fstable-queue.git 4.11-stable patches added patches: xfs-fix-missed-holes-in-seek_hole-implementation.patch xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch --- diff --git a/queue-4.11/series b/queue-4.11/series index b0ecc875a0d..afca4905ead 100644 --- a/queue-4.11/series +++ b/queue-4.11/series @@ -90,3 +90,7 @@ x86-boot-use-cross_compile-prefix-for-readelf.patch ksm-prevent-crash-after-write_protect_page-fails.patch slub-memcg-cure-the-brainless-abuse-of-sysfs-attributes.patch drm-gma500-psb-actually-use-vbt-mode-when-it-is-found.patch +xfs-fix-missed-holes-in-seek_hole-implementation.patch +xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch +xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch +xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch diff --git a/queue-4.11/xfs-fix-missed-holes-in-seek_hole-implementation.patch b/queue-4.11/xfs-fix-missed-holes-in-seek_hole-implementation.patch new file mode 100644 index 00000000000..23feea0bb0e --- /dev/null +++ b/queue-4.11/xfs-fix-missed-holes-in-seek_hole-implementation.patch @@ -0,0 +1,87 @@ +From 5375023ae1266553a7baa0845e82917d8803f48c Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 18 May 2017 16:36:22 -0700 +Subject: xfs: Fix missed holes in SEEK_HOLE implementation + +From: Jan Kara + +commit 5375023ae1266553a7baa0845e82917d8803f48c upstream. + +XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as +can be seen by the following command: + +xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k" + -c "seek -h 0" file +wrote 57344/57344 bytes at offset 0 +56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec) +wrote 8192/8192 bytes at offset 131072 +8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec) +Whence Result +HOLE 139264 + +Where we can see that hole at offset 56k was just ignored by SEEK_HOLE +implementation. The bug is in xfs_find_get_desired_pgoff() which does +not properly detect the case when pages are not contiguous. + +Fix the problem by properly detecting when found page has larger offset +than expected. + +Fixes: d126d43f631f996daeee5006714fed914be32368 +Signed-off-by: Jan Kara +Reviewed-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_file.c | 29 +++++++++-------------------- + 1 file changed, 9 insertions(+), 20 deletions(-) + +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -1076,17 +1076,6 @@ xfs_find_get_desired_pgoff( + break; + } + +- /* +- * At lease we found one page. If this is the first time we +- * step into the loop, and if the first page index offset is +- * greater than the given search offset, a hole was found. +- */ +- if (type == HOLE_OFF && lastoff == startoff && +- lastoff < page_offset(pvec.pages[0])) { +- found = true; +- break; +- } +- + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + loff_t b_offset; +@@ -1098,18 +1087,18 @@ xfs_find_get_desired_pgoff( + * file mapping. However, page->index will not change + * because we have a reference on the page. + * +- * Searching done if the page index is out of range. +- * If the current offset is not reaches the end of +- * the specified search range, there should be a hole +- * between them. ++ * If current page offset is beyond where we've ended, ++ * we've found a hole. + */ +- if (page->index > end) { +- if (type == HOLE_OFF && lastoff < endoff) { +- *offset = lastoff; +- found = true; +- } ++ if (type == HOLE_OFF && lastoff < endoff && ++ lastoff < page_offset(pvec.pages[i])) { ++ found = true; ++ *offset = lastoff; + goto out; + } ++ /* Searching done if the page index is out of range. */ ++ if (page->index > end) ++ goto out; + + lock_page(page); + /* diff --git a/queue-4.11/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch b/queue-4.11/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch new file mode 100644 index 00000000000..d6967110df6 --- /dev/null +++ b/queue-4.11/xfs-fix-off-by-one-on-max-nr_pages-in-xfs_find_get_desired_pgoff.patch @@ -0,0 +1,54 @@ +From 8affebe16d79ebefb1d9d6d56a46dc89716f9453 Mon Sep 17 00:00:00 2001 +From: Eryu Guan +Date: Tue, 23 May 2017 08:30:46 -0700 +Subject: xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff() + +From: Eryu Guan + +commit 8affebe16d79ebefb1d9d6d56a46dc89716f9453 upstream. + +xfs_find_get_desired_pgoff() is used to search for offset of hole or +data in page range [index, end] (both inclusive), and the max number +of pages to search should be at least one, if end == index. +Otherwise the only page is missed and no hole or data is found, +which is not correct. + +When block size is smaller than page size, this can be demonstrated +by preallocating a file with size smaller than page size and writing +data to the last block. E.g. run this xfs_io command on a 1k block +size XFS on x86_64 host. + + # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \ + -c "seek -d 0" /mnt/xfs/testfile + wrote 1024/1024 bytes at offset 2048 + 1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec) + Whence Result + DATA EOF + +Data at offset 2k was missed, and lseek(2) returned ENXIO. + +This is uncovered by generic/285 subtest 07 and 08 on ppc64 host, +where pagesize is 64k. Because a recent change to generic/285 +reduced the preallocated file size to smaller than 64k. + +Signed-off-by: Eryu Guan +Reviewed-by: Jan Kara +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -1049,7 +1049,7 @@ xfs_find_get_desired_pgoff( + unsigned nr_pages; + unsigned int i; + +- want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); ++ want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; + nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, + want); + /* diff --git a/queue-4.11/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch b/queue-4.11/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch new file mode 100644 index 00000000000..a7eb287c581 --- /dev/null +++ b/queue-4.11/xfs-use-b_state-to-fix-buffer-i-o-accounting-release-race.patch @@ -0,0 +1,155 @@ +From 63db7c815bc0997c29e484d2409684fdd9fcd93b Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Wed, 31 May 2017 08:22:52 -0700 +Subject: xfs: use ->b_state to fix buffer I/O accounting release race + +From: Brian Foster + +commit 63db7c815bc0997c29e484d2409684fdd9fcd93b upstream. + +We've had user reports of unmount hangs in xfs_wait_buftarg() that +analysis shows is due to btp->bt_io_count == -1. bt_io_count +represents the count of in-flight asynchronous buffers and thus +should always be >= 0. xfs_wait_buftarg() waits for this value to +stabilize to zero in order to ensure that all untracked (with +respect to the lru) buffers have completed I/O processing before +unmount proceeds to tear down in-core data structures. + +The value of -1 implies an I/O accounting decrement race. Indeed, +the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele() +(where the buffer lock is no longer held) means that bp->b_flags can +be updated from an unsafe context. While a user-level reproducer is +currently not available, some intrusive hacks to run racing buffer +lookups/ioacct/releases from multiple threads was used to +successfully manufacture this problem. + +Existing callers do not expect to acquire the buffer lock from +xfs_buf_rele(). Therefore, we can not safely update ->b_flags from +this context. It turns out that we already have separate buffer +state bits and associated serialization for dealing with buffer LRU +state in the form of ->b_state and ->b_lock. Therefore, replace the +_XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O +accounting wrappers appropriately and make sure they are used with +the correct locking. This ensures that buffer in-flight state can be +modified at buffer release time without racing with modifications +from a buffer lock holder. + +Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount") +Signed-off-by: Brian Foster +Reviewed-by: Nikolay Borisov +Tested-by: Libor Pechacek +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_buf.c | 38 ++++++++++++++++++++++++++------------ + fs/xfs/xfs_buf.h | 5 ++--- + 2 files changed, 28 insertions(+), 15 deletions(-) + +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -97,12 +97,16 @@ static inline void + xfs_buf_ioacct_inc( + struct xfs_buf *bp) + { +- if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) ++ if (bp->b_flags & XBF_NO_IOACCT) + return; + + ASSERT(bp->b_flags & XBF_ASYNC); +- bp->b_flags |= _XBF_IN_FLIGHT; +- percpu_counter_inc(&bp->b_target->bt_io_count); ++ spin_lock(&bp->b_lock); ++ if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { ++ bp->b_state |= XFS_BSTATE_IN_FLIGHT; ++ percpu_counter_inc(&bp->b_target->bt_io_count); ++ } ++ spin_unlock(&bp->b_lock); + } + + /* +@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc( + * freed and unaccount from the buftarg. + */ + static inline void +-xfs_buf_ioacct_dec( ++__xfs_buf_ioacct_dec( + struct xfs_buf *bp) + { +- if (!(bp->b_flags & _XBF_IN_FLIGHT)) +- return; ++ ASSERT(spin_is_locked(&bp->b_lock)); + +- bp->b_flags &= ~_XBF_IN_FLIGHT; +- percpu_counter_dec(&bp->b_target->bt_io_count); ++ if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { ++ bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; ++ percpu_counter_dec(&bp->b_target->bt_io_count); ++ } ++} ++ ++static inline void ++xfs_buf_ioacct_dec( ++ struct xfs_buf *bp) ++{ ++ spin_lock(&bp->b_lock); ++ __xfs_buf_ioacct_dec(bp); ++ spin_unlock(&bp->b_lock); + } + + /* +@@ -149,9 +163,9 @@ xfs_buf_stale( + * unaccounted (released to LRU) before that occurs. Drop in-flight + * status now to preserve accounting consistency. + */ +- xfs_buf_ioacct_dec(bp); +- + spin_lock(&bp->b_lock); ++ __xfs_buf_ioacct_dec(bp); ++ + atomic_set(&bp->b_lru_ref, 0); + if (!(bp->b_state & XFS_BSTATE_DISPOSE) && + (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) +@@ -979,12 +993,12 @@ xfs_buf_rele( + * ensures the decrement occurs only once per-buf. + */ + if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) +- xfs_buf_ioacct_dec(bp); ++ __xfs_buf_ioacct_dec(bp); + goto out_unlock; + } + + /* the last reference has been dropped ... */ +- xfs_buf_ioacct_dec(bp); ++ __xfs_buf_ioacct_dec(bp); + if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + /* + * If the buffer is added to the LRU take a new reference to the +--- a/fs/xfs/xfs_buf.h ++++ b/fs/xfs/xfs_buf.h +@@ -63,7 +63,6 @@ typedef enum { + #define _XBF_KMEM (1 << 21)/* backed by heap memory */ + #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ + #define _XBF_COMPOUND (1 << 23)/* compound buffer */ +-#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */ + + typedef unsigned int xfs_buf_flags_t; + +@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t; + { _XBF_PAGES, "PAGES" }, \ + { _XBF_KMEM, "KMEM" }, \ + { _XBF_DELWRI_Q, "DELWRI_Q" }, \ +- { _XBF_COMPOUND, "COMPOUND" }, \ +- { _XBF_IN_FLIGHT, "IN_FLIGHT" } ++ { _XBF_COMPOUND, "COMPOUND" } + + + /* + * Internal state flags. + */ + #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ ++#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ + + /* + * The xfs_buftarg contains 2 notions of "sector size" - diff --git a/queue-4.11/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch b/queue-4.11/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch new file mode 100644 index 00000000000..fa1cf413e1c --- /dev/null +++ b/queue-4.11/xfs-use-dedicated-log-worker-wq-to-avoid-deadlock-with-cil-wq.patch @@ -0,0 +1,102 @@ +From 696a562072e3c14bcd13ae5acc19cdf27679e865 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Tue, 28 Mar 2017 14:51:44 -0700 +Subject: xfs: use dedicated log worker wq to avoid deadlock with cil wq + +From: Brian Foster + +commit 696a562072e3c14bcd13ae5acc19cdf27679e865 upstream. + +The log covering background task used to be part of the xfssyncd +workqueue. That workqueue was removed as of commit 5889608df ("xfs: +syncd workqueue is no more") and the associated work item scheduled +to the xfs-log wq. The latter is used for log buffer I/O completion. + +Since xfs_log_worker() can invoke a log flush, a deadlock is +possible between the xfs-log and xfs-cil workqueues. Consider the +following codepath from xfs_log_worker(): + +xfs_log_worker() + xfs_log_force() + _xfs_log_force() + xlog_cil_force() + xlog_cil_force_lsn() + xlog_cil_push_now() + flush_work() + +The above is in xfs-log wq context and blocked waiting on the +completion of an xfs-cil work item. Concurrently, the cil push in +progress can end up blocked here: + +xlog_cil_push_work() + xlog_cil_push() + xlog_write() + xlog_state_get_iclog_space() + xlog_wait(&log->l_flush_wait, ...) + +The above is in xfs-cil context waiting on log buffer I/O +completion, which executes in xfs-log wq context. In this scenario +both workqueues are deadlocked waiting on eachother. + +Add a new workqueue specifically for the high level log covering and +ail pushing worker, as was the case prior to commit 5889608df. + +Diagnosed-by: David Jeffery +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_log.c | 2 +- + fs/xfs/xfs_mount.h | 1 + + fs/xfs/xfs_super.c | 8 ++++++++ + 3 files changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1293,7 +1293,7 @@ void + xfs_log_work_queue( + struct xfs_mount *mp) + { +- queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, ++ queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); + } + +--- a/fs/xfs/xfs_mount.h ++++ b/fs/xfs/xfs_mount.h +@@ -183,6 +183,7 @@ typedef struct xfs_mount { + struct workqueue_struct *m_reclaim_workqueue; + struct workqueue_struct *m_log_workqueue; + struct workqueue_struct *m_eofblocks_workqueue; ++ struct workqueue_struct *m_sync_workqueue; + + /* + * Generation of the filesysyem layout. This is incremented by each +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -877,8 +877,15 @@ xfs_init_mount_workqueues( + if (!mp->m_eofblocks_workqueue) + goto out_destroy_log; + ++ mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, ++ mp->m_fsname); ++ if (!mp->m_sync_workqueue) ++ goto out_destroy_eofb; ++ + return 0; + ++out_destroy_eofb: ++ destroy_workqueue(mp->m_eofblocks_workqueue); + out_destroy_log: + destroy_workqueue(mp->m_log_workqueue); + out_destroy_reclaim: +@@ -899,6 +906,7 @@ STATIC void + xfs_destroy_mount_workqueues( + struct xfs_mount *mp) + { ++ destroy_workqueue(mp->m_sync_workqueue); + destroy_workqueue(mp->m_eofblocks_workqueue); + destroy_workqueue(mp->m_log_workqueue); + destroy_workqueue(mp->m_reclaim_workqueue);