From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 5 Jul 2018 16:59:38 +0000 (+0200)
Subject: 4.14-stable patches
X-Git-Tag: v4.14.54~12
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1d8ba106271ebdbdce7db8a95b00a807fd9e3ef6;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	md-allow-metadata-update-while-suspending.patch
	md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
	md-don-t-call-bitmap_create-while-array-is-quiesced.patch
	md-move-suspend_hi-lo-handling-into-core-md-code.patch
	md-remove-special-meaning-of-quiesce-..-2.patch
	md-use-mddev_suspend-resume-instead-of-quiesce.patch
---

diff --git a/queue-4.14/md-allow-metadata-update-while-suspending.patch b/queue-4.14/md-allow-metadata-update-while-suspending.patch
new file mode 100644
index 00000000000..ce2141fc289
--- /dev/null
+++ b/queue-4.14/md-allow-metadata-update-while-suspending.patch
@@ -0,0 +1,86 @@
+From 35bfc52187f6df8779d0f1cebdb52b7f797baf4e Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: allow metadata update while suspending.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 35bfc52187f6df8779d0f1cebdb52b7f797baf4e upstream.
+
+There are various deadlocks that can occur
+when a thread holds reconfig_mutex and calls
+->quiesce(mddev, 1).
+As some write request block waiting for
+metadata to be updated (e.g. to record device
+failure), and as the md thread updates the metadata
+while the reconfig mutex is held, holding the mutex
+can stop write requests completing, and this prevents
+->quiesce(mddev, 1) from completing.
+
+->quiesce() is now usually called from mddev_suspend(),
+and it is always called with reconfig_mutex held.  So
+at this time it is safe for the thread to update metadata
+without explicitly taking the lock.
+
+So add 2 new flags, one which says the unlocked updates is
+allowed, and one which ways it is happening.  Then allow it
+while the quiesce completes, and then wait for it to finish.
+
+Reported-and-tested-by: Xiao Ni <xni@redhat.com>
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   14 ++++++++++++++
+ drivers/md/md.h |    6 ++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev)
+ 		return;
+ 	synchronize_rcu();
+ 	wake_up(&mddev->sb_wait);
++	set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
++	smp_mb__after_atomic();
+ 	wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+ 	mddev->pers->quiesce(mddev, 1);
++	clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
++	wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
+ 
+ 	del_timer_sync(&mddev->safemode_timer);
+ }
+@@ -8882,6 +8886,16 @@ void md_check_recovery(struct mddev *mdd
+ 	unlock:
+ 		wake_up(&mddev->sb_wait);
+ 		mddev_unlock(mddev);
++	} else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
++		/* Write superblock - thread that called mddev_suspend()
++		 * holds reconfig_mutex for us.
++		 */
++		set_bit(MD_UPDATING_SB, &mddev->flags);
++		smp_mb__after_atomic();
++		if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
++			md_update_sb(mddev, 0);
++		clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
++		wake_up(&mddev->sb_wait);
+ 	}
+ }
+ EXPORT_SYMBOL(md_check_recovery);
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -237,6 +237,12 @@ enum mddev_flags {
+ 				 */
+ 	MD_HAS_PPL,		/* The raid array has PPL feature set */
+ 	MD_HAS_MULTIPLE_PPLS,	/* The raid array has multiple PPLs feature set */
++	MD_ALLOW_SB_UPDATE,	/* md_check_recovery is allowed to update
++				 * the metadata without taking reconfig_mutex.
++				 */
++	MD_UPDATING_SB,		/* md_check_recovery is updating the metadata
++				 * without explicitly holding reconfig_mutex.
++				 */
+ };
+ 
+ enum mddev_sb_flags {
diff --git a/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch b/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
new file mode 100644
index 00000000000..d44b31331a7
--- /dev/null
+++ b/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
@@ -0,0 +1,141 @@
+From 4d5324f760aacaefeb721b172aa14bf66045c332 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 19 Oct 2017 12:17:16 +1100
+Subject: md: always hold reconfig_mutex when calling mddev_suspend()
+
+From: NeilBrown <neilb@suse.com>
+
+commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream.
+
+Most often mddev_suspend() is called with
+reconfig_mutex held.  Make this a requirement in
+preparation a subsequent patch.  Also require
+reconfig_mutex to be held for mddev_resume(),
+partly for symmetry and partly to guarantee
+no races with incr/decr of mddev->suspend.
+
+Taking the mutex in r5c_disable_writeback_async() is
+a little tricky as this is called from a work queue
+via log->disable_writeback_work, and flush_work()
+is called on that while holding ->reconfig_mutex.
+If the work item hasn't run before flush_work()
+is called, the work function will not be able to
+get the mutex.
+
+So we use mddev_trylock() inside the wait_event() call, and have that
+abort when conf->log is set to NULL, which happens before
+flush_work() is called.
+We wait in mddev->sb_wait and ensure this is woken
+when any of the conditions change.  This requires
+waking mddev->sb_wait in mddev_unlock().  This is only
+like to trigger extra wake_ups of threads that needn't
+be woken when metadata is being written, and that
+doesn't happen often enough that the cost would be
+noticeable.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-raid.c     |   10 ++++++++--
+ drivers/md/md.c          |    3 +++
+ drivers/md/raid5-cache.c |   18 +++++++++++++-----
+ 3 files changed, 24 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_t
+ {
+ 	struct raid_set *rs = ti->private;
+ 
+-	if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
++	if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
++		mddev_lock_nointr(&rs->md);
+ 		mddev_suspend(&rs->md);
++		mddev_unlock(&rs->md);
++	}
+ 
+ 	rs->md.ro = 1;
+ }
+@@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target
+ 	if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
+ 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ 
+-	if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
++	if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
++		mddev_lock_nointr(mddev);
+ 		mddev_resume(mddev);
++		mddev_unlock(mddev);
++	}
+ }
+ 
+ static struct target_type raid_target = {
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct r
+ void mddev_suspend(struct mddev *mddev)
+ {
+ 	WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
++	lockdep_assert_held(&mddev->reconfig_mutex);
+ 	if (mddev->suspended++)
+ 		return;
+ 	synchronize_rcu();
+@@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
+ 
+ void mddev_resume(struct mddev *mddev)
+ {
++	lockdep_assert_held(&mddev->reconfig_mutex);
+ 	if (--mddev->suspended)
+ 		return;
+ 	wake_up(&mddev->sb_wait);
+@@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev)
+ 	 */
+ 	spin_lock(&pers_lock);
+ 	md_wakeup_thread(mddev->thread);
++	wake_up(&mddev->sb_wait);
+ 	spin_unlock(&pers_lock);
+ }
+ EXPORT_SYMBOL_GPL(mddev_unlock);
+--- a/drivers/md/raid5-cache.c
++++ b/drivers/md/raid5-cache.c
+@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(
+ 	struct r5l_log *log = container_of(work, struct r5l_log,
+ 					   disable_writeback_work);
+ 	struct mddev *mddev = log->rdev->mddev;
++	struct r5conf *conf = mddev->private;
++	int locked = 0;
+ 
+ 	if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+ 		return;
+@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(
+ 
+ 	/* wait superblock change before suspend */
+ 	wait_event(mddev->sb_wait,
+-		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+-
+-	mddev_suspend(mddev);
+-	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+-	mddev_resume(mddev);
++		   conf->log == NULL ||
++		   (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
++		    (locked = mddev_trylock(mddev))));
++	if (locked) {
++		mddev_suspend(mddev);
++		log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
++		mddev_resume(mddev);
++		mddev_unlock(mddev);
++	}
+ }
+ 
+ static void r5l_submit_current_io(struct r5l_log *log)
+@@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf)
+ 	conf->log = NULL;
+ 	synchronize_rcu();
+ 
++	/* Ensure disable_writeback_work wakes up and exits */
++	wake_up(&conf->mddev->sb_wait);
+ 	flush_work(&log->disable_writeback_work);
+ 	md_unregister_thread(&log->reclaim_thread);
+ 	mempool_destroy(log->meta_pool);
diff --git a/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch b/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch
new file mode 100644
index 00000000000..a857d3d770d
--- /dev/null
+++ b/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch
@@ -0,0 +1,68 @@
+From 52a0d49de3d592a3118e13f35985e3d99eaf43df Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: don't call bitmap_create() while array is quiesced.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 52a0d49de3d592a3118e13f35985e3d99eaf43df upstream.
+
+bitmap_create() allocates memory with GFP_KERNEL and
+so can wait for IO.
+If called while the array is quiesced, it could wait indefinitely
+for write out to the array - deadlock.
+So call bitmap_create() before quiescing the array.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -6645,22 +6645,26 @@ static int set_bitmap_file(struct mddev
+ 		return -ENOENT; /* cannot remove what isn't there */
+ 	err = 0;
+ 	if (mddev->pers) {
+-		mddev->pers->quiesce(mddev, 1);
+ 		if (fd >= 0) {
+ 			struct bitmap *bitmap;
+ 
+ 			bitmap = bitmap_create(mddev, -1);
++			mddev->pers->quiesce(mddev, 1);
+ 			if (!IS_ERR(bitmap)) {
+ 				mddev->bitmap = bitmap;
+ 				err = bitmap_load(mddev);
+ 			} else
+ 				err = PTR_ERR(bitmap);
+-		}
+-		if (fd < 0 || err) {
++			if (err) {
++				bitmap_destroy(mddev);
++				fd = -1;
++			}
++			mddev->pers->quiesce(mddev, 0);
++		} else if (fd < 0) {
++			mddev->pers->quiesce(mddev, 1);
+ 			bitmap_destroy(mddev);
+-			fd = -1; /* make sure to put the file */
++			mddev->pers->quiesce(mddev, 0);
+ 		}
+-		mddev->pers->quiesce(mddev, 0);
+ 	}
+ 	if (fd < 0) {
+ 		struct file *f = mddev->bitmap_info.file;
+@@ -6944,8 +6948,8 @@ static int update_array_info(struct mdde
+ 				mddev->bitmap_info.default_offset;
+ 			mddev->bitmap_info.space =
+ 				mddev->bitmap_info.default_space;
+-			mddev->pers->quiesce(mddev, 1);
+ 			bitmap = bitmap_create(mddev, -1);
++			mddev->pers->quiesce(mddev, 1);
+ 			if (!IS_ERR(bitmap)) {
+ 				mddev->bitmap = bitmap;
+ 				rv = bitmap_load(mddev);
diff --git a/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch b/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch
new file mode 100644
index 00000000000..03c0aa099f5
--- /dev/null
+++ b/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch
@@ -0,0 +1,159 @@
+From b3143b9a38d5039bcd1f2d1c94039651bfba8043 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: move suspend_hi/lo handling into core md code
+
+From: NeilBrown <neilb@suse.com>
+
+commit b3143b9a38d5039bcd1f2d1c94039651bfba8043 upstream.
+
+responding to ->suspend_lo and ->suspend_hi is similar
+to responding to ->suspended.  It is best to wait in
+the common core code without incrementing ->active_io.
+This allows mddev_suspend()/mddev_resume() to work while
+requests are waiting for suspend_lo/hi to change.
+This is will be important after a subsequent patch
+which uses mddev_suspend() to synchronize updating for
+suspend_lo/hi.
+
+So move the code for testing suspend_lo/hi out of raid1.c
+and raid5.c, and place it in md.c
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c    |   29 +++++++++++++++++++++++------
+ drivers/md/raid1.c |   14 +++++---------
+ drivers/md/raid5.c |   22 ----------------------
+ 3 files changed, 28 insertions(+), 37 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
+  * call has finished, the bio has been linked into some internal structure
+  * and so is visible to ->quiesce(), so we don't need the refcount any more.
+  */
++static bool is_suspended(struct mddev *mddev, struct bio *bio)
++{
++	if (mddev->suspended)
++		return true;
++	if (bio_data_dir(bio) != WRITE)
++		return false;
++	if (mddev->suspend_lo >= mddev->suspend_hi)
++		return false;
++	if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
++		return false;
++	if (bio_end_sector(bio) < mddev->suspend_lo)
++		return false;
++	return true;
++}
++
+ void md_handle_request(struct mddev *mddev, struct bio *bio)
+ {
+ check_suspended:
+ 	rcu_read_lock();
+-	if (mddev->suspended) {
++	if (is_suspended(mddev, bio)) {
+ 		DEFINE_WAIT(__wait);
+ 		for (;;) {
+ 			prepare_to_wait(&mddev->sb_wait, &__wait,
+ 					TASK_UNINTERRUPTIBLE);
+-			if (!mddev->suspended)
++			if (!is_suspended(mddev, bio))
+ 				break;
+ 			rcu_read_unlock();
+ 			schedule();
+@@ -4849,10 +4864,11 @@ suspend_lo_store(struct mddev *mddev, co
+ 		goto unlock;
+ 	old = mddev->suspend_lo;
+ 	mddev->suspend_lo = new;
+-	if (new >= old)
++	if (new >= old) {
+ 		/* Shrinking suspended region */
++		wake_up(&mddev->sb_wait);
+ 		mddev->pers->quiesce(mddev, 2);
+-	else {
++	} else {
+ 		/* Expanding suspended region - need to wait */
+ 		mddev->pers->quiesce(mddev, 1);
+ 		mddev->pers->quiesce(mddev, 0);
+@@ -4892,10 +4908,11 @@ suspend_hi_store(struct mddev *mddev, co
+ 		goto unlock;
+ 	old = mddev->suspend_hi;
+ 	mddev->suspend_hi = new;
+-	if (new <= old)
++	if (new <= old) {
+ 		/* Shrinking suspended region */
++		wake_up(&mddev->sb_wait);
+ 		mddev->pers->quiesce(mddev, 2);
+-	else {
++	} else {
+ 		/* Expanding suspended region - need to wait */
+ 		mddev->pers->quiesce(mddev, 1);
+ 		mddev->pers->quiesce(mddev, 0);
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1298,11 +1298,9 @@ static void raid1_write_request(struct m
+ 	 */
+ 
+ 
+-	if ((bio_end_sector(bio) > mddev->suspend_lo &&
+-	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
+-	    (mddev_is_clustered(mddev) &&
++	if (mddev_is_clustered(mddev) &&
+ 	     md_cluster_ops->area_resyncing(mddev, WRITE,
+-		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
++		     bio->bi_iter.bi_sector, bio_end_sector(bio))) {
+ 
+ 		/*
+ 		 * As the suspend_* range is controlled by userspace, we want
+@@ -1313,12 +1311,10 @@ static void raid1_write_request(struct m
+ 			sigset_t full, old;
+ 			prepare_to_wait(&conf->wait_barrier,
+ 					&w, TASK_INTERRUPTIBLE);
+-			if ((bio_end_sector(bio) <= mddev->suspend_lo ||
+-			     bio->bi_iter.bi_sector >= mddev->suspend_hi) &&
+-			    (!mddev_is_clustered(mddev) ||
+-			     !md_cluster_ops->area_resyncing(mddev, WRITE,
++			if (!mddev_is_clustered(mddev) ||
++			    !md_cluster_ops->area_resyncing(mddev, WRITE,
+ 							bio->bi_iter.bi_sector,
+-							bio_end_sector(bio))))
++							bio_end_sector(bio)))
+ 				break;
+ 			sigfillset(&full);
+ 			sigprocmask(SIG_BLOCK, &full, &old);
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -5686,28 +5686,6 @@ static bool raid5_make_request(struct md
+ 				goto retry;
+ 			}
+ 
+-			if (rw == WRITE &&
+-			    logical_sector >= mddev->suspend_lo &&
+-			    logical_sector < mddev->suspend_hi) {
+-				raid5_release_stripe(sh);
+-				/* As the suspend_* range is controlled by
+-				 * userspace, we want an interruptible
+-				 * wait.
+-				 */
+-				prepare_to_wait(&conf->wait_for_overlap,
+-						&w, TASK_INTERRUPTIBLE);
+-				if (logical_sector >= mddev->suspend_lo &&
+-				    logical_sector < mddev->suspend_hi) {
+-					sigset_t full, old;
+-					sigfillset(&full);
+-					sigprocmask(SIG_BLOCK, &full, &old);
+-					schedule();
+-					sigprocmask(SIG_SETMASK, &old, NULL);
+-					do_prepare = true;
+-				}
+-				goto retry;
+-			}
+-
+ 			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
+ 			    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
+ 				/* Stripe is busy expanding or
diff --git a/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch b/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch
new file mode 100644
index 00000000000..de48c927ed3
--- /dev/null
+++ b/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch
@@ -0,0 +1,304 @@
+From b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 19 Oct 2017 12:49:15 +1100
+Subject: md: remove special meaning of ->quiesce(.., 2)
+
+From: NeilBrown <neilb@suse.com>
+
+commit b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f upstream.
+
+The '2' argument means "wake up anything that is waiting".
+This is an inelegant part of the design and was added
+to help support management of suspend_lo/suspend_hi setting.
+Now that suspend_lo/hi is managed in mddev_suspend/resume,
+that need is gone.
+These is still a couple of places where we call 'quiesce'
+with an argument of '2', but they can safely be changed to
+call ->quiesce(.., 1); ->quiesce(.., 0) which
+achieve the same result at the small cost of pausing IO
+briefly.
+
+This removes a small "optimization" from suspend_{hi,lo}_store,
+but it isn't clear that optimization served a useful purpose.
+The code now is a lot clearer.
+
+Suggested-by: Shaohua Li <shli@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md-cluster.c  |    6 +++---
+ drivers/md/md.c          |   34 ++++++++++------------------------
+ drivers/md/md.h          |    9 ++++-----
+ drivers/md/raid0.c       |    2 +-
+ drivers/md/raid1.c       |   13 +++----------
+ drivers/md/raid10.c      |   10 +++-------
+ drivers/md/raid5-cache.c |   12 ++++++------
+ drivers/md/raid5-log.h   |    2 +-
+ drivers/md/raid5.c       |   18 ++++++------------
+ 9 files changed, 37 insertions(+), 69 deletions(-)
+
+--- a/drivers/md/md-cluster.c
++++ b/drivers/md/md-cluster.c
+@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct
+ static void remove_suspend_info(struct mddev *mddev, int slot)
+ {
+ 	struct md_cluster_info *cinfo = mddev->cluster_info;
++	mddev->pers->quiesce(mddev, 1);
+ 	spin_lock_irq(&cinfo->suspend_lock);
+ 	__remove_suspend_info(cinfo, slot);
+ 	spin_unlock_irq(&cinfo->suspend_lock);
+-	mddev->pers->quiesce(mddev, 2);
++	mddev->pers->quiesce(mddev, 0);
+ }
+ 
+ 
+@@ -492,13 +493,12 @@ static void process_suspend_info(struct
+ 	s->lo = lo;
+ 	s->hi = hi;
+ 	mddev->pers->quiesce(mddev, 1);
+-	mddev->pers->quiesce(mddev, 0);
+ 	spin_lock_irq(&cinfo->suspend_lock);
+ 	/* Remove existing entry (if exists) before adding */
+ 	__remove_suspend_info(cinfo, slot);
+ 	list_add(&s->list, &cinfo->suspend_list);
+ 	spin_unlock_irq(&cinfo->suspend_lock);
+-	mddev->pers->quiesce(mddev, 2);
++	mddev->pers->quiesce(mddev, 0);
+ }
+ 
+ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4850,7 +4850,7 @@ suspend_lo_show(struct mddev *mddev, cha
+ static ssize_t
+ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+-	unsigned long long old, new;
++	unsigned long long new;
+ 	int err;
+ 
+ 	err = kstrtoull(buf, 10, &new);
+@@ -4866,17 +4866,10 @@ suspend_lo_store(struct mddev *mddev, co
+ 	if (mddev->pers == NULL ||
+ 	    mddev->pers->quiesce == NULL)
+ 		goto unlock;
+-	old = mddev->suspend_lo;
++	mddev_suspend(mddev);
+ 	mddev->suspend_lo = new;
+-	if (new >= old) {
+-		/* Shrinking suspended region */
+-		wake_up(&mddev->sb_wait);
+-		mddev->pers->quiesce(mddev, 2);
+-	} else {
+-		/* Expanding suspended region - need to wait */
+-		mddev_suspend(mddev);
+-		mddev_resume(mddev);
+-	}
++	mddev_resume(mddev);
++
+ 	err = 0;
+ unlock:
+ 	mddev_unlock(mddev);
+@@ -4894,7 +4887,7 @@ suspend_hi_show(struct mddev *mddev, cha
+ static ssize_t
+ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+-	unsigned long long old, new;
++	unsigned long long new;
+ 	int err;
+ 
+ 	err = kstrtoull(buf, 10, &new);
+@@ -4907,20 +4900,13 @@ suspend_hi_store(struct mddev *mddev, co
+ 	if (err)
+ 		return err;
+ 	err = -EINVAL;
+-	if (mddev->pers == NULL ||
+-	    mddev->pers->quiesce == NULL)
++	if (mddev->pers == NULL)
+ 		goto unlock;
+-	old = mddev->suspend_hi;
++
++	mddev_suspend(mddev);
+ 	mddev->suspend_hi = new;
+-	if (new <= old) {
+-		/* Shrinking suspended region */
+-		wake_up(&mddev->sb_wait);
+-		mddev->pers->quiesce(mddev, 2);
+-	} else {
+-		/* Expanding suspended region - need to wait */
+-		mddev_suspend(mddev);
+-		mddev_resume(mddev);
+-	}
++	mddev_resume(mddev);
++
+ 	err = 0;
+ unlock:
+ 	mddev_unlock(mddev);
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -546,12 +546,11 @@ struct md_personality
+ 	int (*check_reshape) (struct mddev *mddev);
+ 	int (*start_reshape) (struct mddev *mddev);
+ 	void (*finish_reshape) (struct mddev *mddev);
+-	/* quiesce moves between quiescence states
+-	 * 0 - fully active
+-	 * 1 - no new requests allowed
+-	 * others - reserved
++	/* quiesce suspends or resumes internal processing.
++	 * 1 - stop new actions and wait for action io to complete
++	 * 0 - return to normal behaviour
+ 	 */
+-	void (*quiesce) (struct mddev *mddev, int state);
++	void (*quiesce) (struct mddev *mddev, int quiesce);
+ 	/* takeover is used to transition an array from one
+ 	 * personality to another.  The new personality must be able
+ 	 * to handle the data in the current layout.
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev
+ 	return ERR_PTR(-EINVAL);
+ }
+ 
+-static void raid0_quiesce(struct mddev *mddev, int state)
++static void raid0_quiesce(struct mddev *mddev, int quiesce)
+ {
+ }
+ 
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -3276,21 +3276,14 @@ static int raid1_reshape(struct mddev *m
+ 	return 0;
+ }
+ 
+-static void raid1_quiesce(struct mddev *mddev, int state)
++static void raid1_quiesce(struct mddev *mddev, int quiesce)
+ {
+ 	struct r1conf *conf = mddev->private;
+ 
+-	switch(state) {
+-	case 2: /* wake for suspend */
+-		wake_up(&conf->wait_barrier);
+-		break;
+-	case 1:
++	if (quiesce)
+ 		freeze_array(conf, 0);
+-		break;
+-	case 0:
++	else
+ 		unfreeze_array(conf);
+-		break;
+-	}
+ }
+ 
+ static void *raid1_takeover(struct mddev *mddev)
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -3838,18 +3838,14 @@ static void raid10_free(struct mddev *md
+ 	kfree(conf);
+ }
+ 
+-static void raid10_quiesce(struct mddev *mddev, int state)
++static void raid10_quiesce(struct mddev *mddev, int quiesce)
+ {
+ 	struct r10conf *conf = mddev->private;
+ 
+-	switch(state) {
+-	case 1:
++	if (quiesce)
+ 		raise_barrier(conf, 0);
+-		break;
+-	case 0:
++	else
+ 		lower_barrier(conf);
+-		break;
+-	}
+ }
+ 
+ static int raid10_resize(struct mddev *mddev, sector_t sectors)
+--- a/drivers/md/raid5-cache.c
++++ b/drivers/md/raid5-cache.c
+@@ -1589,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *lo
+ 	md_wakeup_thread(log->reclaim_thread);
+ }
+ 
+-void r5l_quiesce(struct r5l_log *log, int state)
++void r5l_quiesce(struct r5l_log *log, int quiesce)
+ {
+ 	struct mddev *mddev;
+-	if (!log || state == 2)
++	if (!log)
+ 		return;
+-	if (state == 0)
+-		kthread_unpark(log->reclaim_thread->tsk);
+-	else if (state == 1) {
++
++	if (quiesce) {
+ 		/* make sure r5l_write_super_and_discard_space exits */
+ 		mddev = log->rdev->mddev;
+ 		wake_up(&mddev->sb_wait);
+ 		kthread_park(log->reclaim_thread->tsk);
+ 		r5l_wake_reclaim(log, MaxSector);
+ 		r5l_do_reclaim(log);
+-	}
++	} else
++		kthread_unpark(log->reclaim_thread->tsk);
+ }
+ 
+ bool r5l_log_disk_error(struct r5conf *conf)
+--- a/drivers/md/raid5-log.h
++++ b/drivers/md/raid5-log.h
+@@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct
+ extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
+ extern void r5l_stripe_write_finished(struct stripe_head *sh);
+ extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
+-extern void r5l_quiesce(struct r5l_log *log, int state);
++extern void r5l_quiesce(struct r5l_log *log, int quiesce);
+ extern bool r5l_log_disk_error(struct r5conf *conf);
+ extern bool r5c_is_writeback(struct r5l_log *log);
+ extern int
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -8003,16 +8003,12 @@ static void raid5_finish_reshape(struct
+ 	}
+ }
+ 
+-static void raid5_quiesce(struct mddev *mddev, int state)
++static void raid5_quiesce(struct mddev *mddev, int quiesce)
+ {
+ 	struct r5conf *conf = mddev->private;
+ 
+-	switch(state) {
+-	case 2: /* resume for a suspend */
+-		wake_up(&conf->wait_for_overlap);
+-		break;
+-
+-	case 1: /* stop all writes */
++	if (quiesce) {
++		/* stop all writes */
+ 		lock_all_device_hash_locks_irq(conf);
+ 		/* '2' tells resync/reshape to pause so that all
+ 		 * active stripes can drain
+@@ -8028,17 +8024,15 @@ static void raid5_quiesce(struct mddev *
+ 		unlock_all_device_hash_locks_irq(conf);
+ 		/* allow reshape to continue */
+ 		wake_up(&conf->wait_for_overlap);
+-		break;
+-
+-	case 0: /* re-enable writes */
++	} else {
++		/* re-enable writes */
+ 		lock_all_device_hash_locks_irq(conf);
+ 		conf->quiesce = 0;
+ 		wake_up(&conf->wait_for_quiescent);
+ 		wake_up(&conf->wait_for_overlap);
+ 		unlock_all_device_hash_locks_irq(conf);
+-		break;
+ 	}
+-	r5l_quiesce(conf->log, state);
++	r5l_quiesce(conf->log, quiesce);
+ }
+ 
+ static void *raid45_takeover_raid0(struct mddev *mddev, int level)
diff --git a/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch b/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch
new file mode 100644
index 00000000000..4d6cda463c4
--- /dev/null
+++ b/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch
@@ -0,0 +1,100 @@
+From 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: use mddev_suspend/resume instead of ->quiesce()
+
+From: NeilBrown <neilb@suse.com>
+
+commit 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 upstream.
+
+mddev_suspend() is a more general interface than
+calling ->quiesce() and is so more extensible.  A
+future patch will make use of this.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4870,8 +4870,8 @@ suspend_lo_store(struct mddev *mddev, co
+ 		mddev->pers->quiesce(mddev, 2);
+ 	} else {
+ 		/* Expanding suspended region - need to wait */
+-		mddev->pers->quiesce(mddev, 1);
+-		mddev->pers->quiesce(mddev, 0);
++		mddev_suspend(mddev);
++		mddev_resume(mddev);
+ 	}
+ 	err = 0;
+ unlock:
+@@ -4914,8 +4914,8 @@ suspend_hi_store(struct mddev *mddev, co
+ 		mddev->pers->quiesce(mddev, 2);
+ 	} else {
+ 		/* Expanding suspended region - need to wait */
+-		mddev->pers->quiesce(mddev, 1);
+-		mddev->pers->quiesce(mddev, 0);
++		mddev_suspend(mddev);
++		mddev_resume(mddev);
+ 	}
+ 	err = 0;
+ unlock:
+@@ -6666,7 +6666,7 @@ static int set_bitmap_file(struct mddev
+ 			struct bitmap *bitmap;
+ 
+ 			bitmap = bitmap_create(mddev, -1);
+-			mddev->pers->quiesce(mddev, 1);
++			mddev_suspend(mddev);
+ 			if (!IS_ERR(bitmap)) {
+ 				mddev->bitmap = bitmap;
+ 				err = bitmap_load(mddev);
+@@ -6676,11 +6676,11 @@ static int set_bitmap_file(struct mddev
+ 				bitmap_destroy(mddev);
+ 				fd = -1;
+ 			}
+-			mddev->pers->quiesce(mddev, 0);
++			mddev_resume(mddev);
+ 		} else if (fd < 0) {
+-			mddev->pers->quiesce(mddev, 1);
++			mddev_suspend(mddev);
+ 			bitmap_destroy(mddev);
+-			mddev->pers->quiesce(mddev, 0);
++			mddev_resume(mddev);
+ 		}
+ 	}
+ 	if (fd < 0) {
+@@ -6966,7 +6966,7 @@ static int update_array_info(struct mdde
+ 			mddev->bitmap_info.space =
+ 				mddev->bitmap_info.default_space;
+ 			bitmap = bitmap_create(mddev, -1);
+-			mddev->pers->quiesce(mddev, 1);
++			mddev_suspend(mddev);
+ 			if (!IS_ERR(bitmap)) {
+ 				mddev->bitmap = bitmap;
+ 				rv = bitmap_load(mddev);
+@@ -6974,7 +6974,7 @@ static int update_array_info(struct mdde
+ 				rv = PTR_ERR(bitmap);
+ 			if (rv)
+ 				bitmap_destroy(mddev);
+-			mddev->pers->quiesce(mddev, 0);
++			mddev_resume(mddev);
+ 		} else {
+ 			/* remove the bitmap */
+ 			if (!mddev->bitmap) {
+@@ -6997,9 +6997,9 @@ static int update_array_info(struct mdde
+ 				mddev->bitmap_info.nodes = 0;
+ 				md_cluster_ops->leave(mddev);
+ 			}
+-			mddev->pers->quiesce(mddev, 1);
++			mddev_suspend(mddev);
+ 			bitmap_destroy(mddev);
+-			mddev->pers->quiesce(mddev, 0);
++			mddev_resume(mddev);
+ 			mddev->bitmap_info.offset = 0;
+ 		}
+ 	}
diff --git a/queue-4.14/series b/queue-4.14/series
index 5ad2728107b..437485c87f6 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -31,3 +31,9 @@ netfilter-nf_tables-increase-nft_counters_enabled-in-nft_chain_stats_replace.pat
 netfilter-nf_tables-fix-memory-leak-on-error-exit-return.patch
 netfilter-nf_tables-add-missing-netlink-attrs-to-policies.patch
 netfilter-nf_tables-fix-null-ptr-in-nf_tables_dump_obj.patch
+md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
+md-don-t-call-bitmap_create-while-array-is-quiesced.patch
+md-move-suspend_hi-lo-handling-into-core-md-code.patch
+md-use-mddev_suspend-resume-instead-of-quiesce.patch
+md-allow-metadata-update-while-suspending.patch
+md-remove-special-meaning-of-quiesce-..-2.patch