--- /dev/null
+From 35bfc52187f6df8779d0f1cebdb52b7f797baf4e Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: allow metadata update while suspending.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 35bfc52187f6df8779d0f1cebdb52b7f797baf4e upstream.
+
+There are various deadlocks that can occur
+when a thread holds reconfig_mutex and calls
+->quiesce(mddev, 1).
+As some write request block waiting for
+metadata to be updated (e.g. to record device
+failure), and as the md thread updates the metadata
+while the reconfig mutex is held, holding the mutex
+can stop write requests completing, and this prevents
+->quiesce(mddev, 1) from completing.
+
+->quiesce() is now usually called from mddev_suspend(),
+and it is always called with reconfig_mutex held. So
+at this time it is safe for the thread to update metadata
+without explicitly taking the lock.
+
+So add 2 new flags, one which says the unlocked updates is
+allowed, and one which ways it is happening. Then allow it
+while the quiesce completes, and then wait for it to finish.
+
+Reported-and-tested-by: Xiao Ni <xni@redhat.com>
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c | 14 ++++++++++++++
+ drivers/md/md.h | 6 ++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev)
+ return;
+ synchronize_rcu();
+ wake_up(&mddev->sb_wait);
++ set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
++ smp_mb__after_atomic();
+ wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+ mddev->pers->quiesce(mddev, 1);
++ clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
++ wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
+
+ del_timer_sync(&mddev->safemode_timer);
+ }
+@@ -8882,6 +8886,16 @@ void md_check_recovery(struct mddev *mdd
+ unlock:
+ wake_up(&mddev->sb_wait);
+ mddev_unlock(mddev);
++ } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
++ /* Write superblock - thread that called mddev_suspend()
++ * holds reconfig_mutex for us.
++ */
++ set_bit(MD_UPDATING_SB, &mddev->flags);
++ smp_mb__after_atomic();
++ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
++ md_update_sb(mddev, 0);
++ clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
++ wake_up(&mddev->sb_wait);
+ }
+ }
+ EXPORT_SYMBOL(md_check_recovery);
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -237,6 +237,12 @@ enum mddev_flags {
+ */
+ MD_HAS_PPL, /* The raid array has PPL feature set */
+ MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
++ MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
++ * the metadata without taking reconfig_mutex.
++ */
++ MD_UPDATING_SB, /* md_check_recovery is updating the metadata
++ * without explicitly holding reconfig_mutex.
++ */
+ };
+
+ enum mddev_sb_flags {
--- /dev/null
+From 4d5324f760aacaefeb721b172aa14bf66045c332 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 19 Oct 2017 12:17:16 +1100
+Subject: md: always hold reconfig_mutex when calling mddev_suspend()
+
+From: NeilBrown <neilb@suse.com>
+
+commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream.
+
+Most often mddev_suspend() is called with
+reconfig_mutex held. Make this a requirement in
+preparation a subsequent patch. Also require
+reconfig_mutex to be held for mddev_resume(),
+partly for symmetry and partly to guarantee
+no races with incr/decr of mddev->suspend.
+
+Taking the mutex in r5c_disable_writeback_async() is
+a little tricky as this is called from a work queue
+via log->disable_writeback_work, and flush_work()
+is called on that while holding ->reconfig_mutex.
+If the work item hasn't run before flush_work()
+is called, the work function will not be able to
+get the mutex.
+
+So we use mddev_trylock() inside the wait_event() call, and have that
+abort when conf->log is set to NULL, which happens before
+flush_work() is called.
+We wait in mddev->sb_wait and ensure this is woken
+when any of the conditions change. This requires
+waking mddev->sb_wait in mddev_unlock(). This is only
+like to trigger extra wake_ups of threads that needn't
+be woken when metadata is being written, and that
+doesn't happen often enough that the cost would be
+noticeable.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-raid.c | 10 ++++++++--
+ drivers/md/md.c | 3 +++
+ drivers/md/raid5-cache.c | 18 +++++++++++++-----
+ 3 files changed, 24 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_t
+ {
+ struct raid_set *rs = ti->private;
+
+- if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
++ if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
++ mddev_lock_nointr(&rs->md);
+ mddev_suspend(&rs->md);
++ mddev_unlock(&rs->md);
++ }
+
+ rs->md.ro = 1;
+ }
+@@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target
+ if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+- if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
++ if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
++ mddev_lock_nointr(mddev);
+ mddev_resume(mddev);
++ mddev_unlock(mddev);
++ }
+ }
+
+ static struct target_type raid_target = {
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct r
+ void mddev_suspend(struct mddev *mddev)
+ {
+ WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
++ lockdep_assert_held(&mddev->reconfig_mutex);
+ if (mddev->suspended++)
+ return;
+ synchronize_rcu();
+@@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
+
+ void mddev_resume(struct mddev *mddev)
+ {
++ lockdep_assert_held(&mddev->reconfig_mutex);
+ if (--mddev->suspended)
+ return;
+ wake_up(&mddev->sb_wait);
+@@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev)
+ */
+ spin_lock(&pers_lock);
+ md_wakeup_thread(mddev->thread);
++ wake_up(&mddev->sb_wait);
+ spin_unlock(&pers_lock);
+ }
+ EXPORT_SYMBOL_GPL(mddev_unlock);
+--- a/drivers/md/raid5-cache.c
++++ b/drivers/md/raid5-cache.c
+@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(
+ struct r5l_log *log = container_of(work, struct r5l_log,
+ disable_writeback_work);
+ struct mddev *mddev = log->rdev->mddev;
++ struct r5conf *conf = mddev->private;
++ int locked = 0;
+
+ if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+ return;
+@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(
+
+ /* wait superblock change before suspend */
+ wait_event(mddev->sb_wait,
+- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+-
+- mddev_suspend(mddev);
+- log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+- mddev_resume(mddev);
++ conf->log == NULL ||
++ (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
++ (locked = mddev_trylock(mddev))));
++ if (locked) {
++ mddev_suspend(mddev);
++ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
++ mddev_resume(mddev);
++ mddev_unlock(mddev);
++ }
+ }
+
+ static void r5l_submit_current_io(struct r5l_log *log)
+@@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf)
+ conf->log = NULL;
+ synchronize_rcu();
+
++ /* Ensure disable_writeback_work wakes up and exits */
++ wake_up(&conf->mddev->sb_wait);
+ flush_work(&log->disable_writeback_work);
+ md_unregister_thread(&log->reclaim_thread);
+ mempool_destroy(log->meta_pool);
--- /dev/null
+From 52a0d49de3d592a3118e13f35985e3d99eaf43df Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: don't call bitmap_create() while array is quiesced.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 52a0d49de3d592a3118e13f35985e3d99eaf43df upstream.
+
+bitmap_create() allocates memory with GFP_KERNEL and
+so can wait for IO.
+If called while the array is quiesced, it could wait indefinitely
+for write out to the array - deadlock.
+So call bitmap_create() before quiescing the array.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -6645,22 +6645,26 @@ static int set_bitmap_file(struct mddev
+ return -ENOENT; /* cannot remove what isn't there */
+ err = 0;
+ if (mddev->pers) {
+- mddev->pers->quiesce(mddev, 1);
+ if (fd >= 0) {
+ struct bitmap *bitmap;
+
+ bitmap = bitmap_create(mddev, -1);
++ mddev->pers->quiesce(mddev, 1);
+ if (!IS_ERR(bitmap)) {
+ mddev->bitmap = bitmap;
+ err = bitmap_load(mddev);
+ } else
+ err = PTR_ERR(bitmap);
+- }
+- if (fd < 0 || err) {
++ if (err) {
++ bitmap_destroy(mddev);
++ fd = -1;
++ }
++ mddev->pers->quiesce(mddev, 0);
++ } else if (fd < 0) {
++ mddev->pers->quiesce(mddev, 1);
+ bitmap_destroy(mddev);
+- fd = -1; /* make sure to put the file */
++ mddev->pers->quiesce(mddev, 0);
+ }
+- mddev->pers->quiesce(mddev, 0);
+ }
+ if (fd < 0) {
+ struct file *f = mddev->bitmap_info.file;
+@@ -6944,8 +6948,8 @@ static int update_array_info(struct mdde
+ mddev->bitmap_info.default_offset;
+ mddev->bitmap_info.space =
+ mddev->bitmap_info.default_space;
+- mddev->pers->quiesce(mddev, 1);
+ bitmap = bitmap_create(mddev, -1);
++ mddev->pers->quiesce(mddev, 1);
+ if (!IS_ERR(bitmap)) {
+ mddev->bitmap = bitmap;
+ rv = bitmap_load(mddev);
--- /dev/null
+From b3143b9a38d5039bcd1f2d1c94039651bfba8043 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: move suspend_hi/lo handling into core md code
+
+From: NeilBrown <neilb@suse.com>
+
+commit b3143b9a38d5039bcd1f2d1c94039651bfba8043 upstream.
+
+responding to ->suspend_lo and ->suspend_hi is similar
+to responding to ->suspended. It is best to wait in
+the common core code without incrementing ->active_io.
+This allows mddev_suspend()/mddev_resume() to work while
+requests are waiting for suspend_lo/hi to change.
+This is will be important after a subsequent patch
+which uses mddev_suspend() to synchronize updating for
+suspend_lo/hi.
+
+So move the code for testing suspend_lo/hi out of raid1.c
+and raid5.c, and place it in md.c
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c | 29 +++++++++++++++++++++++------
+ drivers/md/raid1.c | 14 +++++---------
+ drivers/md/raid5.c | 22 ----------------------
+ 3 files changed, 28 insertions(+), 37 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
+ * call has finished, the bio has been linked into some internal structure
+ * and so is visible to ->quiesce(), so we don't need the refcount any more.
+ */
++static bool is_suspended(struct mddev *mddev, struct bio *bio)
++{
++ if (mddev->suspended)
++ return true;
++ if (bio_data_dir(bio) != WRITE)
++ return false;
++ if (mddev->suspend_lo >= mddev->suspend_hi)
++ return false;
++ if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
++ return false;
++ if (bio_end_sector(bio) < mddev->suspend_lo)
++ return false;
++ return true;
++}
++
+ void md_handle_request(struct mddev *mddev, struct bio *bio)
+ {
+ check_suspended:
+ rcu_read_lock();
+- if (mddev->suspended) {
++ if (is_suspended(mddev, bio)) {
+ DEFINE_WAIT(__wait);
+ for (;;) {
+ prepare_to_wait(&mddev->sb_wait, &__wait,
+ TASK_UNINTERRUPTIBLE);
+- if (!mddev->suspended)
++ if (!is_suspended(mddev, bio))
+ break;
+ rcu_read_unlock();
+ schedule();
+@@ -4849,10 +4864,11 @@ suspend_lo_store(struct mddev *mddev, co
+ goto unlock;
+ old = mddev->suspend_lo;
+ mddev->suspend_lo = new;
+- if (new >= old)
++ if (new >= old) {
+ /* Shrinking suspended region */
++ wake_up(&mddev->sb_wait);
+ mddev->pers->quiesce(mddev, 2);
+- else {
++ } else {
+ /* Expanding suspended region - need to wait */
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+@@ -4892,10 +4908,11 @@ suspend_hi_store(struct mddev *mddev, co
+ goto unlock;
+ old = mddev->suspend_hi;
+ mddev->suspend_hi = new;
+- if (new <= old)
++ if (new <= old) {
+ /* Shrinking suspended region */
++ wake_up(&mddev->sb_wait);
+ mddev->pers->quiesce(mddev, 2);
+- else {
++ } else {
+ /* Expanding suspended region - need to wait */
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1298,11 +1298,9 @@ static void raid1_write_request(struct m
+ */
+
+
+- if ((bio_end_sector(bio) > mddev->suspend_lo &&
+- bio->bi_iter.bi_sector < mddev->suspend_hi) ||
+- (mddev_is_clustered(mddev) &&
++ if (mddev_is_clustered(mddev) &&
+ md_cluster_ops->area_resyncing(mddev, WRITE,
+- bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
++ bio->bi_iter.bi_sector, bio_end_sector(bio))) {
+
+ /*
+ * As the suspend_* range is controlled by userspace, we want
+@@ -1313,12 +1311,10 @@ static void raid1_write_request(struct m
+ sigset_t full, old;
+ prepare_to_wait(&conf->wait_barrier,
+ &w, TASK_INTERRUPTIBLE);
+- if ((bio_end_sector(bio) <= mddev->suspend_lo ||
+- bio->bi_iter.bi_sector >= mddev->suspend_hi) &&
+- (!mddev_is_clustered(mddev) ||
+- !md_cluster_ops->area_resyncing(mddev, WRITE,
++ if (!mddev_is_clustered(mddev) ||
++ !md_cluster_ops->area_resyncing(mddev, WRITE,
+ bio->bi_iter.bi_sector,
+- bio_end_sector(bio))))
++ bio_end_sector(bio)))
+ break;
+ sigfillset(&full);
+ sigprocmask(SIG_BLOCK, &full, &old);
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -5686,28 +5686,6 @@ static bool raid5_make_request(struct md
+ goto retry;
+ }
+
+- if (rw == WRITE &&
+- logical_sector >= mddev->suspend_lo &&
+- logical_sector < mddev->suspend_hi) {
+- raid5_release_stripe(sh);
+- /* As the suspend_* range is controlled by
+- * userspace, we want an interruptible
+- * wait.
+- */
+- prepare_to_wait(&conf->wait_for_overlap,
+- &w, TASK_INTERRUPTIBLE);
+- if (logical_sector >= mddev->suspend_lo &&
+- logical_sector < mddev->suspend_hi) {
+- sigset_t full, old;
+- sigfillset(&full);
+- sigprocmask(SIG_BLOCK, &full, &old);
+- schedule();
+- sigprocmask(SIG_SETMASK, &old, NULL);
+- do_prepare = true;
+- }
+- goto retry;
+- }
+-
+ if (test_bit(STRIPE_EXPANDING, &sh->state) ||
+ !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
+ /* Stripe is busy expanding or
--- /dev/null
+From b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 19 Oct 2017 12:49:15 +1100
+Subject: md: remove special meaning of ->quiesce(.., 2)
+
+From: NeilBrown <neilb@suse.com>
+
+commit b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f upstream.
+
+The '2' argument means "wake up anything that is waiting".
+This is an inelegant part of the design and was added
+to help support management of suspend_lo/suspend_hi setting.
+Now that suspend_lo/hi is managed in mddev_suspend/resume,
+that need is gone.
+These is still a couple of places where we call 'quiesce'
+with an argument of '2', but they can safely be changed to
+call ->quiesce(.., 1); ->quiesce(.., 0) which
+achieve the same result at the small cost of pausing IO
+briefly.
+
+This removes a small "optimization" from suspend_{hi,lo}_store,
+but it isn't clear that optimization served a useful purpose.
+The code now is a lot clearer.
+
+Suggested-by: Shaohua Li <shli@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md-cluster.c | 6 +++---
+ drivers/md/md.c | 34 ++++++++++------------------------
+ drivers/md/md.h | 9 ++++-----
+ drivers/md/raid0.c | 2 +-
+ drivers/md/raid1.c | 13 +++----------
+ drivers/md/raid10.c | 10 +++-------
+ drivers/md/raid5-cache.c | 12 ++++++------
+ drivers/md/raid5-log.h | 2 +-
+ drivers/md/raid5.c | 18 ++++++------------
+ 9 files changed, 37 insertions(+), 69 deletions(-)
+
+--- a/drivers/md/md-cluster.c
++++ b/drivers/md/md-cluster.c
+@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct
+ static void remove_suspend_info(struct mddev *mddev, int slot)
+ {
+ struct md_cluster_info *cinfo = mddev->cluster_info;
++ mddev->pers->quiesce(mddev, 1);
+ spin_lock_irq(&cinfo->suspend_lock);
+ __remove_suspend_info(cinfo, slot);
+ spin_unlock_irq(&cinfo->suspend_lock);
+- mddev->pers->quiesce(mddev, 2);
++ mddev->pers->quiesce(mddev, 0);
+ }
+
+
+@@ -492,13 +493,12 @@ static void process_suspend_info(struct
+ s->lo = lo;
+ s->hi = hi;
+ mddev->pers->quiesce(mddev, 1);
+- mddev->pers->quiesce(mddev, 0);
+ spin_lock_irq(&cinfo->suspend_lock);
+ /* Remove existing entry (if exists) before adding */
+ __remove_suspend_info(cinfo, slot);
+ list_add(&s->list, &cinfo->suspend_list);
+ spin_unlock_irq(&cinfo->suspend_lock);
+- mddev->pers->quiesce(mddev, 2);
++ mddev->pers->quiesce(mddev, 0);
+ }
+
+ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4850,7 +4850,7 @@ suspend_lo_show(struct mddev *mddev, cha
+ static ssize_t
+ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+- unsigned long long old, new;
++ unsigned long long new;
+ int err;
+
+ err = kstrtoull(buf, 10, &new);
+@@ -4866,17 +4866,10 @@ suspend_lo_store(struct mddev *mddev, co
+ if (mddev->pers == NULL ||
+ mddev->pers->quiesce == NULL)
+ goto unlock;
+- old = mddev->suspend_lo;
++ mddev_suspend(mddev);
+ mddev->suspend_lo = new;
+- if (new >= old) {
+- /* Shrinking suspended region */
+- wake_up(&mddev->sb_wait);
+- mddev->pers->quiesce(mddev, 2);
+- } else {
+- /* Expanding suspended region - need to wait */
+- mddev_suspend(mddev);
+- mddev_resume(mddev);
+- }
++ mddev_resume(mddev);
++
+ err = 0;
+ unlock:
+ mddev_unlock(mddev);
+@@ -4894,7 +4887,7 @@ suspend_hi_show(struct mddev *mddev, cha
+ static ssize_t
+ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+- unsigned long long old, new;
++ unsigned long long new;
+ int err;
+
+ err = kstrtoull(buf, 10, &new);
+@@ -4907,20 +4900,13 @@ suspend_hi_store(struct mddev *mddev, co
+ if (err)
+ return err;
+ err = -EINVAL;
+- if (mddev->pers == NULL ||
+- mddev->pers->quiesce == NULL)
++ if (mddev->pers == NULL)
+ goto unlock;
+- old = mddev->suspend_hi;
++
++ mddev_suspend(mddev);
+ mddev->suspend_hi = new;
+- if (new <= old) {
+- /* Shrinking suspended region */
+- wake_up(&mddev->sb_wait);
+- mddev->pers->quiesce(mddev, 2);
+- } else {
+- /* Expanding suspended region - need to wait */
+- mddev_suspend(mddev);
+- mddev_resume(mddev);
+- }
++ mddev_resume(mddev);
++
+ err = 0;
+ unlock:
+ mddev_unlock(mddev);
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -546,12 +546,11 @@ struct md_personality
+ int (*check_reshape) (struct mddev *mddev);
+ int (*start_reshape) (struct mddev *mddev);
+ void (*finish_reshape) (struct mddev *mddev);
+- /* quiesce moves between quiescence states
+- * 0 - fully active
+- * 1 - no new requests allowed
+- * others - reserved
++ /* quiesce suspends or resumes internal processing.
++ * 1 - stop new actions and wait for action io to complete
++ * 0 - return to normal behaviour
+ */
+- void (*quiesce) (struct mddev *mddev, int state);
++ void (*quiesce) (struct mddev *mddev, int quiesce);
+ /* takeover is used to transition an array from one
+ * personality to another. The new personality must be able
+ * to handle the data in the current layout.
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev
+ return ERR_PTR(-EINVAL);
+ }
+
+-static void raid0_quiesce(struct mddev *mddev, int state)
++static void raid0_quiesce(struct mddev *mddev, int quiesce)
+ {
+ }
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -3276,21 +3276,14 @@ static int raid1_reshape(struct mddev *m
+ return 0;
+ }
+
+-static void raid1_quiesce(struct mddev *mddev, int state)
++static void raid1_quiesce(struct mddev *mddev, int quiesce)
+ {
+ struct r1conf *conf = mddev->private;
+
+- switch(state) {
+- case 2: /* wake for suspend */
+- wake_up(&conf->wait_barrier);
+- break;
+- case 1:
++ if (quiesce)
+ freeze_array(conf, 0);
+- break;
+- case 0:
++ else
+ unfreeze_array(conf);
+- break;
+- }
+ }
+
+ static void *raid1_takeover(struct mddev *mddev)
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -3838,18 +3838,14 @@ static void raid10_free(struct mddev *md
+ kfree(conf);
+ }
+
+-static void raid10_quiesce(struct mddev *mddev, int state)
++static void raid10_quiesce(struct mddev *mddev, int quiesce)
+ {
+ struct r10conf *conf = mddev->private;
+
+- switch(state) {
+- case 1:
++ if (quiesce)
+ raise_barrier(conf, 0);
+- break;
+- case 0:
++ else
+ lower_barrier(conf);
+- break;
+- }
+ }
+
+ static int raid10_resize(struct mddev *mddev, sector_t sectors)
+--- a/drivers/md/raid5-cache.c
++++ b/drivers/md/raid5-cache.c
+@@ -1589,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *lo
+ md_wakeup_thread(log->reclaim_thread);
+ }
+
+-void r5l_quiesce(struct r5l_log *log, int state)
++void r5l_quiesce(struct r5l_log *log, int quiesce)
+ {
+ struct mddev *mddev;
+- if (!log || state == 2)
++ if (!log)
+ return;
+- if (state == 0)
+- kthread_unpark(log->reclaim_thread->tsk);
+- else if (state == 1) {
++
++ if (quiesce) {
+ /* make sure r5l_write_super_and_discard_space exits */
+ mddev = log->rdev->mddev;
+ wake_up(&mddev->sb_wait);
+ kthread_park(log->reclaim_thread->tsk);
+ r5l_wake_reclaim(log, MaxSector);
+ r5l_do_reclaim(log);
+- }
++ } else
++ kthread_unpark(log->reclaim_thread->tsk);
+ }
+
+ bool r5l_log_disk_error(struct r5conf *conf)
+--- a/drivers/md/raid5-log.h
++++ b/drivers/md/raid5-log.h
+@@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct
+ extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
+ extern void r5l_stripe_write_finished(struct stripe_head *sh);
+ extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
+-extern void r5l_quiesce(struct r5l_log *log, int state);
++extern void r5l_quiesce(struct r5l_log *log, int quiesce);
+ extern bool r5l_log_disk_error(struct r5conf *conf);
+ extern bool r5c_is_writeback(struct r5l_log *log);
+ extern int
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -8003,16 +8003,12 @@ static void raid5_finish_reshape(struct
+ }
+ }
+
+-static void raid5_quiesce(struct mddev *mddev, int state)
++static void raid5_quiesce(struct mddev *mddev, int quiesce)
+ {
+ struct r5conf *conf = mddev->private;
+
+- switch(state) {
+- case 2: /* resume for a suspend */
+- wake_up(&conf->wait_for_overlap);
+- break;
+-
+- case 1: /* stop all writes */
++ if (quiesce) {
++ /* stop all writes */
+ lock_all_device_hash_locks_irq(conf);
+ /* '2' tells resync/reshape to pause so that all
+ * active stripes can drain
+@@ -8028,17 +8024,15 @@ static void raid5_quiesce(struct mddev *
+ unlock_all_device_hash_locks_irq(conf);
+ /* allow reshape to continue */
+ wake_up(&conf->wait_for_overlap);
+- break;
+-
+- case 0: /* re-enable writes */
++ } else {
++ /* re-enable writes */
+ lock_all_device_hash_locks_irq(conf);
+ conf->quiesce = 0;
+ wake_up(&conf->wait_for_quiescent);
+ wake_up(&conf->wait_for_overlap);
+ unlock_all_device_hash_locks_irq(conf);
+- break;
+ }
+- r5l_quiesce(conf->log, state);
++ r5l_quiesce(conf->log, quiesce);
+ }
+
+ static void *raid45_takeover_raid0(struct mddev *mddev, int level)
--- /dev/null
+From 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: use mddev_suspend/resume instead of ->quiesce()
+
+From: NeilBrown <neilb@suse.com>
+
+commit 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 upstream.
+
+mddev_suspend() is a more general interface than
+calling ->quiesce() and is so more extensible. A
+future patch will make use of this.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4870,8 +4870,8 @@ suspend_lo_store(struct mddev *mddev, co
+ mddev->pers->quiesce(mddev, 2);
+ } else {
+ /* Expanding suspended region - need to wait */
+- mddev->pers->quiesce(mddev, 1);
+- mddev->pers->quiesce(mddev, 0);
++ mddev_suspend(mddev);
++ mddev_resume(mddev);
+ }
+ err = 0;
+ unlock:
+@@ -4914,8 +4914,8 @@ suspend_hi_store(struct mddev *mddev, co
+ mddev->pers->quiesce(mddev, 2);
+ } else {
+ /* Expanding suspended region - need to wait */
+- mddev->pers->quiesce(mddev, 1);
+- mddev->pers->quiesce(mddev, 0);
++ mddev_suspend(mddev);
++ mddev_resume(mddev);
+ }
+ err = 0;
+ unlock:
+@@ -6666,7 +6666,7 @@ static int set_bitmap_file(struct mddev
+ struct bitmap *bitmap;
+
+ bitmap = bitmap_create(mddev, -1);
+- mddev->pers->quiesce(mddev, 1);
++ mddev_suspend(mddev);
+ if (!IS_ERR(bitmap)) {
+ mddev->bitmap = bitmap;
+ err = bitmap_load(mddev);
+@@ -6676,11 +6676,11 @@ static int set_bitmap_file(struct mddev
+ bitmap_destroy(mddev);
+ fd = -1;
+ }
+- mddev->pers->quiesce(mddev, 0);
++ mddev_resume(mddev);
+ } else if (fd < 0) {
+- mddev->pers->quiesce(mddev, 1);
++ mddev_suspend(mddev);
+ bitmap_destroy(mddev);
+- mddev->pers->quiesce(mddev, 0);
++ mddev_resume(mddev);
+ }
+ }
+ if (fd < 0) {
+@@ -6966,7 +6966,7 @@ static int update_array_info(struct mdde
+ mddev->bitmap_info.space =
+ mddev->bitmap_info.default_space;
+ bitmap = bitmap_create(mddev, -1);
+- mddev->pers->quiesce(mddev, 1);
++ mddev_suspend(mddev);
+ if (!IS_ERR(bitmap)) {
+ mddev->bitmap = bitmap;
+ rv = bitmap_load(mddev);
+@@ -6974,7 +6974,7 @@ static int update_array_info(struct mdde
+ rv = PTR_ERR(bitmap);
+ if (rv)
+ bitmap_destroy(mddev);
+- mddev->pers->quiesce(mddev, 0);
++ mddev_resume(mddev);
+ } else {
+ /* remove the bitmap */
+ if (!mddev->bitmap) {
+@@ -6997,9 +6997,9 @@ static int update_array_info(struct mdde
+ mddev->bitmap_info.nodes = 0;
+ md_cluster_ops->leave(mddev);
+ }
+- mddev->pers->quiesce(mddev, 1);
++ mddev_suspend(mddev);
+ bitmap_destroy(mddev);
+- mddev->pers->quiesce(mddev, 0);
++ mddev_resume(mddev);
+ mddev->bitmap_info.offset = 0;
+ }
+ }
netfilter-nf_tables-fix-memory-leak-on-error-exit-return.patch
netfilter-nf_tables-add-missing-netlink-attrs-to-policies.patch
netfilter-nf_tables-fix-null-ptr-in-nf_tables_dump_obj.patch
+md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
+md-don-t-call-bitmap_create-while-array-is-quiesced.patch
+md-move-suspend_hi-lo-handling-into-core-md-code.patch
+md-use-mddev_suspend-resume-instead-of-quiesce.patch
+md-allow-metadata-update-while-suspending.patch
+md-remove-special-meaning-of-quiesce-..-2.patch