]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Jul 2018 16:59:38 +0000 (18:59 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Jul 2018 16:59:38 +0000 (18:59 +0200)
added patches:
md-allow-metadata-update-while-suspending.patch
md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
md-don-t-call-bitmap_create-while-array-is-quiesced.patch
md-move-suspend_hi-lo-handling-into-core-md-code.patch
md-remove-special-meaning-of-quiesce-..-2.patch
md-use-mddev_suspend-resume-instead-of-quiesce.patch

queue-4.14/md-allow-metadata-update-while-suspending.patch [new file with mode: 0644]
queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch [new file with mode: 0644]
queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch [new file with mode: 0644]
queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch [new file with mode: 0644]
queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch [new file with mode: 0644]
queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/md-allow-metadata-update-while-suspending.patch b/queue-4.14/md-allow-metadata-update-while-suspending.patch
new file mode 100644 (file)
index 0000000..ce2141f
--- /dev/null
@@ -0,0 +1,86 @@
+From 35bfc52187f6df8779d0f1cebdb52b7f797baf4e Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: allow metadata update while suspending.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 35bfc52187f6df8779d0f1cebdb52b7f797baf4e upstream.
+
+There are various deadlocks that can occur
+when a thread holds reconfig_mutex and calls
+->quiesce(mddev, 1).
+As some write request block waiting for
+metadata to be updated (e.g. to record device
+failure), and as the md thread updates the metadata
+while the reconfig mutex is held, holding the mutex
+can stop write requests completing, and this prevents
+->quiesce(mddev, 1) from completing.
+
+->quiesce() is now usually called from mddev_suspend(),
+and it is always called with reconfig_mutex held.  So
+at this time it is safe for the thread to update metadata
+without explicitly taking the lock.
+
+So add 2 new flags, one which says the unlocked updates is
+allowed, and one which ways it is happening.  Then allow it
+while the quiesce completes, and then wait for it to finish.
+
+Reported-and-tested-by: Xiao Ni <xni@redhat.com>
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   14 ++++++++++++++
+ drivers/md/md.h |    6 ++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev)
+               return;
+       synchronize_rcu();
+       wake_up(&mddev->sb_wait);
++      set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
++      smp_mb__after_atomic();
+       wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+       mddev->pers->quiesce(mddev, 1);
++      clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
++      wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
+       del_timer_sync(&mddev->safemode_timer);
+ }
+@@ -8882,6 +8886,16 @@ void md_check_recovery(struct mddev *mdd
+       unlock:
+               wake_up(&mddev->sb_wait);
+               mddev_unlock(mddev);
++      } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
++              /* Write superblock - thread that called mddev_suspend()
++               * holds reconfig_mutex for us.
++               */
++              set_bit(MD_UPDATING_SB, &mddev->flags);
++              smp_mb__after_atomic();
++              if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
++                      md_update_sb(mddev, 0);
++              clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
++              wake_up(&mddev->sb_wait);
+       }
+ }
+ EXPORT_SYMBOL(md_check_recovery);
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -237,6 +237,12 @@ enum mddev_flags {
+                                */
+       MD_HAS_PPL,             /* The raid array has PPL feature set */
+       MD_HAS_MULTIPLE_PPLS,   /* The raid array has multiple PPLs feature set */
++      MD_ALLOW_SB_UPDATE,     /* md_check_recovery is allowed to update
++                               * the metadata without taking reconfig_mutex.
++                               */
++      MD_UPDATING_SB,         /* md_check_recovery is updating the metadata
++                               * without explicitly holding reconfig_mutex.
++                               */
+ };
+ enum mddev_sb_flags {
diff --git a/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch b/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
new file mode 100644 (file)
index 0000000..d44b313
--- /dev/null
@@ -0,0 +1,141 @@
+From 4d5324f760aacaefeb721b172aa14bf66045c332 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 19 Oct 2017 12:17:16 +1100
+Subject: md: always hold reconfig_mutex when calling mddev_suspend()
+
+From: NeilBrown <neilb@suse.com>
+
+commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream.
+
+Most often mddev_suspend() is called with
+reconfig_mutex held.  Make this a requirement in
+preparation a subsequent patch.  Also require
+reconfig_mutex to be held for mddev_resume(),
+partly for symmetry and partly to guarantee
+no races with incr/decr of mddev->suspend.
+
+Taking the mutex in r5c_disable_writeback_async() is
+a little tricky as this is called from a work queue
+via log->disable_writeback_work, and flush_work()
+is called on that while holding ->reconfig_mutex.
+If the work item hasn't run before flush_work()
+is called, the work function will not be able to
+get the mutex.
+
+So we use mddev_trylock() inside the wait_event() call, and have that
+abort when conf->log is set to NULL, which happens before
+flush_work() is called.
+We wait in mddev->sb_wait and ensure this is woken
+when any of the conditions change.  This requires
+waking mddev->sb_wait in mddev_unlock().  This is only
+like to trigger extra wake_ups of threads that needn't
+be woken when metadata is being written, and that
+doesn't happen often enough that the cost would be
+noticeable.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-raid.c     |   10 ++++++++--
+ drivers/md/md.c          |    3 +++
+ drivers/md/raid5-cache.c |   18 +++++++++++++-----
+ 3 files changed, 24 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_t
+ {
+       struct raid_set *rs = ti->private;
+-      if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
++      if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
++              mddev_lock_nointr(&rs->md);
+               mddev_suspend(&rs->md);
++              mddev_unlock(&rs->md);
++      }
+       rs->md.ro = 1;
+ }
+@@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target
+       if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+-      if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
++      if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
++              mddev_lock_nointr(mddev);
+               mddev_resume(mddev);
++              mddev_unlock(mddev);
++      }
+ }
+ static struct target_type raid_target = {
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct r
+ void mddev_suspend(struct mddev *mddev)
+ {
+       WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
++      lockdep_assert_held(&mddev->reconfig_mutex);
+       if (mddev->suspended++)
+               return;
+       synchronize_rcu();
+@@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
+ void mddev_resume(struct mddev *mddev)
+ {
++      lockdep_assert_held(&mddev->reconfig_mutex);
+       if (--mddev->suspended)
+               return;
+       wake_up(&mddev->sb_wait);
+@@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev)
+        */
+       spin_lock(&pers_lock);
+       md_wakeup_thread(mddev->thread);
++      wake_up(&mddev->sb_wait);
+       spin_unlock(&pers_lock);
+ }
+ EXPORT_SYMBOL_GPL(mddev_unlock);
+--- a/drivers/md/raid5-cache.c
++++ b/drivers/md/raid5-cache.c
+@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(
+       struct r5l_log *log = container_of(work, struct r5l_log,
+                                          disable_writeback_work);
+       struct mddev *mddev = log->rdev->mddev;
++      struct r5conf *conf = mddev->private;
++      int locked = 0;
+       if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+               return;
+@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(
+       /* wait superblock change before suspend */
+       wait_event(mddev->sb_wait,
+-                 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+-
+-      mddev_suspend(mddev);
+-      log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+-      mddev_resume(mddev);
++                 conf->log == NULL ||
++                 (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
++                  (locked = mddev_trylock(mddev))));
++      if (locked) {
++              mddev_suspend(mddev);
++              log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
++              mddev_resume(mddev);
++              mddev_unlock(mddev);
++      }
+ }
+ static void r5l_submit_current_io(struct r5l_log *log)
+@@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf)
+       conf->log = NULL;
+       synchronize_rcu();
++      /* Ensure disable_writeback_work wakes up and exits */
++      wake_up(&conf->mddev->sb_wait);
+       flush_work(&log->disable_writeback_work);
+       md_unregister_thread(&log->reclaim_thread);
+       mempool_destroy(log->meta_pool);
diff --git a/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch b/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch
new file mode 100644 (file)
index 0000000..a857d3d
--- /dev/null
@@ -0,0 +1,68 @@
+From 52a0d49de3d592a3118e13f35985e3d99eaf43df Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: don't call bitmap_create() while array is quiesced.
+
+From: NeilBrown <neilb@suse.com>
+
+commit 52a0d49de3d592a3118e13f35985e3d99eaf43df upstream.
+
+bitmap_create() allocates memory with GFP_KERNEL and
+so can wait for IO.
+If called while the array is quiesced, it could wait indefinitely
+for write out to the array - deadlock.
+So call bitmap_create() before quiescing the array.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -6645,22 +6645,26 @@ static int set_bitmap_file(struct mddev
+               return -ENOENT; /* cannot remove what isn't there */
+       err = 0;
+       if (mddev->pers) {
+-              mddev->pers->quiesce(mddev, 1);
+               if (fd >= 0) {
+                       struct bitmap *bitmap;
+                       bitmap = bitmap_create(mddev, -1);
++                      mddev->pers->quiesce(mddev, 1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
+                               err = bitmap_load(mddev);
+                       } else
+                               err = PTR_ERR(bitmap);
+-              }
+-              if (fd < 0 || err) {
++                      if (err) {
++                              bitmap_destroy(mddev);
++                              fd = -1;
++                      }
++                      mddev->pers->quiesce(mddev, 0);
++              } else if (fd < 0) {
++                      mddev->pers->quiesce(mddev, 1);
+                       bitmap_destroy(mddev);
+-                      fd = -1; /* make sure to put the file */
++                      mddev->pers->quiesce(mddev, 0);
+               }
+-              mddev->pers->quiesce(mddev, 0);
+       }
+       if (fd < 0) {
+               struct file *f = mddev->bitmap_info.file;
+@@ -6944,8 +6948,8 @@ static int update_array_info(struct mdde
+                               mddev->bitmap_info.default_offset;
+                       mddev->bitmap_info.space =
+                               mddev->bitmap_info.default_space;
+-                      mddev->pers->quiesce(mddev, 1);
+                       bitmap = bitmap_create(mddev, -1);
++                      mddev->pers->quiesce(mddev, 1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
+                               rv = bitmap_load(mddev);
diff --git a/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch b/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch
new file mode 100644 (file)
index 0000000..03c0aa0
--- /dev/null
@@ -0,0 +1,159 @@
+From b3143b9a38d5039bcd1f2d1c94039651bfba8043 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: move suspend_hi/lo handling into core md code
+
+From: NeilBrown <neilb@suse.com>
+
+commit b3143b9a38d5039bcd1f2d1c94039651bfba8043 upstream.
+
+responding to ->suspend_lo and ->suspend_hi is similar
+to responding to ->suspended.  It is best to wait in
+the common core code without incrementing ->active_io.
+This allows mddev_suspend()/mddev_resume() to work while
+requests are waiting for suspend_lo/hi to change.
+This is will be important after a subsequent patch
+which uses mddev_suspend() to synchronize updating for
+suspend_lo/hi.
+
+So move the code for testing suspend_lo/hi out of raid1.c
+and raid5.c, and place it in md.c
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c    |   29 +++++++++++++++++++++++------
+ drivers/md/raid1.c |   14 +++++---------
+ drivers/md/raid5.c |   22 ----------------------
+ 3 files changed, 28 insertions(+), 37 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
+  * call has finished, the bio has been linked into some internal structure
+  * and so is visible to ->quiesce(), so we don't need the refcount any more.
+  */
++static bool is_suspended(struct mddev *mddev, struct bio *bio)
++{
++      if (mddev->suspended)
++              return true;
++      if (bio_data_dir(bio) != WRITE)
++              return false;
++      if (mddev->suspend_lo >= mddev->suspend_hi)
++              return false;
++      if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
++              return false;
++      if (bio_end_sector(bio) < mddev->suspend_lo)
++              return false;
++      return true;
++}
++
+ void md_handle_request(struct mddev *mddev, struct bio *bio)
+ {
+ check_suspended:
+       rcu_read_lock();
+-      if (mddev->suspended) {
++      if (is_suspended(mddev, bio)) {
+               DEFINE_WAIT(__wait);
+               for (;;) {
+                       prepare_to_wait(&mddev->sb_wait, &__wait,
+                                       TASK_UNINTERRUPTIBLE);
+-                      if (!mddev->suspended)
++                      if (!is_suspended(mddev, bio))
+                               break;
+                       rcu_read_unlock();
+                       schedule();
+@@ -4849,10 +4864,11 @@ suspend_lo_store(struct mddev *mddev, co
+               goto unlock;
+       old = mddev->suspend_lo;
+       mddev->suspend_lo = new;
+-      if (new >= old)
++      if (new >= old) {
+               /* Shrinking suspended region */
++              wake_up(&mddev->sb_wait);
+               mddev->pers->quiesce(mddev, 2);
+-      else {
++      } else {
+               /* Expanding suspended region - need to wait */
+               mddev->pers->quiesce(mddev, 1);
+               mddev->pers->quiesce(mddev, 0);
+@@ -4892,10 +4908,11 @@ suspend_hi_store(struct mddev *mddev, co
+               goto unlock;
+       old = mddev->suspend_hi;
+       mddev->suspend_hi = new;
+-      if (new <= old)
++      if (new <= old) {
+               /* Shrinking suspended region */
++              wake_up(&mddev->sb_wait);
+               mddev->pers->quiesce(mddev, 2);
+-      else {
++      } else {
+               /* Expanding suspended region - need to wait */
+               mddev->pers->quiesce(mddev, 1);
+               mddev->pers->quiesce(mddev, 0);
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1298,11 +1298,9 @@ static void raid1_write_request(struct m
+        */
+-      if ((bio_end_sector(bio) > mddev->suspend_lo &&
+-          bio->bi_iter.bi_sector < mddev->suspend_hi) ||
+-          (mddev_is_clustered(mddev) &&
++      if (mddev_is_clustered(mddev) &&
+            md_cluster_ops->area_resyncing(mddev, WRITE,
+-                   bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
++                   bio->bi_iter.bi_sector, bio_end_sector(bio))) {
+               /*
+                * As the suspend_* range is controlled by userspace, we want
+@@ -1313,12 +1311,10 @@ static void raid1_write_request(struct m
+                       sigset_t full, old;
+                       prepare_to_wait(&conf->wait_barrier,
+                                       &w, TASK_INTERRUPTIBLE);
+-                      if ((bio_end_sector(bio) <= mddev->suspend_lo ||
+-                           bio->bi_iter.bi_sector >= mddev->suspend_hi) &&
+-                          (!mddev_is_clustered(mddev) ||
+-                           !md_cluster_ops->area_resyncing(mddev, WRITE,
++                      if (!mddev_is_clustered(mddev) ||
++                          !md_cluster_ops->area_resyncing(mddev, WRITE,
+                                                       bio->bi_iter.bi_sector,
+-                                                      bio_end_sector(bio))))
++                                                      bio_end_sector(bio)))
+                               break;
+                       sigfillset(&full);
+                       sigprocmask(SIG_BLOCK, &full, &old);
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -5686,28 +5686,6 @@ static bool raid5_make_request(struct md
+                               goto retry;
+                       }
+-                      if (rw == WRITE &&
+-                          logical_sector >= mddev->suspend_lo &&
+-                          logical_sector < mddev->suspend_hi) {
+-                              raid5_release_stripe(sh);
+-                              /* As the suspend_* range is controlled by
+-                               * userspace, we want an interruptible
+-                               * wait.
+-                               */
+-                              prepare_to_wait(&conf->wait_for_overlap,
+-                                              &w, TASK_INTERRUPTIBLE);
+-                              if (logical_sector >= mddev->suspend_lo &&
+-                                  logical_sector < mddev->suspend_hi) {
+-                                      sigset_t full, old;
+-                                      sigfillset(&full);
+-                                      sigprocmask(SIG_BLOCK, &full, &old);
+-                                      schedule();
+-                                      sigprocmask(SIG_SETMASK, &old, NULL);
+-                                      do_prepare = true;
+-                              }
+-                              goto retry;
+-                      }
+-
+                       if (test_bit(STRIPE_EXPANDING, &sh->state) ||
+                           !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
+                               /* Stripe is busy expanding or
diff --git a/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch b/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch
new file mode 100644 (file)
index 0000000..de48c92
--- /dev/null
@@ -0,0 +1,304 @@
+From b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Thu, 19 Oct 2017 12:49:15 +1100
+Subject: md: remove special meaning of ->quiesce(.., 2)
+
+From: NeilBrown <neilb@suse.com>
+
+commit b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f upstream.
+
+The '2' argument means "wake up anything that is waiting".
+This is an inelegant part of the design and was added
+to help support management of suspend_lo/suspend_hi setting.
+Now that suspend_lo/hi is managed in mddev_suspend/resume,
+that need is gone.
+These is still a couple of places where we call 'quiesce'
+with an argument of '2', but they can safely be changed to
+call ->quiesce(.., 1); ->quiesce(.., 0) which
+achieve the same result at the small cost of pausing IO
+briefly.
+
+This removes a small "optimization" from suspend_{hi,lo}_store,
+but it isn't clear that optimization served a useful purpose.
+The code now is a lot clearer.
+
+Suggested-by: Shaohua Li <shli@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md-cluster.c  |    6 +++---
+ drivers/md/md.c          |   34 ++++++++++------------------------
+ drivers/md/md.h          |    9 ++++-----
+ drivers/md/raid0.c       |    2 +-
+ drivers/md/raid1.c       |   13 +++----------
+ drivers/md/raid10.c      |   10 +++-------
+ drivers/md/raid5-cache.c |   12 ++++++------
+ drivers/md/raid5-log.h   |    2 +-
+ drivers/md/raid5.c       |   18 ++++++------------
+ 9 files changed, 37 insertions(+), 69 deletions(-)
+
+--- a/drivers/md/md-cluster.c
++++ b/drivers/md/md-cluster.c
+@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct
+ static void remove_suspend_info(struct mddev *mddev, int slot)
+ {
+       struct md_cluster_info *cinfo = mddev->cluster_info;
++      mddev->pers->quiesce(mddev, 1);
+       spin_lock_irq(&cinfo->suspend_lock);
+       __remove_suspend_info(cinfo, slot);
+       spin_unlock_irq(&cinfo->suspend_lock);
+-      mddev->pers->quiesce(mddev, 2);
++      mddev->pers->quiesce(mddev, 0);
+ }
+@@ -492,13 +493,12 @@ static void process_suspend_info(struct
+       s->lo = lo;
+       s->hi = hi;
+       mddev->pers->quiesce(mddev, 1);
+-      mddev->pers->quiesce(mddev, 0);
+       spin_lock_irq(&cinfo->suspend_lock);
+       /* Remove existing entry (if exists) before adding */
+       __remove_suspend_info(cinfo, slot);
+       list_add(&s->list, &cinfo->suspend_list);
+       spin_unlock_irq(&cinfo->suspend_lock);
+-      mddev->pers->quiesce(mddev, 2);
++      mddev->pers->quiesce(mddev, 0);
+ }
+ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4850,7 +4850,7 @@ suspend_lo_show(struct mddev *mddev, cha
+ static ssize_t
+ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+-      unsigned long long old, new;
++      unsigned long long new;
+       int err;
+       err = kstrtoull(buf, 10, &new);
+@@ -4866,17 +4866,10 @@ suspend_lo_store(struct mddev *mddev, co
+       if (mddev->pers == NULL ||
+           mddev->pers->quiesce == NULL)
+               goto unlock;
+-      old = mddev->suspend_lo;
++      mddev_suspend(mddev);
+       mddev->suspend_lo = new;
+-      if (new >= old) {
+-              /* Shrinking suspended region */
+-              wake_up(&mddev->sb_wait);
+-              mddev->pers->quiesce(mddev, 2);
+-      } else {
+-              /* Expanding suspended region - need to wait */
+-              mddev_suspend(mddev);
+-              mddev_resume(mddev);
+-      }
++      mddev_resume(mddev);
++
+       err = 0;
+ unlock:
+       mddev_unlock(mddev);
+@@ -4894,7 +4887,7 @@ suspend_hi_show(struct mddev *mddev, cha
+ static ssize_t
+ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+-      unsigned long long old, new;
++      unsigned long long new;
+       int err;
+       err = kstrtoull(buf, 10, &new);
+@@ -4907,20 +4900,13 @@ suspend_hi_store(struct mddev *mddev, co
+       if (err)
+               return err;
+       err = -EINVAL;
+-      if (mddev->pers == NULL ||
+-          mddev->pers->quiesce == NULL)
++      if (mddev->pers == NULL)
+               goto unlock;
+-      old = mddev->suspend_hi;
++
++      mddev_suspend(mddev);
+       mddev->suspend_hi = new;
+-      if (new <= old) {
+-              /* Shrinking suspended region */
+-              wake_up(&mddev->sb_wait);
+-              mddev->pers->quiesce(mddev, 2);
+-      } else {
+-              /* Expanding suspended region - need to wait */
+-              mddev_suspend(mddev);
+-              mddev_resume(mddev);
+-      }
++      mddev_resume(mddev);
++
+       err = 0;
+ unlock:
+       mddev_unlock(mddev);
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -546,12 +546,11 @@ struct md_personality
+       int (*check_reshape) (struct mddev *mddev);
+       int (*start_reshape) (struct mddev *mddev);
+       void (*finish_reshape) (struct mddev *mddev);
+-      /* quiesce moves between quiescence states
+-       * 0 - fully active
+-       * 1 - no new requests allowed
+-       * others - reserved
++      /* quiesce suspends or resumes internal processing.
++       * 1 - stop new actions and wait for action io to complete
++       * 0 - return to normal behaviour
+        */
+-      void (*quiesce) (struct mddev *mddev, int state);
++      void (*quiesce) (struct mddev *mddev, int quiesce);
+       /* takeover is used to transition an array from one
+        * personality to another.  The new personality must be able
+        * to handle the data in the current layout.
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev
+       return ERR_PTR(-EINVAL);
+ }
+-static void raid0_quiesce(struct mddev *mddev, int state)
++static void raid0_quiesce(struct mddev *mddev, int quiesce)
+ {
+ }
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -3276,21 +3276,14 @@ static int raid1_reshape(struct mddev *m
+       return 0;
+ }
+-static void raid1_quiesce(struct mddev *mddev, int state)
++static void raid1_quiesce(struct mddev *mddev, int quiesce)
+ {
+       struct r1conf *conf = mddev->private;
+-      switch(state) {
+-      case 2: /* wake for suspend */
+-              wake_up(&conf->wait_barrier);
+-              break;
+-      case 1:
++      if (quiesce)
+               freeze_array(conf, 0);
+-              break;
+-      case 0:
++      else
+               unfreeze_array(conf);
+-              break;
+-      }
+ }
+ static void *raid1_takeover(struct mddev *mddev)
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -3838,18 +3838,14 @@ static void raid10_free(struct mddev *md
+       kfree(conf);
+ }
+-static void raid10_quiesce(struct mddev *mddev, int state)
++static void raid10_quiesce(struct mddev *mddev, int quiesce)
+ {
+       struct r10conf *conf = mddev->private;
+-      switch(state) {
+-      case 1:
++      if (quiesce)
+               raise_barrier(conf, 0);
+-              break;
+-      case 0:
++      else
+               lower_barrier(conf);
+-              break;
+-      }
+ }
+ static int raid10_resize(struct mddev *mddev, sector_t sectors)
+--- a/drivers/md/raid5-cache.c
++++ b/drivers/md/raid5-cache.c
+@@ -1589,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *lo
+       md_wakeup_thread(log->reclaim_thread);
+ }
+-void r5l_quiesce(struct r5l_log *log, int state)
++void r5l_quiesce(struct r5l_log *log, int quiesce)
+ {
+       struct mddev *mddev;
+-      if (!log || state == 2)
++      if (!log)
+               return;
+-      if (state == 0)
+-              kthread_unpark(log->reclaim_thread->tsk);
+-      else if (state == 1) {
++
++      if (quiesce) {
+               /* make sure r5l_write_super_and_discard_space exits */
+               mddev = log->rdev->mddev;
+               wake_up(&mddev->sb_wait);
+               kthread_park(log->reclaim_thread->tsk);
+               r5l_wake_reclaim(log, MaxSector);
+               r5l_do_reclaim(log);
+-      }
++      } else
++              kthread_unpark(log->reclaim_thread->tsk);
+ }
+ bool r5l_log_disk_error(struct r5conf *conf)
+--- a/drivers/md/raid5-log.h
++++ b/drivers/md/raid5-log.h
+@@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct
+ extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
+ extern void r5l_stripe_write_finished(struct stripe_head *sh);
+ extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
+-extern void r5l_quiesce(struct r5l_log *log, int state);
++extern void r5l_quiesce(struct r5l_log *log, int quiesce);
+ extern bool r5l_log_disk_error(struct r5conf *conf);
+ extern bool r5c_is_writeback(struct r5l_log *log);
+ extern int
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -8003,16 +8003,12 @@ static void raid5_finish_reshape(struct
+       }
+ }
+-static void raid5_quiesce(struct mddev *mddev, int state)
++static void raid5_quiesce(struct mddev *mddev, int quiesce)
+ {
+       struct r5conf *conf = mddev->private;
+-      switch(state) {
+-      case 2: /* resume for a suspend */
+-              wake_up(&conf->wait_for_overlap);
+-              break;
+-
+-      case 1: /* stop all writes */
++      if (quiesce) {
++              /* stop all writes */
+               lock_all_device_hash_locks_irq(conf);
+               /* '2' tells resync/reshape to pause so that all
+                * active stripes can drain
+@@ -8028,17 +8024,15 @@ static void raid5_quiesce(struct mddev *
+               unlock_all_device_hash_locks_irq(conf);
+               /* allow reshape to continue */
+               wake_up(&conf->wait_for_overlap);
+-              break;
+-
+-      case 0: /* re-enable writes */
++      } else {
++              /* re-enable writes */
+               lock_all_device_hash_locks_irq(conf);
+               conf->quiesce = 0;
+               wake_up(&conf->wait_for_quiescent);
+               wake_up(&conf->wait_for_overlap);
+               unlock_all_device_hash_locks_irq(conf);
+-              break;
+       }
+-      r5l_quiesce(conf->log, state);
++      r5l_quiesce(conf->log, quiesce);
+ }
+ static void *raid45_takeover_raid0(struct mddev *mddev, int level)
diff --git a/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch b/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch
new file mode 100644 (file)
index 0000000..4d6cda4
--- /dev/null
@@ -0,0 +1,100 @@
+From 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Tue, 17 Oct 2017 13:46:43 +1100
+Subject: md: use mddev_suspend/resume instead of ->quiesce()
+
+From: NeilBrown <neilb@suse.com>
+
+commit 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 upstream.
+
+mddev_suspend() is a more general interface than
+calling ->quiesce() and is so more extensible.  A
+future patch will make use of this.
+
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/md.c |   24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4870,8 +4870,8 @@ suspend_lo_store(struct mddev *mddev, co
+               mddev->pers->quiesce(mddev, 2);
+       } else {
+               /* Expanding suspended region - need to wait */
+-              mddev->pers->quiesce(mddev, 1);
+-              mddev->pers->quiesce(mddev, 0);
++              mddev_suspend(mddev);
++              mddev_resume(mddev);
+       }
+       err = 0;
+ unlock:
+@@ -4914,8 +4914,8 @@ suspend_hi_store(struct mddev *mddev, co
+               mddev->pers->quiesce(mddev, 2);
+       } else {
+               /* Expanding suspended region - need to wait */
+-              mddev->pers->quiesce(mddev, 1);
+-              mddev->pers->quiesce(mddev, 0);
++              mddev_suspend(mddev);
++              mddev_resume(mddev);
+       }
+       err = 0;
+ unlock:
+@@ -6666,7 +6666,7 @@ static int set_bitmap_file(struct mddev
+                       struct bitmap *bitmap;
+                       bitmap = bitmap_create(mddev, -1);
+-                      mddev->pers->quiesce(mddev, 1);
++                      mddev_suspend(mddev);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
+                               err = bitmap_load(mddev);
+@@ -6676,11 +6676,11 @@ static int set_bitmap_file(struct mddev
+                               bitmap_destroy(mddev);
+                               fd = -1;
+                       }
+-                      mddev->pers->quiesce(mddev, 0);
++                      mddev_resume(mddev);
+               } else if (fd < 0) {
+-                      mddev->pers->quiesce(mddev, 1);
++                      mddev_suspend(mddev);
+                       bitmap_destroy(mddev);
+-                      mddev->pers->quiesce(mddev, 0);
++                      mddev_resume(mddev);
+               }
+       }
+       if (fd < 0) {
+@@ -6966,7 +6966,7 @@ static int update_array_info(struct mdde
+                       mddev->bitmap_info.space =
+                               mddev->bitmap_info.default_space;
+                       bitmap = bitmap_create(mddev, -1);
+-                      mddev->pers->quiesce(mddev, 1);
++                      mddev_suspend(mddev);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
+                               rv = bitmap_load(mddev);
+@@ -6974,7 +6974,7 @@ static int update_array_info(struct mdde
+                               rv = PTR_ERR(bitmap);
+                       if (rv)
+                               bitmap_destroy(mddev);
+-                      mddev->pers->quiesce(mddev, 0);
++                      mddev_resume(mddev);
+               } else {
+                       /* remove the bitmap */
+                       if (!mddev->bitmap) {
+@@ -6997,9 +6997,9 @@ static int update_array_info(struct mdde
+                               mddev->bitmap_info.nodes = 0;
+                               md_cluster_ops->leave(mddev);
+                       }
+-                      mddev->pers->quiesce(mddev, 1);
++                      mddev_suspend(mddev);
+                       bitmap_destroy(mddev);
+-                      mddev->pers->quiesce(mddev, 0);
++                      mddev_resume(mddev);
+                       mddev->bitmap_info.offset = 0;
+               }
+       }
index 5ad2728107bfe71159468e9286b5244c338f98c6..437485c87f689d8309cb941be97f125484d8b03e 100644 (file)
@@ -31,3 +31,9 @@ netfilter-nf_tables-increase-nft_counters_enabled-in-nft_chain_stats_replace.pat
 netfilter-nf_tables-fix-memory-leak-on-error-exit-return.patch
 netfilter-nf_tables-add-missing-netlink-attrs-to-policies.patch
 netfilter-nf_tables-fix-null-ptr-in-nf_tables_dump_obj.patch
+md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch
+md-don-t-call-bitmap_create-while-array-is-quiesced.patch
+md-move-suspend_hi-lo-handling-into-core-md-code.patch
+md-use-mddev_suspend-resume-instead-of-quiesce.patch
+md-allow-metadata-update-while-suspending.patch
+md-remove-special-meaning-of-quiesce-..-2.patch