From: Greg Kroah-Hartman Date: Thu, 5 Jul 2018 16:59:38 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.14.54~12 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1d8ba106271ebdbdce7db8a95b00a807fd9e3ef6;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: md-allow-metadata-update-while-suspending.patch md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch md-don-t-call-bitmap_create-while-array-is-quiesced.patch md-move-suspend_hi-lo-handling-into-core-md-code.patch md-remove-special-meaning-of-quiesce-..-2.patch md-use-mddev_suspend-resume-instead-of-quiesce.patch --- diff --git a/queue-4.14/md-allow-metadata-update-while-suspending.patch b/queue-4.14/md-allow-metadata-update-while-suspending.patch new file mode 100644 index 00000000000..ce2141fc289 --- /dev/null +++ b/queue-4.14/md-allow-metadata-update-while-suspending.patch @@ -0,0 +1,86 @@ +From 35bfc52187f6df8779d0f1cebdb52b7f797baf4e Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Tue, 17 Oct 2017 13:46:43 +1100 +Subject: md: allow metadata update while suspending. + +From: NeilBrown + +commit 35bfc52187f6df8779d0f1cebdb52b7f797baf4e upstream. + +There are various deadlocks that can occur +when a thread holds reconfig_mutex and calls +->quiesce(mddev, 1). +As some write request block waiting for +metadata to be updated (e.g. to record device +failure), and as the md thread updates the metadata +while the reconfig mutex is held, holding the mutex +can stop write requests completing, and this prevents +->quiesce(mddev, 1) from completing. + +->quiesce() is now usually called from mddev_suspend(), +and it is always called with reconfig_mutex held. So +at this time it is safe for the thread to update metadata +without explicitly taking the lock. + +So add 2 new flags, one which says the unlocked updates is +allowed, and one which ways it is happening. Then allow it +while the quiesce completes, and then wait for it to finish. + +Reported-and-tested-by: Xiao Ni +Signed-off-by: NeilBrown +Signed-off-by: Shaohua Li +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/md.c | 14 ++++++++++++++ + drivers/md/md.h | 6 ++++++ + 2 files changed, 20 insertions(+) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev) + return; + synchronize_rcu(); + wake_up(&mddev->sb_wait); ++ set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); ++ smp_mb__after_atomic(); + wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); + mddev->pers->quiesce(mddev, 1); ++ clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); ++ wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); + + del_timer_sync(&mddev->safemode_timer); + } +@@ -8882,6 +8886,16 @@ void md_check_recovery(struct mddev *mdd + unlock: + wake_up(&mddev->sb_wait); + mddev_unlock(mddev); ++ } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { ++ /* Write superblock - thread that called mddev_suspend() ++ * holds reconfig_mutex for us. ++ */ ++ set_bit(MD_UPDATING_SB, &mddev->flags); ++ smp_mb__after_atomic(); ++ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) ++ md_update_sb(mddev, 0); ++ clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); ++ wake_up(&mddev->sb_wait); + } + } + EXPORT_SYMBOL(md_check_recovery); +--- a/drivers/md/md.h ++++ b/drivers/md/md.h +@@ -237,6 +237,12 @@ enum mddev_flags { + */ + MD_HAS_PPL, /* The raid array has PPL feature set */ + MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ ++ MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update ++ * the metadata without taking reconfig_mutex. ++ */ ++ MD_UPDATING_SB, /* md_check_recovery is updating the metadata ++ * without explicitly holding reconfig_mutex. ++ */ + }; + + enum mddev_sb_flags { diff --git a/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch b/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch new file mode 100644 index 00000000000..d44b31331a7 --- /dev/null +++ b/queue-4.14/md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch @@ -0,0 +1,141 @@ +From 4d5324f760aacaefeb721b172aa14bf66045c332 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Thu, 19 Oct 2017 12:17:16 +1100 +Subject: md: always hold reconfig_mutex when calling mddev_suspend() + +From: NeilBrown + +commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream. + +Most often mddev_suspend() is called with +reconfig_mutex held. Make this a requirement in +preparation a subsequent patch. Also require +reconfig_mutex to be held for mddev_resume(), +partly for symmetry and partly to guarantee +no races with incr/decr of mddev->suspend. + +Taking the mutex in r5c_disable_writeback_async() is +a little tricky as this is called from a work queue +via log->disable_writeback_work, and flush_work() +is called on that while holding ->reconfig_mutex. +If the work item hasn't run before flush_work() +is called, the work function will not be able to +get the mutex. + +So we use mddev_trylock() inside the wait_event() call, and have that +abort when conf->log is set to NULL, which happens before +flush_work() is called. +We wait in mddev->sb_wait and ensure this is woken +when any of the conditions change. This requires +waking mddev->sb_wait in mddev_unlock(). This is only +like to trigger extra wake_ups of threads that needn't +be woken when metadata is being written, and that +doesn't happen often enough that the cost would be +noticeable. + +Signed-off-by: NeilBrown +Signed-off-by: Shaohua Li +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-raid.c | 10 ++++++++-- + drivers/md/md.c | 3 +++ + drivers/md/raid5-cache.c | 18 +++++++++++++----- + 3 files changed, 24 insertions(+), 7 deletions(-) + +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_t + { + struct raid_set *rs = ti->private; + +- if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) ++ if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { ++ mddev_lock_nointr(&rs->md); + mddev_suspend(&rs->md); ++ mddev_unlock(&rs->md); ++ } + + rs->md.ro = 1; + } +@@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target + if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + +- if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) ++ if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { ++ mddev_lock_nointr(mddev); + mddev_resume(mddev); ++ mddev_unlock(mddev); ++ } + } + + static struct target_type raid_target = { +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct r + void mddev_suspend(struct mddev *mddev) + { + WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); ++ lockdep_assert_held(&mddev->reconfig_mutex); + if (mddev->suspended++) + return; + synchronize_rcu(); +@@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend); + + void mddev_resume(struct mddev *mddev) + { ++ lockdep_assert_held(&mddev->reconfig_mutex); + if (--mddev->suspended) + return; + wake_up(&mddev->sb_wait); +@@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev) + */ + spin_lock(&pers_lock); + md_wakeup_thread(mddev->thread); ++ wake_up(&mddev->sb_wait); + spin_unlock(&pers_lock); + } + EXPORT_SYMBOL_GPL(mddev_unlock); +--- a/drivers/md/raid5-cache.c ++++ b/drivers/md/raid5-cache.c +@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async( + struct r5l_log *log = container_of(work, struct r5l_log, + disable_writeback_work); + struct mddev *mddev = log->rdev->mddev; ++ struct r5conf *conf = mddev->private; ++ int locked = 0; + + if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) + return; +@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async( + + /* wait superblock change before suspend */ + wait_event(mddev->sb_wait, +- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); +- +- mddev_suspend(mddev); +- log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; +- mddev_resume(mddev); ++ conf->log == NULL || ++ (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && ++ (locked = mddev_trylock(mddev)))); ++ if (locked) { ++ mddev_suspend(mddev); ++ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; ++ mddev_resume(mddev); ++ mddev_unlock(mddev); ++ } + } + + static void r5l_submit_current_io(struct r5l_log *log) +@@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf) + conf->log = NULL; + synchronize_rcu(); + ++ /* Ensure disable_writeback_work wakes up and exits */ ++ wake_up(&conf->mddev->sb_wait); + flush_work(&log->disable_writeback_work); + md_unregister_thread(&log->reclaim_thread); + mempool_destroy(log->meta_pool); diff --git a/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch b/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch new file mode 100644 index 00000000000..a857d3d770d --- /dev/null +++ b/queue-4.14/md-don-t-call-bitmap_create-while-array-is-quiesced.patch @@ -0,0 +1,68 @@ +From 52a0d49de3d592a3118e13f35985e3d99eaf43df Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Tue, 17 Oct 2017 13:46:43 +1100 +Subject: md: don't call bitmap_create() while array is quiesced. + +From: NeilBrown + +commit 52a0d49de3d592a3118e13f35985e3d99eaf43df upstream. + +bitmap_create() allocates memory with GFP_KERNEL and +so can wait for IO. +If called while the array is quiesced, it could wait indefinitely +for write out to the array - deadlock. +So call bitmap_create() before quiescing the array. + +Signed-off-by: NeilBrown +Signed-off-by: Shaohua Li +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/md.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -6645,22 +6645,26 @@ static int set_bitmap_file(struct mddev + return -ENOENT; /* cannot remove what isn't there */ + err = 0; + if (mddev->pers) { +- mddev->pers->quiesce(mddev, 1); + if (fd >= 0) { + struct bitmap *bitmap; + + bitmap = bitmap_create(mddev, -1); ++ mddev->pers->quiesce(mddev, 1); + if (!IS_ERR(bitmap)) { + mddev->bitmap = bitmap; + err = bitmap_load(mddev); + } else + err = PTR_ERR(bitmap); +- } +- if (fd < 0 || err) { ++ if (err) { ++ bitmap_destroy(mddev); ++ fd = -1; ++ } ++ mddev->pers->quiesce(mddev, 0); ++ } else if (fd < 0) { ++ mddev->pers->quiesce(mddev, 1); + bitmap_destroy(mddev); +- fd = -1; /* make sure to put the file */ ++ mddev->pers->quiesce(mddev, 0); + } +- mddev->pers->quiesce(mddev, 0); + } + if (fd < 0) { + struct file *f = mddev->bitmap_info.file; +@@ -6944,8 +6948,8 @@ static int update_array_info(struct mdde + mddev->bitmap_info.default_offset; + mddev->bitmap_info.space = + mddev->bitmap_info.default_space; +- mddev->pers->quiesce(mddev, 1); + bitmap = bitmap_create(mddev, -1); ++ mddev->pers->quiesce(mddev, 1); + if (!IS_ERR(bitmap)) { + mddev->bitmap = bitmap; + rv = bitmap_load(mddev); diff --git a/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch b/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch new file mode 100644 index 00000000000..03c0aa099f5 --- /dev/null +++ b/queue-4.14/md-move-suspend_hi-lo-handling-into-core-md-code.patch @@ -0,0 +1,159 @@ +From b3143b9a38d5039bcd1f2d1c94039651bfba8043 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Tue, 17 Oct 2017 13:46:43 +1100 +Subject: md: move suspend_hi/lo handling into core md code + +From: NeilBrown + +commit b3143b9a38d5039bcd1f2d1c94039651bfba8043 upstream. + +responding to ->suspend_lo and ->suspend_hi is similar +to responding to ->suspended. It is best to wait in +the common core code without incrementing ->active_io. +This allows mddev_suspend()/mddev_resume() to work while +requests are waiting for suspend_lo/hi to change. +This is will be important after a subsequent patch +which uses mddev_suspend() to synchronize updating for +suspend_lo/hi. + +So move the code for testing suspend_lo/hi out of raid1.c +and raid5.c, and place it in md.c + +Signed-off-by: NeilBrown +Signed-off-by: Shaohua Li +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/md.c | 29 +++++++++++++++++++++++------ + drivers/md/raid1.c | 14 +++++--------- + drivers/md/raid5.c | 22 ---------------------- + 3 files changed, 28 insertions(+), 37 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock); + * call has finished, the bio has been linked into some internal structure + * and so is visible to ->quiesce(), so we don't need the refcount any more. + */ ++static bool is_suspended(struct mddev *mddev, struct bio *bio) ++{ ++ if (mddev->suspended) ++ return true; ++ if (bio_data_dir(bio) != WRITE) ++ return false; ++ if (mddev->suspend_lo >= mddev->suspend_hi) ++ return false; ++ if (bio->bi_iter.bi_sector >= mddev->suspend_hi) ++ return false; ++ if (bio_end_sector(bio) < mddev->suspend_lo) ++ return false; ++ return true; ++} ++ + void md_handle_request(struct mddev *mddev, struct bio *bio) + { + check_suspended: + rcu_read_lock(); +- if (mddev->suspended) { ++ if (is_suspended(mddev, bio)) { + DEFINE_WAIT(__wait); + for (;;) { + prepare_to_wait(&mddev->sb_wait, &__wait, + TASK_UNINTERRUPTIBLE); +- if (!mddev->suspended) ++ if (!is_suspended(mddev, bio)) + break; + rcu_read_unlock(); + schedule(); +@@ -4849,10 +4864,11 @@ suspend_lo_store(struct mddev *mddev, co + goto unlock; + old = mddev->suspend_lo; + mddev->suspend_lo = new; +- if (new >= old) ++ if (new >= old) { + /* Shrinking suspended region */ ++ wake_up(&mddev->sb_wait); + mddev->pers->quiesce(mddev, 2); +- else { ++ } else { + /* Expanding suspended region - need to wait */ + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); +@@ -4892,10 +4908,11 @@ suspend_hi_store(struct mddev *mddev, co + goto unlock; + old = mddev->suspend_hi; + mddev->suspend_hi = new; +- if (new <= old) ++ if (new <= old) { + /* Shrinking suspended region */ ++ wake_up(&mddev->sb_wait); + mddev->pers->quiesce(mddev, 2); +- else { ++ } else { + /* Expanding suspended region - need to wait */ + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -1298,11 +1298,9 @@ static void raid1_write_request(struct m + */ + + +- if ((bio_end_sector(bio) > mddev->suspend_lo && +- bio->bi_iter.bi_sector < mddev->suspend_hi) || +- (mddev_is_clustered(mddev) && ++ if (mddev_is_clustered(mddev) && + md_cluster_ops->area_resyncing(mddev, WRITE, +- bio->bi_iter.bi_sector, bio_end_sector(bio)))) { ++ bio->bi_iter.bi_sector, bio_end_sector(bio))) { + + /* + * As the suspend_* range is controlled by userspace, we want +@@ -1313,12 +1311,10 @@ static void raid1_write_request(struct m + sigset_t full, old; + prepare_to_wait(&conf->wait_barrier, + &w, TASK_INTERRUPTIBLE); +- if ((bio_end_sector(bio) <= mddev->suspend_lo || +- bio->bi_iter.bi_sector >= mddev->suspend_hi) && +- (!mddev_is_clustered(mddev) || +- !md_cluster_ops->area_resyncing(mddev, WRITE, ++ if (!mddev_is_clustered(mddev) || ++ !md_cluster_ops->area_resyncing(mddev, WRITE, + bio->bi_iter.bi_sector, +- bio_end_sector(bio)))) ++ bio_end_sector(bio))) + break; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -5686,28 +5686,6 @@ static bool raid5_make_request(struct md + goto retry; + } + +- if (rw == WRITE && +- logical_sector >= mddev->suspend_lo && +- logical_sector < mddev->suspend_hi) { +- raid5_release_stripe(sh); +- /* As the suspend_* range is controlled by +- * userspace, we want an interruptible +- * wait. +- */ +- prepare_to_wait(&conf->wait_for_overlap, +- &w, TASK_INTERRUPTIBLE); +- if (logical_sector >= mddev->suspend_lo && +- logical_sector < mddev->suspend_hi) { +- sigset_t full, old; +- sigfillset(&full); +- sigprocmask(SIG_BLOCK, &full, &old); +- schedule(); +- sigprocmask(SIG_SETMASK, &old, NULL); +- do_prepare = true; +- } +- goto retry; +- } +- + if (test_bit(STRIPE_EXPANDING, &sh->state) || + !add_stripe_bio(sh, bi, dd_idx, rw, previous)) { + /* Stripe is busy expanding or diff --git a/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch b/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch new file mode 100644 index 00000000000..de48c927ed3 --- /dev/null +++ b/queue-4.14/md-remove-special-meaning-of-quiesce-..-2.patch @@ -0,0 +1,304 @@ +From b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Thu, 19 Oct 2017 12:49:15 +1100 +Subject: md: remove special meaning of ->quiesce(.., 2) + +From: NeilBrown + +commit b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f upstream. + +The '2' argument means "wake up anything that is waiting". +This is an inelegant part of the design and was added +to help support management of suspend_lo/suspend_hi setting. +Now that suspend_lo/hi is managed in mddev_suspend/resume, +that need is gone. +These is still a couple of places where we call 'quiesce' +with an argument of '2', but they can safely be changed to +call ->quiesce(.., 1); ->quiesce(.., 0) which +achieve the same result at the small cost of pausing IO +briefly. + +This removes a small "optimization" from suspend_{hi,lo}_store, +but it isn't clear that optimization served a useful purpose. +The code now is a lot clearer. + +Suggested-by: Shaohua Li +Signed-off-by: NeilBrown +Signed-off-by: Shaohua Li +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/md-cluster.c | 6 +++--- + drivers/md/md.c | 34 ++++++++++------------------------ + drivers/md/md.h | 9 ++++----- + drivers/md/raid0.c | 2 +- + drivers/md/raid1.c | 13 +++---------- + drivers/md/raid10.c | 10 +++------- + drivers/md/raid5-cache.c | 12 ++++++------ + drivers/md/raid5-log.h | 2 +- + drivers/md/raid5.c | 18 ++++++------------ + 9 files changed, 37 insertions(+), 69 deletions(-) + +--- a/drivers/md/md-cluster.c ++++ b/drivers/md/md-cluster.c +@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct + static void remove_suspend_info(struct mddev *mddev, int slot) + { + struct md_cluster_info *cinfo = mddev->cluster_info; ++ mddev->pers->quiesce(mddev, 1); + spin_lock_irq(&cinfo->suspend_lock); + __remove_suspend_info(cinfo, slot); + spin_unlock_irq(&cinfo->suspend_lock); +- mddev->pers->quiesce(mddev, 2); ++ mddev->pers->quiesce(mddev, 0); + } + + +@@ -492,13 +493,12 @@ static void process_suspend_info(struct + s->lo = lo; + s->hi = hi; + mddev->pers->quiesce(mddev, 1); +- mddev->pers->quiesce(mddev, 0); + spin_lock_irq(&cinfo->suspend_lock); + /* Remove existing entry (if exists) before adding */ + __remove_suspend_info(cinfo, slot); + list_add(&s->list, &cinfo->suspend_list); + spin_unlock_irq(&cinfo->suspend_lock); +- mddev->pers->quiesce(mddev, 2); ++ mddev->pers->quiesce(mddev, 0); + } + + static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -4850,7 +4850,7 @@ suspend_lo_show(struct mddev *mddev, cha + static ssize_t + suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) + { +- unsigned long long old, new; ++ unsigned long long new; + int err; + + err = kstrtoull(buf, 10, &new); +@@ -4866,17 +4866,10 @@ suspend_lo_store(struct mddev *mddev, co + if (mddev->pers == NULL || + mddev->pers->quiesce == NULL) + goto unlock; +- old = mddev->suspend_lo; ++ mddev_suspend(mddev); + mddev->suspend_lo = new; +- if (new >= old) { +- /* Shrinking suspended region */ +- wake_up(&mddev->sb_wait); +- mddev->pers->quiesce(mddev, 2); +- } else { +- /* Expanding suspended region - need to wait */ +- mddev_suspend(mddev); +- mddev_resume(mddev); +- } ++ mddev_resume(mddev); ++ + err = 0; + unlock: + mddev_unlock(mddev); +@@ -4894,7 +4887,7 @@ suspend_hi_show(struct mddev *mddev, cha + static ssize_t + suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) + { +- unsigned long long old, new; ++ unsigned long long new; + int err; + + err = kstrtoull(buf, 10, &new); +@@ -4907,20 +4900,13 @@ suspend_hi_store(struct mddev *mddev, co + if (err) + return err; + err = -EINVAL; +- if (mddev->pers == NULL || +- mddev->pers->quiesce == NULL) ++ if (mddev->pers == NULL) + goto unlock; +- old = mddev->suspend_hi; ++ ++ mddev_suspend(mddev); + mddev->suspend_hi = new; +- if (new <= old) { +- /* Shrinking suspended region */ +- wake_up(&mddev->sb_wait); +- mddev->pers->quiesce(mddev, 2); +- } else { +- /* Expanding suspended region - need to wait */ +- mddev_suspend(mddev); +- mddev_resume(mddev); +- } ++ mddev_resume(mddev); ++ + err = 0; + unlock: + mddev_unlock(mddev); +--- a/drivers/md/md.h ++++ b/drivers/md/md.h +@@ -546,12 +546,11 @@ struct md_personality + int (*check_reshape) (struct mddev *mddev); + int (*start_reshape) (struct mddev *mddev); + void (*finish_reshape) (struct mddev *mddev); +- /* quiesce moves between quiescence states +- * 0 - fully active +- * 1 - no new requests allowed +- * others - reserved ++ /* quiesce suspends or resumes internal processing. ++ * 1 - stop new actions and wait for action io to complete ++ * 0 - return to normal behaviour + */ +- void (*quiesce) (struct mddev *mddev, int state); ++ void (*quiesce) (struct mddev *mddev, int quiesce); + /* takeover is used to transition an array from one + * personality to another. The new personality must be able + * to handle the data in the current layout. +--- a/drivers/md/raid0.c ++++ b/drivers/md/raid0.c +@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev + return ERR_PTR(-EINVAL); + } + +-static void raid0_quiesce(struct mddev *mddev, int state) ++static void raid0_quiesce(struct mddev *mddev, int quiesce) + { + } + +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -3276,21 +3276,14 @@ static int raid1_reshape(struct mddev *m + return 0; + } + +-static void raid1_quiesce(struct mddev *mddev, int state) ++static void raid1_quiesce(struct mddev *mddev, int quiesce) + { + struct r1conf *conf = mddev->private; + +- switch(state) { +- case 2: /* wake for suspend */ +- wake_up(&conf->wait_barrier); +- break; +- case 1: ++ if (quiesce) + freeze_array(conf, 0); +- break; +- case 0: ++ else + unfreeze_array(conf); +- break; +- } + } + + static void *raid1_takeover(struct mddev *mddev) +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -3838,18 +3838,14 @@ static void raid10_free(struct mddev *md + kfree(conf); + } + +-static void raid10_quiesce(struct mddev *mddev, int state) ++static void raid10_quiesce(struct mddev *mddev, int quiesce) + { + struct r10conf *conf = mddev->private; + +- switch(state) { +- case 1: ++ if (quiesce) + raise_barrier(conf, 0); +- break; +- case 0: ++ else + lower_barrier(conf); +- break; +- } + } + + static int raid10_resize(struct mddev *mddev, sector_t sectors) +--- a/drivers/md/raid5-cache.c ++++ b/drivers/md/raid5-cache.c +@@ -1589,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *lo + md_wakeup_thread(log->reclaim_thread); + } + +-void r5l_quiesce(struct r5l_log *log, int state) ++void r5l_quiesce(struct r5l_log *log, int quiesce) + { + struct mddev *mddev; +- if (!log || state == 2) ++ if (!log) + return; +- if (state == 0) +- kthread_unpark(log->reclaim_thread->tsk); +- else if (state == 1) { ++ ++ if (quiesce) { + /* make sure r5l_write_super_and_discard_space exits */ + mddev = log->rdev->mddev; + wake_up(&mddev->sb_wait); + kthread_park(log->reclaim_thread->tsk); + r5l_wake_reclaim(log, MaxSector); + r5l_do_reclaim(log); +- } ++ } else ++ kthread_unpark(log->reclaim_thread->tsk); + } + + bool r5l_log_disk_error(struct r5conf *conf) +--- a/drivers/md/raid5-log.h ++++ b/drivers/md/raid5-log.h +@@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct + extern void r5l_flush_stripe_to_raid(struct r5l_log *log); + extern void r5l_stripe_write_finished(struct stripe_head *sh); + extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); +-extern void r5l_quiesce(struct r5l_log *log, int state); ++extern void r5l_quiesce(struct r5l_log *log, int quiesce); + extern bool r5l_log_disk_error(struct r5conf *conf); + extern bool r5c_is_writeback(struct r5l_log *log); + extern int +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -8003,16 +8003,12 @@ static void raid5_finish_reshape(struct + } + } + +-static void raid5_quiesce(struct mddev *mddev, int state) ++static void raid5_quiesce(struct mddev *mddev, int quiesce) + { + struct r5conf *conf = mddev->private; + +- switch(state) { +- case 2: /* resume for a suspend */ +- wake_up(&conf->wait_for_overlap); +- break; +- +- case 1: /* stop all writes */ ++ if (quiesce) { ++ /* stop all writes */ + lock_all_device_hash_locks_irq(conf); + /* '2' tells resync/reshape to pause so that all + * active stripes can drain +@@ -8028,17 +8024,15 @@ static void raid5_quiesce(struct mddev * + unlock_all_device_hash_locks_irq(conf); + /* allow reshape to continue */ + wake_up(&conf->wait_for_overlap); +- break; +- +- case 0: /* re-enable writes */ ++ } else { ++ /* re-enable writes */ + lock_all_device_hash_locks_irq(conf); + conf->quiesce = 0; + wake_up(&conf->wait_for_quiescent); + wake_up(&conf->wait_for_overlap); + unlock_all_device_hash_locks_irq(conf); +- break; + } +- r5l_quiesce(conf->log, state); ++ r5l_quiesce(conf->log, quiesce); + } + + static void *raid45_takeover_raid0(struct mddev *mddev, int level) diff --git a/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch b/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch new file mode 100644 index 00000000000..4d6cda463c4 --- /dev/null +++ b/queue-4.14/md-use-mddev_suspend-resume-instead-of-quiesce.patch @@ -0,0 +1,100 @@ +From 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Tue, 17 Oct 2017 13:46:43 +1100 +Subject: md: use mddev_suspend/resume instead of ->quiesce() + +From: NeilBrown + +commit 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 upstream. + +mddev_suspend() is a more general interface than +calling ->quiesce() and is so more extensible. A +future patch will make use of this. + +Signed-off-by: NeilBrown +Signed-off-by: Shaohua Li +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/md.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -4870,8 +4870,8 @@ suspend_lo_store(struct mddev *mddev, co + mddev->pers->quiesce(mddev, 2); + } else { + /* Expanding suspended region - need to wait */ +- mddev->pers->quiesce(mddev, 1); +- mddev->pers->quiesce(mddev, 0); ++ mddev_suspend(mddev); ++ mddev_resume(mddev); + } + err = 0; + unlock: +@@ -4914,8 +4914,8 @@ suspend_hi_store(struct mddev *mddev, co + mddev->pers->quiesce(mddev, 2); + } else { + /* Expanding suspended region - need to wait */ +- mddev->pers->quiesce(mddev, 1); +- mddev->pers->quiesce(mddev, 0); ++ mddev_suspend(mddev); ++ mddev_resume(mddev); + } + err = 0; + unlock: +@@ -6666,7 +6666,7 @@ static int set_bitmap_file(struct mddev + struct bitmap *bitmap; + + bitmap = bitmap_create(mddev, -1); +- mddev->pers->quiesce(mddev, 1); ++ mddev_suspend(mddev); + if (!IS_ERR(bitmap)) { + mddev->bitmap = bitmap; + err = bitmap_load(mddev); +@@ -6676,11 +6676,11 @@ static int set_bitmap_file(struct mddev + bitmap_destroy(mddev); + fd = -1; + } +- mddev->pers->quiesce(mddev, 0); ++ mddev_resume(mddev); + } else if (fd < 0) { +- mddev->pers->quiesce(mddev, 1); ++ mddev_suspend(mddev); + bitmap_destroy(mddev); +- mddev->pers->quiesce(mddev, 0); ++ mddev_resume(mddev); + } + } + if (fd < 0) { +@@ -6966,7 +6966,7 @@ static int update_array_info(struct mdde + mddev->bitmap_info.space = + mddev->bitmap_info.default_space; + bitmap = bitmap_create(mddev, -1); +- mddev->pers->quiesce(mddev, 1); ++ mddev_suspend(mddev); + if (!IS_ERR(bitmap)) { + mddev->bitmap = bitmap; + rv = bitmap_load(mddev); +@@ -6974,7 +6974,7 @@ static int update_array_info(struct mdde + rv = PTR_ERR(bitmap); + if (rv) + bitmap_destroy(mddev); +- mddev->pers->quiesce(mddev, 0); ++ mddev_resume(mddev); + } else { + /* remove the bitmap */ + if (!mddev->bitmap) { +@@ -6997,9 +6997,9 @@ static int update_array_info(struct mdde + mddev->bitmap_info.nodes = 0; + md_cluster_ops->leave(mddev); + } +- mddev->pers->quiesce(mddev, 1); ++ mddev_suspend(mddev); + bitmap_destroy(mddev); +- mddev->pers->quiesce(mddev, 0); ++ mddev_resume(mddev); + mddev->bitmap_info.offset = 0; + } + } diff --git a/queue-4.14/series b/queue-4.14/series index 5ad2728107b..437485c87f6 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -31,3 +31,9 @@ netfilter-nf_tables-increase-nft_counters_enabled-in-nft_chain_stats_replace.pat netfilter-nf_tables-fix-memory-leak-on-error-exit-return.patch netfilter-nf_tables-add-missing-netlink-attrs-to-policies.patch netfilter-nf_tables-fix-null-ptr-in-nf_tables_dump_obj.patch +md-always-hold-reconfig_mutex-when-calling-mddev_suspend.patch +md-don-t-call-bitmap_create-while-array-is-quiesced.patch +md-move-suspend_hi-lo-handling-into-core-md-code.patch +md-use-mddev_suspend-resume-instead-of-quiesce.patch +md-allow-metadata-update-while-suspending.patch +md-remove-special-meaning-of-quiesce-..-2.patch