]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 15 Dec 2019 12:12:03 +0000 (13:12 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 15 Dec 2019 12:12:03 +0000 (13:12 +0100)
added patches:
workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch
workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch

queue-4.4/series
queue-4.4/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch [new file with mode: 0644]
queue-4.4/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch [new file with mode: 0644]

index f68bf4d30d8978662c90d6d47fa7e681199ec6c5..a21a0d67fd7189e6076639930953aaeeb6eb74b1 100644 (file)
@@ -102,3 +102,5 @@ rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch
 rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch
 rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch
 lib-raid6-fix-awk-build-warnings.patch
+workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch
+workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch
diff --git a/queue-4.4/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch b/queue-4.4/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch
new file mode 100644 (file)
index 0000000..0596833
--- /dev/null
@@ -0,0 +1,60 @@
+From e66b39af00f426b3356b96433d620cb3367ba1ff Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Wed, 25 Sep 2019 06:59:15 -0700
+Subject: workqueue: Fix pwq ref leak in rescuer_thread()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit e66b39af00f426b3356b96433d620cb3367ba1ff upstream.
+
+008847f66c3 ("workqueue: allow rescuer thread to do more work.") made
+the rescuer worker requeue the pwq immediately if there may be more
+work items which need rescuing instead of waiting for the next mayday
+timer expiration.  Unfortunately, it doesn't check whether the pwq is
+already on the mayday list and unconditionally gets the ref and moves
+it onto the list.  This doesn't corrupt the list but creates an
+additional reference to the pwq.  It got queued twice but will only be
+removed once.
+
+This leak later can trigger pwq refcnt warning on workqueue
+destruction and prevent freeing of the workqueue.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: "Williams, Gerald S" <gerald.s.williams@intel.com>
+Cc: NeilBrown <neilb@suse.de>
+Cc: stable@vger.kernel.org # v3.19+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/workqueue.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -2308,8 +2308,14 @@ repeat:
+                        */
+                       if (need_to_create_worker(pool)) {
+                               spin_lock(&wq_mayday_lock);
+-                              get_pwq(pwq);
+-                              list_move_tail(&pwq->mayday_node, &wq->maydays);
++                              /*
++                               * Queue iff we aren't racing destruction
++                               * and somebody else hasn't queued it already.
++                               */
++                              if (wq->rescuer && list_empty(&pwq->mayday_node)) {
++                                      get_pwq(pwq);
++                                      list_add_tail(&pwq->mayday_node, &wq->maydays);
++                              }
+                               spin_unlock(&wq_mayday_lock);
+                       }
+               }
+@@ -4276,7 +4282,8 @@ static void show_pwq(struct pool_workque
+       pr_info("  pwq %d:", pool->id);
+       pr_cont_pool_info(pool);
+-      pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
++      pr_cont(" active=%d/%d refcnt=%d%s\n",
++              pwq->nr_active, pwq->max_active, pwq->refcnt,
+               !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
+       hash_for_each(pool->busy_hash, bkt, worker, hentry) {
diff --git a/queue-4.4/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch b/queue-4.4/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch
new file mode 100644 (file)
index 0000000..466d8ba
--- /dev/null
@@ -0,0 +1,83 @@
+From def98c84b6cdf2eeea19ec5736e90e316df5206b Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Wed, 18 Sep 2019 18:43:40 -0700
+Subject: workqueue: Fix spurious sanity check failures in destroy_workqueue()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit def98c84b6cdf2eeea19ec5736e90e316df5206b upstream.
+
+Before actually destrying a workqueue, destroy_workqueue() checks
+whether it's actually idle.  If it isn't, it prints out a bunch of
+warning messages and leaves the workqueue dangling.  It unfortunately
+has a couple issues.
+
+* Mayday list queueing increments pwq's refcnts which gets detected as
+  busy and fails the sanity checks.  However, because mayday list
+  queueing is asynchronous, this condition can happen without any
+  actual work items left in the workqueue.
+
+* Sanity check failure leaves the sysfs interface behind too which can
+  lead to init failure of newer instances of the workqueue.
+
+This patch fixes the above two by
+
+* If a workqueue has a rescuer, disable and kill the rescuer before
+  sanity checks.  Disabling and killing is guaranteed to flush the
+  existing mayday list.
+
+* Remove sysfs interface before sanity checks.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Marcin Pawlowski <mpawlowski@fb.com>
+Reported-by: "Williams, Gerald S" <gerald.s.williams@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/workqueue.c |   24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3949,9 +3949,28 @@ void destroy_workqueue(struct workqueue_
+       struct pool_workqueue *pwq;
+       int node;
++      /*
++       * Remove it from sysfs first so that sanity check failure doesn't
++       * lead to sysfs name conflicts.
++       */
++      workqueue_sysfs_unregister(wq);
++
+       /* drain it before proceeding with destruction */
+       drain_workqueue(wq);
++      /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
++      if (wq->rescuer) {
++              struct worker *rescuer = wq->rescuer;
++
++              /* this prevents new queueing */
++              spin_lock_irq(&wq_mayday_lock);
++              wq->rescuer = NULL;
++              spin_unlock_irq(&wq_mayday_lock);
++
++              /* rescuer will empty maydays list before exiting */
++              kthread_stop(rescuer->task);
++      }
++
+       /* sanity checks */
+       mutex_lock(&wq->mutex);
+       for_each_pwq(pwq, wq) {
+@@ -3981,11 +4000,6 @@ void destroy_workqueue(struct workqueue_
+       list_del_rcu(&wq->list);
+       mutex_unlock(&wq_pool_mutex);
+-      workqueue_sysfs_unregister(wq);
+-
+-      if (wq->rescuer)
+-              kthread_stop(wq->rescuer->task);
+-
+       if (!(wq->flags & WQ_UNBOUND)) {
+               /*
+                * The base ref is never dropped on per-cpu pwqs.  Directly