]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Aug 2013 17:55:13 +0000 (10:55 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Aug 2013 17:55:13 +0000 (10:55 -0700)
added patches:
workqueue-consider-work-function-when-searching-for-busy-work-items.patch
workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch

queue-3.4/series
queue-3.4/workqueue-consider-work-function-when-searching-for-busy-work-items.patch [new file with mode: 0644]
queue-3.4/workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch [new file with mode: 0644]

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fcb054863ac8ee84988feeb63ae06224a8e6be54 100644 (file)
@@ -0,0 +1,2 @@
+workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch
+workqueue-consider-work-function-when-searching-for-busy-work-items.patch
diff --git a/queue-3.4/workqueue-consider-work-function-when-searching-for-busy-work-items.patch b/queue-3.4/workqueue-consider-work-function-when-searching-for-busy-work-items.patch
new file mode 100644 (file)
index 0000000..ec8542d
--- /dev/null
@@ -0,0 +1,146 @@
+From a2c1c57be8d9fd5b716113c8991d3d702eeacf77 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 18 Dec 2012 10:35:02 -0800
+Subject: workqueue: consider work function when searching for busy work items
+
+From: Tejun Heo <tj@kernel.org>
+
+commit a2c1c57be8d9fd5b716113c8991d3d702eeacf77 upstream.
+
+To avoid executing the same work item concurrenlty, workqueue hashes
+currently busy workers according to their current work items and looks
+up the the table when it wants to execute a new work item.  If there
+already is a worker which is executing the new work item, the new item
+is queued to the found worker so that it gets executed only after the
+current execution finishes.
+
+Unfortunately, a work item may be freed while being executed and thus
+recycled for different purposes.  If it gets recycled for a different
+work item and queued while the previous execution is still in
+progress, workqueue may make the new work item wait for the old one
+although the two aren't really related in any way.
+
+In extreme cases, this false dependency may lead to deadlock although
+it's extremely unlikely given that there aren't too many self-freeing
+work item users and they usually don't wait for other work items.
+
+To alleviate the problem, record the current work function in each
+busy worker and match it together with the work item address in
+find_worker_executing_work().  While this isn't complete, it ensures
+that unrelated work items don't interact with each other and in the
+very unlikely case where a twisted wq user triggers it, it's always
+onto itself making the culprit easy to spot.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Andrey Isakov <andy51@gmx.ru>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=51701
+[lizf: Backported to 3.4:
+ - Adjust context
+ - Incorporate earlier logging cleanup in process_one_work() from
+   044c782ce3a9 ('workqueue: fix checkpatch issues')]
+Signed-off-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   42 +++++++++++++++++++++++++++++++-----------
+ 1 file changed, 31 insertions(+), 11 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -128,6 +128,7 @@ struct worker {
+       };
+       struct work_struct      *current_work;  /* L: work being processed */
++      work_func_t             current_func;   /* L: current_work's fn */
+       struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+       struct list_head        scheduled;      /* L: scheduled works */
+       struct task_struct      *task;          /* I: worker task */
+@@ -838,7 +839,8 @@ static struct worker *__find_worker_exec
+       struct hlist_node *tmp;
+       hlist_for_each_entry(worker, tmp, bwh, hentry)
+-              if (worker->current_work == work)
++              if (worker->current_work == work &&
++                  worker->current_func == work->func)
+                       return worker;
+       return NULL;
+ }
+@@ -848,9 +850,27 @@ static struct worker *__find_worker_exec
+  * @gcwq: gcwq of interest
+  * @work: work to find worker for
+  *
+- * Find a worker which is executing @work on @gcwq.  This function is
+- * identical to __find_worker_executing_work() except that this
+- * function calculates @bwh itself.
++ * Find a worker which is executing @work on @gcwq by searching
++ * @gcwq->busy_hash which is keyed by the address of @work.  For a worker
++ * to match, its current execution should match the address of @work and
++ * its work function.  This is to avoid unwanted dependency between
++ * unrelated work executions through a work item being recycled while still
++ * being executed.
++ *
++ * This is a bit tricky.  A work item may be freed once its execution
++ * starts and nothing prevents the freed area from being recycled for
++ * another work item.  If the same work item address ends up being reused
++ * before the original execution finishes, workqueue will identify the
++ * recycled work item as currently executing and make it wait until the
++ * current execution finishes, introducing an unwanted dependency.
++ *
++ * This function checks the work item address, work function and workqueue
++ * to avoid false positives.  Note that this isn't complete as one may
++ * construct a work function which can introduce dependency onto itself
++ * through a recycled work item.  Well, if somebody wants to shoot oneself
++ * in the foot that badly, there's only so much we can do, and if such
++ * deadlock actually occurs, it should be easy to locate the culprit work
++ * function.
+  *
+  * CONTEXT:
+  * spin_lock_irq(gcwq->lock).
+@@ -1811,7 +1831,6 @@ __acquires(&gcwq->lock)
+       struct global_cwq *gcwq = cwq->gcwq;
+       struct hlist_head *bwh = busy_worker_head(gcwq, work);
+       bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
+-      work_func_t f = work->func;
+       int work_color;
+       struct worker *collision;
+ #ifdef CONFIG_LOCKDEP
+@@ -1840,6 +1859,7 @@ __acquires(&gcwq->lock)
+       debug_work_deactivate(work);
+       hlist_add_head(&worker->hentry, bwh);
+       worker->current_work = work;
++      worker->current_func = work->func;
+       worker->current_cwq = cwq;
+       work_color = get_work_color(work);
+@@ -1877,7 +1897,7 @@ __acquires(&gcwq->lock)
+       lock_map_acquire_read(&cwq->wq->lockdep_map);
+       lock_map_acquire(&lockdep_map);
+       trace_workqueue_execute_start(work);
+-      f(work);
++      worker->current_func(work);
+       /*
+        * While we must be careful to not use "work" after this, the trace
+        * point will only record its address.
+@@ -1887,11 +1907,10 @@ __acquires(&gcwq->lock)
+       lock_map_release(&cwq->wq->lockdep_map);
+       if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
+-              printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
+-                     "%s/0x%08x/%d\n",
+-                     current->comm, preempt_count(), task_pid_nr(current));
+-              printk(KERN_ERR "    last function: ");
+-              print_symbol("%s\n", (unsigned long)f);
++              pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
++                     "     last function: %pf\n",
++                     current->comm, preempt_count(), task_pid_nr(current),
++                     worker->current_func);
+               debug_show_held_locks(current);
+               dump_stack();
+       }
+@@ -1905,6 +1924,7 @@ __acquires(&gcwq->lock)
+       /* we're done with it, release */
+       hlist_del_init(&worker->hentry);
+       worker->current_work = NULL;
++      worker->current_func = NULL;
+       worker->current_cwq = NULL;
+       cwq_dec_nr_in_flight(cwq, work_color, false);
+ }
diff --git a/queue-3.4/workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch b/queue-3.4/workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch
new file mode 100644 (file)
index 0000000..a2b7793
--- /dev/null
@@ -0,0 +1,93 @@
+From 3aa62497594430ea522050b75c033f71f2c60ee6 Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+Date: Tue, 18 Sep 2012 10:40:00 -0700
+Subject: workqueue: fix possible stall on try_to_grab_pending() of a delayed work item
+
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+
+commit 3aa62497594430ea522050b75c033f71f2c60ee6 upstream.
+
+Currently, when try_to_grab_pending() grabs a delayed work item, it
+leaves its linked work items alone on the delayed_works.  The linked
+work items are always NO_COLOR and will cause future
+cwq_activate_first_delayed() increase cwq->nr_active incorrectly, and
+may cause the whole cwq to stall.  For example,
+
+state: cwq->max_active = 1, cwq->nr_active = 1
+       one work in cwq->pool, many in cwq->delayed_works.
+
+step1: try_to_grab_pending() removes a work item from delayed_works
+       but leaves its NO_COLOR linked work items on it.
+
+step2: Later on, cwq_activate_first_delayed() activates the linked
+       work item increasing ->nr_active.
+
+step3: cwq->nr_active = 1, but all activated work items of the cwq are
+       NO_COLOR.  When they finish, cwq->nr_active will not be
+       decreased due to NO_COLOR, and no further work items will be
+       activated from cwq->delayed_works. the cwq stalls.
+
+Fix it by ensuring the target work item is activated before stealing
+PENDING in try_to_grab_pending().  This ensures that all the linked
+work items are activated without incorrectly bumping cwq->nr_active.
+
+tj: Updated comment and description.
+
+Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[lizf: backported to 3.4: adjust context]
+Signed-off-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   25 ++++++++++++++++++++++---
+ 1 file changed, 22 insertions(+), 3 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1721,10 +1721,9 @@ static void move_linked_works(struct wor
+               *nextp = n;
+ }
+-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
++static void cwq_activate_delayed_work(struct work_struct *work)
+ {
+-      struct work_struct *work = list_first_entry(&cwq->delayed_works,
+-                                                  struct work_struct, entry);
++      struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+       struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
+       trace_workqueue_activate_work(work);
+@@ -1733,6 +1732,14 @@ static void cwq_activate_first_delayed(s
+       cwq->nr_active++;
+ }
++static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
++{
++      struct work_struct *work = list_first_entry(&cwq->delayed_works,
++                                                  struct work_struct, entry);
++
++      cwq_activate_delayed_work(work);
++}
++
+ /**
+  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
+  * @cwq: cwq of interest
+@@ -2625,6 +2632,18 @@ static int try_to_grab_pending(struct wo
+               smp_rmb();
+               if (gcwq == get_work_gcwq(work)) {
+                       debug_work_deactivate(work);
++
++                      /*
++                       * A delayed work item cannot be grabbed directly
++                       * because it might have linked NO_COLOR work items
++                       * which, if left on the delayed_list, will confuse
++                       * cwq->nr_active management later on and cause
++                       * stall.  Make sure the work item is activated
++                       * before grabbing.
++                       */
++                      if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
++                              cwq_activate_delayed_work(work);
++
+                       list_del_init(&work->entry);
+                       cwq_dec_nr_in_flight(get_work_cwq(work),
+                               get_work_color(work),