From: Greg Kroah-Hartman Date: Fri, 23 Aug 2013 17:55:13 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.0.94~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=086d370c04d18e17541ce4c7d3a44c00e00bce52;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: workqueue-consider-work-function-when-searching-for-busy-work-items.patch workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch --- diff --git a/queue-3.4/series b/queue-3.4/series index e69de29bb2d..fcb054863ac 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -0,0 +1,2 @@ +workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch +workqueue-consider-work-function-when-searching-for-busy-work-items.patch diff --git a/queue-3.4/workqueue-consider-work-function-when-searching-for-busy-work-items.patch b/queue-3.4/workqueue-consider-work-function-when-searching-for-busy-work-items.patch new file mode 100644 index 00000000000..ec8542dec47 --- /dev/null +++ b/queue-3.4/workqueue-consider-work-function-when-searching-for-busy-work-items.patch @@ -0,0 +1,146 @@ +From a2c1c57be8d9fd5b716113c8991d3d702eeacf77 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Tue, 18 Dec 2012 10:35:02 -0800 +Subject: workqueue: consider work function when searching for busy work items + +From: Tejun Heo + +commit a2c1c57be8d9fd5b716113c8991d3d702eeacf77 upstream. + +To avoid executing the same work item concurrenlty, workqueue hashes +currently busy workers according to their current work items and looks +up the the table when it wants to execute a new work item. If there +already is a worker which is executing the new work item, the new item +is queued to the found worker so that it gets executed only after the +current execution finishes. + +Unfortunately, a work item may be freed while being executed and thus +recycled for different purposes. If it gets recycled for a different +work item and queued while the previous execution is still in +progress, workqueue may make the new work item wait for the old one +although the two aren't really related in any way. + +In extreme cases, this false dependency may lead to deadlock although +it's extremely unlikely given that there aren't too many self-freeing +work item users and they usually don't wait for other work items. + +To alleviate the problem, record the current work function in each +busy worker and match it together with the work item address in +find_worker_executing_work(). While this isn't complete, it ensures +that unrelated work items don't interact with each other and in the +very unlikely case where a twisted wq user triggers it, it's always +onto itself making the culprit easy to spot. + +Signed-off-by: Tejun Heo +Reported-by: Andrey Isakov +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=51701 +[lizf: Backported to 3.4: + - Adjust context + - Incorporate earlier logging cleanup in process_one_work() from + 044c782ce3a9 ('workqueue: fix checkpatch issues')] +Signed-off-by: Li Zefan +Signed-off-by: Greg Kroah-Hartman +--- + kernel/workqueue.c | 42 +++++++++++++++++++++++++++++++----------- + 1 file changed, 31 insertions(+), 11 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -128,6 +128,7 @@ struct worker { + }; + + struct work_struct *current_work; /* L: work being processed */ ++ work_func_t current_func; /* L: current_work's fn */ + struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ + struct list_head scheduled; /* L: scheduled works */ + struct task_struct *task; /* I: worker task */ +@@ -838,7 +839,8 @@ static struct worker *__find_worker_exec + struct hlist_node *tmp; + + hlist_for_each_entry(worker, tmp, bwh, hentry) +- if (worker->current_work == work) ++ if (worker->current_work == work && ++ worker->current_func == work->func) + return worker; + return NULL; + } +@@ -848,9 +850,27 @@ static struct worker *__find_worker_exec + * @gcwq: gcwq of interest + * @work: work to find worker for + * +- * Find a worker which is executing @work on @gcwq. This function is +- * identical to __find_worker_executing_work() except that this +- * function calculates @bwh itself. ++ * Find a worker which is executing @work on @gcwq by searching ++ * @gcwq->busy_hash which is keyed by the address of @work. For a worker ++ * to match, its current execution should match the address of @work and ++ * its work function. This is to avoid unwanted dependency between ++ * unrelated work executions through a work item being recycled while still ++ * being executed. ++ * ++ * This is a bit tricky. A work item may be freed once its execution ++ * starts and nothing prevents the freed area from being recycled for ++ * another work item. If the same work item address ends up being reused ++ * before the original execution finishes, workqueue will identify the ++ * recycled work item as currently executing and make it wait until the ++ * current execution finishes, introducing an unwanted dependency. ++ * ++ * This function checks the work item address, work function and workqueue ++ * to avoid false positives. Note that this isn't complete as one may ++ * construct a work function which can introduce dependency onto itself ++ * through a recycled work item. Well, if somebody wants to shoot oneself ++ * in the foot that badly, there's only so much we can do, and if such ++ * deadlock actually occurs, it should be easy to locate the culprit work ++ * function. + * + * CONTEXT: + * spin_lock_irq(gcwq->lock). +@@ -1811,7 +1831,6 @@ __acquires(&gcwq->lock) + struct global_cwq *gcwq = cwq->gcwq; + struct hlist_head *bwh = busy_worker_head(gcwq, work); + bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; +- work_func_t f = work->func; + int work_color; + struct worker *collision; + #ifdef CONFIG_LOCKDEP +@@ -1840,6 +1859,7 @@ __acquires(&gcwq->lock) + debug_work_deactivate(work); + hlist_add_head(&worker->hentry, bwh); + worker->current_work = work; ++ worker->current_func = work->func; + worker->current_cwq = cwq; + work_color = get_work_color(work); + +@@ -1877,7 +1897,7 @@ __acquires(&gcwq->lock) + lock_map_acquire_read(&cwq->wq->lockdep_map); + lock_map_acquire(&lockdep_map); + trace_workqueue_execute_start(work); +- f(work); ++ worker->current_func(work); + /* + * While we must be careful to not use "work" after this, the trace + * point will only record its address. +@@ -1887,11 +1907,10 @@ __acquires(&gcwq->lock) + lock_map_release(&cwq->wq->lockdep_map); + + if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { +- printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " +- "%s/0x%08x/%d\n", +- current->comm, preempt_count(), task_pid_nr(current)); +- printk(KERN_ERR " last function: "); +- print_symbol("%s\n", (unsigned long)f); ++ pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" ++ " last function: %pf\n", ++ current->comm, preempt_count(), task_pid_nr(current), ++ worker->current_func); + debug_show_held_locks(current); + dump_stack(); + } +@@ -1905,6 +1924,7 @@ __acquires(&gcwq->lock) + /* we're done with it, release */ + hlist_del_init(&worker->hentry); + worker->current_work = NULL; ++ worker->current_func = NULL; + worker->current_cwq = NULL; + cwq_dec_nr_in_flight(cwq, work_color, false); + } diff --git a/queue-3.4/workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch b/queue-3.4/workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch new file mode 100644 index 00000000000..a2b779358a8 --- /dev/null +++ b/queue-3.4/workqueue-fix-possible-stall-on-try_to_grab_pending-of-a-delayed-work-item.patch @@ -0,0 +1,93 @@ +From 3aa62497594430ea522050b75c033f71f2c60ee6 Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Tue, 18 Sep 2012 10:40:00 -0700 +Subject: workqueue: fix possible stall on try_to_grab_pending() of a delayed work item + +From: Lai Jiangshan + +commit 3aa62497594430ea522050b75c033f71f2c60ee6 upstream. + +Currently, when try_to_grab_pending() grabs a delayed work item, it +leaves its linked work items alone on the delayed_works. The linked +work items are always NO_COLOR and will cause future +cwq_activate_first_delayed() increase cwq->nr_active incorrectly, and +may cause the whole cwq to stall. For example, + +state: cwq->max_active = 1, cwq->nr_active = 1 + one work in cwq->pool, many in cwq->delayed_works. + +step1: try_to_grab_pending() removes a work item from delayed_works + but leaves its NO_COLOR linked work items on it. + +step2: Later on, cwq_activate_first_delayed() activates the linked + work item increasing ->nr_active. + +step3: cwq->nr_active = 1, but all activated work items of the cwq are + NO_COLOR. When they finish, cwq->nr_active will not be + decreased due to NO_COLOR, and no further work items will be + activated from cwq->delayed_works. the cwq stalls. + +Fix it by ensuring the target work item is activated before stealing +PENDING in try_to_grab_pending(). This ensures that all the linked +work items are activated without incorrectly bumping cwq->nr_active. + +tj: Updated comment and description. + +Signed-off-by: Lai Jiangshan +Signed-off-by: Tejun Heo +[lizf: backported to 3.4: adjust context] +Signed-off-by: Li Zefan +Signed-off-by: Greg Kroah-Hartman +--- + kernel/workqueue.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -1721,10 +1721,9 @@ static void move_linked_works(struct wor + *nextp = n; + } + +-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) ++static void cwq_activate_delayed_work(struct work_struct *work) + { +- struct work_struct *work = list_first_entry(&cwq->delayed_works, +- struct work_struct, entry); ++ struct cpu_workqueue_struct *cwq = get_work_cwq(work); + struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); + + trace_workqueue_activate_work(work); +@@ -1733,6 +1732,14 @@ static void cwq_activate_first_delayed(s + cwq->nr_active++; + } + ++static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) ++{ ++ struct work_struct *work = list_first_entry(&cwq->delayed_works, ++ struct work_struct, entry); ++ ++ cwq_activate_delayed_work(work); ++} ++ + /** + * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight + * @cwq: cwq of interest +@@ -2625,6 +2632,18 @@ static int try_to_grab_pending(struct wo + smp_rmb(); + if (gcwq == get_work_gcwq(work)) { + debug_work_deactivate(work); ++ ++ /* ++ * A delayed work item cannot be grabbed directly ++ * because it might have linked NO_COLOR work items ++ * which, if left on the delayed_list, will confuse ++ * cwq->nr_active management later on and cause ++ * stall. Make sure the work item is activated ++ * before grabbing. ++ */ ++ if (*work_data_bits(work) & WORK_STRUCT_DELAYED) ++ cwq_activate_delayed_work(work); ++ + list_del_init(&work->entry); + cwq_dec_nr_in_flight(get_work_cwq(work), + get_work_color(work),