From: Greg Kroah-Hartman Date: Thu, 8 Aug 2019 09:13:02 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v5.2.8~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=78f08ed1a2b96c058024a2403075732eea548bc7;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: cgroup-call-cgroup_release-before-__exit_signal.patch cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch cgroup-implement-css_task_iter_skip.patch cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch --- diff --git a/queue-4.19/cgroup-call-cgroup_release-before-__exit_signal.patch b/queue-4.19/cgroup-call-cgroup_release-before-__exit_signal.patch new file mode 100644 index 00000000000..71fcba8682c --- /dev/null +++ b/queue-4.19/cgroup-call-cgroup_release-before-__exit_signal.patch @@ -0,0 +1,42 @@ +From 6b115bf58e6f013ca75e7115aabcbd56c20ff31d Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 31 May 2019 10:38:57 -0700 +Subject: cgroup: Call cgroup_release() before __exit_signal() + +From: Tejun Heo + +commit 6b115bf58e6f013ca75e7115aabcbd56c20ff31d upstream. + +cgroup_release() calls cgroup_subsys->release() which is used by the +pids controller to uncharge its pid. We want to use it to manage +iteration of dying tasks which requires putting it before +__unhash_process(). Move cgroup_release() above __exit_signal(). +While this makes it uncharge before the pid is freed, pid is RCU freed +anyway and the window is very narrow. + +Signed-off-by: Tejun Heo +Cc: Oleg Nesterov +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/exit.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -194,6 +194,7 @@ repeat: + rcu_read_unlock(); + + proc_flush_task(p); ++ cgroup_release(p); + + write_lock_irq(&tasklist_lock); + ptrace_release_task(p); +@@ -219,7 +220,6 @@ repeat: + } + + write_unlock_irq(&tasklist_lock); +- cgroup_release(p); + release_thread(p); + call_rcu(&p->rcu, delayed_put_task_struct); + diff --git a/queue-4.19/cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch b/queue-4.19/cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch new file mode 100644 index 00000000000..f249aa88525 --- /dev/null +++ b/queue-4.19/cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch @@ -0,0 +1,42 @@ +From cee0c33c546a93957a52ae9ab6bebadbee765ec5 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 5 Jun 2019 09:54:34 -0700 +Subject: cgroup: css_task_iter_skip()'d iterators must be advanced before accessed + +From: Tejun Heo + +commit cee0c33c546a93957a52ae9ab6bebadbee765ec5 upstream. + +b636fd38dc40 ("cgroup: Implement css_task_iter_skip()") introduced +css_task_iter_skip() which is used to fix task iterations skipping +dying threadgroup leaders with live threads. Skipping is implemented +as a subportion of full advancing but css_task_iter_next() forgot to +fully advance a skipped iterator before determining the next task to +visit causing it to return invalid task pointers. + +Fix it by making css_task_iter_next() fully advance the iterator if it +has been skipped since the previous iteration. + +Signed-off-by: Tejun Heo +Reported-by: syzbot +Link: http://lkml.kernel.org/r/00000000000097025d058a7fd785@google.com +Fixes: b636fd38dc40 ("cgroup: Implement css_task_iter_skip()") +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup/cgroup.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -4303,6 +4303,10 @@ struct task_struct *css_task_iter_next(s + + spin_lock_irq(&css_set_lock); + ++ /* @it may be half-advanced by skips, finish advancing */ ++ if (it->flags & CSS_TASK_ITER_SKIPPED) ++ css_task_iter_advance(it); ++ + if (it->task_pos) { + it->cur_task = list_entry(it->task_pos, struct task_struct, + cg_list); diff --git a/queue-4.19/cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch b/queue-4.19/cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch new file mode 100644 index 00000000000..683382577f9 --- /dev/null +++ b/queue-4.19/cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch @@ -0,0 +1,36 @@ +From c596687a008b579c503afb7a64fcacc7270fae9e Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Mon, 10 Jun 2019 09:08:27 -0700 +Subject: cgroup: Fix css_task_iter_advance_css_set() cset skip condition + +From: Tejun Heo + +commit c596687a008b579c503afb7a64fcacc7270fae9e upstream. + +While adding handling for dying task group leaders c03cd7738a83 +("cgroup: Include dying leaders with live threads in PROCS +iterations") added an inverted cset skip condition to +css_task_iter_advance_css_set(). It should skip cset if it's +completely empty but was incorrectly testing for the inverse condition +for the dying_tasks list. Fix it. + +Signed-off-by: Tejun Heo +Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") +Reported-by: syzbot+d4bba5ccd4f9a2a68681@syzkaller.appspotmail.com +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup/cgroup.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -4154,7 +4154,7 @@ static void css_task_iter_advance_css_se + it->task_pos = NULL; + return; + } +- } while (!css_set_populated(cset) && !list_empty(&cset->dying_tasks)); ++ } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); + + if (!list_empty(&cset->tasks)) + it->task_pos = cset->tasks.next; diff --git a/queue-4.19/cgroup-implement-css_task_iter_skip.patch b/queue-4.19/cgroup-implement-css_task_iter_skip.patch new file mode 100644 index 00000000000..fe5ac7b76f2 --- /dev/null +++ b/queue-4.19/cgroup-implement-css_task_iter_skip.patch @@ -0,0 +1,150 @@ +From b636fd38dc40113f853337a7d2a6885ad23b8811 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 31 May 2019 10:38:58 -0700 +Subject: cgroup: Implement css_task_iter_skip() + +From: Tejun Heo + +commit b636fd38dc40113f853337a7d2a6885ad23b8811 upstream. + +When a task is moved out of a cset, task iterators pointing to the +task are advanced using the normal css_task_iter_advance() call. This +is fine but we'll be tracking dying tasks on csets and thus moving +tasks from cset->tasks to (to be added) cset->dying_tasks. When we +remove a task from cset->tasks, if we advance the iterators, they may +move over to the next cset before we had the chance to add the task +back on the dying list, which can allow the task to escape iteration. + +This patch separates out skipping from advancing. Skipping only moves +the affected iterators to the next pointer rather than fully advancing +it and the following advancing will recognize that the cursor has +already been moved forward and do the rest of advancing. This ensures +that when a task moves from one list to another in its cset, as long +as it moves in the right direction, it's always visible to iteration. + +This doesn't cause any visible behavior changes. + +Signed-off-by: Tejun Heo +Cc: Oleg Nesterov +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/cgroup.h | 3 ++ + kernel/cgroup/cgroup.c | 60 +++++++++++++++++++++++++++++-------------------- + 2 files changed, 39 insertions(+), 24 deletions(-) + +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -43,6 +43,9 @@ + /* walk all threaded css_sets in the domain */ + #define CSS_TASK_ITER_THREADED (1U << 1) + ++/* internal flags */ ++#define CSS_TASK_ITER_SKIPPED (1U << 16) ++ + /* a css_task_iter should be treated as an opaque object */ + struct css_task_iter { + struct cgroup_subsys *ss; +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -212,7 +212,8 @@ static struct cftype cgroup_base_files[] + + static int cgroup_apply_control(struct cgroup *cgrp); + static void cgroup_finalize_control(struct cgroup *cgrp, int ret); +-static void css_task_iter_advance(struct css_task_iter *it); ++static void css_task_iter_skip(struct css_task_iter *it, ++ struct task_struct *task); + static int cgroup_destroy_locked(struct cgroup *cgrp); + static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, + struct cgroup_subsys *ss); +@@ -775,6 +776,21 @@ static void css_set_update_populated(str + cgroup_update_populated(link->cgrp, populated); + } + ++/* ++ * @task is leaving, advance task iterators which are pointing to it so ++ * that they can resume at the next position. Advancing an iterator might ++ * remove it from the list, use safe walk. See css_task_iter_skip() for ++ * details. ++ */ ++static void css_set_skip_task_iters(struct css_set *cset, ++ struct task_struct *task) ++{ ++ struct css_task_iter *it, *pos; ++ ++ list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node) ++ css_task_iter_skip(it, task); ++} ++ + /** + * css_set_move_task - move a task from one css_set to another + * @task: task being moved +@@ -800,22 +816,9 @@ static void css_set_move_task(struct tas + css_set_update_populated(to_cset, true); + + if (from_cset) { +- struct css_task_iter *it, *pos; +- + WARN_ON_ONCE(list_empty(&task->cg_list)); + +- /* +- * @task is leaving, advance task iterators which are +- * pointing to it so that they can resume at the next +- * position. Advancing an iterator might remove it from +- * the list, use safe walk. See css_task_iter_advance*() +- * for details. +- */ +- list_for_each_entry_safe(it, pos, &from_cset->task_iters, +- iters_node) +- if (it->task_pos == &task->cg_list) +- css_task_iter_advance(it); +- ++ css_set_skip_task_iters(from_cset, task); + list_del_init(&task->cg_list); + if (!css_set_populated(from_cset)) + css_set_update_populated(from_cset, false); +@@ -4183,10 +4186,19 @@ static void css_task_iter_advance_css_se + list_add(&it->iters_node, &cset->task_iters); + } + +-static void css_task_iter_advance(struct css_task_iter *it) ++static void css_task_iter_skip(struct css_task_iter *it, ++ struct task_struct *task) + { +- struct list_head *next; ++ lockdep_assert_held(&css_set_lock); ++ ++ if (it->task_pos == &task->cg_list) { ++ it->task_pos = it->task_pos->next; ++ it->flags |= CSS_TASK_ITER_SKIPPED; ++ } ++} + ++static void css_task_iter_advance(struct css_task_iter *it) ++{ + lockdep_assert_held(&css_set_lock); + repeat: + if (it->task_pos) { +@@ -4195,15 +4207,15 @@ repeat: + * consumed first and then ->mg_tasks. After ->mg_tasks, + * we move onto the next cset. + */ +- next = it->task_pos->next; +- +- if (next == it->tasks_head) +- next = it->mg_tasks_head->next; ++ if (it->flags & CSS_TASK_ITER_SKIPPED) ++ it->flags &= ~CSS_TASK_ITER_SKIPPED; ++ else ++ it->task_pos = it->task_pos->next; + +- if (next == it->mg_tasks_head) ++ if (it->task_pos == it->tasks_head) ++ it->task_pos = it->mg_tasks_head->next; ++ if (it->task_pos == it->mg_tasks_head) + css_task_iter_advance_css_set(it); +- else +- it->task_pos = next; + } else { + /* called from start, proceed to the first cset */ + css_task_iter_advance_css_set(it); diff --git a/queue-4.19/cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch b/queue-4.19/cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch new file mode 100644 index 00000000000..906144d6f6e --- /dev/null +++ b/queue-4.19/cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch @@ -0,0 +1,156 @@ +From c03cd7738a83b13739f00546166969342c8ff014 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 31 May 2019 10:38:58 -0700 +Subject: cgroup: Include dying leaders with live threads in PROCS iterations + +From: Tejun Heo + +commit c03cd7738a83b13739f00546166969342c8ff014 upstream. + +CSS_TASK_ITER_PROCS currently iterates live group leaders; however, +this means that a process with dying leader and live threads will be +skipped. IOW, cgroup.procs might be empty while cgroup.threads isn't, +which is confusing to say the least. + +Fix it by making cset track dying tasks and include dying leaders with +live threads in PROCS iteration. + +Signed-off-by: Tejun Heo +Reported-and-tested-by: Topi Miettinen +Cc: Oleg Nesterov +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/cgroup-defs.h | 1 + + include/linux/cgroup.h | 1 + + kernel/cgroup/cgroup.c | 44 +++++++++++++++++++++++++++++++++++++------- + 3 files changed, 39 insertions(+), 7 deletions(-) + +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -207,6 +207,7 @@ struct css_set { + */ + struct list_head tasks; + struct list_head mg_tasks; ++ struct list_head dying_tasks; + + /* all css_task_iters currently walking this cset */ + struct list_head task_iters; +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -60,6 +60,7 @@ struct css_task_iter { + struct list_head *task_pos; + struct list_head *tasks_head; + struct list_head *mg_tasks_head; ++ struct list_head *dying_tasks_head; + + struct css_set *cur_cset; + struct css_set *cur_dcset; +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -673,6 +673,7 @@ struct css_set init_css_set = { + .dom_cset = &init_css_set, + .tasks = LIST_HEAD_INIT(init_css_set.tasks), + .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), ++ .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks), + .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), + .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), + .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), +@@ -1145,6 +1146,7 @@ static struct css_set *find_css_set(stru + cset->dom_cset = cset; + INIT_LIST_HEAD(&cset->tasks); + INIT_LIST_HEAD(&cset->mg_tasks); ++ INIT_LIST_HEAD(&cset->dying_tasks); + INIT_LIST_HEAD(&cset->task_iters); + INIT_LIST_HEAD(&cset->threaded_csets); + INIT_HLIST_NODE(&cset->hlist); +@@ -4152,15 +4154,18 @@ static void css_task_iter_advance_css_se + it->task_pos = NULL; + return; + } +- } while (!css_set_populated(cset)); ++ } while (!css_set_populated(cset) && !list_empty(&cset->dying_tasks)); + + if (!list_empty(&cset->tasks)) + it->task_pos = cset->tasks.next; +- else ++ else if (!list_empty(&cset->mg_tasks)) + it->task_pos = cset->mg_tasks.next; ++ else ++ it->task_pos = cset->dying_tasks.next; + + it->tasks_head = &cset->tasks; + it->mg_tasks_head = &cset->mg_tasks; ++ it->dying_tasks_head = &cset->dying_tasks; + + /* + * We don't keep css_sets locked across iteration steps and thus +@@ -4199,6 +4204,8 @@ static void css_task_iter_skip(struct cs + + static void css_task_iter_advance(struct css_task_iter *it) + { ++ struct task_struct *task; ++ + lockdep_assert_held(&css_set_lock); + repeat: + if (it->task_pos) { +@@ -4215,17 +4222,32 @@ repeat: + if (it->task_pos == it->tasks_head) + it->task_pos = it->mg_tasks_head->next; + if (it->task_pos == it->mg_tasks_head) ++ it->task_pos = it->dying_tasks_head->next; ++ if (it->task_pos == it->dying_tasks_head) + css_task_iter_advance_css_set(it); + } else { + /* called from start, proceed to the first cset */ + css_task_iter_advance_css_set(it); + } + +- /* if PROCS, skip over tasks which aren't group leaders */ +- if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && +- !thread_group_leader(list_entry(it->task_pos, struct task_struct, +- cg_list))) +- goto repeat; ++ if (!it->task_pos) ++ return; ++ ++ task = list_entry(it->task_pos, struct task_struct, cg_list); ++ ++ if (it->flags & CSS_TASK_ITER_PROCS) { ++ /* if PROCS, skip over tasks which aren't group leaders */ ++ if (!thread_group_leader(task)) ++ goto repeat; ++ ++ /* and dying leaders w/o live member threads */ ++ if (!atomic_read(&task->signal->live)) ++ goto repeat; ++ } else { ++ /* skip all dying ones */ ++ if (task->flags & PF_EXITING) ++ goto repeat; ++ } + } + + /** +@@ -5682,6 +5704,7 @@ void cgroup_exit(struct task_struct *tsk + if (!list_empty(&tsk->cg_list)) { + spin_lock_irq(&css_set_lock); + css_set_move_task(tsk, cset, NULL, false); ++ list_add_tail(&tsk->cg_list, &cset->dying_tasks); + cset->nr_tasks--; + spin_unlock_irq(&css_set_lock); + } else { +@@ -5702,6 +5725,13 @@ void cgroup_release(struct task_struct * + do_each_subsys_mask(ss, ssid, have_release_callback) { + ss->release(task); + } while_each_subsys_mask(); ++ ++ if (use_task_css_set_links) { ++ spin_lock_irq(&css_set_lock); ++ css_set_skip_task_iters(task_css_set(task), task); ++ list_del_init(&task->cg_list); ++ spin_unlock_irq(&css_set_lock); ++ } + } + + void cgroup_free(struct task_struct *task) diff --git a/queue-4.19/series b/queue-4.19/series index a69d3ab084c..0490800a6cf 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -37,3 +37,8 @@ net-mlx5-fix-modify_cq_in-alignment.patch net-mlx5e-prevent-encap-flow-counter-update-async-to-user-query.patch r8169-don-t-use-msi-before-rtl8168d.patch compat_ioctl-pppoe-fix-pppoeiocsfwd-handling.patch +cgroup-call-cgroup_release-before-__exit_signal.patch +cgroup-implement-css_task_iter_skip.patch +cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch +cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch +cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch