]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Aug 2019 09:12:46 +0000 (11:12 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Aug 2019 09:12:46 +0000 (11:12 +0200)
added patches:
cgroup-call-cgroup_release-before-__exit_signal.patch
cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch
cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch
cgroup-implement-css_task_iter_skip.patch
cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch

queue-4.14/cgroup-call-cgroup_release-before-__exit_signal.patch [new file with mode: 0644]
queue-4.14/cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch [new file with mode: 0644]
queue-4.14/cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch [new file with mode: 0644]
queue-4.14/cgroup-implement-css_task_iter_skip.patch [new file with mode: 0644]
queue-4.14/cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/cgroup-call-cgroup_release-before-__exit_signal.patch b/queue-4.14/cgroup-call-cgroup_release-before-__exit_signal.patch
new file mode 100644 (file)
index 0000000..4e5ff2a
--- /dev/null
@@ -0,0 +1,42 @@
+From 6b115bf58e6f013ca75e7115aabcbd56c20ff31d Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 31 May 2019 10:38:57 -0700
+Subject: cgroup: Call cgroup_release() before __exit_signal()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 6b115bf58e6f013ca75e7115aabcbd56c20ff31d upstream.
+
+cgroup_release() calls cgroup_subsys->release() which is used by the
+pids controller to uncharge its pid.  We want to use it to manage
+iteration of dying tasks which requires putting it before
+__unhash_process().  Move cgroup_release() above __exit_signal().
+While this makes it uncharge before the pid is freed, pid is RCU freed
+anyway and the window is very narrow.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/exit.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -193,6 +193,7 @@ repeat:
+       rcu_read_unlock();
+       proc_flush_task(p);
++      cgroup_release(p);
+       write_lock_irq(&tasklist_lock);
+       ptrace_release_task(p);
+@@ -218,7 +219,6 @@ repeat:
+       }
+       write_unlock_irq(&tasklist_lock);
+-      cgroup_release(p);
+       release_thread(p);
+       call_rcu(&p->rcu, delayed_put_task_struct);
diff --git a/queue-4.14/cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch b/queue-4.14/cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch
new file mode 100644 (file)
index 0000000..c87304f
--- /dev/null
@@ -0,0 +1,42 @@
+From cee0c33c546a93957a52ae9ab6bebadbee765ec5 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Wed, 5 Jun 2019 09:54:34 -0700
+Subject: cgroup: css_task_iter_skip()'d iterators must be advanced before accessed
+
+From: Tejun Heo <tj@kernel.org>
+
+commit cee0c33c546a93957a52ae9ab6bebadbee765ec5 upstream.
+
+b636fd38dc40 ("cgroup: Implement css_task_iter_skip()") introduced
+css_task_iter_skip() which is used to fix task iterations skipping
+dying threadgroup leaders with live threads.  Skipping is implemented
+as a subportion of full advancing but css_task_iter_next() forgot to
+fully advance a skipped iterator before determining the next task to
+visit causing it to return invalid task pointers.
+
+Fix it by making css_task_iter_next() fully advance the iterator if it
+has been skipped since the previous iteration.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: syzbot
+Link: http://lkml.kernel.org/r/00000000000097025d058a7fd785@google.com
+Fixes: b636fd38dc40 ("cgroup: Implement css_task_iter_skip()")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup/cgroup.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -4197,6 +4197,10 @@ struct task_struct *css_task_iter_next(s
+       spin_lock_irq(&css_set_lock);
++      /* @it may be half-advanced by skips, finish advancing */
++      if (it->flags & CSS_TASK_ITER_SKIPPED)
++              css_task_iter_advance(it);
++
+       if (it->task_pos) {
+               it->cur_task = list_entry(it->task_pos, struct task_struct,
+                                         cg_list);
diff --git a/queue-4.14/cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch b/queue-4.14/cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch
new file mode 100644 (file)
index 0000000..2e5757a
--- /dev/null
@@ -0,0 +1,36 @@
+From c596687a008b579c503afb7a64fcacc7270fae9e Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 10 Jun 2019 09:08:27 -0700
+Subject: cgroup: Fix css_task_iter_advance_css_set() cset skip condition
+
+From: Tejun Heo <tj@kernel.org>
+
+commit c596687a008b579c503afb7a64fcacc7270fae9e upstream.
+
+While adding handling for dying task group leaders c03cd7738a83
+("cgroup: Include dying leaders with live threads in PROCS
+iterations") added an inverted cset skip condition to
+css_task_iter_advance_css_set().  It should skip cset if it's
+completely empty but was incorrectly testing for the inverse condition
+for the dying_tasks list.  Fix it.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations")
+Reported-by: syzbot+d4bba5ccd4f9a2a68681@syzkaller.appspotmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup/cgroup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -4048,7 +4048,7 @@ static void css_task_iter_advance_css_se
+                       it->task_pos = NULL;
+                       return;
+               }
+-      } while (!css_set_populated(cset) && !list_empty(&cset->dying_tasks));
++      } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
+       if (!list_empty(&cset->tasks))
+               it->task_pos = cset->tasks.next;
diff --git a/queue-4.14/cgroup-implement-css_task_iter_skip.patch b/queue-4.14/cgroup-implement-css_task_iter_skip.patch
new file mode 100644 (file)
index 0000000..408e332
--- /dev/null
@@ -0,0 +1,150 @@
+From b636fd38dc40113f853337a7d2a6885ad23b8811 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 31 May 2019 10:38:58 -0700
+Subject: cgroup: Implement css_task_iter_skip()
+
+From: Tejun Heo <tj@kernel.org>
+
+commit b636fd38dc40113f853337a7d2a6885ad23b8811 upstream.
+
+When a task is moved out of a cset, task iterators pointing to the
+task are advanced using the normal css_task_iter_advance() call.  This
+is fine but we'll be tracking dying tasks on csets and thus moving
+tasks from cset->tasks to (to be added) cset->dying_tasks.  When we
+remove a task from cset->tasks, if we advance the iterators, they may
+move over to the next cset before we had the chance to add the task
+back on the dying list, which can allow the task to escape iteration.
+
+This patch separates out skipping from advancing.  Skipping only moves
+the affected iterators to the next pointer rather than fully advancing
+it and the following advancing will recognize that the cursor has
+already been moved forward and do the rest of advancing.  This ensures
+that when a task moves from one list to another in its cset, as long
+as it moves in the right direction, it's always visible to iteration.
+
+This doesn't cause any visible behavior changes.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/cgroup.h |    3 ++
+ kernel/cgroup/cgroup.c |   60 +++++++++++++++++++++++++++++--------------------
+ 2 files changed, 39 insertions(+), 24 deletions(-)
+
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -42,6 +42,9 @@
+ /* walk all threaded css_sets in the domain */
+ #define CSS_TASK_ITER_THREADED                (1U << 1)
++/* internal flags */
++#define CSS_TASK_ITER_SKIPPED         (1U << 16)
++
+ /* a css_task_iter should be treated as an opaque object */
+ struct css_task_iter {
+       struct cgroup_subsys            *ss;
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -204,7 +204,8 @@ static struct cftype cgroup_base_files[]
+ static int cgroup_apply_control(struct cgroup *cgrp);
+ static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
+-static void css_task_iter_advance(struct css_task_iter *it);
++static void css_task_iter_skip(struct css_task_iter *it,
++                             struct task_struct *task);
+ static int cgroup_destroy_locked(struct cgroup *cgrp);
+ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
+                                             struct cgroup_subsys *ss);
+@@ -737,6 +738,21 @@ static void css_set_update_populated(str
+               cgroup_update_populated(link->cgrp, populated);
+ }
++/*
++ * @task is leaving, advance task iterators which are pointing to it so
++ * that they can resume at the next position.  Advancing an iterator might
++ * remove it from the list, use safe walk.  See css_task_iter_skip() for
++ * details.
++ */
++static void css_set_skip_task_iters(struct css_set *cset,
++                                  struct task_struct *task)
++{
++      struct css_task_iter *it, *pos;
++
++      list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
++              css_task_iter_skip(it, task);
++}
++
+ /**
+  * css_set_move_task - move a task from one css_set to another
+  * @task: task being moved
+@@ -762,22 +778,9 @@ static void css_set_move_task(struct tas
+               css_set_update_populated(to_cset, true);
+       if (from_cset) {
+-              struct css_task_iter *it, *pos;
+-
+               WARN_ON_ONCE(list_empty(&task->cg_list));
+-              /*
+-               * @task is leaving, advance task iterators which are
+-               * pointing to it so that they can resume at the next
+-               * position.  Advancing an iterator might remove it from
+-               * the list, use safe walk.  See css_task_iter_advance*()
+-               * for details.
+-               */
+-              list_for_each_entry_safe(it, pos, &from_cset->task_iters,
+-                                       iters_node)
+-                      if (it->task_pos == &task->cg_list)
+-                              css_task_iter_advance(it);
+-
++              css_set_skip_task_iters(from_cset, task);
+               list_del_init(&task->cg_list);
+               if (!css_set_populated(from_cset))
+                       css_set_update_populated(from_cset, false);
+@@ -4077,10 +4080,19 @@ static void css_task_iter_advance_css_se
+       list_add(&it->iters_node, &cset->task_iters);
+ }
+-static void css_task_iter_advance(struct css_task_iter *it)
++static void css_task_iter_skip(struct css_task_iter *it,
++                             struct task_struct *task)
+ {
+-      struct list_head *next;
++      lockdep_assert_held(&css_set_lock);
++
++      if (it->task_pos == &task->cg_list) {
++              it->task_pos = it->task_pos->next;
++              it->flags |= CSS_TASK_ITER_SKIPPED;
++      }
++}
++static void css_task_iter_advance(struct css_task_iter *it)
++{
+       lockdep_assert_held(&css_set_lock);
+ repeat:
+       if (it->task_pos) {
+@@ -4089,15 +4101,15 @@ repeat:
+                * consumed first and then ->mg_tasks.  After ->mg_tasks,
+                * we move onto the next cset.
+                */
+-              next = it->task_pos->next;
+-
+-              if (next == it->tasks_head)
+-                      next = it->mg_tasks_head->next;
++              if (it->flags & CSS_TASK_ITER_SKIPPED)
++                      it->flags &= ~CSS_TASK_ITER_SKIPPED;
++              else
++                      it->task_pos = it->task_pos->next;
+-              if (next == it->mg_tasks_head)
++              if (it->task_pos == it->tasks_head)
++                      it->task_pos = it->mg_tasks_head->next;
++              if (it->task_pos == it->mg_tasks_head)
+                       css_task_iter_advance_css_set(it);
+-              else
+-                      it->task_pos = next;
+       } else {
+               /* called from start, proceed to the first cset */
+               css_task_iter_advance_css_set(it);
diff --git a/queue-4.14/cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch b/queue-4.14/cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch
new file mode 100644 (file)
index 0000000..5102ccf
--- /dev/null
@@ -0,0 +1,156 @@
+From c03cd7738a83b13739f00546166969342c8ff014 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 31 May 2019 10:38:58 -0700
+Subject: cgroup: Include dying leaders with live threads in PROCS iterations
+
+From: Tejun Heo <tj@kernel.org>
+
+commit c03cd7738a83b13739f00546166969342c8ff014 upstream.
+
+CSS_TASK_ITER_PROCS currently iterates live group leaders; however,
+this means that a process with dying leader and live threads will be
+skipped.  IOW, cgroup.procs might be empty while cgroup.threads isn't,
+which is confusing to say the least.
+
+Fix it by making cset track dying tasks and include dying leaders with
+live threads in PROCS iteration.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-and-tested-by: Topi Miettinen <toiwoton@gmail.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/cgroup-defs.h |    1 +
+ include/linux/cgroup.h      |    1 +
+ kernel/cgroup/cgroup.c      |   44 +++++++++++++++++++++++++++++++++++++-------
+ 3 files changed, 39 insertions(+), 7 deletions(-)
+
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -201,6 +201,7 @@ struct css_set {
+        */
+       struct list_head tasks;
+       struct list_head mg_tasks;
++      struct list_head dying_tasks;
+       /* all css_task_iters currently walking this cset */
+       struct list_head task_iters;
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -59,6 +59,7 @@ struct css_task_iter {
+       struct list_head                *task_pos;
+       struct list_head                *tasks_head;
+       struct list_head                *mg_tasks_head;
++      struct list_head                *dying_tasks_head;
+       struct css_set                  *cur_cset;
+       struct css_set                  *cur_dcset;
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -643,6 +643,7 @@ struct css_set init_css_set = {
+       .dom_cset               = &init_css_set,
+       .tasks                  = LIST_HEAD_INIT(init_css_set.tasks),
+       .mg_tasks               = LIST_HEAD_INIT(init_css_set.mg_tasks),
++      .dying_tasks            = LIST_HEAD_INIT(init_css_set.dying_tasks),
+       .task_iters             = LIST_HEAD_INIT(init_css_set.task_iters),
+       .threaded_csets         = LIST_HEAD_INIT(init_css_set.threaded_csets),
+       .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
+@@ -1107,6 +1108,7 @@ static struct css_set *find_css_set(stru
+       cset->dom_cset = cset;
+       INIT_LIST_HEAD(&cset->tasks);
+       INIT_LIST_HEAD(&cset->mg_tasks);
++      INIT_LIST_HEAD(&cset->dying_tasks);
+       INIT_LIST_HEAD(&cset->task_iters);
+       INIT_LIST_HEAD(&cset->threaded_csets);
+       INIT_HLIST_NODE(&cset->hlist);
+@@ -4046,15 +4048,18 @@ static void css_task_iter_advance_css_se
+                       it->task_pos = NULL;
+                       return;
+               }
+-      } while (!css_set_populated(cset));
++      } while (!css_set_populated(cset) && !list_empty(&cset->dying_tasks));
+       if (!list_empty(&cset->tasks))
+               it->task_pos = cset->tasks.next;
+-      else
++      else if (!list_empty(&cset->mg_tasks))
+               it->task_pos = cset->mg_tasks.next;
++      else
++              it->task_pos = cset->dying_tasks.next;
+       it->tasks_head = &cset->tasks;
+       it->mg_tasks_head = &cset->mg_tasks;
++      it->dying_tasks_head = &cset->dying_tasks;
+       /*
+        * We don't keep css_sets locked across iteration steps and thus
+@@ -4093,6 +4098,8 @@ static void css_task_iter_skip(struct cs
+ static void css_task_iter_advance(struct css_task_iter *it)
+ {
++      struct task_struct *task;
++
+       lockdep_assert_held(&css_set_lock);
+ repeat:
+       if (it->task_pos) {
+@@ -4109,17 +4116,32 @@ repeat:
+               if (it->task_pos == it->tasks_head)
+                       it->task_pos = it->mg_tasks_head->next;
+               if (it->task_pos == it->mg_tasks_head)
++                      it->task_pos = it->dying_tasks_head->next;
++              if (it->task_pos == it->dying_tasks_head)
+                       css_task_iter_advance_css_set(it);
+       } else {
+               /* called from start, proceed to the first cset */
+               css_task_iter_advance_css_set(it);
+       }
+-      /* if PROCS, skip over tasks which aren't group leaders */
+-      if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
+-          !thread_group_leader(list_entry(it->task_pos, struct task_struct,
+-                                          cg_list)))
+-              goto repeat;
++      if (!it->task_pos)
++              return;
++
++      task = list_entry(it->task_pos, struct task_struct, cg_list);
++
++      if (it->flags & CSS_TASK_ITER_PROCS) {
++              /* if PROCS, skip over tasks which aren't group leaders */
++              if (!thread_group_leader(task))
++                      goto repeat;
++
++              /* and dying leaders w/o live member threads */
++              if (!atomic_read(&task->signal->live))
++                      goto repeat;
++      } else {
++              /* skip all dying ones */
++              if (task->flags & PF_EXITING)
++                      goto repeat;
++      }
+ }
+ /**
+@@ -5552,6 +5574,7 @@ void cgroup_exit(struct task_struct *tsk
+       if (!list_empty(&tsk->cg_list)) {
+               spin_lock_irq(&css_set_lock);
+               css_set_move_task(tsk, cset, NULL, false);
++              list_add_tail(&tsk->cg_list, &cset->dying_tasks);
+               cset->nr_tasks--;
+               spin_unlock_irq(&css_set_lock);
+       } else {
+@@ -5572,6 +5595,13 @@ void cgroup_release(struct task_struct *
+       do_each_subsys_mask(ss, ssid, have_release_callback) {
+               ss->release(task);
+       } while_each_subsys_mask();
++
++      if (use_task_css_set_links) {
++              spin_lock_irq(&css_set_lock);
++              css_set_skip_task_iters(task_css_set(task), task);
++              list_del_init(&task->cg_list);
++              spin_unlock_irq(&css_set_lock);
++      }
+ }
+ void cgroup_free(struct task_struct *task)
index ce4c19a3161f540a5c9fb90a5482a471b193e378..5201910f3e4514db943a716e7c25802bff2f19a8 100644 (file)
@@ -25,3 +25,8 @@ net-mlx5e-prevent-encap-flow-counter-update-async-to-user-query.patch
 tun-mark-small-packets-as-owned-by-the-tap-sock.patch
 mvpp2-refactor-mtu-change-code.patch
 bnx2x-disable-multi-cos-feature.patch
+cgroup-call-cgroup_release-before-__exit_signal.patch
+cgroup-implement-css_task_iter_skip.patch
+cgroup-include-dying-leaders-with-live-threads-in-procs-iterations.patch
+cgroup-css_task_iter_skip-d-iterators-must-be-advanced-before-accessed.patch
+cgroup-fix-css_task_iter_advance_css_set-cset-skip-condition.patch