From: Greg Kroah-Hartman Date: Mon, 4 Feb 2019 06:04:19 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.20.7~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=830167417e48e0e2bb5fe69a2ee1958c27934b8c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch mm-oom-fix-use-after-free-in-oom_kill_process.patch --- diff --git a/queue-4.9/kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch b/queue-4.9/kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch new file mode 100644 index 00000000000..b1894f2df1b --- /dev/null +++ b/queue-4.9/kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch @@ -0,0 +1,73 @@ +From 8fb335e078378c8426fabeed1ebee1fbf915690c Mon Sep 17 00:00:00 2001 +From: Andrei Vagin +Date: Fri, 1 Feb 2019 14:20:24 -0800 +Subject: kernel/exit.c: release ptraced tasks before zap_pid_ns_processes + +From: Andrei Vagin + +commit 8fb335e078378c8426fabeed1ebee1fbf915690c upstream. + +Currently, exit_ptrace() adds all ptraced tasks in a dead list, then +zap_pid_ns_processes() waits on all tasks in a current pidns, and only +then are tasks from the dead list released. + +zap_pid_ns_processes() can get stuck on waiting tasks from the dead +list. In this case, we will have one unkillable process with one or +more dead children. + +Thanks to Oleg for the advice to release tasks in find_child_reaper(). + +Link: http://lkml.kernel.org/r/20190110175200.12442-1-avagin@gmail.com +Fixes: 7c8bd2322c7f ("exit: ptrace: shift "reap dead" code from exit_ptrace() to forget_original_parent()") +Signed-off-by: Andrei Vagin +Signed-off-by: Oleg Nesterov +Cc: "Eric W. Biederman" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/exit.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -525,12 +525,14 @@ static struct task_struct *find_alive_th + return NULL; + } + +-static struct task_struct *find_child_reaper(struct task_struct *father) ++static struct task_struct *find_child_reaper(struct task_struct *father, ++ struct list_head *dead) + __releases(&tasklist_lock) + __acquires(&tasklist_lock) + { + struct pid_namespace *pid_ns = task_active_pid_ns(father); + struct task_struct *reaper = pid_ns->child_reaper; ++ struct task_struct *p, *n; + + if (likely(reaper != father)) + return reaper; +@@ -546,6 +548,12 @@ static struct task_struct *find_child_re + panic("Attempted to kill init! exitcode=0x%08x\n", + father->signal->group_exit_code ?: father->exit_code); + } ++ ++ list_for_each_entry_safe(p, n, dead, ptrace_entry) { ++ list_del_init(&p->ptrace_entry); ++ release_task(p); ++ } ++ + zap_pid_ns_processes(pid_ns); + write_lock_irq(&tasklist_lock); + +@@ -632,7 +640,7 @@ static void forget_original_parent(struc + exit_ptrace(father, dead); + + /* Can drop and reacquire tasklist_lock */ +- reaper = find_child_reaper(father); ++ reaper = find_child_reaper(father, dead); + if (list_empty(&father->children)) + return; + diff --git a/queue-4.9/mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch b/queue-4.9/mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch new file mode 100644 index 00000000000..7faf5babd15 --- /dev/null +++ b/queue-4.9/mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch @@ -0,0 +1,58 @@ +From 6376360ecbe525a9c17b3d081dfd88ba3e4ed65b Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Fri, 1 Feb 2019 14:21:08 -0800 +Subject: mm: hwpoison: use do_send_sig_info() instead of force_sig() + +From: Naoya Horiguchi + +commit 6376360ecbe525a9c17b3d081dfd88ba3e4ed65b upstream. + +Currently memory_failure() is racy against process's exiting, which +results in kernel crash by null pointer dereference. + +The root cause is that memory_failure() uses force_sig() to forcibly +kill asynchronous (meaning not in the current context) processes. As +discussed in thread https://lkml.org/lkml/2010/6/8/236 years ago for OOM +fixes, this is not a right thing to do. OOM solves this issue by using +do_send_sig_info() as done in commit d2d393099de2 ("signal: +oom_kill_task: use SEND_SIG_FORCED instead of force_sig()"), so this +patch is suggesting to do the same for hwpoison. do_send_sig_info() +properly accesses to siglock with lock_task_sighand(), so is free from +the reported race. + +I confirmed that the reported bug reproduces with inserting some delay +in kill_procs(), and it never reproduces with this patch. + +Note that memory_failure() can send another type of signal using +force_sig_mceerr(), and the reported race shouldn't happen on it because +force_sig_mceerr() is called only for synchronous processes (i.e. +BUS_MCEERR_AR happens only when some process accesses to the corrupted +memory.) + +Link: http://lkml.kernel.org/r/20190116093046.GA29835@hori1.linux.bs1.fc.nec.co.jp +Signed-off-by: Naoya Horiguchi +Reported-by: Jane Chu +Reviewed-by: Dan Williams +Reviewed-by: William Kucharski +Cc: Oleg Nesterov +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory-failure.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -336,7 +336,8 @@ static void kill_procs(struct list_head + if (fail || tk->addr_valid == 0) { + pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", + pfn, tk->tsk->comm, tk->tsk->pid); +- force_sig(SIGKILL, tk->tsk); ++ do_send_sig_info(SIGKILL, SEND_SIG_PRIV, ++ tk->tsk, PIDTYPE_PID); + } + + /* diff --git a/queue-4.9/mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch b/queue-4.9/mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch new file mode 100644 index 00000000000..728869581a9 --- /dev/null +++ b/queue-4.9/mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch @@ -0,0 +1,97 @@ +From e0a352fabce61f730341d119fbedf71ffdb8663f Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 1 Feb 2019 14:21:19 -0800 +Subject: mm: migrate: don't rely on __PageMovable() of newpage after unlocking it + +From: David Hildenbrand + +commit e0a352fabce61f730341d119fbedf71ffdb8663f upstream. + +We had a race in the old balloon compaction code before b1123ea6d3b3 +("mm: balloon: use general non-lru movable page feature") refactored it +that became visible after backporting 195a8c43e93d ("virtio-balloon: +deflate via a page list") without the refactoring. + +The bug existed from commit d6d86c0a7f8d ("mm/balloon_compaction: +redesign ballooned pages management") till b1123ea6d3b3 ("mm: balloon: +use general non-lru movable page feature"). d6d86c0a7f8d +("mm/balloon_compaction: redesign ballooned pages management") was +backported to 3.12, so the broken kernels are stable kernels [3.12 - +4.7]. + +There was a subtle race between dropping the page lock of the newpage in +__unmap_and_move() and checking for __is_movable_balloon_page(newpage). + +Just after dropping this page lock, virtio-balloon could go ahead and +deflate the newpage, effectively dequeueing it and clearing PageBalloon, +in turn making __is_movable_balloon_page(newpage) fail. + +This resulted in dropping the reference of the newpage via +putback_lru_page(newpage) instead of put_page(newpage), leading to +page->lru getting modified and a !LRU page ending up in the LRU lists. +With 195a8c43e93d ("virtio-balloon: deflate via a page list") +backported, one would suddenly get corrupted lists in +release_pages_balloon(): + +- WARNING: CPU: 13 PID: 6586 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0 +- list_del corruption. prev->next should be ffffe253961090a0, but was dead000000000100 + +Nowadays this race is no longer possible, but it is hidden behind very +ugly handling of __ClearPageMovable() and __PageMovable(). + +__ClearPageMovable() will not make __PageMovable() fail, only +PageMovable(). So the new check (__PageMovable(newpage)) will still +hold even after newpage was dequeued by virtio-balloon. + +If anybody would ever change that special handling, the BUG would be +introduced again. So instead, make it explicit and use the information +of the original isolated page before migration. + +This patch can be backported fairly easy to stable kernels (in contrast +to the refactoring). + +Link: http://lkml.kernel.org/r/20190129233217.10747-1-david@redhat.com +Fixes: d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") +Signed-off-by: David Hildenbrand +Reported-by: Vratislav Bendel +Acked-by: Michal Hocko +Acked-by: Rafael Aquini +Cc: Mel Gorman +Cc: "Kirill A. Shutemov" +Cc: Michal Hocko +Cc: Naoya Horiguchi +Cc: Jan Kara +Cc: Andrea Arcangeli +Cc: Dominik Brodowski +Cc: Matthew Wilcox +Cc: Vratislav Bendel +Cc: Rafael Aquini +Cc: Konstantin Khlebnikov +Cc: Minchan Kim +Cc: [3.12 - 4.7] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/migrate.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -1044,10 +1044,13 @@ out: + * If migration is successful, decrease refcount of the newpage + * which will not free the page because new page owner increased + * refcounter. As well, if it is LRU page, add the page to LRU +- * list in here. ++ * list in here. Use the old state of the isolated source page to ++ * determine if we migrated a LRU page. newpage was already unlocked ++ * and possibly modified by its owner - don't rely on the page ++ * state. + */ + if (rc == MIGRATEPAGE_SUCCESS) { +- if (unlikely(__PageMovable(newpage))) ++ if (unlikely(!is_lru)) + put_page(newpage); + else + putback_lru_page(newpage); diff --git a/queue-4.9/mm-oom-fix-use-after-free-in-oom_kill_process.patch b/queue-4.9/mm-oom-fix-use-after-free-in-oom_kill_process.patch new file mode 100644 index 00000000000..f0466b4b042 --- /dev/null +++ b/queue-4.9/mm-oom-fix-use-after-free-in-oom_kill_process.patch @@ -0,0 +1,70 @@ +From cefc7ef3c87d02fc9307835868ff721ea12cc597 Mon Sep 17 00:00:00 2001 +From: Shakeel Butt +Date: Fri, 1 Feb 2019 14:20:54 -0800 +Subject: mm, oom: fix use-after-free in oom_kill_process + +From: Shakeel Butt + +commit cefc7ef3c87d02fc9307835868ff721ea12cc597 upstream. + +Syzbot instance running on upstream kernel found a use-after-free bug in +oom_kill_process. On further inspection it seems like the process +selected to be oom-killed has exited even before reaching +read_lock(&tasklist_lock) in oom_kill_process(). More specifically the +tsk->usage is 1 which is due to get_task_struct() in oom_evaluate_task() +and the put_task_struct within for_each_thread() frees the tsk and +for_each_thread() tries to access the tsk. The easiest fix is to do +get/put across the for_each_thread() on the selected task. + +Now the next question is should we continue with the oom-kill as the +previously selected task has exited? However before adding more +complexity and heuristics, let's answer why we even look at the children +of oom-kill selected task? The select_bad_process() has already selected +the worst process in the system/memcg. Due to race, the selected +process might not be the worst at the kill time but does that matter? +The userspace can use the oom_score_adj interface to prefer children to +be killed before the parent. I looked at the history but it seems like +this is there before git history. + +Link: http://lkml.kernel.org/r/20190121215850.221745-1-shakeelb@google.com +Reported-by: syzbot+7fbbfa368521945f0e3d@syzkaller.appspotmail.com +Fixes: 6b0c81b3be11 ("mm, oom: reduce dependency on tasklist_lock") +Signed-off-by: Shakeel Butt +Reviewed-by: Roman Gushchin +Acked-by: Michal Hocko +Cc: David Rientjes +Cc: Johannes Weiner +Cc: Tetsuo Handa +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/oom_kill.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -861,6 +861,13 @@ static void oom_kill_process(struct oom_ + * still freeing memory. + */ + read_lock(&tasklist_lock); ++ ++ /* ++ * The task 'p' might have already exited before reaching here. The ++ * put_task_struct() will free task_struct 'p' while the loop still try ++ * to access the field of 'p', so, get an extra reference. ++ */ ++ get_task_struct(p); + for_each_thread(p, t) { + list_for_each_entry(child, &t->children, sibling) { + unsigned int child_points; +@@ -880,6 +887,7 @@ static void oom_kill_process(struct oom_ + } + } + } ++ put_task_struct(p); + read_unlock(&tasklist_lock); + + p = find_lock_task_mm(victim); diff --git a/queue-4.9/series b/queue-4.9/series index 37a75cd1b81..4fb9a238424 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -20,3 +20,7 @@ gfs2-revert-fix-loop-in-gfs2_rbm_find.patch platform-x86-asus-nb-wmi-map-0x35-to-key_screenlock.patch platform-x86-asus-nb-wmi-drop-mapping-of-0x33-and-0x.patch mmc-sdhci-iproc-handle-mmc_of_parse-errors-during-probe.patch +kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch +mm-oom-fix-use-after-free-in-oom_kill_process.patch +mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch +mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch