From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 4 Feb 2019 06:04:19 +0000 (+0100)
Subject: 4.9-stable patches
X-Git-Tag: v4.20.7~22
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=830167417e48e0e2bb5fe69a2ee1958c27934b8c;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch
	mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch
	mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch
	mm-oom-fix-use-after-free-in-oom_kill_process.patch
---

diff --git a/queue-4.9/kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch b/queue-4.9/kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch
new file mode 100644
index 00000000000..b1894f2df1b
--- /dev/null
+++ b/queue-4.9/kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch
@@ -0,0 +1,73 @@
+From 8fb335e078378c8426fabeed1ebee1fbf915690c Mon Sep 17 00:00:00 2001
+From: Andrei Vagin <avagin@gmail.com>
+Date: Fri, 1 Feb 2019 14:20:24 -0800
+Subject: kernel/exit.c: release ptraced tasks before zap_pid_ns_processes
+
+From: Andrei Vagin <avagin@gmail.com>
+
+commit 8fb335e078378c8426fabeed1ebee1fbf915690c upstream.
+
+Currently, exit_ptrace() adds all ptraced tasks in a dead list, then
+zap_pid_ns_processes() waits on all tasks in a current pidns, and only
+then are tasks from the dead list released.
+
+zap_pid_ns_processes() can get stuck on waiting tasks from the dead
+list.  In this case, we will have one unkillable process with one or
+more dead children.
+
+Thanks to Oleg for the advice to release tasks in find_child_reaper().
+
+Link: http://lkml.kernel.org/r/20190110175200.12442-1-avagin@gmail.com
+Fixes: 7c8bd2322c7f ("exit: ptrace: shift "reap dead" code from exit_ptrace() to forget_original_parent()")
+Signed-off-by: Andrei Vagin <avagin@gmail.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/exit.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -525,12 +525,14 @@ static struct task_struct *find_alive_th
+ 	return NULL;
+ }
+ 
+-static struct task_struct *find_child_reaper(struct task_struct *father)
++static struct task_struct *find_child_reaper(struct task_struct *father,
++						struct list_head *dead)
+ 	__releases(&tasklist_lock)
+ 	__acquires(&tasklist_lock)
+ {
+ 	struct pid_namespace *pid_ns = task_active_pid_ns(father);
+ 	struct task_struct *reaper = pid_ns->child_reaper;
++	struct task_struct *p, *n;
+ 
+ 	if (likely(reaper != father))
+ 		return reaper;
+@@ -546,6 +548,12 @@ static struct task_struct *find_child_re
+ 		panic("Attempted to kill init! exitcode=0x%08x\n",
+ 			father->signal->group_exit_code ?: father->exit_code);
+ 	}
++
++	list_for_each_entry_safe(p, n, dead, ptrace_entry) {
++		list_del_init(&p->ptrace_entry);
++		release_task(p);
++	}
++
+ 	zap_pid_ns_processes(pid_ns);
+ 	write_lock_irq(&tasklist_lock);
+ 
+@@ -632,7 +640,7 @@ static void forget_original_parent(struc
+ 		exit_ptrace(father, dead);
+ 
+ 	/* Can drop and reacquire tasklist_lock */
+-	reaper = find_child_reaper(father);
++	reaper = find_child_reaper(father, dead);
+ 	if (list_empty(&father->children))
+ 		return;
+ 
diff --git a/queue-4.9/mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch b/queue-4.9/mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch
new file mode 100644
index 00000000000..7faf5babd15
--- /dev/null
+++ b/queue-4.9/mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch
@@ -0,0 +1,58 @@
+From 6376360ecbe525a9c17b3d081dfd88ba3e4ed65b Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Fri, 1 Feb 2019 14:21:08 -0800
+Subject: mm: hwpoison: use do_send_sig_info() instead of force_sig()
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 6376360ecbe525a9c17b3d081dfd88ba3e4ed65b upstream.
+
+Currently memory_failure() is racy against process's exiting, which
+results in kernel crash by null pointer dereference.
+
+The root cause is that memory_failure() uses force_sig() to forcibly
+kill asynchronous (meaning not in the current context) processes.  As
+discussed in thread https://lkml.org/lkml/2010/6/8/236 years ago for OOM
+fixes, this is not a right thing to do.  OOM solves this issue by using
+do_send_sig_info() as done in commit d2d393099de2 ("signal:
+oom_kill_task: use SEND_SIG_FORCED instead of force_sig()"), so this
+patch is suggesting to do the same for hwpoison.  do_send_sig_info()
+properly accesses to siglock with lock_task_sighand(), so is free from
+the reported race.
+
+I confirmed that the reported bug reproduces with inserting some delay
+in kill_procs(), and it never reproduces with this patch.
+
+Note that memory_failure() can send another type of signal using
+force_sig_mceerr(), and the reported race shouldn't happen on it because
+force_sig_mceerr() is called only for synchronous processes (i.e.
+BUS_MCEERR_AR happens only when some process accesses to the corrupted
+memory.)
+
+Link: http://lkml.kernel.org/r/20190116093046.GA29835@hori1.linux.bs1.fc.nec.co.jp
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reported-by: Jane Chu <jane.chu@oracle.com>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: William Kucharski <william.kucharski@oracle.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -336,7 +336,8 @@ static void kill_procs(struct list_head
+ 			if (fail || tk->addr_valid == 0) {
+ 				pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+ 				       pfn, tk->tsk->comm, tk->tsk->pid);
+-				force_sig(SIGKILL, tk->tsk);
++				do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
++						 tk->tsk, PIDTYPE_PID);
+ 			}
+ 
+ 			/*
diff --git a/queue-4.9/mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch b/queue-4.9/mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch
new file mode 100644
index 00000000000..728869581a9
--- /dev/null
+++ b/queue-4.9/mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch
@@ -0,0 +1,97 @@
+From e0a352fabce61f730341d119fbedf71ffdb8663f Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Fri, 1 Feb 2019 14:21:19 -0800
+Subject: mm: migrate: don't rely on __PageMovable() of newpage after unlocking it
+
+From: David Hildenbrand <david@redhat.com>
+
+commit e0a352fabce61f730341d119fbedf71ffdb8663f upstream.
+
+We had a race in the old balloon compaction code before b1123ea6d3b3
+("mm: balloon: use general non-lru movable page feature") refactored it
+that became visible after backporting 195a8c43e93d ("virtio-balloon:
+deflate via a page list") without the refactoring.
+
+The bug existed from commit d6d86c0a7f8d ("mm/balloon_compaction:
+redesign ballooned pages management") till b1123ea6d3b3 ("mm: balloon:
+use general non-lru movable page feature").  d6d86c0a7f8d
+("mm/balloon_compaction: redesign ballooned pages management") was
+backported to 3.12, so the broken kernels are stable kernels [3.12 -
+4.7].
+
+There was a subtle race between dropping the page lock of the newpage in
+__unmap_and_move() and checking for __is_movable_balloon_page(newpage).
+
+Just after dropping this page lock, virtio-balloon could go ahead and
+deflate the newpage, effectively dequeueing it and clearing PageBalloon,
+in turn making __is_movable_balloon_page(newpage) fail.
+
+This resulted in dropping the reference of the newpage via
+putback_lru_page(newpage) instead of put_page(newpage), leading to
+page->lru getting modified and a !LRU page ending up in the LRU lists.
+With 195a8c43e93d ("virtio-balloon: deflate via a page list")
+backported, one would suddenly get corrupted lists in
+release_pages_balloon():
+
+- WARNING: CPU: 13 PID: 6586 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0
+- list_del corruption. prev->next should be ffffe253961090a0, but was dead000000000100
+
+Nowadays this race is no longer possible, but it is hidden behind very
+ugly handling of __ClearPageMovable() and __PageMovable().
+
+__ClearPageMovable() will not make __PageMovable() fail, only
+PageMovable().  So the new check (__PageMovable(newpage)) will still
+hold even after newpage was dequeued by virtio-balloon.
+
+If anybody would ever change that special handling, the BUG would be
+introduced again.  So instead, make it explicit and use the information
+of the original isolated page before migration.
+
+This patch can be backported fairly easy to stable kernels (in contrast
+to the refactoring).
+
+Link: http://lkml.kernel.org/r/20190129233217.10747-1-david@redhat.com
+Fixes: d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: Vratislav Bendel <vbendel@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Dominik Brodowski <linux@dominikbrodowski.net>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Vratislav Bendel <vbendel@redhat.com>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org>	[3.12 - 4.7]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1044,10 +1044,13 @@ out:
+ 	 * If migration is successful, decrease refcount of the newpage
+ 	 * which will not free the page because new page owner increased
+ 	 * refcounter. As well, if it is LRU page, add the page to LRU
+-	 * list in here.
++	 * list in here. Use the old state of the isolated source page to
++	 * determine if we migrated a LRU page. newpage was already unlocked
++	 * and possibly modified by its owner - don't rely on the page
++	 * state.
+ 	 */
+ 	if (rc == MIGRATEPAGE_SUCCESS) {
+-		if (unlikely(__PageMovable(newpage)))
++		if (unlikely(!is_lru))
+ 			put_page(newpage);
+ 		else
+ 			putback_lru_page(newpage);
diff --git a/queue-4.9/mm-oom-fix-use-after-free-in-oom_kill_process.patch b/queue-4.9/mm-oom-fix-use-after-free-in-oom_kill_process.patch
new file mode 100644
index 00000000000..f0466b4b042
--- /dev/null
+++ b/queue-4.9/mm-oom-fix-use-after-free-in-oom_kill_process.patch
@@ -0,0 +1,70 @@
+From cefc7ef3c87d02fc9307835868ff721ea12cc597 Mon Sep 17 00:00:00 2001
+From: Shakeel Butt <shakeelb@google.com>
+Date: Fri, 1 Feb 2019 14:20:54 -0800
+Subject: mm, oom: fix use-after-free in oom_kill_process
+
+From: Shakeel Butt <shakeelb@google.com>
+
+commit cefc7ef3c87d02fc9307835868ff721ea12cc597 upstream.
+
+Syzbot instance running on upstream kernel found a use-after-free bug in
+oom_kill_process.  On further inspection it seems like the process
+selected to be oom-killed has exited even before reaching
+read_lock(&tasklist_lock) in oom_kill_process().  More specifically the
+tsk->usage is 1 which is due to get_task_struct() in oom_evaluate_task()
+and the put_task_struct within for_each_thread() frees the tsk and
+for_each_thread() tries to access the tsk.  The easiest fix is to do
+get/put across the for_each_thread() on the selected task.
+
+Now the next question is should we continue with the oom-kill as the
+previously selected task has exited? However before adding more
+complexity and heuristics, let's answer why we even look at the children
+of oom-kill selected task? The select_bad_process() has already selected
+the worst process in the system/memcg.  Due to race, the selected
+process might not be the worst at the kill time but does that matter?
+The userspace can use the oom_score_adj interface to prefer children to
+be killed before the parent.  I looked at the history but it seems like
+this is there before git history.
+
+Link: http://lkml.kernel.org/r/20190121215850.221745-1-shakeelb@google.com
+Reported-by: syzbot+7fbbfa368521945f0e3d@syzkaller.appspotmail.com
+Fixes: 6b0c81b3be11 ("mm, oom: reduce dependency on tasklist_lock")
+Signed-off-by: Shakeel Butt <shakeelb@google.com>
+Reviewed-by: Roman Gushchin <guro@fb.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/oom_kill.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -861,6 +861,13 @@ static void oom_kill_process(struct oom_
+ 	 * still freeing memory.
+ 	 */
+ 	read_lock(&tasklist_lock);
++
++	/*
++	 * The task 'p' might have already exited before reaching here. The
++	 * put_task_struct() will free task_struct 'p' while the loop still try
++	 * to access the field of 'p', so, get an extra reference.
++	 */
++	get_task_struct(p);
+ 	for_each_thread(p, t) {
+ 		list_for_each_entry(child, &t->children, sibling) {
+ 			unsigned int child_points;
+@@ -880,6 +887,7 @@ static void oom_kill_process(struct oom_
+ 			}
+ 		}
+ 	}
++	put_task_struct(p);
+ 	read_unlock(&tasklist_lock);
+ 
+ 	p = find_lock_task_mm(victim);
diff --git a/queue-4.9/series b/queue-4.9/series
index 37a75cd1b81..4fb9a238424 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -20,3 +20,7 @@ gfs2-revert-fix-loop-in-gfs2_rbm_find.patch
 platform-x86-asus-nb-wmi-map-0x35-to-key_screenlock.patch
 platform-x86-asus-nb-wmi-drop-mapping-of-0x33-and-0x.patch
 mmc-sdhci-iproc-handle-mmc_of_parse-errors-during-probe.patch
+kernel-exit.c-release-ptraced-tasks-before-zap_pid_ns_processes.patch
+mm-oom-fix-use-after-free-in-oom_kill_process.patch
+mm-hwpoison-use-do_send_sig_info-instead-of-force_sig.patch
+mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch