From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 4 Oct 2016 07:07:59 +0000 (+0200)
Subject: 4.7-stable patches
X-Git-Tag: v4.8.1~35
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=81d3151f26d47ef0a1820e268a254951261b111c;p=thirdparty%2Fkernel%2Fstable-queue.git

4.7-stable patches

added patches:
	cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch
	cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch
	mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch
	mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch
	scripts-recordmcount.c-account-for-.softirqentry.text.patch
---

diff --git a/queue-4.7/cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch b/queue-4.7/cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch
new file mode 100644
index 00000000000..fd1a8c0f623
--- /dev/null
+++ b/queue-4.7/cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch
@@ -0,0 +1,87 @@
+From 9157056da8f8c4a6305f15619e269f164b63a6de Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 23 Sep 2016 16:55:49 -0400
+Subject: cgroup: fix invalid controller enable rejections with cgroup namespace
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 9157056da8f8c4a6305f15619e269f164b63a6de upstream.
+
+On the v2 hierarchy, "cgroup.subtree_control" rejects controller
+enables if the cgroup has processes in it.  The enforcement of this
+logic assumes that the cgroup wouldn't have any css_sets associated
+with it if there are no tasks in the cgroup, which is no longer true
+since a79a908fd2b0 ("cgroup: introduce cgroup namespaces").
+
+When a cgroup namespace is created, it pins the css_set of the
+creating task to use it as the root css_set of the namespace.  This
+extra reference stays as long as the namespace is around and makes
+"cgroup.subtree_control" think that the namespace root cgroup is not
+empty even when it is and thus reject controller enables.
+
+Fix it by making cgroup_subtree_control() walk and test emptiness of
+each css_set instead of testing whether the list_head is empty.
+
+While at it, update the comment of cgroup_task_count() to indicate
+that the returned value may be higher than the number of tasks, which
+has always been true due to temporary references and doesn't break
+anything.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Evgeny Vereshchagin <evvers@ya.ru>
+Cc: Serge E. Hallyn <serge.hallyn@ubuntu.com>
+Cc: Aditya Kali <adityakali@google.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces")
+Link: https://github.com/systemd/systemd/pull/3589#issuecomment-249089541
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup.c |   29 +++++++++++++++++++++++++----
+ 1 file changed, 25 insertions(+), 4 deletions(-)
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -3452,9 +3452,28 @@ static ssize_t cgroup_subtree_control_wr
+ 	 * Except for the root, subtree_control must be zero for a cgroup
+ 	 * with tasks so that child cgroups don't compete against tasks.
+ 	 */
+-	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
+-		ret = -EBUSY;
+-		goto out_unlock;
++	if (enable && cgroup_parent(cgrp)) {
++		struct cgrp_cset_link *link;
++
++		/*
++		 * Because namespaces pin csets too, @cgrp->cset_links
++		 * might not be empty even when @cgrp is empty.  Walk and
++		 * verify each cset.
++		 */
++		spin_lock_irq(&css_set_lock);
++
++		ret = 0;
++		list_for_each_entry(link, &cgrp->cset_links, cset_link) {
++			if (css_set_populated(link->cset)) {
++				ret = -EBUSY;
++				break;
++			}
++		}
++
++		spin_unlock_irq(&css_set_lock);
++
++		if (ret)
++			goto out_unlock;
+ 	}
+ 
+ 	/* save and update control masks and prepare csses */
+@@ -3905,7 +3924,9 @@ void cgroup_file_notify(struct cgroup_fi
+  * cgroup_task_count - count the number of tasks in a cgroup.
+  * @cgrp: the cgroup in question
+  *
+- * Return the number of tasks in the cgroup.
++ * Return the number of tasks in the cgroup.  The returned number can be
++ * higher than the actual number of tasks due to css_set references from
++ * namespace roots and temporary usages.
+  */
+ static int cgroup_task_count(const struct cgroup *cgrp)
+ {
diff --git a/queue-4.7/cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch b/queue-4.7/cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch
new file mode 100644
index 00000000000..5d4d9a9a05e
--- /dev/null
+++ b/queue-4.7/cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch
@@ -0,0 +1,108 @@
+From 28b89b9e6f7b6c8fef7b3af39828722bca20cfee Mon Sep 17 00:00:00 2001
+From: Joonwoo Park <joonwoop@codeaurora.org>
+Date: Sun, 11 Sep 2016 21:14:58 -0700
+Subject: cpuset: handle race between CPU hotplug and cpuset_hotplug_work
+
+From: Joonwoo Park <joonwoop@codeaurora.org>
+
+commit 28b89b9e6f7b6c8fef7b3af39828722bca20cfee upstream.
+
+A discrepancy between cpu_online_mask and cpuset's effective_cpus
+mask is inevitable during hotplug since cpuset defers updating of
+effective_cpus mask using a workqueue, during which time nothing
+prevents the system from more hotplug operations.  For that reason
+guarantee_online_cpus() walks up the cpuset hierarchy until it finds
+an intersection under the assumption that top cpuset's effective_cpus
+mask intersects with cpu_online_mask even with such a race occurring.
+
+However a sequence of CPU hotplugs can open a time window, during which
+none of the effective CPUs in the top cpuset intersect with
+cpu_online_mask.
+
+For example when there are 4 possible CPUs 0-3 and only CPU0 is online:
+
+  ========================  ===========================
+   cpu_online_mask           top_cpuset.effective_cpus
+  ========================  ===========================
+   echo 1 > cpu2/online.
+   CPU hotplug notifier woke up hotplug work but not yet scheduled.
+      [0,2]                     [0]
+
+   echo 0 > cpu0/online.
+   The workqueue is still runnable.
+      [2]                       [0]
+  ========================  ===========================
+
+  Now there is no intersection between cpu_online_mask and
+  top_cpuset.effective_cpus.  Thus invoking sys_sched_setaffinity() at
+  this moment can cause following:
+
+   Unable to handle kernel NULL pointer dereference at virtual address 000000d0
+   ------------[ cut here ]------------
+   Kernel BUG at ffffffc0001389b0 [verbose debug info unavailable]
+   Internal error: Oops - BUG: 96000005 [#1] PREEMPT SMP
+   Modules linked in:
+   CPU: 2 PID: 1420 Comm: taskset Tainted: G        W       4.4.8+ #98
+   task: ffffffc06a5c4880 ti: ffffffc06e124000 task.ti: ffffffc06e124000
+   PC is at guarantee_online_cpus+0x2c/0x58
+   LR is at cpuset_cpus_allowed+0x4c/0x6c
+   <snip>
+   Process taskset (pid: 1420, stack limit = 0xffffffc06e124020)
+   Call trace:
+   [<ffffffc0001389b0>] guarantee_online_cpus+0x2c/0x58
+   [<ffffffc00013b208>] cpuset_cpus_allowed+0x4c/0x6c
+   [<ffffffc0000d61f0>] sched_setaffinity+0xc0/0x1ac
+   [<ffffffc0000d6374>] SyS_sched_setaffinity+0x98/0xac
+   [<ffffffc000085cb0>] el0_svc_naked+0x24/0x28
+
+The top cpuset's effective_cpus are guaranteed to be identical to
+cpu_online_mask eventually.  Hence fall back to cpu_online_mask when
+there is no intersection between top cpuset's effective_cpus and
+cpu_online_mask.
+
+Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
+Acked-by: Li Zefan <lizefan@huawei.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: cgroups@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cpuset.c |   17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs
+ /*
+  * Return in pmask the portion of a cpusets's cpus_allowed that
+  * are online.  If none are online, walk up the cpuset hierarchy
+- * until we find one that does have some online cpus.  The top
+- * cpuset always has some cpus online.
++ * until we find one that does have some online cpus.
+  *
+  * One way or another, we guarantee to return some non-empty subset
+  * of cpu_online_mask.
+@@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs
+  */
+ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
+ {
+-	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
++	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
+ 		cs = parent_cs(cs);
++		if (unlikely(!cs)) {
++			/*
++			 * The top cpuset doesn't have any online cpu as a
++			 * consequence of a race between cpuset_hotplug_work
++			 * and cpu hotplug notifier.  But we know the top
++			 * cpuset's effective_cpus is on its way to to be
++			 * identical to cpu_online_mask.
++			 */
++			cpumask_copy(pmask, cpu_online_mask);
++			return;
++		}
++	}
+ 	cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
+ }
+ 
diff --git a/queue-4.7/mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch b/queue-4.7/mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch
new file mode 100644
index 00000000000..0791f4b7781
--- /dev/null
+++ b/queue-4.7/mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch
@@ -0,0 +1,75 @@
+From 5b398e416e880159fe55eefd93c6588fa072cd66 Mon Sep 17 00:00:00 2001
+From: zhong jiang <zhongjiang@huawei.com>
+Date: Wed, 28 Sep 2016 15:22:30 -0700
+Subject: mm,ksm: fix endless looping in allocating memory when ksm enable
+
+From: zhong jiang <zhongjiang@huawei.com>
+
+commit 5b398e416e880159fe55eefd93c6588fa072cd66 upstream.
+
+I hit the following hung task when runing a OOM LTP test case with 4.1
+kernel.
+
+Call trace:
+[<ffffffc000086a88>] __switch_to+0x74/0x8c
+[<ffffffc000a1bae0>] __schedule+0x23c/0x7bc
+[<ffffffc000a1c09c>] schedule+0x3c/0x94
+[<ffffffc000a1eb84>] rwsem_down_write_failed+0x214/0x350
+[<ffffffc000a1e32c>] down_write+0x64/0x80
+[<ffffffc00021f794>] __ksm_exit+0x90/0x19c
+[<ffffffc0000be650>] mmput+0x118/0x11c
+[<ffffffc0000c3ec4>] do_exit+0x2dc/0xa74
+[<ffffffc0000c46f8>] do_group_exit+0x4c/0xe4
+[<ffffffc0000d0f34>] get_signal+0x444/0x5e0
+[<ffffffc000089fcc>] do_signal+0x1d8/0x450
+[<ffffffc00008a35c>] do_notify_resume+0x70/0x78
+
+The oom victim cannot terminate because it needs to take mmap_sem for
+write while the lock is held by ksmd for read which loops in the page
+allocator
+
+ksm_do_scan
+	scan_get_next_rmap_item
+		down_read
+		get_next_rmap_item
+			alloc_rmap_item   #ksmd will loop permanently.
+
+There is no way forward because the oom victim cannot release any memory
+in 4.1 based kernel.  Since 4.6 we have the oom reaper which would solve
+this problem because it would release the memory asynchronously.
+Nevertheless we can relax alloc_rmap_item requirements and use
+__GFP_NORETRY because the allocation failure is acceptable as ksm_do_scan
+would just retry later after the lock got dropped.
+
+Such a patch would be also easy to backport to older stable kernels which
+do not have oom_reaper.
+
+While we are at it add GFP_NOWARN so the admin doesn't have to be alarmed
+by the allocation failure.
+
+Link: http://lkml.kernel.org/r/1474165570-44398-1-git-send-email-zhongjiang@huawei.com
+Signed-off-by: zhong jiang <zhongjiang@huawei.com>
+Suggested-by: Hugh Dickins <hughd@google.com>
+Suggested-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/ksm.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rm
+ {
+ 	struct rmap_item *rmap_item;
+ 
+-	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
++	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
++						__GFP_NORETRY | __GFP_NOWARN);
+ 	if (rmap_item)
+ 		ksm_rmap_items++;
+ 	return rmap_item;
diff --git a/queue-4.7/mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch b/queue-4.7/mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch
new file mode 100644
index 00000000000..01b7143bf8d
--- /dev/null
+++ b/queue-4.7/mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch
@@ -0,0 +1,53 @@
+From f6d7c1b5598b6407c3f1da795dd54acf99c1990c Mon Sep 17 00:00:00 2001
+From: Karl Beldan <kbeldan@baylibre.com>
+Date: Mon, 29 Aug 2016 07:45:49 +0000
+Subject: mtd: nand: davinci: Reinitialize the HW ECC engine in 4bit hwctl
+
+From: Karl Beldan <kbeldan@baylibre.com>
+
+commit f6d7c1b5598b6407c3f1da795dd54acf99c1990c upstream.
+
+This fixes subpage writes when using 4-bit HW ECC.
+
+There has been numerous reports about ECC errors with devices using this
+driver for a while.  Also the 4-bit ECC has been reported as broken with
+subpages in [1] and with 16 bits NANDs in the driver and in mach* board
+files both in mainline and in the vendor BSPs.
+
+What I saw with 4-bit ECC on a 16bits NAND (on an LCDK) which got me to
+try reinitializing the ECC engine:
+- R/W on whole pages properly generates/checks RS code
+- try writing the 1st subpage only of a blank page, the subpage is well
+  written and the RS code properly generated, re-reading the same page
+  the HW detects some ECC error, reading the same page again no ECC
+  error is detected
+
+Note that the ECC engine is already reinitialized in the 1-bit case.
+
+Tested on my LCDK with UBI+UBIFS using subpages.
+This could potentially get rid of the issue workarounded in [1].
+
+[1] 28c015a9daab ("mtd: davinci-nand: disable subpage write for keystone-nand")
+
+Fixes: 6a4123e581b3 ("mtd: nand: davinci_nand, 4-bit ECC for smallpage")
+Signed-off-by: Karl Beldan <kbeldan@baylibre.com>
+Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Signed-off-by: Brian Norris <computersforpeace@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/davinci_nand.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/mtd/nand/davinci_nand.c
++++ b/drivers/mtd/nand/davinci_nand.c
+@@ -240,6 +240,9 @@ static void nand_davinci_hwctl_4bit(stru
+ 	unsigned long flags;
+ 	u32 val;
+ 
++	/* Reset ECC hardware */
++	davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET);
++
+ 	spin_lock_irqsave(&davinci_nand_lock, flags);
+ 
+ 	/* Start 4-bit ECC calculation for read/write */
diff --git a/queue-4.7/scripts-recordmcount.c-account-for-.softirqentry.text.patch b/queue-4.7/scripts-recordmcount.c-account-for-.softirqentry.text.patch
new file mode 100644
index 00000000000..a8e8569bb31
--- /dev/null
+++ b/queue-4.7/scripts-recordmcount.c-account-for-.softirqentry.text.patch
@@ -0,0 +1,47 @@
+From e436fd61a8f62cb7a16310a42b95ab076ff72eff Mon Sep 17 00:00:00 2001
+From: Dmitry Vyukov <dvyukov@google.com>
+Date: Wed, 28 Sep 2016 15:22:36 -0700
+Subject: scripts/recordmcount.c: account for .softirqentry.text
+
+From: Dmitry Vyukov <dvyukov@google.com>
+
+commit e436fd61a8f62cb7a16310a42b95ab076ff72eff upstream.
+
+be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into
+separate sections") added .softirqentry.text section, but it was not added
+to recordmcount.  So functions in the section are untracable.  Add the
+section to scripts/recordmcount.c and scripts/recordmcount.pl.
+
+Fixes: be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections")
+Link: http://lkml.kernel.org/r/1474902626-73468-1-git-send-email-dvyukov@google.com
+Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Steve Rostedt <rostedt@goodmis.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ scripts/recordmcount.c  |    1 +
+ scripts/recordmcount.pl |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/scripts/recordmcount.c
++++ b/scripts/recordmcount.c
+@@ -363,6 +363,7 @@ is_mcounted_section_name(char const *con
+ 		strcmp(".sched.text",    txtname) == 0 ||
+ 		strcmp(".spinlock.text", txtname) == 0 ||
+ 		strcmp(".irqentry.text", txtname) == 0 ||
++		strcmp(".softirqentry.text", txtname) == 0 ||
+ 		strcmp(".kprobes.text", txtname) == 0 ||
+ 		strcmp(".text.unlikely", txtname) == 0;
+ }
+--- a/scripts/recordmcount.pl
++++ b/scripts/recordmcount.pl
+@@ -134,6 +134,7 @@ my %text_sections = (
+      ".sched.text" => 1,
+      ".spinlock.text" => 1,
+      ".irqentry.text" => 1,
++     ".softirqentry.text" => 1,
+      ".kprobes.text" => 1,
+      ".text.unlikely" => 1,
+ );
diff --git a/queue-4.7/series b/queue-4.7/series
index ca8a967d97e..6b59fb94849 100644
--- a/queue-4.7/series
+++ b/queue-4.7/series
@@ -2,3 +2,4 @@ cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch
 cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch
 scripts-recordmcount.c-account-for-.softirqentry.text.patch
 mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch
+mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch