From: Greg Kroah-Hartman Date: Tue, 4 Oct 2016 07:07:59 +0000 (+0200) Subject: 4.7-stable patches X-Git-Tag: v4.8.1~35 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=81d3151f26d47ef0a1820e268a254951261b111c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.7-stable patches added patches: cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch scripts-recordmcount.c-account-for-.softirqentry.text.patch --- diff --git a/queue-4.7/cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch b/queue-4.7/cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch new file mode 100644 index 00000000000..fd1a8c0f623 --- /dev/null +++ b/queue-4.7/cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch @@ -0,0 +1,87 @@ +From 9157056da8f8c4a6305f15619e269f164b63a6de Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 23 Sep 2016 16:55:49 -0400 +Subject: cgroup: fix invalid controller enable rejections with cgroup namespace + +From: Tejun Heo + +commit 9157056da8f8c4a6305f15619e269f164b63a6de upstream. + +On the v2 hierarchy, "cgroup.subtree_control" rejects controller +enables if the cgroup has processes in it. The enforcement of this +logic assumes that the cgroup wouldn't have any css_sets associated +with it if there are no tasks in the cgroup, which is no longer true +since a79a908fd2b0 ("cgroup: introduce cgroup namespaces"). + +When a cgroup namespace is created, it pins the css_set of the +creating task to use it as the root css_set of the namespace. This +extra reference stays as long as the namespace is around and makes +"cgroup.subtree_control" think that the namespace root cgroup is not +empty even when it is and thus reject controller enables. + +Fix it by making cgroup_subtree_control() walk and test emptiness of +each css_set instead of testing whether the list_head is empty. + +While at it, update the comment of cgroup_task_count() to indicate +that the returned value may be higher than the number of tasks, which +has always been true due to temporary references and doesn't break +anything. + +Signed-off-by: Tejun Heo +Reported-by: Evgeny Vereshchagin +Cc: Serge E. Hallyn +Cc: Aditya Kali +Cc: Eric W. Biederman +Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") +Link: https://github.com/systemd/systemd/pull/3589#issuecomment-249089541 +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 29 +++++++++++++++++++++++++---- + 1 file changed, 25 insertions(+), 4 deletions(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -3452,9 +3452,28 @@ static ssize_t cgroup_subtree_control_wr + * Except for the root, subtree_control must be zero for a cgroup + * with tasks so that child cgroups don't compete against tasks. + */ +- if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { +- ret = -EBUSY; +- goto out_unlock; ++ if (enable && cgroup_parent(cgrp)) { ++ struct cgrp_cset_link *link; ++ ++ /* ++ * Because namespaces pin csets too, @cgrp->cset_links ++ * might not be empty even when @cgrp is empty. Walk and ++ * verify each cset. ++ */ ++ spin_lock_irq(&css_set_lock); ++ ++ ret = 0; ++ list_for_each_entry(link, &cgrp->cset_links, cset_link) { ++ if (css_set_populated(link->cset)) { ++ ret = -EBUSY; ++ break; ++ } ++ } ++ ++ spin_unlock_irq(&css_set_lock); ++ ++ if (ret) ++ goto out_unlock; + } + + /* save and update control masks and prepare csses */ +@@ -3905,7 +3924,9 @@ void cgroup_file_notify(struct cgroup_fi + * cgroup_task_count - count the number of tasks in a cgroup. + * @cgrp: the cgroup in question + * +- * Return the number of tasks in the cgroup. ++ * Return the number of tasks in the cgroup. The returned number can be ++ * higher than the actual number of tasks due to css_set references from ++ * namespace roots and temporary usages. + */ + static int cgroup_task_count(const struct cgroup *cgrp) + { diff --git a/queue-4.7/cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch b/queue-4.7/cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch new file mode 100644 index 00000000000..5d4d9a9a05e --- /dev/null +++ b/queue-4.7/cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch @@ -0,0 +1,108 @@ +From 28b89b9e6f7b6c8fef7b3af39828722bca20cfee Mon Sep 17 00:00:00 2001 +From: Joonwoo Park +Date: Sun, 11 Sep 2016 21:14:58 -0700 +Subject: cpuset: handle race between CPU hotplug and cpuset_hotplug_work + +From: Joonwoo Park + +commit 28b89b9e6f7b6c8fef7b3af39828722bca20cfee upstream. + +A discrepancy between cpu_online_mask and cpuset's effective_cpus +mask is inevitable during hotplug since cpuset defers updating of +effective_cpus mask using a workqueue, during which time nothing +prevents the system from more hotplug operations. For that reason +guarantee_online_cpus() walks up the cpuset hierarchy until it finds +an intersection under the assumption that top cpuset's effective_cpus +mask intersects with cpu_online_mask even with such a race occurring. + +However a sequence of CPU hotplugs can open a time window, during which +none of the effective CPUs in the top cpuset intersect with +cpu_online_mask. + +For example when there are 4 possible CPUs 0-3 and only CPU0 is online: + + ======================== =========================== + cpu_online_mask top_cpuset.effective_cpus + ======================== =========================== + echo 1 > cpu2/online. + CPU hotplug notifier woke up hotplug work but not yet scheduled. + [0,2] [0] + + echo 0 > cpu0/online. + The workqueue is still runnable. + [2] [0] + ======================== =========================== + + Now there is no intersection between cpu_online_mask and + top_cpuset.effective_cpus. Thus invoking sys_sched_setaffinity() at + this moment can cause following: + + Unable to handle kernel NULL pointer dereference at virtual address 000000d0 + ------------[ cut here ]------------ + Kernel BUG at ffffffc0001389b0 [verbose debug info unavailable] + Internal error: Oops - BUG: 96000005 [#1] PREEMPT SMP + Modules linked in: + CPU: 2 PID: 1420 Comm: taskset Tainted: G W 4.4.8+ #98 + task: ffffffc06a5c4880 ti: ffffffc06e124000 task.ti: ffffffc06e124000 + PC is at guarantee_online_cpus+0x2c/0x58 + LR is at cpuset_cpus_allowed+0x4c/0x6c + + Process taskset (pid: 1420, stack limit = 0xffffffc06e124020) + Call trace: + [] guarantee_online_cpus+0x2c/0x58 + [] cpuset_cpus_allowed+0x4c/0x6c + [] sched_setaffinity+0xc0/0x1ac + [] SyS_sched_setaffinity+0x98/0xac + [] el0_svc_naked+0x24/0x28 + +The top cpuset's effective_cpus are guaranteed to be identical to +cpu_online_mask eventually. Hence fall back to cpu_online_mask when +there is no intersection between top cpuset's effective_cpus and +cpu_online_mask. + +Signed-off-by: Joonwoo Park +Acked-by: Li Zefan +Cc: Tejun Heo +Cc: cgroups@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cpuset.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs + /* + * Return in pmask the portion of a cpusets's cpus_allowed that + * are online. If none are online, walk up the cpuset hierarchy +- * until we find one that does have some online cpus. The top +- * cpuset always has some cpus online. ++ * until we find one that does have some online cpus. + * + * One way or another, we guarantee to return some non-empty subset + * of cpu_online_mask. +@@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs + */ + static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) + { +- while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) ++ while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { + cs = parent_cs(cs); ++ if (unlikely(!cs)) { ++ /* ++ * The top cpuset doesn't have any online cpu as a ++ * consequence of a race between cpuset_hotplug_work ++ * and cpu hotplug notifier. But we know the top ++ * cpuset's effective_cpus is on its way to to be ++ * identical to cpu_online_mask. ++ */ ++ cpumask_copy(pmask, cpu_online_mask); ++ return; ++ } ++ } + cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); + } + diff --git a/queue-4.7/mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch b/queue-4.7/mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch new file mode 100644 index 00000000000..0791f4b7781 --- /dev/null +++ b/queue-4.7/mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch @@ -0,0 +1,75 @@ +From 5b398e416e880159fe55eefd93c6588fa072cd66 Mon Sep 17 00:00:00 2001 +From: zhong jiang +Date: Wed, 28 Sep 2016 15:22:30 -0700 +Subject: mm,ksm: fix endless looping in allocating memory when ksm enable + +From: zhong jiang + +commit 5b398e416e880159fe55eefd93c6588fa072cd66 upstream. + +I hit the following hung task when runing a OOM LTP test case with 4.1 +kernel. + +Call trace: +[] __switch_to+0x74/0x8c +[] __schedule+0x23c/0x7bc +[] schedule+0x3c/0x94 +[] rwsem_down_write_failed+0x214/0x350 +[] down_write+0x64/0x80 +[] __ksm_exit+0x90/0x19c +[] mmput+0x118/0x11c +[] do_exit+0x2dc/0xa74 +[] do_group_exit+0x4c/0xe4 +[] get_signal+0x444/0x5e0 +[] do_signal+0x1d8/0x450 +[] do_notify_resume+0x70/0x78 + +The oom victim cannot terminate because it needs to take mmap_sem for +write while the lock is held by ksmd for read which loops in the page +allocator + +ksm_do_scan + scan_get_next_rmap_item + down_read + get_next_rmap_item + alloc_rmap_item #ksmd will loop permanently. + +There is no way forward because the oom victim cannot release any memory +in 4.1 based kernel. Since 4.6 we have the oom reaper which would solve +this problem because it would release the memory asynchronously. +Nevertheless we can relax alloc_rmap_item requirements and use +__GFP_NORETRY because the allocation failure is acceptable as ksm_do_scan +would just retry later after the lock got dropped. + +Such a patch would be also easy to backport to older stable kernels which +do not have oom_reaper. + +While we are at it add GFP_NOWARN so the admin doesn't have to be alarmed +by the allocation failure. + +Link: http://lkml.kernel.org/r/1474165570-44398-1-git-send-email-zhongjiang@huawei.com +Signed-off-by: zhong jiang +Suggested-by: Hugh Dickins +Suggested-by: Michal Hocko +Acked-by: Michal Hocko +Acked-by: Hugh Dickins +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/ksm.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rm + { + struct rmap_item *rmap_item; + +- rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL); ++ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | ++ __GFP_NORETRY | __GFP_NOWARN); + if (rmap_item) + ksm_rmap_items++; + return rmap_item; diff --git a/queue-4.7/mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch b/queue-4.7/mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch new file mode 100644 index 00000000000..01b7143bf8d --- /dev/null +++ b/queue-4.7/mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch @@ -0,0 +1,53 @@ +From f6d7c1b5598b6407c3f1da795dd54acf99c1990c Mon Sep 17 00:00:00 2001 +From: Karl Beldan +Date: Mon, 29 Aug 2016 07:45:49 +0000 +Subject: mtd: nand: davinci: Reinitialize the HW ECC engine in 4bit hwctl + +From: Karl Beldan + +commit f6d7c1b5598b6407c3f1da795dd54acf99c1990c upstream. + +This fixes subpage writes when using 4-bit HW ECC. + +There has been numerous reports about ECC errors with devices using this +driver for a while. Also the 4-bit ECC has been reported as broken with +subpages in [1] and with 16 bits NANDs in the driver and in mach* board +files both in mainline and in the vendor BSPs. + +What I saw with 4-bit ECC on a 16bits NAND (on an LCDK) which got me to +try reinitializing the ECC engine: +- R/W on whole pages properly generates/checks RS code +- try writing the 1st subpage only of a blank page, the subpage is well + written and the RS code properly generated, re-reading the same page + the HW detects some ECC error, reading the same page again no ECC + error is detected + +Note that the ECC engine is already reinitialized in the 1-bit case. + +Tested on my LCDK with UBI+UBIFS using subpages. +This could potentially get rid of the issue workarounded in [1]. + +[1] 28c015a9daab ("mtd: davinci-nand: disable subpage write for keystone-nand") + +Fixes: 6a4123e581b3 ("mtd: nand: davinci_nand, 4-bit ECC for smallpage") +Signed-off-by: Karl Beldan +Acked-by: Boris Brezillon +Signed-off-by: Brian Norris +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/davinci_nand.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/mtd/nand/davinci_nand.c ++++ b/drivers/mtd/nand/davinci_nand.c +@@ -240,6 +240,9 @@ static void nand_davinci_hwctl_4bit(stru + unsigned long flags; + u32 val; + ++ /* Reset ECC hardware */ ++ davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET); ++ + spin_lock_irqsave(&davinci_nand_lock, flags); + + /* Start 4-bit ECC calculation for read/write */ diff --git a/queue-4.7/scripts-recordmcount.c-account-for-.softirqentry.text.patch b/queue-4.7/scripts-recordmcount.c-account-for-.softirqentry.text.patch new file mode 100644 index 00000000000..a8e8569bb31 --- /dev/null +++ b/queue-4.7/scripts-recordmcount.c-account-for-.softirqentry.text.patch @@ -0,0 +1,47 @@ +From e436fd61a8f62cb7a16310a42b95ab076ff72eff Mon Sep 17 00:00:00 2001 +From: Dmitry Vyukov +Date: Wed, 28 Sep 2016 15:22:36 -0700 +Subject: scripts/recordmcount.c: account for .softirqentry.text + +From: Dmitry Vyukov + +commit e436fd61a8f62cb7a16310a42b95ab076ff72eff upstream. + +be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into +separate sections") added .softirqentry.text section, but it was not added +to recordmcount. So functions in the section are untracable. Add the +section to scripts/recordmcount.c and scripts/recordmcount.pl. + +Fixes: be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections") +Link: http://lkml.kernel.org/r/1474902626-73468-1-git-send-email-dvyukov@google.com +Signed-off-by: Dmitry Vyukov +Acked-by: Steve Rostedt +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + scripts/recordmcount.c | 1 + + scripts/recordmcount.pl | 1 + + 2 files changed, 2 insertions(+) + +--- a/scripts/recordmcount.c ++++ b/scripts/recordmcount.c +@@ -363,6 +363,7 @@ is_mcounted_section_name(char const *con + strcmp(".sched.text", txtname) == 0 || + strcmp(".spinlock.text", txtname) == 0 || + strcmp(".irqentry.text", txtname) == 0 || ++ strcmp(".softirqentry.text", txtname) == 0 || + strcmp(".kprobes.text", txtname) == 0 || + strcmp(".text.unlikely", txtname) == 0; + } +--- a/scripts/recordmcount.pl ++++ b/scripts/recordmcount.pl +@@ -134,6 +134,7 @@ my %text_sections = ( + ".sched.text" => 1, + ".spinlock.text" => 1, + ".irqentry.text" => 1, ++ ".softirqentry.text" => 1, + ".kprobes.text" => 1, + ".text.unlikely" => 1, + ); diff --git a/queue-4.7/series b/queue-4.7/series index ca8a967d97e..6b59fb94849 100644 --- a/queue-4.7/series +++ b/queue-4.7/series @@ -2,3 +2,4 @@ cpuset-handle-race-between-cpu-hotplug-and-cpuset_hotplug_work.patch cgroup-fix-invalid-controller-enable-rejections-with-cgroup-namespace.patch scripts-recordmcount.c-account-for-.softirqentry.text.patch mtd-nand-davinci-reinitialize-the-hw-ecc-engine-in-4bit-hwctl.patch +mm-ksm-fix-endless-looping-in-allocating-memory-when-ksm-enable.patch