From 9198d9e4086c35a951f64c76c1f7f30ccd2b0a3d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 16 Dec 2019 13:01:33 +0100 Subject: [PATCH] 5.3-stable patches added patches: mfd-intel-lpss-use-devm_ioremap_uc-for-mmio.patch mfd-rk808-fix-rk818-id-template.patch mm-memcg-slab-wait-for-root-kmem_cache-refcnt-killing-on-root-kmem_cache-destruction.patch mm-memfd-fix-cow-issue-on-map_private-and-f_seal_future_write-mappings.patch mm-memory.c-fix-a-huge-pud-insertion-race-during-faulting.patch --- ...el-lpss-use-devm_ioremap_uc-for-mmio.patch | 49 ++++++++ .../mfd-rk808-fix-rk818-id-template.patch | 40 +++++++ ...lling-on-root-kmem_cache-destruction.patch | 110 ++++++++++++++++++ ...ate-and-f_seal_future_write-mappings.patch | 90 ++++++++++++++ ...e-pud-insertion-race-during-faulting.patch | 104 +++++++++++++++++ queue-5.3/series | 5 + 6 files changed, 398 insertions(+) create mode 100644 queue-5.3/mfd-intel-lpss-use-devm_ioremap_uc-for-mmio.patch create mode 100644 queue-5.3/mfd-rk808-fix-rk818-id-template.patch create mode 100644 queue-5.3/mm-memcg-slab-wait-for-root-kmem_cache-refcnt-killing-on-root-kmem_cache-destruction.patch create mode 100644 queue-5.3/mm-memfd-fix-cow-issue-on-map_private-and-f_seal_future_write-mappings.patch create mode 100644 queue-5.3/mm-memory.c-fix-a-huge-pud-insertion-race-during-faulting.patch diff --git a/queue-5.3/mfd-intel-lpss-use-devm_ioremap_uc-for-mmio.patch b/queue-5.3/mfd-intel-lpss-use-devm_ioremap_uc-for-mmio.patch new file mode 100644 index 00000000000..50a3d3bf9ae --- /dev/null +++ b/queue-5.3/mfd-intel-lpss-use-devm_ioremap_uc-for-mmio.patch @@ -0,0 +1,49 @@ +From a8ff78f7f773142eb8a8befe5a95dd6858ebd635 Mon Sep 17 00:00:00 2001 +From: Tuowen Zhao +Date: Wed, 16 Oct 2019 15:06:29 -0600 +Subject: mfd: intel-lpss: Use devm_ioremap_uc for MMIO + +From: Tuowen Zhao + +commit a8ff78f7f773142eb8a8befe5a95dd6858ebd635 upstream. + +Some BIOS erroneously specifies write-combining BAR for intel-lpss-pci +in MTRR. This will cause the system to hang during boot. If possible, +this bug could be corrected with a firmware update. + +This patch use devm_ioremap_uc to overwrite/ignore the MTRR settings +by forcing the use of strongly uncachable pages for intel-lpss. + +The BIOS bug is present on Dell XPS 13 7390 2-in-1: + +[ 0.001734] 5 base 4000000000 mask 6000000000 write-combining + +4000000000-7fffffffff : PCI Bus 0000:00 + 4000000000-400fffffff : 0000:00:02.0 (i915) + 4010000000-4010000fff : 0000:00:15.0 (intel-lpss-pci) + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=203485 +Cc: # v4.19+ +Tested-by: AceLan Kao +Signed-off-by: Tuowen Zhao +Acked-by: Mika Westerberg +Acked-by: Andy Shevchenko +Tested-by: Roman Gilg +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mfd/intel-lpss.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mfd/intel-lpss.c ++++ b/drivers/mfd/intel-lpss.c +@@ -395,7 +395,7 @@ int intel_lpss_probe(struct device *dev, + if (!lpss) + return -ENOMEM; + +- lpss->priv = devm_ioremap(dev, info->mem->start + LPSS_PRIV_OFFSET, ++ lpss->priv = devm_ioremap_uc(dev, info->mem->start + LPSS_PRIV_OFFSET, + LPSS_PRIV_SIZE); + if (!lpss->priv) + return -ENOMEM; diff --git a/queue-5.3/mfd-rk808-fix-rk818-id-template.patch b/queue-5.3/mfd-rk808-fix-rk818-id-template.patch new file mode 100644 index 00000000000..febd4ae586d --- /dev/null +++ b/queue-5.3/mfd-rk808-fix-rk818-id-template.patch @@ -0,0 +1,40 @@ +From 37ef8c2c15bdc1322b160e38986c187de2b877b2 Mon Sep 17 00:00:00 2001 +From: Daniel Schultz +Date: Tue, 17 Sep 2019 10:12:53 +0200 +Subject: mfd: rk808: Fix RK818 ID template + +From: Daniel Schultz + +commit 37ef8c2c15bdc1322b160e38986c187de2b877b2 upstream. + +The Rockchip PMIC driver can automatically detect connected component +versions by reading the ID_MSB and ID_LSB registers. The probe function +will always fail with RK818 PMICs because the ID_MSK is 0xFFF0 and the +RK818 template ID is 0x8181. + +This patch changes this value to 0x8180. + +Fixes: 9d6105e19f61 ("mfd: rk808: Fix up the chip id get failed") +Cc: stable@vger.kernel.org +Cc: Elaine Zhang +Cc: Joseph Chen +Signed-off-by: Daniel Schultz +Signed-off-by: Heiko Stuebner +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mfd/rk808.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/mfd/rk808.h ++++ b/include/linux/mfd/rk808.h +@@ -610,7 +610,7 @@ enum { + RK808_ID = 0x0000, + RK809_ID = 0x8090, + RK817_ID = 0x8170, +- RK818_ID = 0x8181, ++ RK818_ID = 0x8180, + }; + + struct rk808 { diff --git a/queue-5.3/mm-memcg-slab-wait-for-root-kmem_cache-refcnt-killing-on-root-kmem_cache-destruction.patch b/queue-5.3/mm-memcg-slab-wait-for-root-kmem_cache-refcnt-killing-on-root-kmem_cache-destruction.patch new file mode 100644 index 00000000000..4a70a03eec9 --- /dev/null +++ b/queue-5.3/mm-memcg-slab-wait-for-root-kmem_cache-refcnt-killing-on-root-kmem_cache-destruction.patch @@ -0,0 +1,110 @@ +From a264df74df38855096393447f1b8f386069a94b9 Mon Sep 17 00:00:00 2001 +From: Roman Gushchin +Date: Wed, 4 Dec 2019 16:49:46 -0800 +Subject: mm: memcg/slab: wait for !root kmem_cache refcnt killing on root kmem_cache destruction + +From: Roman Gushchin + +commit a264df74df38855096393447f1b8f386069a94b9 upstream. + +Christian reported a warning like the following obtained during running +some KVM-related tests on s390: + + WARNING: CPU: 8 PID: 208 at lib/percpu-refcount.c:108 percpu_ref_exit+0x50/0x58 + Modules linked in: kvm(-) xt_CHECKSUM xt_MASQUERADE bonding xt_tcpudp ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ip6table_na> + CPU: 8 PID: 208 Comm: kworker/8:1 Not tainted 5.2.0+ #66 + Hardware name: IBM 2964 NC9 712 (LPAR) + Workqueue: events sysfs_slab_remove_workfn + Krnl PSW : 0704e00180000000 0000001529746850 (percpu_ref_exit+0x50/0x58) + R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 + Krnl GPRS: 00000000ffff8808 0000001529746740 000003f4e30e8e18 0036008100000000 + 0000001f00000000 0035008100000000 0000001fb3573ab8 0000000000000000 + 0000001fbdb6de00 0000000000000000 0000001529f01328 0000001fb3573b00 + 0000001fbb27e000 0000001fbdb69300 000003e009263d00 000003e009263cd0 + Krnl Code: 0000001529746842: f0a0000407fe srp 4(11,%r0),2046,0 + 0000001529746848: 47000700 bc 0,1792 + #000000152974684c: a7f40001 brc 15,152974684e + >0000001529746850: a7f4fff2 brc 15,1529746834 + 0000001529746854: 0707 bcr 0,%r7 + 0000001529746856: 0707 bcr 0,%r7 + 0000001529746858: eb8ff0580024 stmg %r8,%r15,88(%r15) + 000000152974685e: a738ffff lhi %r3,-1 + Call Trace: + ([<000003e009263d00>] 0x3e009263d00) + [<00000015293252ea>] slab_kmem_cache_release+0x3a/0x70 + [<0000001529b04882>] kobject_put+0xaa/0xe8 + [<000000152918cf28>] process_one_work+0x1e8/0x428 + [<000000152918d1b0>] worker_thread+0x48/0x460 + [<00000015291942c6>] kthread+0x126/0x160 + [<0000001529b22344>] ret_from_fork+0x28/0x30 + [<0000001529b2234c>] kernel_thread_starter+0x0/0x10 + Last Breaking-Event-Address: + [<000000152974684c>] percpu_ref_exit+0x4c/0x58 + ---[ end trace b035e7da5788eb09 ]--- + +The problem occurs because kmem_cache_destroy() is called immediately +after deleting of a memcg, so it races with the memcg kmem_cache +deactivation. + +flush_memcg_workqueue() at the beginning of kmem_cache_destroy() is +supposed to guarantee that all deactivation processes are finished, but +failed to do so. It waits for an rcu grace period, after which all +children kmem_caches should be deactivated. During the deactivation +percpu_ref_kill() is called for non root kmem_cache refcounters, but it +requires yet another rcu grace period to finish the transition to the +atomic (dead) state. + +So in a rare case when not all children kmem_caches are destroyed at the +moment when the root kmem_cache is about to be gone, we need to wait +another rcu grace period before destroying the root kmem_cache. + +This issue can be triggered only with dynamically created kmem_caches +which are used with memcg accounting. In this case per-memcg child +kmem_caches are created. They are deactivated from the cgroup removing +path. If the destruction of the root kmem_cache is racing with the +removal of the cgroup (both are quite complicated multi-stage +processes), the described issue can occur. The only known way to +trigger it in the real life, is to unload some kernel module which +creates a dedicated kmem_cache, used from different memory cgroups with +GFP_ACCOUNT flag. If the unloading happens immediately after calling +rmdir on the corresponding cgroup, there is some chance to trigger the +issue. + +Link: http://lkml.kernel.org/r/20191129025011.3076017-1-guro@fb.com +Fixes: f0a3a24b532d ("mm: memcg/slab: rework non-root kmem_cache lifecycle management") +Signed-off-by: Roman Gushchin +Reported-by: Christian Borntraeger +Tested-by: Christian Borntraeger +Reviewed-by: Shakeel Butt +Acked-by: Michal Hocko +Cc: Johannes Weiner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/slab_common.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -904,6 +904,18 @@ static void flush_memcg_workqueue(struct + * previous workitems on workqueue are processed. + */ + flush_workqueue(memcg_kmem_cache_wq); ++ ++ /* ++ * If we're racing with children kmem_cache deactivation, it might ++ * take another rcu grace period to complete their destruction. ++ * At this moment the corresponding percpu_ref_kill() call should be ++ * done, but it might take another rcu grace period to complete ++ * switching to the atomic mode. ++ * Please, note that we check without grabbing the slab_mutex. It's safe ++ * because at this moment the children list can't grow. ++ */ ++ if (!list_empty(&s->memcg_params.children)) ++ rcu_barrier(); + } + #else + static inline int shutdown_memcg_caches(struct kmem_cache *s) diff --git a/queue-5.3/mm-memfd-fix-cow-issue-on-map_private-and-f_seal_future_write-mappings.patch b/queue-5.3/mm-memfd-fix-cow-issue-on-map_private-and-f_seal_future_write-mappings.patch new file mode 100644 index 00000000000..884333da137 --- /dev/null +++ b/queue-5.3/mm-memfd-fix-cow-issue-on-map_private-and-f_seal_future_write-mappings.patch @@ -0,0 +1,90 @@ +From 05d351102dbe4e103d6bdac18b1122cd3cd04925 Mon Sep 17 00:00:00 2001 +From: Nicolas Geoffray +Date: Sat, 30 Nov 2019 17:53:28 -0800 +Subject: mm, memfd: fix COW issue on MAP_PRIVATE and F_SEAL_FUTURE_WRITE mappings + +From: Nicolas Geoffray + +commit 05d351102dbe4e103d6bdac18b1122cd3cd04925 upstream. + +F_SEAL_FUTURE_WRITE has unexpected behavior when used with MAP_PRIVATE: +A private mapping created after the memfd file that gets sealed with +F_SEAL_FUTURE_WRITE loses the copy-on-write at fork behavior, meaning +children and parent share the same memory, even though the mapping is +private. + +The reason for this is due to the code below: + + static int shmem_mmap(struct file *file, struct vm_area_struct *vma) + { + struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + + if (info->seals & F_SEAL_FUTURE_WRITE) { + /* + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when + * "future write" seal active. + */ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) + return -EPERM; + + /* + * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED + * read-only mapping, take care to not allow mprotect to revert + * protections. + */ + vma->vm_flags &= ~(VM_MAYWRITE); + } + ... + } + +And for the mm to know if a mapping is copy-on-write: + + static inline bool is_cow_mapping(vm_flags_t flags) + { + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + } + +The patch fixes the issue by making the mprotect revert protection +happen only for shared mappings. For private mappings, using mprotect +will have no effect on the seal behavior. + +The F_SEAL_FUTURE_WRITE feature was introduced in v5.1 so v5.3.x stable +kernels would need a backport. + +[akpm@linux-foundation.org: reflow comment, per Christoph] +Link: http://lkml.kernel.org/r/20191107195355.80608-1-joel@joelfernandes.org +Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd") +Signed-off-by: Nicolas Geoffray +Signed-off-by: Joel Fernandes (Google) +Cc: Hugh Dickins +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/shmem.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2198,11 +2198,14 @@ static int shmem_mmap(struct file *file, + return -EPERM; + + /* +- * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED +- * read-only mapping, take care to not allow mprotect to revert +- * protections. ++ * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as ++ * MAP_SHARED and read-only, take care to not allow mprotect to ++ * revert protections on such mappings. Do this only for shared ++ * mappings. For private mappings, don't need to mask ++ * VM_MAYWRITE as we still want them to be COW-writable. + */ +- vma->vm_flags &= ~(VM_MAYWRITE); ++ if (vma->vm_flags & VM_SHARED) ++ vma->vm_flags &= ~(VM_MAYWRITE); + } + + file_accessed(file); diff --git a/queue-5.3/mm-memory.c-fix-a-huge-pud-insertion-race-during-faulting.patch b/queue-5.3/mm-memory.c-fix-a-huge-pud-insertion-race-during-faulting.patch new file mode 100644 index 00000000000..e2991cd68a3 --- /dev/null +++ b/queue-5.3/mm-memory.c-fix-a-huge-pud-insertion-race-during-faulting.patch @@ -0,0 +1,104 @@ +From 625110b5e9dae9074d8a7e67dd07f821a053eed7 Mon Sep 17 00:00:00 2001 +From: Thomas Hellstrom +Date: Sat, 30 Nov 2019 17:51:32 -0800 +Subject: mm/memory.c: fix a huge pud insertion race during faulting + +From: Thomas Hellstrom + +commit 625110b5e9dae9074d8a7e67dd07f821a053eed7 upstream. + +A huge pud page can theoretically be faulted in racing with pmd_alloc() +in __handle_mm_fault(). That will lead to pmd_alloc() returning an +invalid pmd pointer. + +Fix this by adding a pud_trans_unstable() function similar to +pmd_trans_unstable() and check whether the pud is really stable before +using the pmd pointer. + +Race: + Thread 1: Thread 2: Comment + create_huge_pud() Fallback - not taken. + create_huge_pud() Taken. + pmd_alloc() Returns an invalid pointer. + +This will result in user-visible huge page data corruption. + +Note that this was caught during a code audit rather than a real +experienced problem. It looks to me like the only implementation that +currently creates huge pud pagetable entries is dev_dax_huge_fault() +which doesn't appear to care much about private (COW) mappings or +write-tracking which is, I believe, a prerequisite for create_huge_pud() +falling back on thread 1, but not in thread 2. + +Link: http://lkml.kernel.org/r/20191115115808.21181-2-thomas_os@shipmail.org +Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") +Signed-off-by: Thomas Hellstrom +Acked-by: Kirill A. Shutemov +Cc: Arnd Bergmann +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/asm-generic/pgtable.h | 25 +++++++++++++++++++++++++ + mm/memory.c | 6 ++++++ + 2 files changed, 31 insertions(+) + +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -912,6 +912,31 @@ static inline int pud_trans_huge(pud_t p + } + #endif + ++/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */ ++static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud) ++{ ++ pud_t pudval = READ_ONCE(*pud); ++ ++ if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) ++ return 1; ++ if (unlikely(pud_bad(pudval))) { ++ pud_clear_bad(pud); ++ return 1; ++ } ++ return 0; ++} ++ ++/* See pmd_trans_unstable for discussion. */ ++static inline int pud_trans_unstable(pud_t *pud) ++{ ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ ++ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) ++ return pud_none_or_trans_huge_or_dev_or_clear_bad(pud); ++#else ++ return 0; ++#endif ++} ++ + #ifndef pmd_read_atomic + static inline pmd_t pmd_read_atomic(pmd_t *pmdp) + { +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3905,6 +3905,7 @@ static vm_fault_t __handle_mm_fault(stru + vmf.pud = pud_alloc(mm, p4d, address); + if (!vmf.pud) + return VM_FAULT_OOM; ++retry_pud: + if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { + ret = create_huge_pud(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) +@@ -3931,6 +3932,11 @@ static vm_fault_t __handle_mm_fault(stru + vmf.pmd = pmd_alloc(mm, vmf.pud, address); + if (!vmf.pmd) + return VM_FAULT_OOM; ++ ++ /* Huge pud page fault raced with pmd_alloc? */ ++ if (pud_trans_unstable(vmf.pud)) ++ goto retry_pud; ++ + if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { + ret = create_huge_pmd(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) diff --git a/queue-5.3/series b/queue-5.3/series index 86bccea11a3..309e3a62125 100644 --- a/queue-5.3/series +++ b/queue-5.3/series @@ -168,3 +168,8 @@ omap-pdata-quirks-revert-pandora-specific-gpiod-addi.patch omap-pdata-quirks-remove-openpandora-quirks-for-mmc3.patch powerpc-avoid-clang-warnings-around-setjmp-and-longj.patch powerpc-fix-vdso-clock_getres.patch +mm-memfd-fix-cow-issue-on-map_private-and-f_seal_future_write-mappings.patch +mfd-intel-lpss-use-devm_ioremap_uc-for-mmio.patch +mfd-rk808-fix-rk818-id-template.patch +mm-memory.c-fix-a-huge-pud-insertion-race-during-faulting.patch +mm-memcg-slab-wait-for-root-kmem_cache-refcnt-killing-on-root-kmem_cache-destruction.patch -- 2.47.3