From 2b8d1824fbff03ea48ba2f57576d06a470480784 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 16 Dec 2019 12:36:57 -0500 Subject: [PATCH] fixes for 4.19 Signed-off-by: Sasha Levin --- .../mfd-rk808-fix-rk818-id-template.patch | 44 +++++ ...a-huge-pud-insertion-race-during-fau.patch | 110 +++++++++++ ...-report-thp-eligibility-for-each-vma.patch | 186 ++++++++++++++++++ .../s390-smp-vdso-fix-asce-handling.patch | 84 ++++++++ queue-4.19/series | 4 + 5 files changed, 428 insertions(+) create mode 100644 queue-4.19/mfd-rk808-fix-rk818-id-template.patch create mode 100644 queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch create mode 100644 queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch create mode 100644 queue-4.19/s390-smp-vdso-fix-asce-handling.patch diff --git a/queue-4.19/mfd-rk808-fix-rk818-id-template.patch b/queue-4.19/mfd-rk808-fix-rk818-id-template.patch new file mode 100644 index 00000000000..5e24b9d6600 --- /dev/null +++ b/queue-4.19/mfd-rk808-fix-rk818-id-template.patch @@ -0,0 +1,44 @@ +From 6cea56c52a739cf01cc874a36ffc3f5f1adcdcd5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Sep 2019 10:12:53 +0200 +Subject: mfd: rk808: Fix RK818 ID template + +From: Daniel Schultz + +[ Upstream commit 37ef8c2c15bdc1322b160e38986c187de2b877b2 ] + +The Rockchip PMIC driver can automatically detect connected component +versions by reading the ID_MSB and ID_LSB registers. The probe function +will always fail with RK818 PMICs because the ID_MSK is 0xFFF0 and the +RK818 template ID is 0x8181. + +This patch changes this value to 0x8180. + +Fixes: 9d6105e19f61 ("mfd: rk808: Fix up the chip id get failed") +Cc: stable@vger.kernel.org +Cc: Elaine Zhang +Cc: Joseph Chen +Signed-off-by: Daniel Schultz +Signed-off-by: Heiko Stuebner +Signed-off-by: Lee Jones +Signed-off-by: Sasha Levin +--- + include/linux/mfd/rk808.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h +index d3156594674c2..338e0f6e2226b 100644 +--- a/include/linux/mfd/rk808.h ++++ b/include/linux/mfd/rk808.h +@@ -443,7 +443,7 @@ enum { + enum { + RK805_ID = 0x8050, + RK808_ID = 0x0000, +- RK818_ID = 0x8181, ++ RK818_ID = 0x8180, + }; + + struct rk808 { +-- +2.20.1 + diff --git a/queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch b/queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch new file mode 100644 index 00000000000..bb1f5482fc9 --- /dev/null +++ b/queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch @@ -0,0 +1,110 @@ +From 3e0a2ff638b34f322eb170b1ae4515f61cfe3b14 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 30 Nov 2019 17:51:32 -0800 +Subject: mm/memory.c: fix a huge pud insertion race during faulting + +From: Thomas Hellstrom + +[ Upstream commit 625110b5e9dae9074d8a7e67dd07f821a053eed7 ] + +A huge pud page can theoretically be faulted in racing with pmd_alloc() +in __handle_mm_fault(). That will lead to pmd_alloc() returning an +invalid pmd pointer. + +Fix this by adding a pud_trans_unstable() function similar to +pmd_trans_unstable() and check whether the pud is really stable before +using the pmd pointer. + +Race: + Thread 1: Thread 2: Comment + create_huge_pud() Fallback - not taken. + create_huge_pud() Taken. + pmd_alloc() Returns an invalid pointer. + +This will result in user-visible huge page data corruption. + +Note that this was caught during a code audit rather than a real +experienced problem. It looks to me like the only implementation that +currently creates huge pud pagetable entries is dev_dax_huge_fault() +which doesn't appear to care much about private (COW) mappings or +write-tracking which is, I believe, a prerequisite for create_huge_pud() +falling back on thread 1, but not in thread 2. + +Link: http://lkml.kernel.org/r/20191115115808.21181-2-thomas_os@shipmail.org +Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") +Signed-off-by: Thomas Hellstrom +Acked-by: Kirill A. Shutemov +Cc: Arnd Bergmann +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + include/asm-generic/pgtable.h | 25 +++++++++++++++++++++++++ + mm/memory.c | 6 ++++++ + 2 files changed, 31 insertions(+) + +diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h +index 15fd0277ffa69..f94c39070dcca 100644 +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -874,6 +874,31 @@ static inline int pud_trans_huge(pud_t pud) + } + #endif + ++/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */ ++static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud) ++{ ++ pud_t pudval = READ_ONCE(*pud); ++ ++ if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) ++ return 1; ++ if (unlikely(pud_bad(pudval))) { ++ pud_clear_bad(pud); ++ return 1; ++ } ++ return 0; ++} ++ ++/* See pmd_trans_unstable for discussion. */ ++static inline int pud_trans_unstable(pud_t *pud) ++{ ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ ++ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) ++ return pud_none_or_trans_huge_or_dev_or_clear_bad(pud); ++#else ++ return 0; ++#endif ++} ++ + #ifndef pmd_read_atomic + static inline pmd_t pmd_read_atomic(pmd_t *pmdp) + { +diff --git a/mm/memory.c b/mm/memory.c +index bbf0cc4066c84..f910da42a1f01 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4106,6 +4106,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, + vmf.pud = pud_alloc(mm, p4d, address); + if (!vmf.pud) + return VM_FAULT_OOM; ++retry_pud: + if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { + ret = create_huge_pud(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) +@@ -4132,6 +4133,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, + vmf.pmd = pmd_alloc(mm, vmf.pud, address); + if (!vmf.pmd) + return VM_FAULT_OOM; ++ ++ /* Huge pud page fault raced with pmd_alloc? */ ++ if (pud_trans_unstable(vmf.pud)) ++ goto retry_pud; ++ + if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { + ret = create_huge_pmd(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) +-- +2.20.1 + diff --git a/queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch b/queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch new file mode 100644 index 00000000000..f12cc8e6e15 --- /dev/null +++ b/queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch @@ -0,0 +1,186 @@ +From 346a9dd5a9f9a0306e988401d4d726ef1b668057 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Dec 2018 00:38:21 -0800 +Subject: mm, thp, proc: report THP eligibility for each vma + +From: Michal Hocko + +[ Upstream commit 7635d9cbe8327e131a1d3d8517dc186c2796ce2e ] + +Userspace falls short when trying to find out whether a specific memory +range is eligible for THP. There are usecases that would like to know +that +http://lkml.kernel.org/r/alpine.DEB.2.21.1809251248450.50347@chino.kir.corp.google.com +: This is used to identify heap mappings that should be able to fault thp +: but do not, and they normally point to a low-on-memory or fragmentation +: issue. + +The only way to deduce this now is to query for hg resp. nh flags and +confronting the state with the global setting. Except that there is also +PR_SET_THP_DISABLE that might change the picture. So the final logic is +not trivial. Moreover the eligibility of the vma depends on the type of +VMA as well. In the past we have supported only anononymous memory VMAs +but things have changed and shmem based vmas are supported as well these +days and the query logic gets even more complicated because the +eligibility depends on the mount option and another global configuration +knob. + +Simplify the current state and report the THP eligibility in +/proc//smaps for each existing vma. Reuse +transparent_hugepage_enabled for this purpose. The original +implementation of this function assumes that the caller knows that the vma +itself is supported for THP so make the core checks into +__transparent_hugepage_enabled and use it for existing callers. +__show_smap just use the new transparent_hugepage_enabled which also +checks the vma support status (please note that this one has to be out of +line due to include dependency issues). + +[mhocko@kernel.org: fix oops with NULL ->f_mapping] + Link: http://lkml.kernel.org/r/20181224185106.GC16738@dhcp22.suse.cz +Link: http://lkml.kernel.org/r/20181211143641.3503-3-mhocko@kernel.org +Signed-off-by: Michal Hocko +Acked-by: Vlastimil Babka +Cc: Dan Williams +Cc: David Rientjes +Cc: Jan Kara +Cc: Mike Rapoport +Cc: Paul Oppenheimer +Cc: William Kucharski +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + Documentation/filesystems/proc.txt | 3 +++ + fs/proc/task_mmu.c | 2 ++ + include/linux/huge_mm.h | 13 ++++++++++++- + mm/huge_memory.c | 12 +++++++++++- + mm/memory.c | 4 ++-- + 5 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt +index 06ac6dda9b345..0d0ecc7df2600 100644 +--- a/Documentation/filesystems/proc.txt ++++ b/Documentation/filesystems/proc.txt +@@ -425,6 +425,7 @@ SwapPss: 0 kB + KernelPageSize: 4 kB + MMUPageSize: 4 kB + Locked: 0 kB ++THPeligible: 0 + VmFlags: rd ex mr mw me dw + + the first of these lines shows the same information as is displayed for the +@@ -462,6 +463,8 @@ replaced by copy-on-write) part of the underlying shmem object out on swap. + "SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this + does not take into account swapped out page of underlying shmem objects. + "Locked" indicates whether the mapping is locked in memory or not. ++"THPeligible" indicates whether the mapping is eligible for THP pages - 1 if ++true, 0 otherwise. + + "VmFlags" field deserves a separate description. This member represents the kernel + flags associated with the particular virtual memory area in two letter encoded +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index 71aba44c4fa6d..efa6273c00067 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -800,6 +800,8 @@ static int show_smap(struct seq_file *m, void *v) + + __show_smap(m, &mss); + ++ seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); ++ + if (arch_pkeys_enabled()) + seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); + show_smap_vma_flags(m, vma); +diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h +index 77227224ca880..e375f2249f520 100644 +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -91,7 +91,11 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); + + extern unsigned long transparent_hugepage_flags; + +-static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) ++/* ++ * to be used on vmas which are known to support THP. ++ * Use transparent_hugepage_enabled otherwise ++ */ ++static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) + { + if (vma->vm_flags & VM_NOHUGEPAGE) + return false; +@@ -115,6 +119,8 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) + return false; + } + ++bool transparent_hugepage_enabled(struct vm_area_struct *vma); ++ + #define transparent_hugepage_use_zero_page() \ + (transparent_hugepage_flags & \ + (1<ptl); + alloc: +- if (transparent_hugepage_enabled(vma) && ++ if (__transparent_hugepage_enabled(vma) && + !transparent_hugepage_debug_cow()) { + huge_gfp = alloc_hugepage_direct_gfpmask(vma); + new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); +diff --git a/mm/memory.c b/mm/memory.c +index fb5655b518c99..bbf0cc4066c84 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4106,7 +4106,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, + vmf.pud = pud_alloc(mm, p4d, address); + if (!vmf.pud) + return VM_FAULT_OOM; +- if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { ++ if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { + ret = create_huge_pud(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; +@@ -4132,7 +4132,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, + vmf.pmd = pmd_alloc(mm, vmf.pud, address); + if (!vmf.pmd) + return VM_FAULT_OOM; +- if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { ++ if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { + ret = create_huge_pmd(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; +-- +2.20.1 + diff --git a/queue-4.19/s390-smp-vdso-fix-asce-handling.patch b/queue-4.19/s390-smp-vdso-fix-asce-handling.patch new file mode 100644 index 00000000000..c5330cb03b6 --- /dev/null +++ b/queue-4.19/s390-smp-vdso-fix-asce-handling.patch @@ -0,0 +1,84 @@ +From ea46298700357a99313a408683bbfbd51306c347 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Nov 2019 13:09:52 +0100 +Subject: s390/smp,vdso: fix ASCE handling + +From: Heiko Carstens + +[ Upstream commit a2308c11ecbc3471ebb7435ee8075815b1502ef0 ] + +When a secondary CPU is brought up it must initialize its control +registers. CPU A which triggers that a secondary CPU B is brought up +stores its control register contents into the lowcore of new CPU B, +which then loads these values on startup. + +This is problematic in various ways: the control register which +contains the home space ASCE will correctly contain the kernel ASCE; +however control registers for primary and secondary ASCEs are +initialized with whatever values were present in CPU A. + +Typically: +- the primary ASCE will contain the user process ASCE of the process + that triggered onlining of CPU B. +- the secondary ASCE will contain the percpu VDSO ASCE of CPU A. + +Due to lazy ASCE handling we may also end up with other combinations. + +When then CPU B switches to a different process (!= idle) it will +fixup the primary ASCE. However the problem is that the (wrong) ASCE +from CPU A was loaded into control register 1: as soon as an ASCE is +attached (aka loaded) a CPU is free to generate TLB entries using that +address space. +Even though it is very unlikey that CPU B will actually generate such +entries, this could result in TLB entries of the address space of the +process that ran on CPU A. These entries shouldn't exist at all and +could cause problems later on. + +Furthermore the secondary ASCE of CPU B will not be updated correctly. +This means that processes may see wrong results or even crash if they +access VDSO data on CPU B. The correct VDSO ASCE will eventually be +loaded on return to user space as soon as the kernel executed a call +to strnlen_user or an atomic futex operation on CPU B. + +Fix both issues by intializing the to be loaded control register +contents with the correct ASCEs and also enforce (re-)loading of the +ASCEs upon first context switch and return to user space. + +Fixes: 0aaba41b58bc ("s390: remove all code using the access register mode") +Cc: stable@vger.kernel.org # v4.15+ +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/smp.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c +index da02f4087d61f..df2413f26a8f2 100644 +--- a/arch/s390/kernel/smp.c ++++ b/arch/s390/kernel/smp.c +@@ -261,9 +261,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) + lc->spinlock_index = 0; + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; ++ lc->user_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); ++ lc->cregs_save_area[1] = lc->kernel_asce; ++ lc->cregs_save_area[7] = lc->vdso_asce; + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + sizeof(lc->stfle_fac_list)); +@@ -810,6 +813,8 @@ static void smp_start_secondary(void *cpuvoid) + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); ++ set_cpu_flag(CIF_ASCE_PRIMARY); ++ set_cpu_flag(CIF_ASCE_SECONDARY); + cpu_init(); + preempt_disable(); + init_cpu_timer(); +-- +2.20.1 + diff --git a/queue-4.19/series b/queue-4.19/series index c727bb66364..414b1ca23b7 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -134,3 +134,7 @@ mm-shmem.c-cast-the-type-of-unmap_start-to-u64.patch rtc-disable-uie-before-setting-time-and-enable-after.patch splice-only-read-in-as-much-information-as-there-is-pipe-buffer-space.patch ext4-fix-a-bug-in-ext4_wait_for_tail_page_commit.patch +mfd-rk808-fix-rk818-id-template.patch +mm-thp-proc-report-thp-eligibility-for-each-vma.patch +mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch +s390-smp-vdso-fix-asce-handling.patch -- 2.47.3