fixes for 4.19

author Sasha Levin <sashal@kernel.org>

Mon, 16 Dec 2019 17:36:57 +0000 (12:36 -0500)

committer Sasha Levin <sashal@kernel.org>

Mon, 16 Dec 2019 17:37:22 +0000 (12:37 -0500)
author Sasha Levin <sashal@kernel.org>
Mon, 16 Dec 2019 17:36:57 +0000 (12:36 -0500)
committer Sasha Levin <sashal@kernel.org>
Mon, 16 Dec 2019 17:37:22 +0000 (12:37 -0500)
diff --git a/queue-4.19/mfd-rk808-fix-rk818-id-template.patch b/queue-4.19/mfd-rk808-fix-rk818-id-template.patch

new file mode 100644 (file)

index 0000000..5e24b9d
--- /dev/null
+++ b/queue-4.19/mfd-rk808-fix-rk818-id-template.patch
@@ -0,0 +1,44 @@
+From 6cea56c52a739cf01cc874a36ffc3f5f1adcdcd5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Sep 2019 10:12:53 +0200
+Subject: mfd: rk808: Fix RK818 ID template
+
+From: Daniel Schultz <d.schultz@phytec.de>
+
+[ Upstream commit 37ef8c2c15bdc1322b160e38986c187de2b877b2 ]
+
+The Rockchip PMIC driver can automatically detect connected component
+versions by reading the ID_MSB and ID_LSB registers. The probe function
+will always fail with RK818 PMICs because the ID_MSK is 0xFFF0 and the
+RK818 template ID is 0x8181.
+
+This patch changes this value to 0x8180.
+
+Fixes: 9d6105e19f61 ("mfd: rk808: Fix up the chip id get failed")
+Cc: stable@vger.kernel.org
+Cc: Elaine Zhang <zhangqing@rock-chips.com>
+Cc: Joseph Chen <chenjh@rock-chips.com>
+Signed-off-by: Daniel Schultz <d.schultz@phytec.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mfd/rk808.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
+index d3156594674c2..338e0f6e2226b 100644
+--- a/include/linux/mfd/rk808.h
++++ b/include/linux/mfd/rk808.h
+@@ -443,7 +443,7 @@ enum {
+ enum {
+       RK805_ID = 0x8050,
+       RK808_ID = 0x0000,
+-      RK818_ID = 0x8181,
++      RK818_ID = 0x8180,
+ };
+ 
+ struct rk808 {
+-- 
+2.20.1
+
diff --git a/queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch b/queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch

new file mode 100644 (file)

index 0000000..bb1f548
--- /dev/null
+++ b/queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch
@@ -0,0 +1,110 @@
+From 3e0a2ff638b34f322eb170b1ae4515f61cfe3b14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Nov 2019 17:51:32 -0800
+Subject: mm/memory.c: fix a huge pud insertion race during faulting
+
+From: Thomas Hellstrom <thellstrom@vmware.com>
+
+[ Upstream commit 625110b5e9dae9074d8a7e67dd07f821a053eed7 ]
+
+A huge pud page can theoretically be faulted in racing with pmd_alloc()
+in __handle_mm_fault().  That will lead to pmd_alloc() returning an
+invalid pmd pointer.
+
+Fix this by adding a pud_trans_unstable() function similar to
+pmd_trans_unstable() and check whether the pud is really stable before
+using the pmd pointer.
+
+Race:
+  Thread 1:             Thread 2:                 Comment
+  create_huge_pud()                               Fallback - not taken.
+                        create_huge_pud()         Taken.
+  pmd_alloc()                                     Returns an invalid pointer.
+
+This will result in user-visible huge page data corruption.
+
+Note that this was caught during a code audit rather than a real
+experienced problem.  It looks to me like the only implementation that
+currently creates huge pud pagetable entries is dev_dax_huge_fault()
+which doesn't appear to care much about private (COW) mappings or
+write-tracking which is, I believe, a prerequisite for create_huge_pud()
+falling back on thread 1, but not in thread 2.
+
+Link: http://lkml.kernel.org/r/20191115115808.21181-2-thomas_os@shipmail.org
+Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
+Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/pgtable.h | 25 +++++++++++++++++++++++++
+ mm/memory.c                   |  6 ++++++
+ 2 files changed, 31 insertions(+)
+
+diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
+index 15fd0277ffa69..f94c39070dcca 100644
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -874,6 +874,31 @@ static inline int pud_trans_huge(pud_t pud)
+ }
+ #endif
+ 
++/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
++static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
++{
++      pud_t pudval = READ_ONCE(*pud);
++
++      if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
++              return 1;
++      if (unlikely(pud_bad(pudval))) {
++              pud_clear_bad(pud);
++              return 1;
++      }
++      return 0;
++}
++
++/* See pmd_trans_unstable for discussion. */
++static inline int pud_trans_unstable(pud_t *pud)
++{
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&                   \
++      defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
++      return pud_none_or_trans_huge_or_dev_or_clear_bad(pud);
++#else
++      return 0;
++#endif
++}
++
+ #ifndef pmd_read_atomic
+ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
+ {
+diff --git a/mm/memory.c b/mm/memory.c
+index bbf0cc4066c84..f910da42a1f01 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4106,6 +4106,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+       vmf.pud = pud_alloc(mm, p4d, address);
+       if (!vmf.pud)
+               return VM_FAULT_OOM;
++retry_pud:
+       if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
+               ret = create_huge_pud(&vmf);
+               if (!(ret & VM_FAULT_FALLBACK))
+@@ -4132,6 +4133,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+       vmf.pmd = pmd_alloc(mm, vmf.pud, address);
+       if (!vmf.pmd)
+               return VM_FAULT_OOM;
++
++      /* Huge pud page fault raced with pmd_alloc? */
++      if (pud_trans_unstable(vmf.pud))
++              goto retry_pud;
++
+       if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) {
+               ret = create_huge_pmd(&vmf);
+               if (!(ret & VM_FAULT_FALLBACK))
+-- 
+2.20.1
+
diff --git a/queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch b/queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch

new file mode 100644 (file)

index 0000000..f12cc8e
--- /dev/null
+++ b/queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch
@@ -0,0 +1,186 @@
+From 346a9dd5a9f9a0306e988401d4d726ef1b668057 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Dec 2018 00:38:21 -0800
+Subject: mm, thp, proc: report THP eligibility for each vma
+
+From: Michal Hocko <mhocko@suse.com>
+
+[ Upstream commit 7635d9cbe8327e131a1d3d8517dc186c2796ce2e ]
+
+Userspace falls short when trying to find out whether a specific memory
+range is eligible for THP.  There are usecases that would like to know
+that
+http://lkml.kernel.org/r/alpine.DEB.2.21.1809251248450.50347@chino.kir.corp.google.com
+: This is used to identify heap mappings that should be able to fault thp
+: but do not, and they normally point to a low-on-memory or fragmentation
+: issue.
+
+The only way to deduce this now is to query for hg resp.  nh flags and
+confronting the state with the global setting.  Except that there is also
+PR_SET_THP_DISABLE that might change the picture.  So the final logic is
+not trivial.  Moreover the eligibility of the vma depends on the type of
+VMA as well.  In the past we have supported only anononymous memory VMAs
+but things have changed and shmem based vmas are supported as well these
+days and the query logic gets even more complicated because the
+eligibility depends on the mount option and another global configuration
+knob.
+
+Simplify the current state and report the THP eligibility in
+/proc/<pid>/smaps for each existing vma.  Reuse
+transparent_hugepage_enabled for this purpose.  The original
+implementation of this function assumes that the caller knows that the vma
+itself is supported for THP so make the core checks into
+__transparent_hugepage_enabled and use it for existing callers.
+__show_smap just use the new transparent_hugepage_enabled which also
+checks the vma support status (please note that this one has to be out of
+line due to include dependency issues).
+
+[mhocko@kernel.org: fix oops with NULL ->f_mapping]
+  Link: http://lkml.kernel.org/r/20181224185106.GC16738@dhcp22.suse.cz
+Link: http://lkml.kernel.org/r/20181211143641.3503-3-mhocko@kernel.org
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: Paul Oppenheimer <bepvte@gmail.com>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/filesystems/proc.txt |  3 +++
+ fs/proc/task_mmu.c                 |  2 ++
+ include/linux/huge_mm.h            | 13 ++++++++++++-
+ mm/huge_memory.c                   | 12 +++++++++++-
+ mm/memory.c                        |  4 ++--
+ 5 files changed, 30 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
+index 06ac6dda9b345..0d0ecc7df2600 100644
+--- a/Documentation/filesystems/proc.txt
++++ b/Documentation/filesystems/proc.txt
+@@ -425,6 +425,7 @@ SwapPss:               0 kB
+ KernelPageSize:        4 kB
+ MMUPageSize:           4 kB
+ Locked:                0 kB
++THPeligible:           0
+ VmFlags: rd ex mr mw me dw
+ 
+ the first of these lines shows the same information as is displayed for the
+@@ -462,6 +463,8 @@ replaced by copy-on-write) part of the underlying shmem object out on swap.
+ "SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this
+ does not take into account swapped out page of underlying shmem objects.
+ "Locked" indicates whether the mapping is locked in memory or not.
++"THPeligible" indicates whether the mapping is eligible for THP pages - 1 if
++true, 0 otherwise.
+ 
+ "VmFlags" field deserves a separate description. This member represents the kernel
+ flags associated with the particular virtual memory area in two letter encoded
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index 71aba44c4fa6d..efa6273c00067 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -800,6 +800,8 @@ static int show_smap(struct seq_file *m, void *v)
+ 
+       __show_smap(m, &mss);
+ 
++      seq_printf(m, "THPeligible:    %d\n", transparent_hugepage_enabled(vma));
++
+       if (arch_pkeys_enabled())
+               seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
+       show_smap_vma_flags(m, vma);
+diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
+index 77227224ca880..e375f2249f520 100644
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -91,7 +91,11 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
+ 
+ extern unsigned long transparent_hugepage_flags;
+ 
+-static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
++/*
++ * to be used on vmas which are known to support THP.
++ * Use transparent_hugepage_enabled otherwise
++ */
++static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
+ {
+       if (vma->vm_flags & VM_NOHUGEPAGE)
+               return false;
+@@ -115,6 +119,8 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+       return false;
+ }
+ 
++bool transparent_hugepage_enabled(struct vm_area_struct *vma);
++
+ #define transparent_hugepage_use_zero_page()                          \
+       (transparent_hugepage_flags &                                   \
+        (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
+@@ -255,6 +261,11 @@ static inline bool thp_migration_supported(void)
+ 
+ #define hpage_nr_pages(x) 1
+ 
++static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
++{
++      return false;
++}
++
+ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+ {
+       return false;
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 09ce8528bbdd9..5a1771bd5d04d 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -63,6 +63,16 @@ static struct shrinker deferred_split_shrinker;
+ static atomic_t huge_zero_refcount;
+ struct page *huge_zero_page __read_mostly;
+ 
++bool transparent_hugepage_enabled(struct vm_area_struct *vma)
++{
++      if (vma_is_anonymous(vma))
++              return __transparent_hugepage_enabled(vma);
++      if (vma_is_shmem(vma) && shmem_huge_enabled(vma))
++              return __transparent_hugepage_enabled(vma);
++
++      return false;
++}
++
+ static struct page *get_huge_zero_page(void)
+ {
+       struct page *zero_page;
+@@ -1329,7 +1339,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
+       get_page(page);
+       spin_unlock(vmf->ptl);
+ alloc:
+-      if (transparent_hugepage_enabled(vma) &&
++      if (__transparent_hugepage_enabled(vma) &&
+           !transparent_hugepage_debug_cow()) {
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+diff --git a/mm/memory.c b/mm/memory.c
+index fb5655b518c99..bbf0cc4066c84 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4106,7 +4106,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+       vmf.pud = pud_alloc(mm, p4d, address);
+       if (!vmf.pud)
+               return VM_FAULT_OOM;
+-      if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
++      if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
+               ret = create_huge_pud(&vmf);
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
+@@ -4132,7 +4132,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+       vmf.pmd = pmd_alloc(mm, vmf.pud, address);
+       if (!vmf.pmd)
+               return VM_FAULT_OOM;
+-      if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) {
++      if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) {
+               ret = create_huge_pmd(&vmf);
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
+-- 
+2.20.1
+
diff --git a/queue-4.19/s390-smp-vdso-fix-asce-handling.patch b/queue-4.19/s390-smp-vdso-fix-asce-handling.patch

new file mode 100644 (file)

index 0000000..c5330cb
--- /dev/null
+++ b/queue-4.19/s390-smp-vdso-fix-asce-handling.patch
@@ -0,0 +1,84 @@
+From ea46298700357a99313a408683bbfbd51306c347 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Nov 2019 13:09:52 +0100
+Subject: s390/smp,vdso: fix ASCE handling
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+[ Upstream commit a2308c11ecbc3471ebb7435ee8075815b1502ef0 ]
+
+When a secondary CPU is brought up it must initialize its control
+registers. CPU A which triggers that a secondary CPU B is brought up
+stores its control register contents into the lowcore of new CPU B,
+which then loads these values on startup.
+
+This is problematic in various ways: the control register which
+contains the home space ASCE will correctly contain the kernel ASCE;
+however control registers for primary and secondary ASCEs are
+initialized with whatever values were present in CPU A.
+
+Typically:
+- the primary ASCE will contain the user process ASCE of the process
+  that triggered onlining of CPU B.
+- the secondary ASCE will contain the percpu VDSO ASCE of CPU A.
+
+Due to lazy ASCE handling we may also end up with other combinations.
+
+When then CPU B switches to a different process (!= idle) it will
+fixup the primary ASCE. However the problem is that the (wrong) ASCE
+from CPU A was loaded into control register 1: as soon as an ASCE is
+attached (aka loaded) a CPU is free to generate TLB entries using that
+address space.
+Even though it is very unlikey that CPU B will actually generate such
+entries, this could result in TLB entries of the address space of the
+process that ran on CPU A. These entries shouldn't exist at all and
+could cause problems later on.
+
+Furthermore the secondary ASCE of CPU B will not be updated correctly.
+This means that processes may see wrong results or even crash if they
+access VDSO data on CPU B. The correct VDSO ASCE will eventually be
+loaded on return to user space as soon as the kernel executed a call
+to strnlen_user or an atomic futex operation on CPU B.
+
+Fix both issues by intializing the to be loaded control register
+contents with the correct ASCEs and also enforce (re-)loading of the
+ASCEs upon first context switch and return to user space.
+
+Fixes: 0aaba41b58bc ("s390: remove all code using the access register mode")
+Cc: stable@vger.kernel.org # v4.15+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/smp.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index da02f4087d61f..df2413f26a8f2 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -261,9 +261,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+       lc->spinlock_index = 0;
+       lc->percpu_offset = __per_cpu_offset[cpu];
+       lc->kernel_asce = S390_lowcore.kernel_asce;
++      lc->user_asce = S390_lowcore.kernel_asce;
+       lc->machine_flags = S390_lowcore.machine_flags;
+       lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+       __ctl_store(lc->cregs_save_area, 0, 15);
++      lc->cregs_save_area[1] = lc->kernel_asce;
++      lc->cregs_save_area[7] = lc->vdso_asce;
+       save_access_regs((unsigned int *) lc->access_regs_save_area);
+       memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+              sizeof(lc->stfle_fac_list));
+@@ -810,6 +813,8 @@ static void smp_start_secondary(void *cpuvoid)
+       restore_access_regs(S390_lowcore.access_regs_save_area);
+       __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+       __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
++      set_cpu_flag(CIF_ASCE_PRIMARY);
++      set_cpu_flag(CIF_ASCE_SECONDARY);
+       cpu_init();
+       preempt_disable();
+       init_cpu_timer();
+-- 
+2.20.1
+
diff --git a/queue-4.19/series b/queue-4.19/series

index c727bb66364859fd2e153a07bdb8d2be3e15299f..414b1ca23b7f3a6a610c00cf3eab0bf35ef10676 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -134,3 +134,7 @@ mm-shmem.c-cast-the-type-of-unmap_start-to-u64.patch
  rtc-disable-uie-before-setting-time-and-enable-after.patch
  splice-only-read-in-as-much-information-as-there-is-pipe-buffer-space.patch
  ext4-fix-a-bug-in-ext4_wait_for_tail_page_commit.patch
+mfd-rk808-fix-rk818-id-template.patch
+mm-thp-proc-report-thp-eligibility-for-each-vma.patch
+mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch
+s390-smp-vdso-fix-asce-handling.patch
author	Sasha Levin <sashal@kernel.org>
	Mon, 16 Dec 2019 17:36:57 +0000 (12:36 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 16 Dec 2019 17:37:22 +0000 (12:37 -0500)
queue-4.19/mfd-rk808-fix-rk818-id-template.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mm-memory.c-fix-a-huge-pud-insertion-race-during-fau.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/mm-thp-proc-report-thp-eligibility-for-each-vma.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/s390-smp-vdso-fix-asce-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history