From 49fced61e13a5d67f3ca36816cd8fd4445a22330 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 15 Jul 2021 13:58:11 +0200 Subject: [PATCH] 5.12-stable patches added patches: mm-mremap-hold-the-rmap-lock-in-write-mode-when-moving-page-table-entries.patch powerpc-barrier-avoid-collision-with-clang-s-__lwsync-macro.patch powerpc-mm-fix-lockup-on-kernel-exec-fault.patch powerpc-powernv-vas-release-reference-to-tgid-during-window-close.patch --- ...-mode-when-moving-page-table-entries.patch | 98 +++++++++++++++++++ ...ollision-with-clang-s-__lwsync-macro.patch | 57 +++++++++++ ...c-mm-fix-lockup-on-kernel-exec-fault.patch | 67 +++++++++++++ ...eference-to-tgid-during-window-close.patch | 58 +++++++++++ queue-5.12/series | 4 + 5 files changed, 284 insertions(+) create mode 100644 queue-5.12/mm-mremap-hold-the-rmap-lock-in-write-mode-when-moving-page-table-entries.patch create mode 100644 queue-5.12/powerpc-barrier-avoid-collision-with-clang-s-__lwsync-macro.patch create mode 100644 queue-5.12/powerpc-mm-fix-lockup-on-kernel-exec-fault.patch create mode 100644 queue-5.12/powerpc-powernv-vas-release-reference-to-tgid-during-window-close.patch diff --git a/queue-5.12/mm-mremap-hold-the-rmap-lock-in-write-mode-when-moving-page-table-entries.patch b/queue-5.12/mm-mremap-hold-the-rmap-lock-in-write-mode-when-moving-page-table-entries.patch new file mode 100644 index 00000000000..38576e6964c --- /dev/null +++ b/queue-5.12/mm-mremap-hold-the-rmap-lock-in-write-mode-when-moving-page-table-entries.patch @@ -0,0 +1,98 @@ +From 97113eb39fa7972722ff490b947d8af023e1f6a2 Mon Sep 17 00:00:00 2001 +From: "Aneesh Kumar K.V" +Date: Wed, 7 Jul 2021 18:10:15 -0700 +Subject: mm/mremap: hold the rmap lock in write mode when moving page table entries. + +From: Aneesh Kumar K.V + +commit 97113eb39fa7972722ff490b947d8af023e1f6a2 upstream. + +To avoid a race between rmap walk and mremap, mremap does +take_rmap_locks(). The lock was taken to ensure that rmap walk don't miss +a page table entry due to PTE moves via move_pagetables(). The kernel +does further optimization of this lock such that if we are going to find +the newly added vma after the old vma, the rmap lock is not taken. This +is because rmap walk would find the vmas in the same order and if we don't +find the page table attached to older vma we would find it with the new +vma which we would iterate later. + +As explained in commit eb66ae030829 ("mremap: properly flush TLB before +releasing the page") mremap is special in that it doesn't take ownership +of the page. The optimized version for PUD/PMD aligned mremap also +doesn't hold the ptl lock. This can result in stale TLB entries as show +below. + +This patch updates the rmap locking requirement in mremap to handle the race condition +explained below with optimized mremap:: + +Optmized PMD move + + CPU 1 CPU 2 CPU 3 + + mremap(old_addr, new_addr) page_shrinker/try_to_unmap_one + + mmap_write_lock_killable() + + addr = old_addr + lock(pte_ptl) + lock(pmd_ptl) + pmd = *old_pmd + pmd_clear(old_pmd) + flush_tlb_range(old_addr) + + *new_pmd = pmd + *new_addr = 10; and fills + TLB with new addr + and old pfn + + unlock(pmd_ptl) + ptep_clear_flush() + old pfn is free. + Stale TLB entry + +Optimized PUD move also suffers from a similar race. Both the above race +condition can be fixed if we force mremap path to take rmap lock. + +Link: https://lkml.kernel.org/r/20210616045239.370802-7-aneesh.kumar@linux.ibm.com +Fixes: 2c91bd4a4e2e ("mm: speed up mremap by 20x on large regions") +Fixes: c49dd3401802 ("mm: speedup mremap on 1GB or larger regions") +Link: https://lore.kernel.org/linux-mm/CAHk-=wgXVR04eBNtxQfevontWnP6FDm+oj5vauQXP3S-huwbPw@mail.gmail.com +Signed-off-by: Aneesh Kumar K.V +Acked-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Christophe Leroy +Cc: Joel Fernandes +Cc: Kalesh Singh +Cc: Kirill A. Shutemov +Cc: Michael Ellerman +Cc: Nicholas Piggin +Cc: Stephen Rothwell +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mremap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -439,7 +439,7 @@ unsigned long move_page_tables(struct vm + if (!new_pud) + break; + if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr, +- old_pud, new_pud, need_rmap_locks)) ++ old_pud, new_pud, true)) + continue; + } + +@@ -466,7 +466,7 @@ unsigned long move_page_tables(struct vm + * moving at the PMD level if possible. + */ + if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr, +- old_pmd, new_pmd, need_rmap_locks)) ++ old_pmd, new_pmd, true)) + continue; + } + diff --git a/queue-5.12/powerpc-barrier-avoid-collision-with-clang-s-__lwsync-macro.patch b/queue-5.12/powerpc-barrier-avoid-collision-with-clang-s-__lwsync-macro.patch new file mode 100644 index 00000000000..a468ddfe809 --- /dev/null +++ b/queue-5.12/powerpc-barrier-avoid-collision-with-clang-s-__lwsync-macro.patch @@ -0,0 +1,57 @@ +From 015d98149b326e0f1f02e44413112ca8b4330543 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Fri, 28 May 2021 11:27:52 -0700 +Subject: powerpc/barrier: Avoid collision with clang's __lwsync macro + +From: Nathan Chancellor + +commit 015d98149b326e0f1f02e44413112ca8b4330543 upstream. + +A change in clang 13 results in the __lwsync macro being defined as +__builtin_ppc_lwsync, which emits 'lwsync' or 'msync' depending on what +the target supports. This breaks the build because of -Werror in +arch/powerpc, along with thousands of warnings: + + In file included from arch/powerpc/kernel/pmc.c:12: + In file included from include/linux/bug.h:5: + In file included from arch/powerpc/include/asm/bug.h:109: + In file included from include/asm-generic/bug.h:20: + In file included from include/linux/kernel.h:12: + In file included from include/linux/bitops.h:32: + In file included from arch/powerpc/include/asm/bitops.h:62: + arch/powerpc/include/asm/barrier.h:49:9: error: '__lwsync' macro redefined [-Werror,-Wmacro-redefined] + #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") + ^ + :308:9: note: previous definition is here + #define __lwsync __builtin_ppc_lwsync + ^ + 1 error generated. + +Undefine this macro so that the runtime patching introduced by +commit 2d1b2027626d ("powerpc: Fixup lwsync at runtime") continues to +work properly with clang and the build no longer breaks. + +Cc: stable@vger.kernel.org +Signed-off-by: Nathan Chancellor +Reviewed-by: Nick Desaulniers +Signed-off-by: Michael Ellerman +Link: https://github.com/ClangBuiltLinux/linux/issues/1386 +Link: https://github.com/llvm/llvm-project/commit/62b5df7fe2b3fda1772befeda15598fbef96a614 +Link: https://lore.kernel.org/r/20210528182752.1852002-1-nathan@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/barrier.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/powerpc/include/asm/barrier.h ++++ b/arch/powerpc/include/asm/barrier.h +@@ -46,6 +46,8 @@ + # define SMPWMB eieio + #endif + ++/* clang defines this macro for a builtin, which will not work with runtime patching */ ++#undef __lwsync + #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") + #define dma_rmb() __lwsync() + #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") diff --git a/queue-5.12/powerpc-mm-fix-lockup-on-kernel-exec-fault.patch b/queue-5.12/powerpc-mm-fix-lockup-on-kernel-exec-fault.patch new file mode 100644 index 00000000000..e29082ec103 --- /dev/null +++ b/queue-5.12/powerpc-mm-fix-lockup-on-kernel-exec-fault.patch @@ -0,0 +1,67 @@ +From cd5d5e602f502895e47e18cd46804d6d7014e65c Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Thu, 1 Jul 2021 11:17:08 +0000 +Subject: powerpc/mm: Fix lockup on kernel exec fault + +From: Christophe Leroy + +commit cd5d5e602f502895e47e18cd46804d6d7014e65c upstream. + +The powerpc kernel is not prepared to handle exec faults from kernel. +Especially, the function is_exec_fault() will return 'false' when an +exec fault is taken by kernel, because the check is based on reading +current->thread.regs->trap which contains the trap from user. + +For instance, when provoking a LKDTM EXEC_USERSPACE test, +current->thread.regs->trap is set to SYSCALL trap (0xc00), and +the fault taken by the kernel is not seen as an exec fault by +set_access_flags_filter(). + +Commit d7df2443cd5f ("powerpc/mm: Fix spurious segfaults on radix +with autonuma") made it clear and handled it properly. But later on +commit d3ca587404b3 ("powerpc/mm: Fix reporting of kernel execute +faults") removed that handling, introducing test based on error_code. +And here is the problem, because on the 603 all upper bits of SRR1 +get cleared when the TLB instruction miss handler bails out to ISI. + +Until commit cbd7e6ca0210 ("powerpc/fault: Avoid heavy +search_exception_tables() verification"), an exec fault from kernel +at a userspace address was indirectly caught by the lack of entry for +that address in the exception tables. But after that commit the +kernel mainly relies on KUAP or on core mm handling to catch wrong +user accesses. Here the access is not wrong, so mm handles it. +It is a minor fault because PAGE_EXEC is not set, +set_access_flags_filter() should set PAGE_EXEC and voila. +But as is_exec_fault() returns false as explained in the beginning, +set_access_flags_filter() bails out without setting PAGE_EXEC flag, +which leads to a forever minor exec fault. + +As the kernel is not prepared to handle such exec faults, the thing to +do is to fire in bad_kernel_fault() for any exec fault taken by the +kernel, as it was prior to commit d3ca587404b3. + +Fixes: d3ca587404b3 ("powerpc/mm: Fix reporting of kernel execute faults") +Cc: stable@vger.kernel.org # v4.14+ +Signed-off-by: Christophe Leroy +Acked-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/024bb05105050f704743a0083fe3548702be5706.1625138205.git.christophe.leroy@csgroup.eu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/fault.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/arch/powerpc/mm/fault.c ++++ b/arch/powerpc/mm/fault.c +@@ -199,9 +199,7 @@ static bool bad_kernel_fault(struct pt_r + { + int is_exec = TRAP(regs) == 0x400; + +- /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ +- if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | +- DSISR_PROTFAULT))) { ++ if (is_exec) { + pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", + address >= TASK_SIZE ? "exec-protected" : "user", + address, diff --git a/queue-5.12/powerpc-powernv-vas-release-reference-to-tgid-during-window-close.patch b/queue-5.12/powerpc-powernv-vas-release-reference-to-tgid-during-window-close.patch new file mode 100644 index 00000000000..9058630e597 --- /dev/null +++ b/queue-5.12/powerpc-powernv-vas-release-reference-to-tgid-during-window-close.patch @@ -0,0 +1,58 @@ +From 91cdbb955aa94ee0841af4685be40937345d29b8 Mon Sep 17 00:00:00 2001 +From: Haren Myneni +Date: Thu, 17 Jun 2021 13:29:05 -0700 +Subject: powerpc/powernv/vas: Release reference to tgid during window close + +From: Haren Myneni + +commit 91cdbb955aa94ee0841af4685be40937345d29b8 upstream. + +The kernel handles the NX fault by updating CSB or sending +signal to process. In multithread applications, children can +open VAS windows and can exit without closing them. But the +parent can continue to send NX requests with these windows. To +prevent pid reuse, reference will be taken on pid and tgid +when the window is opened and release them during window close. + +The current code is not releasing the tgid reference which can +cause pid leak and this patch fixes the issue. + +Fixes: db1c08a740635 ("powerpc/vas: Take reference to PID and mm for user space windows") +Cc: stable@vger.kernel.org # 5.8+ +Reported-by: Nicholas Piggin +Signed-off-by: Haren Myneni +Reviewed-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/6020fc4d444864fe20f7dcdc5edfe53e67480a1c.camel@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/powernv/vas-window.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/platforms/powernv/vas-window.c ++++ b/arch/powerpc/platforms/powernv/vas-window.c +@@ -1093,9 +1093,9 @@ struct vas_window *vas_tx_win_open(int v + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open +- * window and can exit without closing it. Expects parent +- * thread to use and close the window. So do not need +- * to take pid reference for parent thread. ++ * window and can exit without closing it. so takes tgid ++ * reference until window closed to make sure tgid is not ++ * reused. + */ + txwin->tgid = find_get_pid(task_tgid_vnr(current)); + /* +@@ -1339,8 +1339,9 @@ int vas_win_close(struct vas_window *win + /* if send window, drop reference to matching receive window */ + if (window->tx_win) { + if (window->user_win) { +- /* Drop references to pid and mm */ ++ /* Drop references to pid. tgid and mm */ + put_pid(window->pid); ++ put_pid(window->tgid); + if (window->mm) { + mm_context_remove_vas_window(window->mm); + mmdrop(window->mm); diff --git a/queue-5.12/series b/queue-5.12/series index d0c84421e5c..da56d9c4e2f 100644 --- a/queue-5.12/series +++ b/queue-5.12/series @@ -154,3 +154,7 @@ mips-ci20-reduce-clocksource-to-750-khz.patch pci-tegra194-fix-host-initialization-during-resume.patch selftests-resctrl-fix-incorrect-parsing-of-option-t.patch mips-mt-extensions-are-not-available-on-mips32r1.patch +mm-mremap-hold-the-rmap-lock-in-write-mode-when-moving-page-table-entries.patch +powerpc-mm-fix-lockup-on-kernel-exec-fault.patch +powerpc-barrier-avoid-collision-with-clang-s-__lwsync-macro.patch +powerpc-powernv-vas-release-reference-to-tgid-during-window-close.patch -- 2.47.3