From: Greg Kroah-Hartman Date: Wed, 3 Jul 2024 08:43:40 +0000 (+0200) Subject: remove some mm patches from 5.15 X-Git-Tag: v4.19.317~34 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=db2a3a20f5c65210328b70e5c851433135bfccc6;p=thirdparty%2Fkernel%2Fstable-queue.git remove some mm patches from 5.15 --- diff --git a/queue-5.15/mm-avoid-unnecessary-flush-on-change_huge_pmd.patch b/queue-5.15/mm-avoid-unnecessary-flush-on-change_huge_pmd.patch deleted file mode 100644 index dfb5deeeb96..00000000000 --- a/queue-5.15/mm-avoid-unnecessary-flush-on-change_huge_pmd.patch +++ /dev/null @@ -1,154 +0,0 @@ -From 6f73cf81e6438c334ae03321c915e9d376501fd8 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 9 May 2022 18:20:50 -0700 -Subject: mm: avoid unnecessary flush on change_huge_pmd() - -From: Nadav Amit - -[ Upstream commit 4f83145721f362c2f4d312edc4755269a2069488 ] - -Calls to change_protection_range() on THP can trigger, at least on x86, -two TLB flushes for one page: one immediately, when pmdp_invalidate() is -called by change_huge_pmd(), and then another one later (that can be -batched) when change_protection_range() finishes. - -The first TLB flush is only necessary to prevent the dirty bit (and with a -lesser importance the access bit) from changing while the PTE is modified. -However, this is not necessary as the x86 CPUs set the dirty-bit -atomically with an additional check that the PTE is (still) present. One -caveat is Intel's Knights Landing that has a bug and does not do so. - -Leverage this behavior to eliminate the unnecessary TLB flush in -change_huge_pmd(). Introduce a new arch specific pmdp_invalidate_ad() -that only invalidates the access and dirty bit from further changes. - -Link: https://lkml.kernel.org/r/20220401180821.1986781-4-namit@vmware.com -Signed-off-by: Nadav Amit -Cc: Andrea Arcangeli -Cc: Andrew Cooper -Cc: Andy Lutomirski -Cc: Dave Hansen -Cc: Peter Xu -Cc: Peter Zijlstra -Cc: Thomas Gleixner -Cc: Will Deacon -Cc: Yu Zhao -Cc: Nick Piggin -Signed-off-by: Andrew Morton -Stable-dep-of: 3a5a8d343e1c ("mm: fix race between __split_huge_pmd_locked() and GUP-fast") -Signed-off-by: Sasha Levin ---- - arch/x86/include/asm/pgtable.h | 5 +++++ - arch/x86/mm/pgtable.c | 10 ++++++++++ - include/linux/pgtable.h | 20 ++++++++++++++++++++ - mm/huge_memory.c | 4 ++-- - mm/pgtable-generic.c | 8 ++++++++ - 5 files changed, 45 insertions(+), 2 deletions(-) - -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index 448cd01eb3ecb..c04be133a6cd7 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -1146,6 +1146,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, - } - } - #endif -+ -+#define __HAVE_ARCH_PMDP_INVALIDATE_AD -+extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, -+ unsigned long address, pmd_t *pmdp); -+ - /* - * Page table pages are page-aligned. The lower half of the top - * level is used for userspace and the top half for the kernel. -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index 3481b35cb4ec7..f16059e9a85e7 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -608,6 +608,16 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, - - return young; - } -+ -+pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, -+ pmd_t *pmdp) -+{ -+ /* -+ * No flush is necessary. Once an invalid PTE is established, the PTE's -+ * access and dirty bits cannot be updated. -+ */ -+ return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); -+} - #endif - - /** -diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h -index d468efcf48f45..952969aa19ec1 100644 ---- a/include/linux/pgtable.h -+++ b/include/linux/pgtable.h -@@ -562,6 +562,26 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); - #endif - -+#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD -+ -+/* -+ * pmdp_invalidate_ad() invalidates the PMD while changing a transparent -+ * hugepage mapping in the page tables. This function is similar to -+ * pmdp_invalidate(), but should only be used if the access and dirty bits would -+ * not be cleared by the software in the new PMD value. The function ensures -+ * that hardware changes of the access and dirty bits updates would not be lost. -+ * -+ * Doing so can allow in certain architectures to avoid a TLB flush in most -+ * cases. Yet, another TLB flush might be necessary later if the PMD update -+ * itself requires such flush (e.g., if protection was set to be stricter). Yet, -+ * even when a TLB flush is needed because of the update, the caller may be able -+ * to batch these TLB flushing operations, so fewer TLB flush operations are -+ * needed. -+ */ -+extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, -+ unsigned long address, pmd_t *pmdp); -+#endif -+ - #ifndef __HAVE_ARCH_PTE_SAME - static inline int pte_same(pte_t pte_a, pte_t pte_b) - { -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 8ab6316d85391..265ef8d1393c5 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -1798,10 +1798,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - * The race makes MADV_DONTNEED miss the huge pmd and don't clear it - * which may break userspace. - * -- * pmdp_invalidate() is required to make sure we don't miss -+ * pmdp_invalidate_ad() is required to make sure we don't miss - * dirty/young flags set by hardware. - */ -- oldpmd = pmdp_invalidate(vma, addr, pmd); -+ oldpmd = pmdp_invalidate_ad(vma, addr, pmd); - - entry = pmd_modify(oldpmd, newprot); - if (preserve_write) -diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c -index 4e640baf97948..b0ce6c7391bf4 100644 ---- a/mm/pgtable-generic.c -+++ b/mm/pgtable-generic.c -@@ -200,6 +200,14 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, - } - #endif - -+#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD -+pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, -+ pmd_t *pmdp) -+{ -+ return pmdp_invalidate(vma, address, pmdp); -+} -+#endif -+ - #ifndef pmdp_collapse_flush - pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) --- -2.43.0 - diff --git a/queue-5.15/mm-fix-race-between-__split_huge_pmd_locked-and-gup-.patch b/queue-5.15/mm-fix-race-between-__split_huge_pmd_locked-and-gup-.patch deleted file mode 100644 index 9523fa6f08e..00000000000 --- a/queue-5.15/mm-fix-race-between-__split_huge_pmd_locked-and-gup-.patch +++ /dev/null @@ -1,225 +0,0 @@ -From dca09ad288fc1dd6652c82f0aa90f993a357f4f8 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 1 May 2024 15:33:10 +0100 -Subject: mm: fix race between __split_huge_pmd_locked() and GUP-fast - -From: Ryan Roberts - -[ Upstream commit 3a5a8d343e1cf96eb9971b17cbd4b832ab19b8e7 ] - -__split_huge_pmd_locked() can be called for a present THP, devmap or -(non-present) migration entry. It calls pmdp_invalidate() unconditionally -on the pmdp and only determines if it is present or not based on the -returned old pmd. This is a problem for the migration entry case because -pmd_mkinvalid(), called by pmdp_invalidate() must only be called for a -present pmd. - -On arm64 at least, pmd_mkinvalid() will mark the pmd such that any future -call to pmd_present() will return true. And therefore any lockless -pgtable walker could see the migration entry pmd in this state and start -interpretting the fields as if it were present, leading to BadThings (TM). -GUP-fast appears to be one such lockless pgtable walker. - -x86 does not suffer the above problem, but instead pmd_mkinvalid() will -corrupt the offset field of the swap entry within the swap pte. See link -below for discussion of that problem. - -Fix all of this by only calling pmdp_invalidate() for a present pmd. And -for good measure let's add a warning to all implementations of -pmdp_invalidate[_ad](). I've manually reviewed all other -pmdp_invalidate[_ad]() call sites and believe all others to be conformant. - -This is a theoretical bug found during code review. I don't have any test -case to trigger it in practice. - -Link: https://lkml.kernel.org/r/20240501143310.1381675-1-ryan.roberts@arm.com -Link: https://lore.kernel.org/all/0dd7827a-6334-439a-8fd0-43c98e6af22b@arm.com/ -Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") -Signed-off-by: Ryan Roberts -Reviewed-by: Zi Yan -Reviewed-by: Anshuman Khandual -Acked-by: David Hildenbrand -Cc: Andreas Larsson -Cc: Andy Lutomirski -Cc: Aneesh Kumar K.V -Cc: Borislav Petkov (AMD) -Cc: Catalin Marinas -Cc: Christian Borntraeger -Cc: Christophe Leroy -Cc: Dave Hansen -Cc: "David S. Miller" -Cc: Ingo Molnar -Cc: Jonathan Corbet -Cc: Mark Rutland -Cc: Naveen N. Rao -Cc: Nicholas Piggin -Cc: Peter Zijlstra -Cc: Sven Schnelle -Cc: Thomas Gleixner -Cc: Will Deacon -Cc: -Signed-off-by: Andrew Morton -Signed-off-by: Sasha Levin ---- - Documentation/vm/arch_pgtable_helpers.rst | 6 ++- - arch/powerpc/mm/book3s64/pgtable.c | 1 - arch/s390/include/asm/pgtable.h | 4 +- - arch/sparc/mm/tlb.c | 1 - arch/x86/mm/pgtable.c | 2 + - mm/huge_memory.c | 49 +++++++++++++++--------------- - mm/pgtable-generic.c | 2 + - 7 files changed, 39 insertions(+), 26 deletions(-) - ---- a/Documentation/vm/arch_pgtable_helpers.rst -+++ b/Documentation/vm/arch_pgtable_helpers.rst -@@ -134,7 +134,8 @@ PMD Page Table Helpers - +---------------------------+--------------------------------------------------+ - | pmd_swp_clear_soft_dirty | Clears a soft dirty swapped PMD | - +---------------------------+--------------------------------------------------+ --| pmd_mkinvalid | Invalidates a mapped PMD [1] | -+| pmd_mkinvalid | Invalidates a present PMD; do not call for | -+| | non-present PMD [1] | - +---------------------------+--------------------------------------------------+ - | pmd_set_huge | Creates a PMD huge mapping | - +---------------------------+--------------------------------------------------+ -@@ -190,7 +191,8 @@ PUD Page Table Helpers - +---------------------------+--------------------------------------------------+ - | pud_mkdevmap | Creates a ZONE_DEVICE mapped PUD | - +---------------------------+--------------------------------------------------+ --| pud_mkinvalid | Invalidates a mapped PUD [1] | -+| pud_mkinvalid | Invalidates a present PUD; do not call for | -+| | non-present PUD [1] | - +---------------------------+--------------------------------------------------+ - | pud_set_huge | Creates a PUD huge mapping | - +---------------------------+--------------------------------------------------+ ---- a/arch/powerpc/mm/book3s64/pgtable.c -+++ b/arch/powerpc/mm/book3s64/pgtable.c -@@ -115,6 +115,7 @@ pmd_t pmdp_invalidate(struct vm_area_str - { - unsigned long old_pmd; - -+ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); - old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); - flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - return __pmd(old_pmd); ---- a/arch/s390/include/asm/pgtable.h -+++ b/arch/s390/include/asm/pgtable.h -@@ -1625,8 +1625,10 @@ static inline pmd_t pmdp_huge_clear_flus - static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp) - { -- pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); -+ pmd_t pmd; - -+ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); -+ pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); - return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); - } - ---- a/arch/sparc/mm/tlb.c -+++ b/arch/sparc/mm/tlb.c -@@ -245,6 +245,7 @@ pmd_t pmdp_invalidate(struct vm_area_str - { - pmd_t old, entry; - -+ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); - entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID); - old = pmdp_establish(vma, address, pmdp, entry); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -612,6 +612,8 @@ int pmdp_clear_flush_young(struct vm_are - pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) - { -+ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); -+ - /* - * No flush is necessary. Once an invalid PTE is established, the PTE's - * access and dirty bits cannot be updated. ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -2024,32 +2024,11 @@ static void __split_huge_pmd_locked(stru - return __split_huge_zero_page_pmd(vma, haddr, pmd); - } - -- /* -- * Up to this point the pmd is present and huge and userland has the -- * whole access to the hugepage during the split (which happens in -- * place). If we overwrite the pmd with the not-huge version pointing -- * to the pte here (which of course we could if all CPUs were bug -- * free), userland could trigger a small page size TLB miss on the -- * small sized TLB while the hugepage TLB entry is still established in -- * the huge TLB. Some CPU doesn't like that. -- * See http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum -- * 383 on page 105. Intel should be safe but is also warns that it's -- * only safe if the permission and cache attributes of the two entries -- * loaded in the two TLB is identical (which should be the case here). -- * But it is generally safer to never allow small and huge TLB entries -- * for the same virtual address to be loaded simultaneously. So instead -- * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the -- * current pmd notpresent (atomically because here the pmd_trans_huge -- * must remain set at all times on the pmd until the split is complete -- * for this pmd), then we flush the SMP TLB and finally we write the -- * non-huge version of the pmd entry with pmd_populate. -- */ -- old_pmd = pmdp_invalidate(vma, haddr, pmd); -- -- pmd_migration = is_pmd_migration_entry(old_pmd); -+ pmd_migration = is_pmd_migration_entry(*pmd); - if (unlikely(pmd_migration)) { - swp_entry_t entry; - -+ old_pmd = *pmd; - entry = pmd_to_swp_entry(old_pmd); - page = pfn_swap_entry_to_page(entry); - write = is_writable_migration_entry(entry); -@@ -2057,6 +2036,30 @@ static void __split_huge_pmd_locked(stru - soft_dirty = pmd_swp_soft_dirty(old_pmd); - uffd_wp = pmd_swp_uffd_wp(old_pmd); - } else { -+ /* -+ * Up to this point the pmd is present and huge and userland has -+ * the whole access to the hugepage during the split (which -+ * happens in place). If we overwrite the pmd with the not-huge -+ * version pointing to the pte here (which of course we could if -+ * all CPUs were bug free), userland could trigger a small page -+ * size TLB miss on the small sized TLB while the hugepage TLB -+ * entry is still established in the huge TLB. Some CPU doesn't -+ * like that. See -+ * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum -+ * 383 on page 105. Intel should be safe but is also warns that -+ * it's only safe if the permission and cache attributes of the -+ * two entries loaded in the two TLB is identical (which should -+ * be the case here). But it is generally safer to never allow -+ * small and huge TLB entries for the same virtual address to be -+ * loaded simultaneously. So instead of doing "pmd_populate(); -+ * flush_pmd_tlb_range();" we first mark the current pmd -+ * notpresent (atomically because here the pmd_trans_huge must -+ * remain set at all times on the pmd until the split is -+ * complete for this pmd), then we flush the SMP TLB and finally -+ * we write the non-huge version of the pmd entry with -+ * pmd_populate. -+ */ -+ old_pmd = pmdp_invalidate(vma, haddr, pmd); - page = pmd_page(old_pmd); - if (pmd_dirty(old_pmd)) - SetPageDirty(page); ---- a/mm/pgtable-generic.c -+++ b/mm/pgtable-generic.c -@@ -195,6 +195,7 @@ pmd_t pmdp_invalidate(struct vm_area_str - pmd_t *pmdp) - { - pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); -+ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); - flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); - return old; - } -@@ -204,6 +205,7 @@ pmd_t pmdp_invalidate(struct vm_area_str - pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) - { -+ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); - return pmdp_invalidate(vma, address, pmdp); - } - #endif diff --git a/queue-5.15/mm-mprotect-do-not-flush-when-not-required-architect.patch b/queue-5.15/mm-mprotect-do-not-flush-when-not-required-architect.patch deleted file mode 100644 index dc8b5c585ae..00000000000 --- a/queue-5.15/mm-mprotect-do-not-flush-when-not-required-architect.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 2031c117202f5d2e11b95194e0012d36553e6e78 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 9 May 2022 18:20:50 -0700 -Subject: mm/mprotect: do not flush when not required architecturally - -From: Nadav Amit - -[ Upstream commit c9fe66560bf2dc7d109754414e309888cb8c9ba9 ] - -Currently, using mprotect() to unprotect a memory region or uffd to -unprotect a memory region causes a TLB flush. However, in such cases the -PTE is often not modified (i.e., remain RO) and therefore not TLB flush is -needed. - -Add an arch-specific pte_needs_flush() which tells whether a TLB flush is -needed based on the old PTE and the new one. Implement an x86 -pte_needs_flush(). - -Always flush the TLB when it is architecturally needed even when skipping -a TLB flush might only result in a spurious page-faults by skipping the -flush. - -Even with such conservative manner, we can in the future further refine -the checks to test whether a PTE is present by only considering the -architectural _PAGE_PRESENT flag instead of {pte|pmd}_preesnt(). For not -be careful and use the latter. - -Link: https://lkml.kernel.org/r/20220401180821.1986781-3-namit@vmware.com -Signed-off-by: Nadav Amit -Cc: Andrea Arcangeli -Cc: Andy Lutomirski -Cc: Dave Hansen -Cc: Peter Zijlstra -Cc: Thomas Gleixner -Cc: Will Deacon -Cc: Yu Zhao -Cc: Nick Piggin -Cc: Andrew Cooper -Cc: Peter Xu -Signed-off-by: Andrew Morton -Stable-dep-of: 3a5a8d343e1c ("mm: fix race between __split_huge_pmd_locked() and GUP-fast") -Signed-off-by: Sasha Levin ---- - arch/x86/include/asm/pgtable_types.h | 2 + - arch/x86/include/asm/tlbflush.h | 97 ++++++++++++++++++++++++++++ - include/asm-generic/tlb.h | 14 ++++ - mm/huge_memory.c | 9 +-- - mm/mprotect.c | 3 +- - 5 files changed, 120 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index 28e59576c75be..de9e3c635618e 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -110,9 +110,11 @@ - #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) - #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) -+#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4) - #else - #define _PAGE_NX (_AT(pteval_t, 0)) - #define _PAGE_DEVMAP (_AT(pteval_t, 0)) -+#define _PAGE_SOFTW4 (_AT(pteval_t, 0)) - #endif - - #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index b587a9ee9cb25..8be1ff9081728 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -259,6 +259,103 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, - - extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); - -+static inline bool pte_flags_need_flush(unsigned long oldflags, -+ unsigned long newflags, -+ bool ignore_access) -+{ -+ /* -+ * Flags that require a flush when cleared but not when they are set. -+ * Only include flags that would not trigger spurious page-faults. -+ * Non-present entries are not cached. Hardware would set the -+ * dirty/access bit if needed without a fault. -+ */ -+ const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | -+ _PAGE_ACCESSED; -+ const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | -+ _PAGE_SOFTW3 | _PAGE_SOFTW4; -+ const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | -+ _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | -+ _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | -+ _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; -+ unsigned long diff = oldflags ^ newflags; -+ -+ BUILD_BUG_ON(flush_on_clear & software_flags); -+ BUILD_BUG_ON(flush_on_clear & flush_on_change); -+ BUILD_BUG_ON(flush_on_change & software_flags); -+ -+ /* Ignore software flags */ -+ diff &= ~software_flags; -+ -+ if (ignore_access) -+ diff &= ~_PAGE_ACCESSED; -+ -+ /* -+ * Did any of the 'flush_on_clear' flags was clleared set from between -+ * 'oldflags' and 'newflags'? -+ */ -+ if (diff & oldflags & flush_on_clear) -+ return true; -+ -+ /* Flush on modified flags. */ -+ if (diff & flush_on_change) -+ return true; -+ -+ /* Ensure there are no flags that were left behind */ -+ if (IS_ENABLED(CONFIG_DEBUG_VM) && -+ (diff & ~(flush_on_clear | software_flags | flush_on_change))) { -+ VM_WARN_ON_ONCE(1); -+ return true; -+ } -+ -+ return false; -+} -+ -+/* -+ * pte_needs_flush() checks whether permissions were demoted and require a -+ * flush. It should only be used for userspace PTEs. -+ */ -+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) -+{ -+ /* !PRESENT -> * ; no need for flush */ -+ if (!(pte_flags(oldpte) & _PAGE_PRESENT)) -+ return false; -+ -+ /* PFN changed ; needs flush */ -+ if (pte_pfn(oldpte) != pte_pfn(newpte)) -+ return true; -+ -+ /* -+ * check PTE flags; ignore access-bit; see comment in -+ * ptep_clear_flush_young(). -+ */ -+ return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), -+ true); -+} -+#define pte_needs_flush pte_needs_flush -+ -+/* -+ * huge_pmd_needs_flush() checks whether permissions were demoted and require a -+ * flush. It should only be used for userspace huge PMDs. -+ */ -+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) -+{ -+ /* !PRESENT -> * ; no need for flush */ -+ if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) -+ return false; -+ -+ /* PFN changed ; needs flush */ -+ if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) -+ return true; -+ -+ /* -+ * check PMD flags; do not ignore access-bit; see -+ * pmdp_clear_flush_young(). -+ */ -+ return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), -+ false); -+} -+#define huge_pmd_needs_flush huge_pmd_needs_flush -+ - #endif /* !MODULE */ - - #endif /* _ASM_X86_TLBFLUSH_H */ -diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h -index c99710b3027a0..7afde1eff2398 100644 ---- a/include/asm-generic/tlb.h -+++ b/include/asm-generic/tlb.h -@@ -662,6 +662,20 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, - } while (0) - #endif - -+#ifndef pte_needs_flush -+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) -+{ -+ return true; -+} -+#endif -+ -+#ifndef huge_pmd_needs_flush -+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) -+{ -+ return true; -+} -+#endif -+ - #endif /* CONFIG_MMU */ - - #endif /* _ASM_GENERIC__TLB_H */ -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 661dd29642ebc..8ab6316d85391 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -1726,7 +1726,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - { - struct mm_struct *mm = vma->vm_mm; - spinlock_t *ptl; -- pmd_t entry; -+ pmd_t oldpmd, entry; - bool preserve_write; - int ret; - bool prot_numa = cp_flags & MM_CP_PROT_NUMA; -@@ -1801,9 +1801,9 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - * pmdp_invalidate() is required to make sure we don't miss - * dirty/young flags set by hardware. - */ -- entry = pmdp_invalidate(vma, addr, pmd); -+ oldpmd = pmdp_invalidate(vma, addr, pmd); - -- entry = pmd_modify(entry, newprot); -+ entry = pmd_modify(oldpmd, newprot); - if (preserve_write) - entry = pmd_mk_savedwrite(entry); - if (uffd_wp) { -@@ -1820,7 +1820,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - -- tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); -+ if (huge_pmd_needs_flush(oldpmd, entry)) -+ tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); - - BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); - unlock: -diff --git a/mm/mprotect.c b/mm/mprotect.c -index fe1196be9ca28..09c5c448b9e7c 100644 ---- a/mm/mprotect.c -+++ b/mm/mprotect.c -@@ -141,7 +141,8 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, - ptent = pte_mkwrite(ptent); - } - ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); -- tlb_flush_pte_range(tlb, addr, PAGE_SIZE); -+ if (pte_needs_flush(oldpte, ptent)) -+ tlb_flush_pte_range(tlb, addr, PAGE_SIZE); - pages++; - } else if (is_swap_pte(oldpte)) { - swp_entry_t entry = pte_to_swp_entry(oldpte); --- -2.43.0 - diff --git a/queue-5.15/mm-mprotect-use-mmu_gather.patch b/queue-5.15/mm-mprotect-use-mmu_gather.patch deleted file mode 100644 index fe42d2d0f2b..00000000000 --- a/queue-5.15/mm-mprotect-use-mmu_gather.patch +++ /dev/null @@ -1,537 +0,0 @@ -From 61cba6a6dc1cc6682b9aeff3aff3114f0ff30462 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 9 May 2022 18:20:50 -0700 -Subject: mm/mprotect: use mmu_gather - -From: Nadav Amit - -[ Upstream commit 4a18419f71cdf9155d2d2a6c79546f720978b990 ] - -Patch series "mm/mprotect: avoid unnecessary TLB flushes", v6. - -This patchset is intended to remove unnecessary TLB flushes during -mprotect() syscalls. Once this patch-set make it through, similar and -further optimizations for MADV_COLD and userfaultfd would be possible. - -Basically, there are 3 optimizations in this patch-set: - -1. Use TLB batching infrastructure to batch flushes across VMAs and do - better/fewer flushes. This would also be handy for later userfaultfd - enhancements. - -2. Avoid unnecessary TLB flushes. This optimization is the one that - provides most of the performance benefits. Unlike previous versions, - we now only avoid flushes that would not result in spurious - page-faults. - -3. Avoiding TLB flushes on change_huge_pmd() that are only needed to - prevent the A/D bits from changing. - -Andrew asked for some benchmark numbers. I do not have an easy -determinate macrobenchmark in which it is easy to show benefit. I -therefore ran a microbenchmark: a loop that does the following on -anonymous memory, just as a sanity check to see that time is saved by -avoiding TLB flushes. The loop goes: - - mprotect(p, PAGE_SIZE, PROT_READ) - mprotect(p, PAGE_SIZE, PROT_READ|PROT_WRITE) - *p = 0; // make the page writable - -The test was run in KVM guest with 1 or 2 threads (the second thread was -busy-looping). I measured the time (cycles) of each operation: - - 1 thread 2 threads - mmots +patch mmots +patch -PROT_READ 3494 2725 (-22%) 8630 7788 (-10%) -PROT_READ|WRITE 3952 2724 (-31%) 9075 2865 (-68%) - -[ mmots = v5.17-rc6-mmots-2022-03-06-20-38 ] - -The exact numbers are really meaningless, but the benefit is clear. There -are 2 interesting results though. - -(1) PROT_READ is cheaper, while one can expect it not to be affected. -This is presumably due to TLB miss that is saved - -(2) Without memory access (*p = 0), the speedup of the patch is even -greater. In that scenario mprotect(PROT_READ) also avoids the TLB flush. -As a result both operations on the patched kernel take roughly ~1500 -cycles (with either 1 or 2 threads), whereas on mmotm their cost is as -high as presented in the table. - -This patch (of 3): - -change_pXX_range() currently does not use mmu_gather, but instead -implements its own deferred TLB flushes scheme. This both complicates the -code, as developers need to be aware of different invalidation schemes, -and prevents opportunities to avoid TLB flushes or perform them in finer -granularity. - -The use of mmu_gather for modified PTEs has benefits in various scenarios -even if pages are not released. For instance, if only a single page needs -to be flushed out of a range of many pages, only that page would be -flushed. If a THP page is flushed, on x86 a single TLB invlpg instruction -can be used instead of 512 instructions (or a full TLB flush, which would -Linux would actually use by default). mprotect() over multiple VMAs -requires a single flush. - -Use mmu_gather in change_pXX_range(). As the pages are not released, only -record the flushed range using tlb_flush_pXX_range(). - -Handle THP similarly and get rid of flush_cache_range() which becomes -redundant since tlb_start_vma() calls it when needed. - -Link: https://lkml.kernel.org/r/20220401180821.1986781-1-namit@vmware.com -Link: https://lkml.kernel.org/r/20220401180821.1986781-2-namit@vmware.com -Signed-off-by: Nadav Amit -Acked-by: Peter Zijlstra (Intel) -Cc: Andrea Arcangeli -Cc: Andrew Cooper -Cc: Andy Lutomirski -Cc: Dave Hansen -Cc: Peter Xu -Cc: Thomas Gleixner -Cc: Will Deacon -Cc: Yu Zhao -Cc: Nick Piggin -Signed-off-by: Andrew Morton -Stable-dep-of: 3a5a8d343e1c ("mm: fix race between __split_huge_pmd_locked() and GUP-fast") -Signed-off-by: Sasha Levin ---- - fs/exec.c | 6 ++- - include/linux/huge_mm.h | 5 ++- - include/linux/mm.h | 5 ++- - mm/huge_memory.c | 10 ++++- - mm/mempolicy.c | 9 +++- - mm/mprotect.c | 92 ++++++++++++++++++++++------------------- - mm/userfaultfd.c | 6 ++- - 7 files changed, 82 insertions(+), 51 deletions(-) - -diff --git a/fs/exec.c b/fs/exec.c -index 03516b704d8a4..3cf38e5e8b733 100644 ---- a/fs/exec.c -+++ b/fs/exec.c -@@ -758,6 +758,7 @@ int setup_arg_pages(struct linux_binprm *bprm, - unsigned long stack_size; - unsigned long stack_expand; - unsigned long rlim_stack; -+ struct mmu_gather tlb; - - #ifdef CONFIG_STACK_GROWSUP - /* Limit stack size */ -@@ -812,8 +813,11 @@ int setup_arg_pages(struct linux_binprm *bprm, - vm_flags |= mm->def_flags; - vm_flags |= VM_STACK_INCOMPLETE_SETUP; - -- ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, -+ tlb_gather_mmu(&tlb, mm); -+ ret = mprotect_fixup(&tlb, vma, &prev, vma->vm_start, vma->vm_end, - vm_flags); -+ tlb_finish_mmu(&tlb); -+ - if (ret) - goto out_unlock; - BUG_ON(prev != vma); -diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h -index f123e15d966e8..6cb3e6fe11e7f 100644 ---- a/include/linux/huge_mm.h -+++ b/include/linux/huge_mm.h -@@ -36,8 +36,9 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, - unsigned long addr); - bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); --int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, -- pgprot_t newprot, unsigned long cp_flags); -+int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, -+ pmd_t *pmd, unsigned long addr, pgprot_t newprot, -+ unsigned long cp_flags); - vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, - pgprot_t pgprot, bool write); - -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 5692055f202cb..e05c91ea5735d 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -1899,10 +1899,11 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, - #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ - MM_CP_UFFD_WP_RESOLVE) - --extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, -+extern unsigned long change_protection(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgprot_t newprot, - unsigned long cp_flags); --extern int mprotect_fixup(struct vm_area_struct *vma, -+extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, - struct vm_area_struct **pprev, unsigned long start, - unsigned long end, unsigned long newflags); - -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 98ff57c8eda69..661dd29642ebc 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -1720,8 +1720,9 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, - * or if prot_numa but THP migration is not supported - * - HPAGE_PMD_NR if protections changed and TLB flush necessary - */ --int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, -- unsigned long addr, pgprot_t newprot, unsigned long cp_flags) -+int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, -+ pmd_t *pmd, unsigned long addr, pgprot_t newprot, -+ unsigned long cp_flags) - { - struct mm_struct *mm = vma->vm_mm; - spinlock_t *ptl; -@@ -1732,6 +1733,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - bool uffd_wp = cp_flags & MM_CP_UFFD_WP; - bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; - -+ tlb_change_page_size(tlb, HPAGE_PMD_SIZE); -+ - if (prot_numa && !thp_migration_supported()) - return 1; - -@@ -1816,6 +1819,9 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - } - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); -+ -+ tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE); -+ - BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); - unlock: - spin_unlock(ptl); -diff --git a/mm/mempolicy.c b/mm/mempolicy.c -index 818753635e427..c05e979fd8695 100644 ---- a/mm/mempolicy.c -+++ b/mm/mempolicy.c -@@ -104,6 +104,7 @@ - #include - - #include -+#include - #include - - #include "internal.h" -@@ -634,12 +635,18 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, - unsigned long change_prot_numa(struct vm_area_struct *vma, - unsigned long addr, unsigned long end) - { -+ struct mmu_gather tlb; - int nr_updated; - -- nr_updated = change_protection(vma, addr, end, PAGE_NONE, MM_CP_PROT_NUMA); -+ tlb_gather_mmu(&tlb, vma->vm_mm); -+ -+ nr_updated = change_protection(&tlb, vma, addr, end, PAGE_NONE, -+ MM_CP_PROT_NUMA); - if (nr_updated) - count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); - -+ tlb_finish_mmu(&tlb); -+ - return nr_updated; - } - #else -diff --git a/mm/mprotect.c b/mm/mprotect.c -index ed18dc49533f6..fe1196be9ca28 100644 ---- a/mm/mprotect.c -+++ b/mm/mprotect.c -@@ -32,12 +32,13 @@ - #include - #include - #include -+#include - - #include "internal.h" - --static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, -- unsigned long addr, unsigned long end, pgprot_t newprot, -- unsigned long cp_flags) -+static unsigned long change_pte_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, -+ unsigned long end, pgprot_t newprot, unsigned long cp_flags) - { - pte_t *pte, oldpte; - spinlock_t *ptl; -@@ -48,6 +49,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - bool uffd_wp = cp_flags & MM_CP_UFFD_WP; - bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; - -+ tlb_change_page_size(tlb, PAGE_SIZE); -+ - /* - * Can be called with only the mmap_lock for reading by - * prot_numa so we must check the pmd isn't constantly -@@ -138,6 +141,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - ptent = pte_mkwrite(ptent); - } - ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); -+ tlb_flush_pte_range(tlb, addr, PAGE_SIZE); - pages++; - } else if (is_swap_pte(oldpte)) { - swp_entry_t entry = pte_to_swp_entry(oldpte); -@@ -219,9 +223,9 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) - return 0; - } - --static inline unsigned long change_pmd_range(struct vm_area_struct *vma, -- pud_t *pud, unsigned long addr, unsigned long end, -- pgprot_t newprot, unsigned long cp_flags) -+static inline unsigned long change_pmd_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, pud_t *pud, unsigned long addr, -+ unsigned long end, pgprot_t newprot, unsigned long cp_flags) - { - pmd_t *pmd; - unsigned long next; -@@ -261,8 +265,12 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, - if (next - addr != HPAGE_PMD_SIZE) { - __split_huge_pmd(vma, pmd, addr, false, NULL); - } else { -- int nr_ptes = change_huge_pmd(vma, pmd, addr, -- newprot, cp_flags); -+ /* -+ * change_huge_pmd() does not defer TLB flushes, -+ * so no need to propagate the tlb argument. -+ */ -+ int nr_ptes = change_huge_pmd(tlb, vma, pmd, -+ addr, newprot, cp_flags); - - if (nr_ptes) { - if (nr_ptes == HPAGE_PMD_NR) { -@@ -276,8 +284,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, - } - /* fall through, the trans huge pmd just split */ - } -- this_pages = change_pte_range(vma, pmd, addr, next, newprot, -- cp_flags); -+ this_pages = change_pte_range(tlb, vma, pmd, addr, next, -+ newprot, cp_flags); - pages += this_pages; - next: - cond_resched(); -@@ -291,9 +299,9 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, - return pages; - } - --static inline unsigned long change_pud_range(struct vm_area_struct *vma, -- p4d_t *p4d, unsigned long addr, unsigned long end, -- pgprot_t newprot, unsigned long cp_flags) -+static inline unsigned long change_pud_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, -+ unsigned long end, pgprot_t newprot, unsigned long cp_flags) - { - pud_t *pud; - unsigned long next; -@@ -304,16 +312,16 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; -- pages += change_pmd_range(vma, pud, addr, next, newprot, -+ pages += change_pmd_range(tlb, vma, pud, addr, next, newprot, - cp_flags); - } while (pud++, addr = next, addr != end); - - return pages; - } - --static inline unsigned long change_p4d_range(struct vm_area_struct *vma, -- pgd_t *pgd, unsigned long addr, unsigned long end, -- pgprot_t newprot, unsigned long cp_flags) -+static inline unsigned long change_p4d_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, -+ unsigned long end, pgprot_t newprot, unsigned long cp_flags) - { - p4d_t *p4d; - unsigned long next; -@@ -324,44 +332,40 @@ static inline unsigned long change_p4d_range(struct vm_area_struct *vma, - next = p4d_addr_end(addr, end); - if (p4d_none_or_clear_bad(p4d)) - continue; -- pages += change_pud_range(vma, p4d, addr, next, newprot, -+ pages += change_pud_range(tlb, vma, p4d, addr, next, newprot, - cp_flags); - } while (p4d++, addr = next, addr != end); - - return pages; - } - --static unsigned long change_protection_range(struct vm_area_struct *vma, -- unsigned long addr, unsigned long end, pgprot_t newprot, -- unsigned long cp_flags) -+static unsigned long change_protection_range(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, unsigned long addr, -+ unsigned long end, pgprot_t newprot, unsigned long cp_flags) - { - struct mm_struct *mm = vma->vm_mm; - pgd_t *pgd; - unsigned long next; -- unsigned long start = addr; - unsigned long pages = 0; - - BUG_ON(addr >= end); - pgd = pgd_offset(mm, addr); -- flush_cache_range(vma, addr, end); -- inc_tlb_flush_pending(mm); -+ tlb_start_vma(tlb, vma); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; -- pages += change_p4d_range(vma, pgd, addr, next, newprot, -+ pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot, - cp_flags); - } while (pgd++, addr = next, addr != end); - -- /* Only flush the TLB if we actually modified any entries: */ -- if (pages) -- flush_tlb_range(vma, start, end); -- dec_tlb_flush_pending(mm); -+ tlb_end_vma(tlb, vma); - - return pages; - } - --unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, -+unsigned long change_protection(struct mmu_gather *tlb, -+ struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgprot_t newprot, - unsigned long cp_flags) - { -@@ -372,7 +376,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, - if (is_vm_hugetlb_page(vma)) - pages = hugetlb_change_protection(vma, start, end, newprot); - else -- pages = change_protection_range(vma, start, end, newprot, -+ pages = change_protection_range(tlb, vma, start, end, newprot, - cp_flags); - - return pages; -@@ -406,8 +410,9 @@ static const struct mm_walk_ops prot_none_walk_ops = { - }; - - int --mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, -- unsigned long start, unsigned long end, unsigned long newflags) -+mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, -+ struct vm_area_struct **pprev, unsigned long start, -+ unsigned long end, unsigned long newflags) - { - struct mm_struct *mm = vma->vm_mm; - unsigned long oldflags = vma->vm_flags; -@@ -494,7 +499,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, - dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot); - vma_set_page_prot(vma); - -- change_protection(vma, start, end, vma->vm_page_prot, -+ change_protection(tlb, vma, start, end, vma->vm_page_prot, - dirty_accountable ? MM_CP_DIRTY_ACCT : 0); - - /* -@@ -528,6 +533,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, - const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); - const bool rier = (current->personality & READ_IMPLIES_EXEC) && - (prot & PROT_READ); -+ struct mmu_gather tlb; - - start = untagged_addr(start); - -@@ -584,6 +590,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, - if (start > vma->vm_start) - prev = vma; - -+ tlb_gather_mmu(&tlb, current->mm); - for (nstart = start ; ; ) { - unsigned long mask_off_old_flags; - unsigned long newflags; -@@ -610,18 +617,18 @@ static int do_mprotect_pkey(unsigned long start, size_t len, - /* newflags >> 4 shift VM_MAY% in place of VM_% */ - if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { - error = -EACCES; -- goto out; -+ break; - } - - /* Allow architectures to sanity-check the new flags */ - if (!arch_validate_flags(newflags)) { - error = -EINVAL; -- goto out; -+ break; - } - - error = security_file_mprotect(vma, reqprot, prot); - if (error) -- goto out; -+ break; - - tmp = vma->vm_end; - if (tmp > end) -@@ -630,27 +637,28 @@ static int do_mprotect_pkey(unsigned long start, size_t len, - if (vma->vm_ops && vma->vm_ops->mprotect) { - error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags); - if (error) -- goto out; -+ break; - } - -- error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); -+ error = mprotect_fixup(&tlb, vma, &prev, nstart, tmp, newflags); - if (error) -- goto out; -+ break; - - nstart = tmp; - - if (nstart < prev->vm_end) - nstart = prev->vm_end; - if (nstart >= end) -- goto out; -+ break; - - vma = prev->vm_next; - if (!vma || vma->vm_start != nstart) { - error = -ENOMEM; -- goto out; -+ break; - } - prot = reqprot; - } -+ tlb_finish_mmu(&tlb); - out: - mmap_write_unlock(current->mm); - return error; -diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c -index 98a9d0ef2d917..eafdc112ac7aa 100644 ---- a/mm/userfaultfd.c -+++ b/mm/userfaultfd.c -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - #include "internal.h" - - static __always_inline -@@ -698,6 +699,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, - atomic_t *mmap_changing) - { - struct vm_area_struct *dst_vma; -+ struct mmu_gather tlb; - pgprot_t newprot; - int err; - -@@ -739,8 +741,10 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, - else - newprot = vm_get_page_prot(dst_vma->vm_flags); - -- change_protection(dst_vma, start, start + len, newprot, -+ tlb_gather_mmu(&tlb, dst_mm); -+ change_protection(&tlb, dst_vma, start, start + len, newprot, - enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); -+ tlb_finish_mmu(&tlb); - - err = 0; - out_unlock: --- -2.43.0 - diff --git a/queue-5.15/mmc-sdhci-do-not-invert-write-protect-twice.patch b/queue-5.15/mmc-sdhci-do-not-invert-write-protect-twice.patch index f52cc93026b..a711933c08b 100644 --- a/queue-5.15/mmc-sdhci-do-not-invert-write-protect-twice.patch +++ b/queue-5.15/mmc-sdhci-do-not-invert-write-protect-twice.patch @@ -46,7 +46,7 @@ Signed-off-by: Greg Kroah-Hartman --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c -@@ -2491,26 +2491,34 @@ static int sdhci_get_cd(struct mmc_host +@@ -2487,26 +2487,34 @@ static int sdhci_get_cd(struct mmc_host static int sdhci_check_ro(struct sdhci_host *host) { diff --git a/queue-5.15/mmc-sdhci-do-not-lock-spinlock-around-mmc_gpio_get_ro.patch b/queue-5.15/mmc-sdhci-do-not-lock-spinlock-around-mmc_gpio_get_ro.patch index 2d361d3825a..d61710e87ef 100644 --- a/queue-5.15/mmc-sdhci-do-not-lock-spinlock-around-mmc_gpio_get_ro.patch +++ b/queue-5.15/mmc-sdhci-do-not-lock-spinlock-around-mmc_gpio_get_ro.patch @@ -30,7 +30,7 @@ Signed-off-by: Greg Kroah-Hartman --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c -@@ -2492,11 +2492,8 @@ static int sdhci_get_cd(struct mmc_host +@@ -2488,11 +2488,8 @@ static int sdhci_get_cd(struct mmc_host static int sdhci_check_ro(struct sdhci_host *host) { bool allow_invert = false; @@ -42,7 +42,7 @@ Signed-off-by: Greg Kroah-Hartman if (host->flags & SDHCI_DEVICE_DEAD) { is_readonly = 0; } else if (host->ops->get_ro) { -@@ -2511,8 +2508,6 @@ static int sdhci_check_ro(struct sdhci_h +@@ -2507,8 +2504,6 @@ static int sdhci_check_ro(struct sdhci_h allow_invert = true; } diff --git a/queue-5.15/series b/queue-5.15/series index aa1db12b7c8..cddb9195b9d 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -58,10 +58,6 @@ x86-ibt-ftrace-search-for-__fentry__-location.patch ftrace-fix-possible-use-after-free-issue-in-ftrace_l.patch mmc-davinci_mmc-convert-to-platform-remove-callback-.patch mmc-davinci-don-t-strip-remove-function-when-driver-.patch -mm-mprotect-use-mmu_gather.patch -mm-mprotect-do-not-flush-when-not-required-architect.patch -mm-avoid-unnecessary-flush-on-change_huge_pmd.patch -mm-fix-race-between-__split_huge_pmd_locked-and-gup-.patch i2c-add-fwnode-apis.patch i2c-acpi-unbind-mux-adapters-before-delete.patch cma-factor-out-minimum-alignment-requirement.patch