From: Greg Kroah-Hartman Date: Mon, 29 Nov 2021 12:49:11 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v5.15.6~31 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0616015dac19a1917a34bc2cfff7097865b6bc34;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch s390-mm-validate-vma-in-pgste-manipulation-functions.patch tracing-check-pid-filtering-when-creating-events.patch --- diff --git a/queue-4.19/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch b/queue-4.19/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch new file mode 100644 index 00000000000..a1067e241e7 --- /dev/null +++ b/queue-4.19/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch @@ -0,0 +1,229 @@ +From a4a118f2eead1d6c49e00765de89878288d4b890 Mon Sep 17 00:00:00 2001 +From: Nadav Amit +Date: Sun, 21 Nov 2021 12:40:07 -0800 +Subject: hugetlbfs: flush TLBs correctly after huge_pmd_unshare + +From: Nadav Amit + +commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream. + +When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB +flush is missing. This TLB flush must be performed before releasing the +i_mmap_rwsem, in order to prevent an unshared PMDs page from being +released and reused before the TLB flush took place. + +Arguably, a comprehensive solution would use mmu_gather interface to +batch the TLB flushes and the PMDs page release, however it is not an +easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call +huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2) +deferring the release of the page reference for the PMDs page until +after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into +thinking PMDs are shared when they are not. + +Fix __unmap_hugepage_range() by adding the missing TLB flush, and +forcing a flush when unshare is successful. + +Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6 +Signed-off-by: Nadav Amit +Reviewed-by: Mike Kravetz +Cc: Aneesh Kumar K.V +Cc: KAMEZAWA Hiroyuki +Cc: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/tlb.h | 8 ++++++++ + arch/ia64/include/asm/tlb.h | 10 ++++++++++ + arch/s390/include/asm/tlb.h | 16 ++++++++++++++++ + arch/sh/include/asm/tlb.h | 10 ++++++++++ + arch/um/include/asm/tlb.h | 12 ++++++++++++ + include/asm-generic/tlb.h | 2 ++ + mm/hugetlb.c | 23 +++++++++++++++++++---- + mm/memory.c | 10 ++++++++++ + 8 files changed, 87 insertions(+), 4 deletions(-) + +--- a/arch/arm/include/asm/tlb.h ++++ b/arch/arm/include/asm/tlb.h +@@ -280,6 +280,14 @@ tlb_remove_pmd_tlb_entry(struct mmu_gath + tlb_add_flush(tlb, addr); + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ tlb_add_flush(tlb, address); ++ tlb_add_flush(tlb, address + size - PMD_SIZE); ++} ++ + #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) + #define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) + #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) +--- a/arch/ia64/include/asm/tlb.h ++++ b/arch/ia64/include/asm/tlb.h +@@ -268,6 +268,16 @@ __tlb_remove_tlb_entry (struct mmu_gathe + tlb->end_addr = address + PAGE_SIZE; + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ if (tlb->start_addr > address) ++ tlb->start_addr = address; ++ if (tlb->end_addr < address + size) ++ tlb->end_addr = address + size; ++} ++ + #define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm) + + #define tlb_start_vma(tlb, vma) do { } while (0) +--- a/arch/s390/include/asm/tlb.h ++++ b/arch/s390/include/asm/tlb.h +@@ -116,6 +116,20 @@ static inline void tlb_remove_page_size( + return tlb_remove_page(tlb, page); + } + ++static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, ++ unsigned long address, unsigned long size) ++{ ++ /* ++ * the range might exceed the original range that was provided to ++ * tlb_gather_mmu(), so we need to update it despite the fact it is ++ * usually not updated. ++ */ ++ if (tlb->start > address) ++ tlb->start = address; ++ if (tlb->end < address + size) ++ tlb->end = address + size; ++} ++ + /* + * pte_free_tlb frees a pte table and clears the CRSTE for the + * page table from the tlb. +@@ -177,6 +191,8 @@ static inline void pud_free_tlb(struct m + #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) + #define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) + #define tlb_migrate_finish(mm) do { } while (0) ++#define tlb_flush_pmd_range(tlb, addr, sz) do { } while (0) ++ + #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + tlb_remove_tlb_entry(tlb, ptep, address) + +--- a/arch/sh/include/asm/tlb.h ++++ b/arch/sh/include/asm/tlb.h +@@ -127,6 +127,16 @@ static inline void tlb_remove_page_size( + return tlb_remove_page(tlb, page); + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ if (tlb->start > address) ++ tlb->start = address; ++ if (tlb->end < address + size) ++ tlb->end = address + size; ++} ++ + #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change + static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, + unsigned int page_size) +--- a/arch/um/include/asm/tlb.h ++++ b/arch/um/include/asm/tlb.h +@@ -130,6 +130,18 @@ static inline void tlb_remove_page_size( + return tlb_remove_page(tlb, page); + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ tlb->need_flush = 1; ++ ++ if (tlb->start > address) ++ tlb->start = address; ++ if (tlb->end < address + size) ++ tlb->end = address + size; ++} ++ + /** + * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. + * +--- a/include/asm-generic/tlb.h ++++ b/include/asm-generic/tlb.h +@@ -118,6 +118,8 @@ void arch_tlb_gather_mmu(struct mmu_gath + void tlb_flush_mmu(struct mmu_gather *tlb); + void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force); ++void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size); + extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + int page_size); + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -3425,6 +3425,7 @@ void __unmap_hugepage_range(struct mmu_g + unsigned long sz = huge_page_size(h); + unsigned long mmun_start = start; /* For mmu_notifiers */ + unsigned long mmun_end = end; /* For mmu_notifiers */ ++ bool force_flush = false; + + WARN_ON(!is_vm_hugetlb_page(vma)); + BUG_ON(start & ~huge_page_mask(h)); +@@ -3451,10 +3452,8 @@ void __unmap_hugepage_range(struct mmu_g + ptl = huge_pte_lock(h, mm, ptep); + if (huge_pmd_unshare(mm, &address, ptep)) { + spin_unlock(ptl); +- /* +- * We just unmapped a page of PMDs by clearing a PUD. +- * The caller's TLB flush range should cover this area. +- */ ++ tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); ++ force_flush = true; + continue; + } + +@@ -3511,6 +3510,22 @@ void __unmap_hugepage_range(struct mmu_g + } + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + tlb_end_vma(tlb, vma); ++ ++ /* ++ * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We ++ * could defer the flush until now, since by holding i_mmap_rwsem we ++ * guaranteed that the last refernece would not be dropped. But we must ++ * do the flushing before we return, as otherwise i_mmap_rwsem will be ++ * dropped and the last reference to the shared PMDs page might be ++ * dropped as well. ++ * ++ * In theory we could defer the freeing of the PMD pages as well, but ++ * huge_pmd_unshare() relies on the exact page_count for the PMD page to ++ * detect sharing, so we cannot defer the release of the page either. ++ * Instead, do flush now. ++ */ ++ if (force_flush) ++ tlb_flush_mmu_tlbonly(tlb); + } + + void __unmap_hugepage_range_final(struct mmu_gather *tlb, +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -324,6 +324,16 @@ bool __tlb_remove_page_size(struct mmu_g + return false; + } + ++void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE) ++ tlb_flush_mmu(tlb); ++ ++ tlb->page_size = PMD_SIZE; ++ tlb->start = min(tlb->start, address); ++ tlb->end = max(tlb->end, address + size); ++} + #endif /* HAVE_GENERIC_MMU_GATHER */ + + #ifdef CONFIG_HAVE_RCU_TABLE_FREE diff --git a/queue-4.19/s390-mm-validate-vma-in-pgste-manipulation-functions.patch b/queue-4.19/s390-mm-validate-vma-in-pgste-manipulation-functions.patch new file mode 100644 index 00000000000..b851c0be849 --- /dev/null +++ b/queue-4.19/s390-mm-validate-vma-in-pgste-manipulation-functions.patch @@ -0,0 +1,86 @@ +From fe3d10024073f06f04c74b9674bd71ccc1d787cf Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Thu, 9 Sep 2021 18:22:42 +0200 +Subject: s390/mm: validate VMA in PGSTE manipulation functions + +From: David Hildenbrand + +commit fe3d10024073f06f04c74b9674bd71ccc1d787cf upstream. + +We should not walk/touch page tables outside of VMA boundaries when +holding only the mmap sem in read mode. Evil user space can modify the +VMA layout just before this function runs and e.g., trigger races with +page table removal code since commit dd2283f2605e ("mm: mmap: zap pages +with read mmap_sem in munmap"). gfn_to_hva() will only translate using +KVM memory regions, but won't validate the VMA. + +Further, we should not allocate page tables outside of VMA boundaries: if +evil user space decides to map hugetlbfs to these ranges, bad things will +happen because we suddenly have PTE or PMD page tables where we +shouldn't have them. + +Similarly, we have to check if we suddenly find a hugetlbfs VMA, before +calling get_locked_pte(). + +Fixes: 2d42f9477320 ("s390/kvm: Add PGSTE manipulation functions") +Signed-off-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Acked-by: Heiko Carstens +Link: https://lore.kernel.org/r/20210909162248.14969-4-david@redhat.com +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/mm/pgtable.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/arch/s390/mm/pgtable.c ++++ b/arch/s390/mm/pgtable.c +@@ -970,6 +970,7 @@ EXPORT_SYMBOL(get_guest_storage_key); + int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste) + { ++ struct vm_area_struct *vma; + unsigned long pgstev; + spinlock_t *ptl; + pgste_t pgste; +@@ -979,6 +980,10 @@ int pgste_perform_essa(struct mm_struct + WARN_ON_ONCE(orc > ESSA_MAX); + if (unlikely(orc > ESSA_MAX)) + return -EINVAL; ++ ++ vma = find_vma(mm, hva); ++ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) ++ return -EFAULT; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; +@@ -1071,10 +1076,14 @@ EXPORT_SYMBOL(pgste_perform_essa); + int set_pgste_bits(struct mm_struct *mm, unsigned long hva, + unsigned long bits, unsigned long value) + { ++ struct vm_area_struct *vma; + spinlock_t *ptl; + pgste_t new; + pte_t *ptep; + ++ vma = find_vma(mm, hva); ++ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) ++ return -EFAULT; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; +@@ -1099,9 +1108,13 @@ EXPORT_SYMBOL(set_pgste_bits); + */ + int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) + { ++ struct vm_area_struct *vma; + spinlock_t *ptl; + pte_t *ptep; + ++ vma = find_vma(mm, hva); ++ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) ++ return -EFAULT; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; diff --git a/queue-4.19/series b/queue-4.19/series index a6d89cbbcbd..e7906370e65 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -53,3 +53,6 @@ mips-use-3-level-pgtable-for-64kb-page-size-on-mips_.patch net-smc-don-t-call-clcsock-shutdown-twice-when-smc-s.patch net-hns3-fix-vf-rss-failed-problem-after-pf-enable-m.patch vhost-vsock-fix-incorrect-used-length-reported-to-the-guest.patch +tracing-check-pid-filtering-when-creating-events.patch +s390-mm-validate-vma-in-pgste-manipulation-functions.patch +hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch diff --git a/queue-4.19/tracing-check-pid-filtering-when-creating-events.patch b/queue-4.19/tracing-check-pid-filtering-when-creating-events.patch new file mode 100644 index 00000000000..c43ccd40931 --- /dev/null +++ b/queue-4.19/tracing-check-pid-filtering-when-creating-events.patch @@ -0,0 +1,48 @@ +From 6cb206508b621a9a0a2c35b60540e399225c8243 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Fri, 26 Nov 2021 13:35:26 -0500 +Subject: tracing: Check pid filtering when creating events + +From: Steven Rostedt (VMware) + +commit 6cb206508b621a9a0a2c35b60540e399225c8243 upstream. + +When pid filtering is activated in an instance, all of the events trace +files for that instance has the PID_FILTER flag set. This determines +whether or not pid filtering needs to be done on the event, otherwise the +event is executed as normal. + +If pid filtering is enabled when an event is created (via a dynamic event +or modules), its flag is not updated to reflect the current state, and the +events are not filtered properly. + +Cc: stable@vger.kernel.org +Fixes: 3fdaf80f4a836 ("tracing: Implement event pid filtering") +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -2255,12 +2255,19 @@ static struct trace_event_file * + trace_create_new_event(struct trace_event_call *call, + struct trace_array *tr) + { ++ struct trace_pid_list *pid_list; + struct trace_event_file *file; + + file = kmem_cache_alloc(file_cachep, GFP_TRACE); + if (!file) + return NULL; + ++ pid_list = rcu_dereference_protected(tr->filtered_pids, ++ lockdep_is_held(&event_mutex)); ++ ++ if (pid_list) ++ file->flags |= EVENT_FILE_FL_PID_FILTER; ++ + file->event_call = call; + file->tr = tr; + atomic_set(&file->sm_ref, 0);