From 67be904782e5c8ee5bba3f6d66e42b5832936651 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Nov 2021 13:48:30 +0100 Subject: [PATCH] 4.9-stable patches added patches: hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch tracing-check-pid-filtering-when-creating-events.patch --- ...lbs-correctly-after-huge_pmd_unshare.patch | 222 ++++++++++++++++++ queue-4.9/series | 2 + ...k-pid-filtering-when-creating-events.patch | 48 ++++ 3 files changed, 272 insertions(+) create mode 100644 queue-4.9/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch create mode 100644 queue-4.9/tracing-check-pid-filtering-when-creating-events.patch diff --git a/queue-4.9/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch b/queue-4.9/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch new file mode 100644 index 00000000000..d729fbf6e4b --- /dev/null +++ b/queue-4.9/hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch @@ -0,0 +1,222 @@ +From a4a118f2eead1d6c49e00765de89878288d4b890 Mon Sep 17 00:00:00 2001 +From: Nadav Amit +Date: Sun, 21 Nov 2021 12:40:07 -0800 +Subject: hugetlbfs: flush TLBs correctly after huge_pmd_unshare + +From: Nadav Amit + +commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream. + +When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB +flush is missing. This TLB flush must be performed before releasing the +i_mmap_rwsem, in order to prevent an unshared PMDs page from being +released and reused before the TLB flush took place. + +Arguably, a comprehensive solution would use mmu_gather interface to +batch the TLB flushes and the PMDs page release, however it is not an +easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call +huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2) +deferring the release of the page reference for the PMDs page until +after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into +thinking PMDs are shared when they are not. + +Fix __unmap_hugepage_range() by adding the missing TLB flush, and +forcing a flush when unshare is successful. + +Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6 +Signed-off-by: Nadav Amit +Reviewed-by: Mike Kravetz +Cc: Aneesh Kumar K.V +Cc: KAMEZAWA Hiroyuki +Cc: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/tlb.h | 8 ++++++++ + arch/ia64/include/asm/tlb.h | 10 ++++++++++ + arch/s390/include/asm/tlb.h | 14 ++++++++++++++ + arch/sh/include/asm/tlb.h | 10 ++++++++++ + arch/um/include/asm/tlb.h | 12 ++++++++++++ + include/asm-generic/tlb.h | 2 ++ + mm/hugetlb.c | 19 +++++++++++++++++++ + mm/memory.c | 16 ++++++++++++++++ + 8 files changed, 91 insertions(+) + +--- a/arch/arm/include/asm/tlb.h ++++ b/arch/arm/include/asm/tlb.h +@@ -278,6 +278,14 @@ tlb_remove_pmd_tlb_entry(struct mmu_gath + tlb_add_flush(tlb, addr); + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ tlb_add_flush(tlb, address); ++ tlb_add_flush(tlb, address + size - PMD_SIZE); ++} ++ + #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) + #define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) + #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) +--- a/arch/ia64/include/asm/tlb.h ++++ b/arch/ia64/include/asm/tlb.h +@@ -272,6 +272,16 @@ __tlb_remove_tlb_entry (struct mmu_gathe + tlb->end_addr = address + PAGE_SIZE; + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ if (tlb->start_addr > address) ++ tlb->start_addr = address; ++ if (tlb->end_addr < address + size) ++ tlb->end_addr = address + size; ++} ++ + #define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm) + + #define tlb_start_vma(tlb, vma) do { } while (0) +--- a/arch/s390/include/asm/tlb.h ++++ b/arch/s390/include/asm/tlb.h +@@ -116,6 +116,20 @@ static inline void tlb_remove_page_size( + return tlb_remove_page(tlb, page); + } + ++static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, ++ unsigned long address, unsigned long size) ++{ ++ /* ++ * the range might exceed the original range that was provided to ++ * tlb_gather_mmu(), so we need to update it despite the fact it is ++ * usually not updated. ++ */ ++ if (tlb->start > address) ++ tlb->start = address; ++ if (tlb->end < address + size) ++ tlb->end = address + size; ++} ++ + /* + * pte_free_tlb frees a pte table and clears the CRSTE for the + * page table from the tlb. +--- a/arch/sh/include/asm/tlb.h ++++ b/arch/sh/include/asm/tlb.h +@@ -115,6 +115,16 @@ static inline bool __tlb_remove_page_siz + return __tlb_remove_page(tlb, page); + } + ++static inline voide ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ if (tlb->start > address) ++ tlb->start = address; ++ if (tlb->end < address + size) ++ tlb->end = address + size; ++} ++ + static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) + { +--- a/arch/um/include/asm/tlb.h ++++ b/arch/um/include/asm/tlb.h +@@ -128,6 +128,18 @@ static inline void tlb_remove_page_size( + return tlb_remove_page(tlb, page); + } + ++static inline void ++tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ tlb->need_flush = 1; ++ ++ if (tlb->start > address) ++ tlb->start = address; ++ if (tlb->end < address + size) ++ tlb->end = address + size; ++} ++ + /** + * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. + * +--- a/include/asm-generic/tlb.h ++++ b/include/asm-generic/tlb.h +@@ -123,6 +123,8 @@ void tlb_finish_mmu(struct mmu_gather *t + unsigned long end); + extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + int page_size); ++void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size); + + static inline void __tlb_adjust_range(struct mmu_gather *tlb, + unsigned long address) +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -3395,6 +3395,7 @@ void __unmap_hugepage_range(struct mmu_g + unsigned long sz = huge_page_size(h); + const unsigned long mmun_start = start; /* For mmu_notifiers */ + const unsigned long mmun_end = end; /* For mmu_notifiers */ ++ bool force_flush = false; + + WARN_ON(!is_vm_hugetlb_page(vma)); + BUG_ON(start & ~huge_page_mask(h)); +@@ -3411,6 +3412,8 @@ void __unmap_hugepage_range(struct mmu_g + ptl = huge_pte_lock(h, mm, ptep); + if (huge_pmd_unshare(mm, &address, ptep)) { + spin_unlock(ptl); ++ tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); ++ force_flush = true; + continue; + } + +@@ -3467,6 +3470,22 @@ void __unmap_hugepage_range(struct mmu_g + } + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + tlb_end_vma(tlb, vma); ++ ++ /* ++ * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We ++ * could defer the flush until now, since by holding i_mmap_rwsem we ++ * guaranteed that the last refernece would not be dropped. But we must ++ * do the flushing before we return, as otherwise i_mmap_rwsem will be ++ * dropped and the last reference to the shared PMDs page might be ++ * dropped as well. ++ * ++ * In theory we could defer the freeing of the PMD pages as well, but ++ * huge_pmd_unshare() relies on the exact page_count for the PMD page to ++ * detect sharing, so we cannot defer the release of the page either. ++ * Instead, do flush now. ++ */ ++ if (force_flush) ++ tlb_flush_mmu(tlb); + } + + void __unmap_hugepage_range_final(struct mmu_gather *tlb, +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -320,6 +320,22 @@ bool __tlb_remove_page_size(struct mmu_g + return false; + } + ++void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, ++ unsigned long size) ++{ ++ if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE) ++ tlb_flush_mmu(tlb); ++ ++ tlb->page_size = PMD_SIZE; ++ tlb->start = min(tlb->start, address); ++ tlb->end = max(tlb->end, address + size); ++ /* ++ * Track the last address with which we adjusted the range. This ++ * will be used later to adjust again after a mmu_flush due to ++ * failed __tlb_remove_page ++ */ ++ tlb->addr = address + size - PMD_SIZE; ++} + #endif /* HAVE_GENERIC_MMU_GATHER */ + + #ifdef CONFIG_HAVE_RCU_TABLE_FREE diff --git a/queue-4.9/series b/queue-4.9/series index 4bcebd64968..8536922b00e 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -19,3 +19,5 @@ scsi-mpt3sas-fix-kernel-panic-during-drive-powercycl.patch drm-vc4-fix-error-code-in-vc4_create_object.patch pm-hibernate-use-correct-mode-for-swsusp_close.patch tcp_cubic-fix-spurious-hystart-ack-train-detections-.patch +tracing-check-pid-filtering-when-creating-events.patch +hugetlbfs-flush-tlbs-correctly-after-huge_pmd_unshare.patch diff --git a/queue-4.9/tracing-check-pid-filtering-when-creating-events.patch b/queue-4.9/tracing-check-pid-filtering-when-creating-events.patch new file mode 100644 index 00000000000..2fb2a80aa81 --- /dev/null +++ b/queue-4.9/tracing-check-pid-filtering-when-creating-events.patch @@ -0,0 +1,48 @@ +From 6cb206508b621a9a0a2c35b60540e399225c8243 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Fri, 26 Nov 2021 13:35:26 -0500 +Subject: tracing: Check pid filtering when creating events + +From: Steven Rostedt (VMware) + +commit 6cb206508b621a9a0a2c35b60540e399225c8243 upstream. + +When pid filtering is activated in an instance, all of the events trace +files for that instance has the PID_FILTER flag set. This determines +whether or not pid filtering needs to be done on the event, otherwise the +event is executed as normal. + +If pid filtering is enabled when an event is created (via a dynamic event +or modules), its flag is not updated to reflect the current state, and the +events are not filtered properly. + +Cc: stable@vger.kernel.org +Fixes: 3fdaf80f4a836 ("tracing: Implement event pid filtering") +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -2241,12 +2241,19 @@ static struct trace_event_file * + trace_create_new_event(struct trace_event_call *call, + struct trace_array *tr) + { ++ struct trace_pid_list *pid_list; + struct trace_event_file *file; + + file = kmem_cache_alloc(file_cachep, GFP_TRACE); + if (!file) + return NULL; + ++ pid_list = rcu_dereference_protected(tr->filtered_pids, ++ lockdep_is_held(&event_mutex)); ++ ++ if (pid_list) ++ file->flags |= EVENT_FILE_FL_PID_FILTER; ++ + file->event_call = call; + file->tr = tr; + atomic_set(&file->sm_ref, 0); -- 2.47.2