From: Greg Kroah-Hartman Date: Tue, 7 Jan 2014 18:38:35 +0000 (-0800) Subject: 3.12-stable patches X-Git-Tag: v3.4.76~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=77131c42fbaa3b277296454896faaa73b1bb05d8;p=thirdparty%2Fkernel%2Fstable-queue.git 3.12-stable patches added patches: mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch --- diff --git a/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch b/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch new file mode 100644 index 00000000000..42cbacf3fdb --- /dev/null +++ b/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch @@ -0,0 +1,244 @@ +From mgorman@suse.de Tue Jan 7 10:24:43 2014 +From: Rik van Riel +Date: Tue, 7 Jan 2014 14:00:46 +0000 +Subject: mm: fix TLB flush race between migration, and change_protection_range +To: gregkh@linuxfoundation.org +Cc: athorlton@sgi.com, riel@redhat.com, chegu_vinod@hp.com, Mel Gorman , stable@vger.kernel.org +Message-ID: <1389103248-17617-12-git-send-email-mgorman@suse.de> + +From: Rik van Riel + +commit 20841405940e7be0617612d521e206e4b6b325db upstream. + +There are a few subtle races, between change_protection_range (used by +mprotect and change_prot_numa) on one side, and NUMA page migration and +compaction on the other side. + +The basic race is that there is a time window between when the PTE gets +made non-present (PROT_NONE or NUMA), and the TLB is flushed. + +During that time, a CPU may continue writing to the page. + +This is fine most of the time, however compaction or the NUMA migration +code may come in, and migrate the page away. + +When that happens, the CPU may continue writing, through the cached +translation, to what is no longer the current memory location of the +process. + +This only affects x86, which has a somewhat optimistic pte_accessible. +All other architectures appear to be safe, and will either always flush, +or flush whenever there is a valid mapping, even with no permissions +(SPARC). + +The basic race looks like this: + +CPU A CPU B CPU C + + load TLB entry +make entry PTE/PMD_NUMA + fault on entry + read/write old page + start migrating page + change PTE/PMD to new page + read/write old page [*] +flush TLB + reload TLB from new entry + read/write new page + lose data + +[*] the old page may belong to a new user at this point! + +The obvious fix is to flush remote TLB entries, by making sure that +pte_accessible aware of the fact that PROT_NONE and PROT_NUMA memory may +still be accessible if there is a TLB flush pending for the mm. + +This should fix both NUMA migration and compaction. + +[mgorman@suse.de: fix build] +Signed-off-by: Rik van Riel +Signed-off-by: Mel Gorman +Cc: Alex Thorlton +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/pgtable_64.h | 4 +-- + arch/x86/include/asm/pgtable.h | 11 +++++++-- + include/asm-generic/pgtable.h | 2 - + include/linux/mm_types.h | 44 ++++++++++++++++++++++++++++++++++++ + kernel/fork.c | 1 + mm/huge_memory.c | 7 +++++ + mm/mprotect.c | 2 + + mm/pgtable-generic.c | 5 ++-- + 8 files changed, 69 insertions(+), 7 deletions(-) + +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -616,7 +616,7 @@ static inline unsigned long pte_present( + } + + #define pte_accessible pte_accessible +-static inline unsigned long pte_accessible(pte_t a) ++static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) + { + return pte_val(a) & _PAGE_VALID; + } +@@ -806,7 +806,7 @@ static inline void __set_pte_at(struct m + * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U + * and SUN4V pte layout, so this inline test is fine. + */ +- if (likely(mm != &init_mm) && pte_accessible(orig)) ++ if (likely(mm != &init_mm) && pte_accessible(mm, orig)) + tlb_batch_add(mm, addr, ptep, orig, fullmm); + } + +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -452,9 +452,16 @@ static inline int pte_present(pte_t a) + } + + #define pte_accessible pte_accessible +-static inline int pte_accessible(pte_t a) ++static inline bool pte_accessible(struct mm_struct *mm, pte_t a) + { +- return pte_flags(a) & _PAGE_PRESENT; ++ if (pte_flags(a) & _PAGE_PRESENT) ++ return true; ++ ++ if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && ++ mm_tlb_flush_pending(mm)) ++ return true; ++ ++ return false; + } + + static inline int pte_hidden(pte_t pte) +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a, + #endif + + #ifndef pte_accessible +-# define pte_accessible(pte) ((void)(pte),1) ++# define pte_accessible(mm, pte) ((void)(pte), 1) + #endif + + #ifndef flush_tlb_fix_spurious_fault +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -435,6 +435,14 @@ struct mm_struct { + */ + int first_nid; + #endif ++#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) ++ /* ++ * An operation with batched TLB flushing is going on. Anything that ++ * can move process memory needs to flush the TLB when moving a ++ * PROT_NONE or PROT_NUMA mapped page. ++ */ ++ bool tlb_flush_pending; ++#endif + struct uprobes_state uprobes_state; + }; + +@@ -455,4 +463,40 @@ static inline cpumask_t *mm_cpumask(stru + return mm->cpu_vm_mask_var; + } + ++#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) ++/* ++ * Memory barriers to keep this state in sync are graciously provided by ++ * the page table locks, outside of which no page table modifications happen. ++ * The barriers below prevent the compiler from re-ordering the instructions ++ * around the memory barriers that are already present in the code. ++ */ ++static inline bool mm_tlb_flush_pending(struct mm_struct *mm) ++{ ++ barrier(); ++ return mm->tlb_flush_pending; ++} ++static inline void set_tlb_flush_pending(struct mm_struct *mm) ++{ ++ mm->tlb_flush_pending = true; ++ barrier(); ++} ++/* Clearing is done after a TLB flush, which also provides a barrier. */ ++static inline void clear_tlb_flush_pending(struct mm_struct *mm) ++{ ++ barrier(); ++ mm->tlb_flush_pending = false; ++} ++#else ++static inline bool mm_tlb_flush_pending(struct mm_struct *mm) ++{ ++ return false; ++} ++static inline void set_tlb_flush_pending(struct mm_struct *mm) ++{ ++} ++static inline void clear_tlb_flush_pending(struct mm_struct *mm) ++{ ++} ++#endif ++ + #endif /* _LINUX_MM_TYPES_H */ +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct + spin_lock_init(&mm->page_table_lock); + mm_init_aio(mm); + mm_init_owner(mm, p); ++ clear_tlb_flush_pending(mm); + + if (likely(!mm_alloc_pgd(mm))) { + mm->def_flags = 0; +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1360,6 +1360,13 @@ int do_huge_pmd_numa_page(struct mm_stru + } + + /* ++ * The page_table_lock above provides a memory barrier ++ * with change_protection_range. ++ */ ++ if (mm_tlb_flush_pending(mm)) ++ flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); ++ ++ /* + * Migrate the THP to the requested node, returns with page unlocked + * and pmd_numa cleared. + */ +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -216,6 +216,7 @@ static unsigned long change_protection_r + BUG_ON(addr >= end); + pgd = pgd_offset(mm, addr); + flush_cache_range(vma, addr, end); ++ set_tlb_flush_pending(mm); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) +@@ -227,6 +228,7 @@ static unsigned long change_protection_r + /* Only flush the TLB if we actually modified any entries: */ + if (pages) + flush_tlb_range(vma, start, end); ++ clear_tlb_flush_pending(mm); + + return pages; + } +--- a/mm/pgtable-generic.c ++++ b/mm/pgtable-generic.c +@@ -110,9 +110,10 @@ int pmdp_clear_flush_young(struct vm_are + pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) + { ++ struct mm_struct *mm = (vma)->vm_mm; + pte_t pte; +- pte = ptep_get_and_clear((vma)->vm_mm, address, ptep); +- if (pte_accessible(pte)) ++ pte = ptep_get_and_clear(mm, address, ptep); ++ if (pte_accessible(mm, pte)) + flush_tlb_page(vma, address); + return pte; + } diff --git a/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch b/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch new file mode 100644 index 00000000000..01cc24cfff0 --- /dev/null +++ b/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch @@ -0,0 +1,43 @@ +From mgorman@suse.de Tue Jan 7 10:38:11 2014 +From: Mel Gorman +Date: Tue, 7 Jan 2014 14:00:47 +0000 +Subject: mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates +To: gregkh@linuxfoundation.org +Cc: athorlton@sgi.com, riel@redhat.com, chegu_vinod@hp.com, Mel Gorman , stable@vger.kernel.org +Message-ID: <1389103248-17617-13-git-send-email-mgorman@suse.de> + +From: Mel Gorman + +commit af2c1401e6f9177483be4fad876d0073669df9df upstream. + +According to documentation on barriers, stores issued before a LOCK can +complete after the lock implying that it's possible tlb_flush_pending +can be visible after a page table update. As per revised documentation, +this patch adds a smp_mb__before_spinlock to guarantee the correct +ordering. + +Signed-off-by: Mel Gorman +Acked-by: Paul E. McKenney +Reviewed-by: Rik van Riel +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +--- + include/linux/mm_types.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -478,7 +478,12 @@ static inline bool mm_tlb_flush_pending( + static inline void set_tlb_flush_pending(struct mm_struct *mm) + { + mm->tlb_flush_pending = true; +- barrier(); ++ ++ /* ++ * Guarantee that the tlb_flush_pending store does not leak into the ++ * critical section updating the page tables ++ */ ++ smp_mb__before_spinlock(); + } + /* Clearing is done after a TLB flush, which also provides a barrier. */ + static inline void clear_tlb_flush_pending(struct mm_struct *mm) diff --git a/queue-3.12/series b/queue-3.12/series index da5613e4753..7e6a1a7663a 100644 --- a/queue-3.12/series +++ b/queue-3.12/series @@ -116,6 +116,8 @@ mm-numa-avoid-unnecessary-work-on-the-failure-path.patch sched-numa-skip-inaccessible-vmas.patch mm-numa-clear-numa-hinting-information-on-mprotect.patch mm-numa-avoid-unnecessary-disruption-of-numa-hinting-during-migration.patch +mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch +mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch