3.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 7 Jan 2014 18:38:35 +0000 (10:38 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 7 Jan 2014 18:38:35 +0000 (10:38 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 7 Jan 2014 18:38:35 +0000 (10:38 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 7 Jan 2014 18:38:35 +0000 (10:38 -0800)
diff --git a/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch b/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch

new file mode 100644 (file)

index 0000000..42cbacf
--- /dev/null
+++ b/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch
@@ -0,0 +1,244 @@
+From mgorman@suse.de  Tue Jan  7 10:24:43 2014
+From: Rik van Riel <riel@redhat.com>
+Date: Tue,  7 Jan 2014 14:00:46 +0000
+Subject: mm: fix TLB flush race between migration, and change_protection_range
+To: gregkh@linuxfoundation.org
+Cc: athorlton@sgi.com, riel@redhat.com, chegu_vinod@hp.com, Mel Gorman <mgorman@suse.de>, stable@vger.kernel.org
+Message-ID: <1389103248-17617-12-git-send-email-mgorman@suse.de>
+
+From: Rik van Riel <riel@redhat.com>
+
+commit 20841405940e7be0617612d521e206e4b6b325db upstream.
+
+There are a few subtle races, between change_protection_range (used by
+mprotect and change_prot_numa) on one side, and NUMA page migration and
+compaction on the other side.
+
+The basic race is that there is a time window between when the PTE gets
+made non-present (PROT_NONE or NUMA), and the TLB is flushed.
+
+During that time, a CPU may continue writing to the page.
+
+This is fine most of the time, however compaction or the NUMA migration
+code may come in, and migrate the page away.
+
+When that happens, the CPU may continue writing, through the cached
+translation, to what is no longer the current memory location of the
+process.
+
+This only affects x86, which has a somewhat optimistic pte_accessible.
+All other architectures appear to be safe, and will either always flush,
+or flush whenever there is a valid mapping, even with no permissions
+(SPARC).
+
+The basic race looks like this:
+
+CPU A                  CPU B                   CPU C
+
+                                               load TLB entry
+make entry PTE/PMD_NUMA
+                       fault on entry
+                                               read/write old page
+                       start migrating page
+                       change PTE/PMD to new page
+                                               read/write old page [*]
+flush TLB
+                                               reload TLB from new entry
+                                               read/write new page
+                                               lose data
+
+[*] the old page may belong to a new user at this point!
+
+The obvious fix is to flush remote TLB entries, by making sure that
+pte_accessible aware of the fact that PROT_NONE and PROT_NUMA memory may
+still be accessible if there is a TLB flush pending for the mm.
+
+This should fix both NUMA migration and compaction.
+
+[mgorman@suse.de: fix build]
+Signed-off-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Cc: Alex Thorlton <athorlton@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/pgtable_64.h |    4 +--
+ arch/x86/include/asm/pgtable.h      |   11 +++++++--
+ include/asm-generic/pgtable.h       |    2 -
+ include/linux/mm_types.h            |   44 ++++++++++++++++++++++++++++++++++++
+ kernel/fork.c                       |    1 
+ mm/huge_memory.c                    |    7 +++++
+ mm/mprotect.c                       |    2 +
+ mm/pgtable-generic.c                |    5 ++--
+ 8 files changed, 69 insertions(+), 7 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -616,7 +616,7 @@ static inline unsigned long pte_present(
+ }
+ 
+ #define pte_accessible pte_accessible
+-static inline unsigned long pte_accessible(pte_t a)
++static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
+ {
+       return pte_val(a) & _PAGE_VALID;
+ }
+@@ -806,7 +806,7 @@ static inline void __set_pte_at(struct m
+        * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+        *             and SUN4V pte layout, so this inline test is fine.
+        */
+-      if (likely(mm != &init_mm) && pte_accessible(orig))
++      if (likely(mm != &init_mm) && pte_accessible(mm, orig))
+               tlb_batch_add(mm, addr, ptep, orig, fullmm);
+ }
+ 
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -452,9 +452,16 @@ static inline int pte_present(pte_t a)
+ }
+ 
+ #define pte_accessible pte_accessible
+-static inline int pte_accessible(pte_t a)
++static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
+ {
+-      return pte_flags(a) & _PAGE_PRESENT;
++      if (pte_flags(a) & _PAGE_PRESENT)
++              return true;
++
++      if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
++                      mm_tlb_flush_pending(mm))
++              return true;
++
++      return false;
+ }
+ 
+ static inline int pte_hidden(pte_t pte)
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a,
+ #endif
+ 
+ #ifndef pte_accessible
+-# define pte_accessible(pte)          ((void)(pte),1)
++# define pte_accessible(mm, pte)      ((void)(pte), 1)
+ #endif
+ 
+ #ifndef flush_tlb_fix_spurious_fault
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -435,6 +435,14 @@ struct mm_struct {
+        */
+       int first_nid;
+ #endif
++#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
++      /*
++       * An operation with batched TLB flushing is going on. Anything that
++       * can move process memory needs to flush the TLB when moving a
++       * PROT_NONE or PROT_NUMA mapped page.
++       */
++      bool tlb_flush_pending;
++#endif
+       struct uprobes_state uprobes_state;
+ };
+ 
+@@ -455,4 +463,40 @@ static inline cpumask_t *mm_cpumask(stru
+       return mm->cpu_vm_mask_var;
+ }
+ 
++#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
++/*
++ * Memory barriers to keep this state in sync are graciously provided by
++ * the page table locks, outside of which no page table modifications happen.
++ * The barriers below prevent the compiler from re-ordering the instructions
++ * around the memory barriers that are already present in the code.
++ */
++static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
++{
++      barrier();
++      return mm->tlb_flush_pending;
++}
++static inline void set_tlb_flush_pending(struct mm_struct *mm)
++{
++      mm->tlb_flush_pending = true;
++      barrier();
++}
++/* Clearing is done after a TLB flush, which also provides a barrier. */
++static inline void clear_tlb_flush_pending(struct mm_struct *mm)
++{
++      barrier();
++      mm->tlb_flush_pending = false;
++}
++#else
++static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
++{
++      return false;
++}
++static inline void set_tlb_flush_pending(struct mm_struct *mm)
++{
++}
++static inline void clear_tlb_flush_pending(struct mm_struct *mm)
++{
++}
++#endif
++
+ #endif /* _LINUX_MM_TYPES_H */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct
+       spin_lock_init(&mm->page_table_lock);
+       mm_init_aio(mm);
+       mm_init_owner(mm, p);
++      clear_tlb_flush_pending(mm);
+ 
+       if (likely(!mm_alloc_pgd(mm))) {
+               mm->def_flags = 0;
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1360,6 +1360,13 @@ int do_huge_pmd_numa_page(struct mm_stru
+       }
+ 
+       /*
++       * The page_table_lock above provides a memory barrier
++       * with change_protection_range.
++       */
++      if (mm_tlb_flush_pending(mm))
++              flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
++
++      /*
+        * Migrate the THP to the requested node, returns with page unlocked
+        * and pmd_numa cleared.
+        */
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -216,6 +216,7 @@ static unsigned long change_protection_r
+       BUG_ON(addr >= end);
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
++      set_tlb_flush_pending(mm);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+@@ -227,6 +228,7 @@ static unsigned long change_protection_r
+       /* Only flush the TLB if we actually modified any entries: */
+       if (pages)
+               flush_tlb_range(vma, start, end);
++      clear_tlb_flush_pending(mm);
+ 
+       return pages;
+ }
+--- a/mm/pgtable-generic.c
++++ b/mm/pgtable-generic.c
+@@ -110,9 +110,10 @@ int pmdp_clear_flush_young(struct vm_are
+ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
+                      pte_t *ptep)
+ {
++      struct mm_struct *mm = (vma)->vm_mm;
+       pte_t pte;
+-      pte = ptep_get_and_clear((vma)->vm_mm, address, ptep);
+-      if (pte_accessible(pte))
++      pte = ptep_get_and_clear(mm, address, ptep);
++      if (pte_accessible(mm, pte))
+               flush_tlb_page(vma, address);
+       return pte;
+ }
diff --git a/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch b/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch

new file mode 100644 (file)

index 0000000..01cc24c
--- /dev/null
+++ b/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch
@@ -0,0 +1,43 @@
+From mgorman@suse.de  Tue Jan  7 10:38:11 2014
+From: Mel Gorman <mgorman@suse.de>
+Date: Tue,  7 Jan 2014 14:00:47 +0000
+Subject: mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates
+To: gregkh@linuxfoundation.org
+Cc: athorlton@sgi.com, riel@redhat.com, chegu_vinod@hp.com, Mel Gorman <mgorman@suse.de>, stable@vger.kernel.org
+Message-ID: <1389103248-17617-13-git-send-email-mgorman@suse.de>
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit af2c1401e6f9177483be4fad876d0073669df9df upstream.
+
+According to documentation on barriers, stores issued before a LOCK can
+complete after the lock implying that it's possible tlb_flush_pending
+can be visible after a page table update.  As per revised documentation,
+this patch adds a smp_mb__before_spinlock to guarantee the correct
+ordering.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+---
+ include/linux/mm_types.h |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -478,7 +478,12 @@ static inline bool mm_tlb_flush_pending(
+ static inline void set_tlb_flush_pending(struct mm_struct *mm)
+ {
+       mm->tlb_flush_pending = true;
+-      barrier();
++
++      /*
++       * Guarantee that the tlb_flush_pending store does not leak into the
++       * critical section updating the page tables
++       */
++      smp_mb__before_spinlock();
+ }
+ /* Clearing is done after a TLB flush, which also provides a barrier. */
+ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
diff --git a/queue-3.12/series b/queue-3.12/series

index da5613e4753cb525d5ce74aeb914ec43e0b46457..7e6a1a7663a963c50647180cb8d748d3c68ae4f2 100644 (file)
--- a/queue-3.12/series
+++ b/queue-3.12/series
@@ -116,6 +116,8 @@ mm-numa-avoid-unnecessary-work-on-the-failure-path.patch
  sched-numa-skip-inaccessible-vmas.patch
  mm-numa-clear-numa-hinting-information-on-mprotect.patch
  mm-numa-avoid-unnecessary-disruption-of-numa-hinting-during-migration.patch
+mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch
+mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch
  mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch
  mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch
  mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 7 Jan 2014 18:38:35 +0000 (10:38 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 7 Jan 2014 18:38:35 +0000 (10:38 -0800)
queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/series		patch \| blob \| blame \| history