]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 7 Jan 2014 17:47:30 +0000 (09:47 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 7 Jan 2014 17:47:30 +0000 (09:47 -0800)
added patches:
mm-numa-call-mmu-notifiers-on-thp-migration.patch
mm-numa-serialise-parallel-get_user_page-against-thp-migration.patch

queue-3.12/mm-clear-pmd_numa-before-invalidating.patch [deleted file]
queue-3.12/mm-numa-call-mmu-notifiers-on-thp-migration.patch [new file with mode: 0644]
queue-3.12/mm-numa-serialise-parallel-get_user_page-against-thp-migration.patch [new file with mode: 0644]
queue-3.12/series

diff --git a/queue-3.12/mm-clear-pmd_numa-before-invalidating.patch b/queue-3.12/mm-clear-pmd_numa-before-invalidating.patch
deleted file mode 100644 (file)
index 0b6bd84..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-From 67f87463d3a3362424efcbe8b40e4772fd34fc61 Mon Sep 17 00:00:00 2001
-From: Mel Gorman <mgorman@suse.de>
-Date: Wed, 18 Dec 2013 17:08:34 -0800
-Subject: mm: clear pmd_numa before invalidating
-
-From: Mel Gorman <mgorman@suse.de>
-
-commit 67f87463d3a3362424efcbe8b40e4772fd34fc61 upstream.
-
-On x86, PMD entries are similar to _PAGE_PROTNONE protection and are
-handled as NUMA hinting faults.  The following two page table protection
-bits are what defines them
-
-       _PAGE_NUMA:set  _PAGE_PRESENT:clear
-
-A PMD is considered present if any of the _PAGE_PRESENT, _PAGE_PROTNONE,
-_PAGE_PSE or _PAGE_NUMA bits are set.  If pmdp_invalidate encounters a
-pmd_numa, it clears the present bit leaving _PAGE_NUMA which will be
-considered not present by the CPU but present by pmd_present.  The
-existing caller of pmdp_invalidate should handle it but it's an
-inconsistent state for a PMD.  This patch keeps the state consistent
-when calling pmdp_invalidate.
-
-Signed-off-by: Mel Gorman <mgorman@suse.de>
-Reviewed-by: Rik van Riel <riel@redhat.com>
-Cc: Alex Thorlton <athorlton@sgi.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- mm/pgtable-generic.c |    3 +++
- 1 file changed, 3 insertions(+)
-
---- a/mm/pgtable-generic.c
-+++ b/mm/pgtable-generic.c
-@@ -191,6 +191,9 @@ pgtable_t pgtable_trans_huge_withdraw(st
- void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
-                    pmd_t *pmdp)
- {
-+      pmd_t entry = *pmdp;
-+      if (pmd_numa(entry))
-+              entry = pmd_mknonnuma(entry);
-       set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp));
-       flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
- }
diff --git a/queue-3.12/mm-numa-call-mmu-notifiers-on-thp-migration.patch b/queue-3.12/mm-numa-call-mmu-notifiers-on-thp-migration.patch
new file mode 100644 (file)
index 0000000..708630c
--- /dev/null
@@ -0,0 +1,103 @@
+From mgorman@suse.de  Tue Jan  7 09:45:19 2014
+From: Mel Gorman <mgorman@suse.de>
+Date: Tue,  7 Jan 2014 14:00:37 +0000
+Subject: mm: numa: call MMU notifiers on THP migration
+To: gregkh@linuxfoundation.org
+Cc: athorlton@sgi.com, riel@redhat.com, chegu_vinod@hp.com, Mel Gorman <mgorman@suse.de>, stable@vger.kernel.org
+Message-ID: <1389103248-17617-3-git-send-email-mgorman@suse.de>
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit f714f4f20e59ea6eea264a86b9a51fd51b88fc54 upstream.
+
+MMU notifiers must be called on THP page migration or secondary MMUs
+will get very confused.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Cc: Alex Thorlton <athorlton@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c |   22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -36,6 +36,7 @@
+ #include <linux/hugetlb_cgroup.h>
+ #include <linux/gfp.h>
+ #include <linux/balloon_compaction.h>
++#include <linux/mmu_notifier.h>
+ #include <asm/tlbflush.h>
+@@ -1655,12 +1656,13 @@ int migrate_misplaced_transhuge_page(str
+                               unsigned long address,
+                               struct page *page, int node)
+ {
+-      unsigned long haddr = address & HPAGE_PMD_MASK;
+       pg_data_t *pgdat = NODE_DATA(node);
+       int isolated = 0;
+       struct page *new_page = NULL;
+       struct mem_cgroup *memcg = NULL;
+       int page_lru = page_is_file_cache(page);
++      unsigned long mmun_start = address & HPAGE_PMD_MASK;
++      unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
+       pmd_t orig_entry;
+       /*
+@@ -1702,10 +1704,12 @@ int migrate_misplaced_transhuge_page(str
+       WARN_ON(PageLRU(new_page));
+       /* Recheck the target PMD */
++      mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+       spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
+ fail_putback:
+               spin_unlock(&mm->page_table_lock);
++              mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+               /* Reverse changes made by migrate_page_copy() */
+               if (TestClearPageActive(new_page))
+@@ -1746,15 +1750,16 @@ fail_putback:
+        * The SetPageUptodate on the new page and page_add_new_anon_rmap
+        * guarantee the copy is visible before the pagetable update.
+        */
+-      flush_cache_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+-      page_add_new_anon_rmap(new_page, vma, haddr);
+-      pmdp_clear_flush(vma, haddr, pmd);
+-      set_pmd_at(mm, haddr, pmd, entry);
++      flush_cache_range(vma, mmun_start, mmun_end);
++      page_add_new_anon_rmap(new_page, vma, mmun_start);
++      pmdp_clear_flush(vma, mmun_start, pmd);
++      set_pmd_at(mm, mmun_start, pmd, entry);
++      flush_tlb_range(vma, mmun_start, mmun_end);
+       update_mmu_cache_pmd(vma, address, &entry);
+       if (page_count(page) != 2) {
+-              set_pmd_at(mm, haddr, pmd, orig_entry);
+-              flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
++              set_pmd_at(mm, mmun_start, pmd, orig_entry);
++              flush_tlb_range(vma, mmun_start, mmun_end);
+               update_mmu_cache_pmd(vma, address, &entry);
+               page_remove_rmap(new_page);
+               goto fail_putback;
+@@ -1769,6 +1774,7 @@ fail_putback:
+        */
+       mem_cgroup_end_migration(memcg, page, new_page, true);
+       spin_unlock(&mm->page_table_lock);
++      mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+       unlock_page(new_page);
+       unlock_page(page);
+@@ -1789,7 +1795,7 @@ out_dropref:
+       spin_lock(&mm->page_table_lock);
+       if (pmd_same(*pmd, entry)) {
+               entry = pmd_mknonnuma(entry);
+-              set_pmd_at(mm, haddr, pmd, entry);
++              set_pmd_at(mm, mmun_start, pmd, entry);
+               update_mmu_cache_pmd(vma, address, &entry);
+       }
+       spin_unlock(&mm->page_table_lock);
diff --git a/queue-3.12/mm-numa-serialise-parallel-get_user_page-against-thp-migration.patch b/queue-3.12/mm-numa-serialise-parallel-get_user_page-against-thp-migration.patch
new file mode 100644 (file)
index 0000000..a6723c7
--- /dev/null
@@ -0,0 +1,193 @@
+From mgorman@suse.de  Tue Jan  7 09:44:16 2014
+From: Mel Gorman <mgorman@suse.de>
+Date: Tue,  7 Jan 2014 14:00:36 +0000
+Subject: mm: numa: serialise parallel get_user_page against THP migration
+To: gregkh@linuxfoundation.org
+Cc: athorlton@sgi.com, riel@redhat.com, chegu_vinod@hp.com, Mel Gorman <mgorman@suse.de>, stable@vger.kernel.org
+Message-ID: <1389103248-17617-2-git-send-email-mgorman@suse.de>
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 2b4847e73004c10ae6666c2e27b5c5430aed8698 upstream.
+
+Base pages are unmapped and flushed from cache and TLB during normal
+page migration and replaced with a migration entry that causes any
+parallel NUMA hinting fault or gup to block until migration completes.
+
+THP does not unmap pages due to a lack of support for migration entries
+at a PMD level.  This allows races with get_user_pages and
+get_user_pages_fast which commit 3f926ab945b6 ("mm: Close races between
+THP migration and PMD numa clearing") made worse by introducing a
+pmd_clear_flush().
+
+This patch forces get_user_page (fast and normal) on a pmd_numa page to
+go through the slow get_user_page path where it will serialise against
+THP migration and properly account for the NUMA hinting fault.  On the
+migration side the page table lock is taken for each PTE update.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Cc: Alex Thorlton <athorlton@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/gup.c |   13 +++++++++++++
+ mm/huge_memory.c  |   24 ++++++++++++++++--------
+ mm/migrate.c      |   38 +++++++++++++++++++++++++++++++-------
+ 3 files changed, 60 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/mm/gup.c
++++ b/arch/x86/mm/gup.c
+@@ -83,6 +83,12 @@ static noinline int gup_pte_range(pmd_t
+               pte_t pte = gup_get_pte(ptep);
+               struct page *page;
++              /* Similar to the PMD case, NUMA hinting must take slow path */
++              if (pte_numa(pte)) {
++                      pte_unmap(ptep);
++                      return 0;
++              }
++
+               if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+                       pte_unmap(ptep);
+                       return 0;
+@@ -167,6 +173,13 @@ static int gup_pmd_range(pud_t pud, unsi
+               if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+                       return 0;
+               if (unlikely(pmd_large(pmd))) {
++                      /*
++                       * NUMA hinting faults need to be handled in the GUP
++                       * slowpath for accounting purposes and so that they
++                       * can be serialised against THP migration.
++                       */
++                      if (pmd_numa(pmd))
++                              return 0;
+                       if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
+                               return 0;
+               } else {
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1240,6 +1240,10 @@ struct page *follow_trans_huge_pmd(struc
+       if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
+               return ERR_PTR(-EFAULT);
++      /* Full NUMA hinting faults to serialise migration in fault paths */
++      if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
++              goto out;
++
+       page = pmd_page(*pmd);
+       VM_BUG_ON(!PageHead(page));
+       if (flags & FOLL_TOUCH) {
+@@ -1306,23 +1310,27 @@ int do_huge_pmd_numa_page(struct mm_stru
+               /* If the page was locked, there are no parallel migrations */
+               if (page_locked)
+                       goto clear_pmdnuma;
++      }
+-              /*
+-               * Otherwise wait for potential migrations and retry. We do
+-               * relock and check_same as the page may no longer be mapped.
+-               * As the fault is being retried, do not account for it.
+-               */
++      /*
++       * If there are potential migrations, wait for completion and retry. We
++       * do not relock and check_same as the page may no longer be mapped.
++       * Furtermore, even if the page is currently misplaced, there is no
++       * guarantee it is still misplaced after the migration completes.
++       */
++      if (!page_locked) {
+               spin_unlock(&mm->page_table_lock);
+               wait_on_page_locked(page);
+               page_nid = -1;
+               goto out;
+       }
+-      /* Page is misplaced, serialise migrations and parallel THP splits */
++      /*
++       * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
++       * to serialises splits
++       */
+       get_page(page);
+       spin_unlock(&mm->page_table_lock);
+-      if (!page_locked)
+-              lock_page(page);
+       anon_vma = page_lock_anon_vma_read(page);
+       /* Confirm the PTE did not while locked */
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1661,6 +1661,7 @@ int migrate_misplaced_transhuge_page(str
+       struct page *new_page = NULL;
+       struct mem_cgroup *memcg = NULL;
+       int page_lru = page_is_file_cache(page);
++      pmd_t orig_entry;
+       /*
+        * Don't migrate pages that are mapped in multiple processes.
+@@ -1702,7 +1703,8 @@ int migrate_misplaced_transhuge_page(str
+       /* Recheck the target PMD */
+       spin_lock(&mm->page_table_lock);
+-      if (unlikely(!pmd_same(*pmd, entry))) {
++      if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
++fail_putback:
+               spin_unlock(&mm->page_table_lock);
+               /* Reverse changes made by migrate_page_copy() */
+@@ -1732,16 +1734,34 @@ int migrate_misplaced_transhuge_page(str
+        */
+       mem_cgroup_prepare_migration(page, new_page, &memcg);
++      orig_entry = *pmd;
+       entry = mk_pmd(new_page, vma->vm_page_prot);
+-      entry = pmd_mknonnuma(entry);
+-      entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+       entry = pmd_mkhuge(entry);
++      entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
++      /*
++       * Clear the old entry under pagetable lock and establish the new PTE.
++       * Any parallel GUP will either observe the old page blocking on the
++       * page lock, block on the page table lock or observe the new page.
++       * The SetPageUptodate on the new page and page_add_new_anon_rmap
++       * guarantee the copy is visible before the pagetable update.
++       */
++      flush_cache_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
++      page_add_new_anon_rmap(new_page, vma, haddr);
+       pmdp_clear_flush(vma, haddr, pmd);
+       set_pmd_at(mm, haddr, pmd, entry);
+-      page_add_new_anon_rmap(new_page, vma, haddr);
+       update_mmu_cache_pmd(vma, address, &entry);
++
++      if (page_count(page) != 2) {
++              set_pmd_at(mm, haddr, pmd, orig_entry);
++              flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
++              update_mmu_cache_pmd(vma, address, &entry);
++              page_remove_rmap(new_page);
++              goto fail_putback;
++      }
++
+       page_remove_rmap(page);
++
+       /*
+        * Finish the charge transaction under the page table lock to
+        * prevent split_huge_page() from dividing up the charge
+@@ -1766,9 +1786,13 @@ int migrate_misplaced_transhuge_page(str
+ out_fail:
+       count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+ out_dropref:
+-      entry = pmd_mknonnuma(entry);
+-      set_pmd_at(mm, haddr, pmd, entry);
+-      update_mmu_cache_pmd(vma, address, &entry);
++      spin_lock(&mm->page_table_lock);
++      if (pmd_same(*pmd, entry)) {
++              entry = pmd_mknonnuma(entry);
++              set_pmd_at(mm, haddr, pmd, entry);
++              update_mmu_cache_pmd(vma, address, &entry);
++      }
++      spin_unlock(&mm->page_table_lock);
+       unlock_page(page);
+       put_page(page);
index 0b9f3b1922b60f0ee8e5dd2eaf71a3e36316d560..d7408c6dcf39d4adb430e53d0a13b4ae11b3a747 100644 (file)
@@ -107,7 +107,8 @@ ext2-fix-oops-in-ext2_get_block-called-from-ext2_quota_write.patch
 acpi-pci-hotplug-avoid-warning-when-_adr-not-present.patch
 intel_pstate-fail-initialization-if-p-state-information-is-missing.patch
 revert-of-address-handle-address-cells-2-specially.patch
-mm-clear-pmd_numa-before-invalidating.patch
+mm-numa-serialise-parallel-get_user_page-against-thp-migration.patch
+mm-numa-call-mmu-notifiers-on-thp-migration.patch
 mm-numa-ensure-anon_vma-is-locked-to-prevent-parallel-thp-splits.patch
 mm-numa-avoid-unnecessary-work-on-the-failure-path.patch
 mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch