From 78e1b45d12d9ceedb576f81e3ca99e8aa13e7e14 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 8 Nov 2013 21:55:19 -0800 Subject: [PATCH] 3.10-stable patches added patches: mm-account-for-a-thp-numa-hinting-update-as-one-pte-update.patch mm-close-races-between-thp-migration-and-pmd-numa-clearing.patch mm-numa-do-not-account-for-a-hinting-fault-if-we-raced.patch mm-numa-sanitize-task_numa_fault-callsites.patch mm-prevent-parallel-splits-during-thp-migration.patch mm-wait-for-thp-migrations-to-complete-during-numa-hinting-faults.patch --- ...uma-hinting-update-as-one-pte-update.patch | 39 +++ ...-thp-migration-and-pmd-numa-clearing.patch | 173 +++++++++++++ ...ount-for-a-hinting-fault-if-we-raced.patch | 41 ++++ ...a-sanitize-task_numa_fault-callsites.patch | 230 ++++++++++++++++++ ...parallel-splits-during-thp-migration.patch | 122 ++++++++++ ...-complete-during-numa-hinting-faults.patch | 81 ++++++ queue-3.10/series | 6 + 7 files changed, 692 insertions(+) create mode 100644 queue-3.10/mm-account-for-a-thp-numa-hinting-update-as-one-pte-update.patch create mode 100644 queue-3.10/mm-close-races-between-thp-migration-and-pmd-numa-clearing.patch create mode 100644 queue-3.10/mm-numa-do-not-account-for-a-hinting-fault-if-we-raced.patch create mode 100644 queue-3.10/mm-numa-sanitize-task_numa_fault-callsites.patch create mode 100644 queue-3.10/mm-prevent-parallel-splits-during-thp-migration.patch create mode 100644 queue-3.10/mm-wait-for-thp-migrations-to-complete-during-numa-hinting-faults.patch diff --git a/queue-3.10/mm-account-for-a-thp-numa-hinting-update-as-one-pte-update.patch b/queue-3.10/mm-account-for-a-thp-numa-hinting-update-as-one-pte-update.patch new file mode 100644 index 00000000000..2619c0fdf9b --- /dev/null +++ b/queue-3.10/mm-account-for-a-thp-numa-hinting-update-as-one-pte-update.patch @@ -0,0 +1,39 @@ +From 0255d491848032f6c601b6410c3b8ebded3a37b1 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 7 Oct 2013 11:28:47 +0100 +Subject: mm: Account for a THP NUMA hinting update as one PTE update + +From: Mel Gorman + +commit 0255d491848032f6c601b6410c3b8ebded3a37b1 upstream. + +A THP PMD update is accounted for as 512 pages updated in vmstat. This is +large difference when estimating the cost of automatic NUMA balancing and +can be misleading when comparing results that had collapsed versus split +THP. This patch addresses the accounting issue. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Srikar Dronamraju +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/1381141781-10992-10-git-send-email-mgorman@suse.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mprotect.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -145,7 +145,7 @@ static inline unsigned long change_pmd_r + split_huge_page_pmd(vma, addr, pmd); + else if (change_huge_pmd(vma, pmd, addr, newprot, + prot_numa)) { +- pages += HPAGE_PMD_NR; ++ pages++; + continue; + } + /* fall through */ diff --git a/queue-3.10/mm-close-races-between-thp-migration-and-pmd-numa-clearing.patch b/queue-3.10/mm-close-races-between-thp-migration-and-pmd-numa-clearing.patch new file mode 100644 index 00000000000..6077e011e2c --- /dev/null +++ b/queue-3.10/mm-close-races-between-thp-migration-and-pmd-numa-clearing.patch @@ -0,0 +1,173 @@ +From 3f926ab945b60a5824369d21add7710622a2eac0 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 7 Oct 2013 11:28:46 +0100 +Subject: mm: Close races between THP migration and PMD numa clearing + +From: Mel Gorman + +commit 3f926ab945b60a5824369d21add7710622a2eac0 upstream. + +THP migration uses the page lock to guard against parallel allocations +but there are cases like this still open + + Task A Task B + --------------------- --------------------- + do_huge_pmd_numa_page do_huge_pmd_numa_page + lock_page + mpol_misplaced == -1 + unlock_page + goto clear_pmdnuma + lock_page + mpol_misplaced == 2 + migrate_misplaced_transhuge + pmd = pmd_mknonnuma + set_pmd_at + +During hours of testing, one crashed with weird errors and while I have +no direct evidence, I suspect something like the race above happened. +This patch extends the page lock to being held until the pmd_numa is +cleared to prevent migration starting in parallel while the pmd_numa is +being cleared. It also flushes the old pmd entry and orders pagetable +insertion before rmap insertion. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Srikar Dronamraju +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 33 +++++++++++++++------------------ + mm/migrate.c | 19 +++++++++++-------- + 2 files changed, 26 insertions(+), 26 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1314,24 +1314,25 @@ int do_huge_pmd_numa_page(struct mm_stru + target_nid = mpol_misplaced(page, vma, haddr); + if (target_nid == -1) { + /* If the page was locked, there are no parallel migrations */ +- if (page_locked) { +- unlock_page(page); ++ if (page_locked) + goto clear_pmdnuma; +- } + +- /* Otherwise wait for potential migrations and retry fault */ ++ /* ++ * Otherwise wait for potential migrations and retry. We do ++ * relock and check_same as the page may no longer be mapped. ++ * As the fault is being retried, do not account for it. ++ */ + spin_unlock(&mm->page_table_lock); + wait_on_page_locked(page); ++ page_nid = -1; + goto out; + } + + /* Page is misplaced, serialise migrations and parallel THP splits */ + get_page(page); + spin_unlock(&mm->page_table_lock); +- if (!page_locked) { ++ if (!page_locked) + lock_page(page); +- page_locked = true; +- } + anon_vma = page_lock_anon_vma_read(page); + + /* Confirm the PTE did not while locked */ +@@ -1339,32 +1340,28 @@ int do_huge_pmd_numa_page(struct mm_stru + if (unlikely(!pmd_same(pmd, *pmdp))) { + unlock_page(page); + put_page(page); ++ page_nid = -1; + goto out_unlock; + } + +- /* Migrate the THP to the requested node */ ++ /* ++ * Migrate the THP to the requested node, returns with page unlocked ++ * and pmd_numa cleared. ++ */ + spin_unlock(&mm->page_table_lock); + migrated = migrate_misplaced_transhuge_page(mm, vma, + pmdp, pmd, addr, page, target_nid); + if (migrated) + page_nid = target_nid; +- else +- goto check_same; + + goto out; +- +-check_same: +- spin_lock(&mm->page_table_lock); +- if (unlikely(!pmd_same(pmd, *pmdp))) { +- /* Someone else took our fault */ +- page_nid = -1; +- goto out_unlock; +- } + clear_pmdnuma: ++ BUG_ON(!PageLocked(page)); + pmd = pmd_mknonnuma(pmd); + set_pmd_at(mm, haddr, pmdp, pmd); + VM_BUG_ON(pmd_numa(*pmdp)); + update_mmu_cache_pmd(vma, addr, pmdp); ++ unlock_page(page); + out_unlock: + spin_unlock(&mm->page_table_lock); + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -1710,12 +1710,12 @@ int migrate_misplaced_transhuge_page(str + unlock_page(new_page); + put_page(new_page); /* Free it */ + +- unlock_page(page); ++ /* Retake the callers reference and putback on LRU */ ++ get_page(page); + putback_lru_page(page); +- +- count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); +- isolated = 0; +- goto out; ++ mod_zone_page_state(page_zone(page), ++ NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); ++ goto out_fail; + } + + /* +@@ -1732,9 +1732,9 @@ int migrate_misplaced_transhuge_page(str + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = pmd_mkhuge(entry); + +- page_add_new_anon_rmap(new_page, vma, haddr); +- ++ pmdp_clear_flush(vma, haddr, pmd); + set_pmd_at(mm, haddr, pmd, entry); ++ page_add_new_anon_rmap(new_page, vma, haddr); + update_mmu_cache_pmd(vma, address, &entry); + page_remove_rmap(page); + /* +@@ -1753,7 +1753,6 @@ int migrate_misplaced_transhuge_page(str + count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); + count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); + +-out: + mod_zone_page_state(page_zone(page), + NR_ISOLATED_ANON + page_lru, + -HPAGE_PMD_NR); +@@ -1762,6 +1761,10 @@ out: + out_fail: + count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); + out_dropref: ++ entry = pmd_mknonnuma(entry); ++ set_pmd_at(mm, haddr, pmd, entry); ++ update_mmu_cache_pmd(vma, address, &entry); ++ + unlock_page(page); + put_page(page); + return 0; diff --git a/queue-3.10/mm-numa-do-not-account-for-a-hinting-fault-if-we-raced.patch b/queue-3.10/mm-numa-do-not-account-for-a-hinting-fault-if-we-raced.patch new file mode 100644 index 00000000000..774753d20c3 --- /dev/null +++ b/queue-3.10/mm-numa-do-not-account-for-a-hinting-fault-if-we-raced.patch @@ -0,0 +1,41 @@ +From 1dd49bfa3465756b3ce72214b58a33e4afb67aa3 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 7 Oct 2013 11:28:42 +0100 +Subject: mm: numa: Do not account for a hinting fault if we raced + +From: Mel Gorman + +commit 1dd49bfa3465756b3ce72214b58a33e4afb67aa3 upstream. + +If another task handled a hinting fault in parallel then do not double +account for it. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Srikar Dronamraju +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/1381141781-10992-5-git-send-email-mgorman@suse.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1335,8 +1335,11 @@ int do_huge_pmd_numa_page(struct mm_stru + + check_same: + spin_lock(&mm->page_table_lock); +- if (unlikely(!pmd_same(pmd, *pmdp))) ++ if (unlikely(!pmd_same(pmd, *pmdp))) { ++ /* Someone else took our fault */ ++ current_nid = -1; + goto out_unlock; ++ } + clear_pmdnuma: + pmd = pmd_mknonnuma(pmd); + set_pmd_at(mm, haddr, pmdp, pmd); diff --git a/queue-3.10/mm-numa-sanitize-task_numa_fault-callsites.patch b/queue-3.10/mm-numa-sanitize-task_numa_fault-callsites.patch new file mode 100644 index 00000000000..93bc3c2be67 --- /dev/null +++ b/queue-3.10/mm-numa-sanitize-task_numa_fault-callsites.patch @@ -0,0 +1,230 @@ +From c61109e34f60f6e85bb43c5a1cd51c0e3db40847 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 7 Oct 2013 11:28:45 +0100 +Subject: mm: numa: Sanitize task_numa_fault() callsites + +From: Mel Gorman + +commit c61109e34f60f6e85bb43c5a1cd51c0e3db40847 upstream. + +There are three callers of task_numa_fault(): + + - do_huge_pmd_numa_page(): + Accounts against the current node, not the node where the + page resides, unless we migrated, in which case it accounts + against the node we migrated to. + + - do_numa_page(): + Accounts against the current node, not the node where the + page resides, unless we migrated, in which case it accounts + against the node we migrated to. + + - do_pmd_numa_page(): + Accounts not at all when the page isn't migrated, otherwise + accounts against the node we migrated towards. + +This seems wrong to me; all three sites should have the same +sementaics, furthermore we should accounts against where the page +really is, we already know where the task is. + +So modify all three sites to always account; we did after all receive +the fault; and always account to where the page is after migration, +regardless of success. + +They all still differ on when they clear the PTE/PMD; ideally that +would get sorted too. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Srikar Dronamraju +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 25 +++++++++++++------------ + mm/memory.c | 53 +++++++++++++++++++++-------------------------------- + 2 files changed, 34 insertions(+), 44 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1291,18 +1291,19 @@ int do_huge_pmd_numa_page(struct mm_stru + struct anon_vma *anon_vma = NULL; + struct page *page; + unsigned long haddr = addr & HPAGE_PMD_MASK; ++ int page_nid = -1, this_nid = numa_node_id(); + int target_nid; +- int current_nid = -1; +- bool migrated, page_locked; ++ bool page_locked; ++ bool migrated = false; + + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(pmd, *pmdp))) + goto out_unlock; + + page = pmd_page(pmd); +- current_nid = page_to_nid(page); ++ page_nid = page_to_nid(page); + count_vm_numa_event(NUMA_HINT_FAULTS); +- if (current_nid == numa_node_id()) ++ if (page_nid == this_nid) + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + + /* +@@ -1345,19 +1346,18 @@ int do_huge_pmd_numa_page(struct mm_stru + spin_unlock(&mm->page_table_lock); + migrated = migrate_misplaced_transhuge_page(mm, vma, + pmdp, pmd, addr, page, target_nid); +- if (!migrated) ++ if (migrated) ++ page_nid = target_nid; ++ else + goto check_same; + +- task_numa_fault(target_nid, HPAGE_PMD_NR, true); +- if (anon_vma) +- page_unlock_anon_vma_read(anon_vma); +- return 0; ++ goto out; + + check_same: + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(pmd, *pmdp))) { + /* Someone else took our fault */ +- current_nid = -1; ++ page_nid = -1; + goto out_unlock; + } + clear_pmdnuma: +@@ -1372,8 +1372,9 @@ out: + if (anon_vma) + page_unlock_anon_vma_read(anon_vma); + +- if (current_nid != -1) +- task_numa_fault(current_nid, HPAGE_PMD_NR, false); ++ if (page_nid != -1) ++ task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); ++ + return 0; + } + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3525,12 +3525,12 @@ static int do_nonlinear_fault(struct mm_ + } + + int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, +- unsigned long addr, int current_nid) ++ unsigned long addr, int page_nid) + { + get_page(page); + + count_vm_numa_event(NUMA_HINT_FAULTS); +- if (current_nid == numa_node_id()) ++ if (page_nid == numa_node_id()) + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + + return mpol_misplaced(page, vma, addr); +@@ -3541,7 +3541,7 @@ int do_numa_page(struct mm_struct *mm, s + { + struct page *page = NULL; + spinlock_t *ptl; +- int current_nid = -1; ++ int page_nid = -1; + int target_nid; + bool migrated = false; + +@@ -3571,15 +3571,10 @@ int do_numa_page(struct mm_struct *mm, s + return 0; + } + +- current_nid = page_to_nid(page); +- target_nid = numa_migrate_prep(page, vma, addr, current_nid); ++ page_nid = page_to_nid(page); ++ target_nid = numa_migrate_prep(page, vma, addr, page_nid); + pte_unmap_unlock(ptep, ptl); + if (target_nid == -1) { +- /* +- * Account for the fault against the current node if it not +- * being replaced regardless of where the page is located. +- */ +- current_nid = numa_node_id(); + put_page(page); + goto out; + } +@@ -3587,11 +3582,11 @@ int do_numa_page(struct mm_struct *mm, s + /* Migrate to the requested node */ + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) +- current_nid = target_nid; ++ page_nid = target_nid; + + out: +- if (current_nid != -1) +- task_numa_fault(current_nid, 1, migrated); ++ if (page_nid != -1) ++ task_numa_fault(page_nid, 1, migrated); + return 0; + } + +@@ -3606,7 +3601,6 @@ static int do_pmd_numa_page(struct mm_st + unsigned long offset; + spinlock_t *ptl; + bool numa = false; +- int local_nid = numa_node_id(); + + spin_lock(&mm->page_table_lock); + pmd = *pmdp; +@@ -3629,9 +3623,10 @@ static int do_pmd_numa_page(struct mm_st + for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { + pte_t pteval = *pte; + struct page *page; +- int curr_nid = local_nid; ++ int page_nid = -1; + int target_nid; +- bool migrated; ++ bool migrated = false; ++ + if (!pte_present(pteval)) + continue; + if (!pte_numa(pteval)) +@@ -3653,25 +3648,19 @@ static int do_pmd_numa_page(struct mm_st + if (unlikely(page_mapcount(page) != 1)) + continue; + +- /* +- * Note that the NUMA fault is later accounted to either +- * the node that is currently running or where the page is +- * migrated to. +- */ +- curr_nid = local_nid; +- target_nid = numa_migrate_prep(page, vma, addr, +- page_to_nid(page)); +- if (target_nid == -1) { ++ page_nid = page_to_nid(page); ++ target_nid = numa_migrate_prep(page, vma, addr, page_nid); ++ pte_unmap_unlock(pte, ptl); ++ if (target_nid != -1) { ++ migrated = migrate_misplaced_page(page, target_nid); ++ if (migrated) ++ page_nid = target_nid; ++ } else { + put_page(page); +- continue; + } + +- /* Migrate to the requested node */ +- pte_unmap_unlock(pte, ptl); +- migrated = migrate_misplaced_page(page, target_nid); +- if (migrated) +- curr_nid = target_nid; +- task_numa_fault(curr_nid, 1, migrated); ++ if (page_nid != -1) ++ task_numa_fault(page_nid, 1, migrated); + + pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); + } diff --git a/queue-3.10/mm-prevent-parallel-splits-during-thp-migration.patch b/queue-3.10/mm-prevent-parallel-splits-during-thp-migration.patch new file mode 100644 index 00000000000..01dd846f21b --- /dev/null +++ b/queue-3.10/mm-prevent-parallel-splits-during-thp-migration.patch @@ -0,0 +1,122 @@ +From 587fe586f44a48f9691001ba6c45b86c8e4ba21f Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 7 Oct 2013 11:28:44 +0100 +Subject: mm: Prevent parallel splits during THP migration + +From: Mel Gorman + +commit 587fe586f44a48f9691001ba6c45b86c8e4ba21f upstream. + +THP migrations are serialised by the page lock but on its own that does +not prevent THP splits. If the page is split during THP migration then +the pmd_same checks will prevent page table corruption but the unlock page +and other fix-ups potentially will cause corruption. This patch takes the +anon_vma lock to prevent parallel splits during migration. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Srikar Dronamraju +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/1381141781-10992-7-git-send-email-mgorman@suse.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 44 ++++++++++++++++++++++++++++++-------------- + 1 file changed, 30 insertions(+), 14 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1288,18 +1288,18 @@ out: + int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd, pmd_t *pmdp) + { ++ struct anon_vma *anon_vma = NULL; + struct page *page; + unsigned long haddr = addr & HPAGE_PMD_MASK; + int target_nid; + int current_nid = -1; +- bool migrated; ++ bool migrated, page_locked; + + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(pmd, *pmdp))) + goto out_unlock; + + page = pmd_page(pmd); +- get_page(page); + current_nid = page_to_nid(page); + count_vm_numa_event(NUMA_HINT_FAULTS); + if (current_nid == numa_node_id()) +@@ -1309,12 +1309,29 @@ int do_huge_pmd_numa_page(struct mm_stru + * Acquire the page lock to serialise THP migrations but avoid dropping + * page_table_lock if at all possible + */ +- if (trylock_page(page)) +- goto got_lock; ++ page_locked = trylock_page(page); ++ target_nid = mpol_misplaced(page, vma, haddr); ++ if (target_nid == -1) { ++ /* If the page was locked, there are no parallel migrations */ ++ if (page_locked) { ++ unlock_page(page); ++ goto clear_pmdnuma; ++ } ++ ++ /* Otherwise wait for potential migrations and retry fault */ ++ spin_unlock(&mm->page_table_lock); ++ wait_on_page_locked(page); ++ goto out; ++ } + +- /* Serialise against migrationa and check placement check placement */ ++ /* Page is misplaced, serialise migrations and parallel THP splits */ ++ get_page(page); + spin_unlock(&mm->page_table_lock); +- lock_page(page); ++ if (!page_locked) { ++ lock_page(page); ++ page_locked = true; ++ } ++ anon_vma = page_lock_anon_vma_read(page); + + /* Confirm the PTE did not while locked */ + spin_lock(&mm->page_table_lock); +@@ -1324,14 +1341,6 @@ int do_huge_pmd_numa_page(struct mm_stru + goto out_unlock; + } + +-got_lock: +- target_nid = mpol_misplaced(page, vma, haddr); +- if (target_nid == -1) { +- unlock_page(page); +- put_page(page); +- goto clear_pmdnuma; +- } +- + /* Migrate the THP to the requested node */ + spin_unlock(&mm->page_table_lock); + migrated = migrate_misplaced_transhuge_page(mm, vma, +@@ -1340,6 +1349,8 @@ got_lock: + goto check_same; + + task_numa_fault(target_nid, HPAGE_PMD_NR, true); ++ if (anon_vma) ++ page_unlock_anon_vma_read(anon_vma); + return 0; + + check_same: +@@ -1356,6 +1367,11 @@ clear_pmdnuma: + update_mmu_cache_pmd(vma, addr, pmdp); + out_unlock: + spin_unlock(&mm->page_table_lock); ++ ++out: ++ if (anon_vma) ++ page_unlock_anon_vma_read(anon_vma); ++ + if (current_nid != -1) + task_numa_fault(current_nid, HPAGE_PMD_NR, false); + return 0; diff --git a/queue-3.10/mm-wait-for-thp-migrations-to-complete-during-numa-hinting-faults.patch b/queue-3.10/mm-wait-for-thp-migrations-to-complete-during-numa-hinting-faults.patch new file mode 100644 index 00000000000..05f1a00b6a0 --- /dev/null +++ b/queue-3.10/mm-wait-for-thp-migrations-to-complete-during-numa-hinting-faults.patch @@ -0,0 +1,81 @@ +From 42836f5f8baa33085f547098b74aa98991ee9216 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Mon, 7 Oct 2013 11:28:43 +0100 +Subject: mm: Wait for THP migrations to complete during NUMA hinting faults + +From: Mel Gorman + +commit 42836f5f8baa33085f547098b74aa98991ee9216 upstream. + +The locking for migrating THP is unusual. While normal page migration +prevents parallel accesses using a migration PTE, THP migration relies on +a combination of the page_table_lock, the page lock and the existance of +the NUMA hinting PTE to guarantee safety but there is a bug in the scheme. + +If a THP page is currently being migrated and another thread traps a +fault on the same page it checks if the page is misplaced. If it is not, +then pmd_numa is cleared. The problem is that it checks if the page is +misplaced without holding the page lock meaning that the racing thread +can be migrating the THP when the second thread clears the NUMA bit +and faults a stale page. + +This patch checks if the page is potentially being migrated and stalls +using the lock_page if it is potentially being migrated before checking +if the page is misplaced or not. + +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Cc: Andrea Arcangeli +Cc: Johannes Weiner +Cc: Srikar Dronamraju +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/1381141781-10992-6-git-send-email-mgorman@suse.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1305,13 +1305,14 @@ int do_huge_pmd_numa_page(struct mm_stru + if (current_nid == numa_node_id()) + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + +- target_nid = mpol_misplaced(page, vma, haddr); +- if (target_nid == -1) { +- put_page(page); +- goto clear_pmdnuma; +- } ++ /* ++ * Acquire the page lock to serialise THP migrations but avoid dropping ++ * page_table_lock if at all possible ++ */ ++ if (trylock_page(page)) ++ goto got_lock; + +- /* Acquire the page lock to serialise THP migrations */ ++ /* Serialise against migrationa and check placement check placement */ + spin_unlock(&mm->page_table_lock); + lock_page(page); + +@@ -1322,9 +1323,17 @@ int do_huge_pmd_numa_page(struct mm_stru + put_page(page); + goto out_unlock; + } +- spin_unlock(&mm->page_table_lock); ++ ++got_lock: ++ target_nid = mpol_misplaced(page, vma, haddr); ++ if (target_nid == -1) { ++ unlock_page(page); ++ put_page(page); ++ goto clear_pmdnuma; ++ } + + /* Migrate the THP to the requested node */ ++ spin_unlock(&mm->page_table_lock); + migrated = migrate_misplaced_transhuge_page(mm, vma, + pmdp, pmd, addr, page, target_nid); + if (!migrated) diff --git a/queue-3.10/series b/queue-3.10/series index d9a89241889..b9767842747 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -54,3 +54,9 @@ fix-a-few-incorrectly-checked-remap_pfn_range-calls.patch lib-scatterlist.c-don-t-flush_kernel_dcache_page-on-slab-page.patch aacraid-missing-capable-check-in-compat-ioctl.patch clk-fixup-argument-order-when-setting-vco-parameters.patch +mm-numa-do-not-account-for-a-hinting-fault-if-we-raced.patch +mm-wait-for-thp-migrations-to-complete-during-numa-hinting-faults.patch +mm-prevent-parallel-splits-during-thp-migration.patch +mm-numa-sanitize-task_numa_fault-callsites.patch +mm-close-races-between-thp-migration-and-pmd-numa-clearing.patch +mm-account-for-a-thp-numa-hinting-update-as-one-pte-update.patch -- 2.47.3