From 9fae0da3adc78c25cbd2b566594661637c405143 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 3 Jun 2014 13:07:58 -0700 Subject: [PATCH] 3.10-stable patches added patches: hwpoison-hugetlb-lock_page-unlock_page-does-not-match-for-handling-a-free-hugepage.patch mm-thp-close-race-between-mremap-and-split_huge_page.patch x86-mm-hugetlb-add-missing-tlb-page-invalidation-for-hugetlb_cow.patch --- ...t-match-for-handling-a-free-hugepage.patch | 54 ++++++++++++++ ...e-between-mremap-and-split_huge_page.patch | 71 +++++++++++++++++++ queue-3.10/series | 3 + ...lb-page-invalidation-for-hugetlb_cow.patch | 42 +++++++++++ 4 files changed, 170 insertions(+) create mode 100644 queue-3.10/hwpoison-hugetlb-lock_page-unlock_page-does-not-match-for-handling-a-free-hugepage.patch create mode 100644 queue-3.10/mm-thp-close-race-between-mremap-and-split_huge_page.patch create mode 100644 queue-3.10/x86-mm-hugetlb-add-missing-tlb-page-invalidation-for-hugetlb_cow.patch diff --git a/queue-3.10/hwpoison-hugetlb-lock_page-unlock_page-does-not-match-for-handling-a-free-hugepage.patch b/queue-3.10/hwpoison-hugetlb-lock_page-unlock_page-does-not-match-for-handling-a-free-hugepage.patch new file mode 100644 index 00000000000..ef1d6d4e505 --- /dev/null +++ b/queue-3.10/hwpoison-hugetlb-lock_page-unlock_page-does-not-match-for-handling-a-free-hugepage.patch @@ -0,0 +1,54 @@ +From b985194c8c0a130ed155b71662e39f7eaea4876f Mon Sep 17 00:00:00 2001 +From: Chen Yucong +Date: Thu, 22 May 2014 11:54:15 -0700 +Subject: hwpoison, hugetlb: lock_page/unlock_page does not match for handling a free hugepage + +From: Chen Yucong + +commit b985194c8c0a130ed155b71662e39f7eaea4876f upstream. + +For handling a free hugepage in memory failure, the race will happen if +another thread hwpoisoned this hugepage concurrently. So we need to +check PageHWPoison instead of !PageHWPoison. + +If hwpoison_filter(p) returns true or a race happens, then we need to +unlock_page(hpage). + +Signed-off-by: Chen Yucong +Reviewed-by: Naoya Horiguchi +Tested-by: Naoya Horiguchi +Reviewed-by: Andi Kleen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory-failure.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1083,15 +1083,16 @@ int memory_failure(unsigned long pfn, in + return 0; + } else if (PageHuge(hpage)) { + /* +- * Check "just unpoisoned", "filter hit", and +- * "race with other subpage." ++ * Check "filter hit" and "race with other subpage." + */ + lock_page(hpage); +- if (!PageHWPoison(hpage) +- || (hwpoison_filter(p) && TestClearPageHWPoison(p)) +- || (p != hpage && TestSetPageHWPoison(hpage))) { +- atomic_long_sub(nr_pages, &num_poisoned_pages); +- return 0; ++ if (PageHWPoison(hpage)) { ++ if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) ++ || (p != hpage && TestSetPageHWPoison(hpage))) { ++ atomic_long_sub(nr_pages, &num_poisoned_pages); ++ unlock_page(hpage); ++ return 0; ++ } + } + set_page_hwpoison_huge_page(hpage); + res = dequeue_hwpoisoned_huge_page(hpage); diff --git a/queue-3.10/mm-thp-close-race-between-mremap-and-split_huge_page.patch b/queue-3.10/mm-thp-close-race-between-mremap-and-split_huge_page.patch new file mode 100644 index 00000000000..2ec6b70cd0b --- /dev/null +++ b/queue-3.10/mm-thp-close-race-between-mremap-and-split_huge_page.patch @@ -0,0 +1,71 @@ +From dd18dbc2d42af75fffa60c77e0f02220bc329829 Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Fri, 9 May 2014 15:37:00 -0700 +Subject: mm, thp: close race between mremap() and split_huge_page() + +From: "Kirill A. Shutemov" + +commit dd18dbc2d42af75fffa60c77e0f02220bc329829 upstream. + +It's critical for split_huge_page() (and migration) to catch and freeze +all PMDs on rmap walk. It gets tricky if there's concurrent fork() or +mremap() since usually we copy/move page table entries on dup_mm() or +move_page_tables() without rmap lock taken. To get it work we rely on +rmap walk order to not miss any entry. We expect to see destination VMA +after source one to work correctly. + +But after switching rmap implementation to interval tree it's not always +possible to preserve expected walk order. + +It works fine for dup_mm() since new VMA has the same vma_start_pgoff() +/ vma_last_pgoff() and explicitly insert dst VMA after src one with +vma_interval_tree_insert_after(). + +But on move_vma() destination VMA can be merged into adjacent one and as +result shifted left in interval tree. Fortunately, we can detect the +situation and prevent race with rmap walk by moving page table entries +under rmap lock. See commit 38a76013ad80. + +Problem is that we miss the lock when we move transhuge PMD. Most +likely this bug caused the crash[1]. + +[1] http://thread.gmane.org/gmane.linux.kernel.mm/96473 + +Fixes: 108d6642ad81 ("mm anon rmap: remove anon_vma_moveto_tail") + +Signed-off-by: Kirill A. Shutemov +Reviewed-by: Andrea Arcangeli +Cc: Rik van Riel +Acked-by: Michel Lespinasse +Cc: Dave Jones +Cc: David Miller +Acked-by: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mremap.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -175,10 +175,17 @@ unsigned long move_page_tables(struct vm + break; + if (pmd_trans_huge(*old_pmd)) { + int err = 0; +- if (extent == HPAGE_PMD_SIZE) ++ if (extent == HPAGE_PMD_SIZE) { ++ VM_BUG_ON(vma->vm_file || !vma->anon_vma); ++ /* See comment in move_ptes() */ ++ if (need_rmap_locks) ++ anon_vma_lock_write(vma->anon_vma); + err = move_huge_pmd(vma, new_vma, old_addr, + new_addr, old_end, + old_pmd, new_pmd); ++ if (need_rmap_locks) ++ anon_vma_unlock_write(vma->anon_vma); ++ } + if (err > 0) { + need_flush = true; + continue; diff --git a/queue-3.10/series b/queue-3.10/series index 95a83a122cb..64ba869e4e3 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -16,3 +16,6 @@ timer-prevent-overflow-in-apply_slack.patch ipmi-fix-a-race-restarting-the-timer.patch ipmi-reset-the-kcs-timeout-when-starting-error-recovery.patch mac80211-fix-suspend-vs.-authentication-race.patch +mm-thp-close-race-between-mremap-and-split_huge_page.patch +x86-mm-hugetlb-add-missing-tlb-page-invalidation-for-hugetlb_cow.patch +hwpoison-hugetlb-lock_page-unlock_page-does-not-match-for-handling-a-free-hugepage.patch diff --git a/queue-3.10/x86-mm-hugetlb-add-missing-tlb-page-invalidation-for-hugetlb_cow.patch b/queue-3.10/x86-mm-hugetlb-add-missing-tlb-page-invalidation-for-hugetlb_cow.patch new file mode 100644 index 00000000000..30f8cbb6a58 --- /dev/null +++ b/queue-3.10/x86-mm-hugetlb-add-missing-tlb-page-invalidation-for-hugetlb_cow.patch @@ -0,0 +1,42 @@ +From 9844f5462392b53824e8b86726e7c33b5ecbb676 Mon Sep 17 00:00:00 2001 +From: Anthony Iliopoulos +Date: Wed, 14 May 2014 11:29:48 +0200 +Subject: x86, mm, hugetlb: Add missing TLB page invalidation for hugetlb_cow() + +From: Anthony Iliopoulos + +commit 9844f5462392b53824e8b86726e7c33b5ecbb676 upstream. + +The invalidation is required in order to maintain proper semantics +under CoW conditions. In scenarios where a process clones several +threads, a thread operating on a core whose DTLB entry for a +particular hugepage has not been invalidated, will be reading from +the hugepage that belongs to the forked child process, even after +hugetlb_cow(). + +The thread will not see the updated page as long as the stale DTLB +entry remains cached, the thread attempts to write into the page, +the child process exits, or the thread gets migrated to a different +processor. + +Signed-off-by: Anthony Iliopoulos +Link: http://lkml.kernel.org/r/20140514092948.GA17391@server-36.huawei.corp +Suggested-by: Shay Goikhman +Acked-by: Dave Hansen +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/hugetlb.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/include/asm/hugetlb.h ++++ b/arch/x86/include/asm/hugetlb.h +@@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_cl + static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) + { ++ ptep_clear_flush(vma, addr, ptep); + } + + static inline int huge_pte_none(pte_t pte) -- 2.47.3