From: Greg Kroah-Hartman Date: Fri, 7 Feb 2014 16:21:18 +0000 (-0800) Subject: removed mm-munlock-fix-potential-race-with-thp-page-split.patch X-Git-Tag: v3.4.80~72 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8695e51cba52709c56524df36528a8b2d61b52f1;p=thirdparty%2Fkernel%2Fstable-queue.git removed mm-munlock-fix-potential-race-with-thp-page-split.patch From 3.12 and 3.13 queues --- diff --git a/queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch b/queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch deleted file mode 100644 index 040801de0ed..00000000000 --- a/queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch +++ /dev/null @@ -1,207 +0,0 @@ -From 01cc2e58697e34c6ee9a40fb6cebc18bf5a1923f Mon Sep 17 00:00:00 2001 -From: Vlastimil Babka -Date: Thu, 23 Jan 2014 15:52:50 -0800 -Subject: mm: munlock: fix potential race with THP page split - -From: Vlastimil Babka - -commit 01cc2e58697e34c6ee9a40fb6cebc18bf5a1923f upstream. - -Since commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP -pages") munlock skips tail pages of a munlocked THP page. There is some -attempt to prevent bad consequences of racing with a THP page split, but -code inspection indicates that there are two problems that may lead to a -non-fatal, yet wrong outcome. - -First, __split_huge_page_refcount() copies flags including PageMlocked -from the head page to the tail pages. Clearing PageMlocked by -munlock_vma_page() in the middle of this operation might result in part -of tail pages left with PageMlocked flag. As the head page still -appears to be a THP page until all tail pages are processed, -munlock_vma_page() might think it munlocked the whole THP page and skip -all the former tail pages. Before ff6a6da60, those pages would be -cleared in further iterations of munlock_vma_pages_range(), but NR_MLOCK -would still become undercounted (related the next point). - -Second, NR_MLOCK accounting is based on call to hpage_nr_pages() after -the PageMlocked is cleared. The accounting might also become -inconsistent due to race with __split_huge_page_refcount() - -- undercount when HUGE_PMD_NR is subtracted, but some tail pages are - left with PageMlocked set and counted again (only possible before - ff6a6da60) - -- overcount when hpage_nr_pages() sees a normal page (split has already - finished), but the parallel split has meanwhile cleared PageMlocked from - additional tail pages - -This patch prevents both problems via extending the scope of lru_lock in -munlock_vma_page(). This is convenient because: - -- __split_huge_page_refcount() takes lru_lock for its whole operation - -- munlock_vma_page() typically takes lru_lock anyway for page isolation - -As this becomes a second function where page isolation is done with -lru_lock already held, factor this out to a new -__munlock_isolate_lru_page() function and clean up the code around. - -[akpm@linux-foundation.org: avoid a coding-style ugly] -Signed-off-by: Vlastimil Babka -Cc: Sasha Levin -Cc: Michel Lespinasse -Cc: Andrea Arcangeli -Cc: Rik van Riel -Cc: Mel Gorman -Cc: Hugh Dickins -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Signed-off-by: Greg Kroah-Hartman - ---- - mm/mlock.c | 104 +++++++++++++++++++++++++++++++++++-------------------------- - 1 file changed, 60 insertions(+), 44 deletions(-) - ---- a/mm/mlock.c -+++ b/mm/mlock.c -@@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page) - } - - /* -+ * Isolate a page from LRU with optional get_page() pin. -+ * Assumes lru_lock already held and page already pinned. -+ */ -+static bool __munlock_isolate_lru_page(struct page *page, bool getpage) -+{ -+ if (PageLRU(page)) { -+ struct lruvec *lruvec; -+ -+ lruvec = mem_cgroup_page_lruvec(page, page_zone(page)); -+ if (getpage) -+ get_page(page); -+ ClearPageLRU(page); -+ del_page_from_lru_list(page, lruvec, page_lru(page)); -+ return true; -+ } -+ -+ return false; -+} -+ -+/* - * Finish munlock after successful page isolation - * - * Page must be locked. This is a wrapper for try_to_munlock() -@@ -126,9 +146,9 @@ static void __munlock_isolated_page(stru - static void __munlock_isolation_failed(struct page *page) - { - if (PageUnevictable(page)) -- count_vm_event(UNEVICTABLE_PGSTRANDED); -+ __count_vm_event(UNEVICTABLE_PGSTRANDED); - else -- count_vm_event(UNEVICTABLE_PGMUNLOCKED); -+ __count_vm_event(UNEVICTABLE_PGMUNLOCKED); - } - - /** -@@ -152,28 +172,34 @@ static void __munlock_isolation_failed(s - unsigned int munlock_vma_page(struct page *page) - { - unsigned int nr_pages; -+ struct zone *zone = page_zone(page); - - BUG_ON(!PageLocked(page)); - -- if (TestClearPageMlocked(page)) { -- nr_pages = hpage_nr_pages(page); -- mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); -- if (!isolate_lru_page(page)) -- __munlock_isolated_page(page); -- else -- __munlock_isolation_failed(page); -- } else { -- nr_pages = hpage_nr_pages(page); -- } -- - /* -- * Regardless of the original PageMlocked flag, we determine nr_pages -- * after touching the flag. This leaves a possible race with a THP page -- * split, such that a whole THP page was munlocked, but nr_pages == 1. -- * Returning a smaller mask due to that is OK, the worst that can -- * happen is subsequent useless scanning of the former tail pages. -- * The NR_MLOCK accounting can however become broken. -+ * Serialize with any parallel __split_huge_page_refcount() which -+ * might otherwise copy PageMlocked to part of the tail pages before -+ * we clear it in the head page. It also stabilizes hpage_nr_pages(). - */ -+ spin_lock_irq(&zone->lru_lock); -+ -+ nr_pages = hpage_nr_pages(page); -+ if (!TestClearPageMlocked(page)) -+ goto unlock_out; -+ -+ __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); -+ -+ if (__munlock_isolate_lru_page(page, true)) { -+ spin_unlock_irq(&zone->lru_lock); -+ __munlock_isolated_page(page); -+ goto out; -+ } -+ __munlock_isolation_failed(page); -+ -+unlock_out: -+ spin_unlock_irq(&zone->lru_lock); -+ -+out: - return nr_pages - 1; - } - -@@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pag - struct page *page = pvec->pages[i]; - - if (TestClearPageMlocked(page)) { -- struct lruvec *lruvec; -- int lru; -- -- if (PageLRU(page)) { -- lruvec = mem_cgroup_page_lruvec(page, zone); -- lru = page_lru(page); -- /* -- * We already have pin from follow_page_mask() -- * so we can spare the get_page() here. -- */ -- ClearPageLRU(page); -- del_page_from_lru_list(page, lruvec, lru); -- } else { -- __munlock_isolation_failed(page); -- goto skip_munlock; -- } -- -- } else { --skip_munlock: - /* -- * We won't be munlocking this page in the next phase -- * but we still need to release the follow_page_mask() -- * pin. We cannot do it under lru_lock however. If it's -- * the last pin, __page_cache_release would deadlock. -+ * We already have pin from follow_page_mask() -+ * so we can spare the get_page() here. - */ -- pagevec_add(&pvec_putback, pvec->pages[i]); -- pvec->pages[i] = NULL; -+ if (__munlock_isolate_lru_page(page, false)) -+ continue; -+ else -+ __munlock_isolation_failed(page); - } -+ -+ /* -+ * We won't be munlocking this page in the next phase -+ * but we still need to release the follow_page_mask() -+ * pin. We cannot do it under lru_lock however. If it's -+ * the last pin, __page_cache_release() would deadlock. -+ */ -+ pagevec_add(&pvec_putback, pvec->pages[i]); -+ pvec->pages[i] = NULL; - } - delta_munlocked = -nr + pagevec_count(&pvec_putback); - __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); diff --git a/queue-3.13/mm-munlock-fix-potential-race-with-thp-page-split.patch b/queue-3.13/mm-munlock-fix-potential-race-with-thp-page-split.patch deleted file mode 100644 index 040801de0ed..00000000000 --- a/queue-3.13/mm-munlock-fix-potential-race-with-thp-page-split.patch +++ /dev/null @@ -1,207 +0,0 @@ -From 01cc2e58697e34c6ee9a40fb6cebc18bf5a1923f Mon Sep 17 00:00:00 2001 -From: Vlastimil Babka -Date: Thu, 23 Jan 2014 15:52:50 -0800 -Subject: mm: munlock: fix potential race with THP page split - -From: Vlastimil Babka - -commit 01cc2e58697e34c6ee9a40fb6cebc18bf5a1923f upstream. - -Since commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP -pages") munlock skips tail pages of a munlocked THP page. There is some -attempt to prevent bad consequences of racing with a THP page split, but -code inspection indicates that there are two problems that may lead to a -non-fatal, yet wrong outcome. - -First, __split_huge_page_refcount() copies flags including PageMlocked -from the head page to the tail pages. Clearing PageMlocked by -munlock_vma_page() in the middle of this operation might result in part -of tail pages left with PageMlocked flag. As the head page still -appears to be a THP page until all tail pages are processed, -munlock_vma_page() might think it munlocked the whole THP page and skip -all the former tail pages. Before ff6a6da60, those pages would be -cleared in further iterations of munlock_vma_pages_range(), but NR_MLOCK -would still become undercounted (related the next point). - -Second, NR_MLOCK accounting is based on call to hpage_nr_pages() after -the PageMlocked is cleared. The accounting might also become -inconsistent due to race with __split_huge_page_refcount() - -- undercount when HUGE_PMD_NR is subtracted, but some tail pages are - left with PageMlocked set and counted again (only possible before - ff6a6da60) - -- overcount when hpage_nr_pages() sees a normal page (split has already - finished), but the parallel split has meanwhile cleared PageMlocked from - additional tail pages - -This patch prevents both problems via extending the scope of lru_lock in -munlock_vma_page(). This is convenient because: - -- __split_huge_page_refcount() takes lru_lock for its whole operation - -- munlock_vma_page() typically takes lru_lock anyway for page isolation - -As this becomes a second function where page isolation is done with -lru_lock already held, factor this out to a new -__munlock_isolate_lru_page() function and clean up the code around. - -[akpm@linux-foundation.org: avoid a coding-style ugly] -Signed-off-by: Vlastimil Babka -Cc: Sasha Levin -Cc: Michel Lespinasse -Cc: Andrea Arcangeli -Cc: Rik van Riel -Cc: Mel Gorman -Cc: Hugh Dickins -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Signed-off-by: Greg Kroah-Hartman - ---- - mm/mlock.c | 104 +++++++++++++++++++++++++++++++++++-------------------------- - 1 file changed, 60 insertions(+), 44 deletions(-) - ---- a/mm/mlock.c -+++ b/mm/mlock.c -@@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page) - } - - /* -+ * Isolate a page from LRU with optional get_page() pin. -+ * Assumes lru_lock already held and page already pinned. -+ */ -+static bool __munlock_isolate_lru_page(struct page *page, bool getpage) -+{ -+ if (PageLRU(page)) { -+ struct lruvec *lruvec; -+ -+ lruvec = mem_cgroup_page_lruvec(page, page_zone(page)); -+ if (getpage) -+ get_page(page); -+ ClearPageLRU(page); -+ del_page_from_lru_list(page, lruvec, page_lru(page)); -+ return true; -+ } -+ -+ return false; -+} -+ -+/* - * Finish munlock after successful page isolation - * - * Page must be locked. This is a wrapper for try_to_munlock() -@@ -126,9 +146,9 @@ static void __munlock_isolated_page(stru - static void __munlock_isolation_failed(struct page *page) - { - if (PageUnevictable(page)) -- count_vm_event(UNEVICTABLE_PGSTRANDED); -+ __count_vm_event(UNEVICTABLE_PGSTRANDED); - else -- count_vm_event(UNEVICTABLE_PGMUNLOCKED); -+ __count_vm_event(UNEVICTABLE_PGMUNLOCKED); - } - - /** -@@ -152,28 +172,34 @@ static void __munlock_isolation_failed(s - unsigned int munlock_vma_page(struct page *page) - { - unsigned int nr_pages; -+ struct zone *zone = page_zone(page); - - BUG_ON(!PageLocked(page)); - -- if (TestClearPageMlocked(page)) { -- nr_pages = hpage_nr_pages(page); -- mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); -- if (!isolate_lru_page(page)) -- __munlock_isolated_page(page); -- else -- __munlock_isolation_failed(page); -- } else { -- nr_pages = hpage_nr_pages(page); -- } -- - /* -- * Regardless of the original PageMlocked flag, we determine nr_pages -- * after touching the flag. This leaves a possible race with a THP page -- * split, such that a whole THP page was munlocked, but nr_pages == 1. -- * Returning a smaller mask due to that is OK, the worst that can -- * happen is subsequent useless scanning of the former tail pages. -- * The NR_MLOCK accounting can however become broken. -+ * Serialize with any parallel __split_huge_page_refcount() which -+ * might otherwise copy PageMlocked to part of the tail pages before -+ * we clear it in the head page. It also stabilizes hpage_nr_pages(). - */ -+ spin_lock_irq(&zone->lru_lock); -+ -+ nr_pages = hpage_nr_pages(page); -+ if (!TestClearPageMlocked(page)) -+ goto unlock_out; -+ -+ __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); -+ -+ if (__munlock_isolate_lru_page(page, true)) { -+ spin_unlock_irq(&zone->lru_lock); -+ __munlock_isolated_page(page); -+ goto out; -+ } -+ __munlock_isolation_failed(page); -+ -+unlock_out: -+ spin_unlock_irq(&zone->lru_lock); -+ -+out: - return nr_pages - 1; - } - -@@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pag - struct page *page = pvec->pages[i]; - - if (TestClearPageMlocked(page)) { -- struct lruvec *lruvec; -- int lru; -- -- if (PageLRU(page)) { -- lruvec = mem_cgroup_page_lruvec(page, zone); -- lru = page_lru(page); -- /* -- * We already have pin from follow_page_mask() -- * so we can spare the get_page() here. -- */ -- ClearPageLRU(page); -- del_page_from_lru_list(page, lruvec, lru); -- } else { -- __munlock_isolation_failed(page); -- goto skip_munlock; -- } -- -- } else { --skip_munlock: - /* -- * We won't be munlocking this page in the next phase -- * but we still need to release the follow_page_mask() -- * pin. We cannot do it under lru_lock however. If it's -- * the last pin, __page_cache_release would deadlock. -+ * We already have pin from follow_page_mask() -+ * so we can spare the get_page() here. - */ -- pagevec_add(&pvec_putback, pvec->pages[i]); -- pvec->pages[i] = NULL; -+ if (__munlock_isolate_lru_page(page, false)) -+ continue; -+ else -+ __munlock_isolation_failed(page); - } -+ -+ /* -+ * We won't be munlocking this page in the next phase -+ * but we still need to release the follow_page_mask() -+ * pin. We cannot do it under lru_lock however. If it's -+ * the last pin, __page_cache_release() would deadlock. -+ */ -+ pagevec_add(&pvec_putback, pvec->pages[i]); -+ pvec->pages[i] = NULL; - } - delta_munlocked = -nr + pagevec_count(&pvec_putback); - __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);