]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 8 Mar 2019 11:15:55 +0000 (12:15 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 8 Mar 2019 11:15:55 +0000 (12:15 +0100)
added patches:
hugetlbfs-fix-races-and-page-leaks-during-migration.patch
xtensa-fix-get_wchan.patch

queue-3.18/hugetlbfs-fix-races-and-page-leaks-during-migration.patch [new file with mode: 0644]
queue-3.18/series
queue-3.18/xtensa-fix-get_wchan.patch [new file with mode: 0644]

diff --git a/queue-3.18/hugetlbfs-fix-races-and-page-leaks-during-migration.patch b/queue-3.18/hugetlbfs-fix-races-and-page-leaks-during-migration.patch
new file mode 100644 (file)
index 0000000..cb8de0d
--- /dev/null
@@ -0,0 +1,201 @@
+From cb6acd01e2e43fd8bad11155752b7699c3d0fb76 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Thu, 28 Feb 2019 16:22:02 -0800
+Subject: hugetlbfs: fix races and page leaks during migration
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit cb6acd01e2e43fd8bad11155752b7699c3d0fb76 upstream.
+
+hugetlb pages should only be migrated if they are 'active'.  The
+routines set/clear_page_huge_active() modify the active state of hugetlb
+pages.
+
+When a new hugetlb page is allocated at fault time, set_page_huge_active
+is called before the page is locked.  Therefore, another thread could
+race and migrate the page while it is being added to page table by the
+fault code.  This race is somewhat hard to trigger, but can be seen by
+strategically adding udelay to simulate worst case scheduling behavior.
+Depending on 'how' the code races, various BUG()s could be triggered.
+
+To address this issue, simply delay the set_page_huge_active call until
+after the page is successfully added to the page table.
+
+Hugetlb pages can also be leaked at migration time if the pages are
+associated with a file in an explicitly mounted hugetlbfs filesystem.
+For example, consider a two node system with 4GB worth of huge pages
+available.  A program mmaps a 2G file in a hugetlbfs filesystem.  It
+then migrates the pages associated with the file from one node to
+another.  When the program exits, huge page counts are as follows:
+
+  node0
+  1024    free_hugepages
+  1024    nr_hugepages
+
+  node1
+  0       free_hugepages
+  1024    nr_hugepages
+
+  Filesystem                         Size  Used Avail Use% Mounted on
+  nodev                              4.0G  2.0G  2.0G  50% /var/opt/hugepool
+
+That is as expected.  2G of huge pages are taken from the free_hugepages
+counts, and 2G is the size of the file in the explicitly mounted
+filesystem.  If the file is then removed, the counts become:
+
+  node0
+  1024    free_hugepages
+  1024    nr_hugepages
+
+  node1
+  1024    free_hugepages
+  1024    nr_hugepages
+
+  Filesystem                         Size  Used Avail Use% Mounted on
+  nodev                              4.0G  2.0G  2.0G  50% /var/opt/hugepool
+
+Note that the filesystem still shows 2G of pages used, while there
+actually are no huge pages in use.  The only way to 'fix' the filesystem
+accounting is to unmount the filesystem
+
+If a hugetlb page is associated with an explicitly mounted filesystem,
+this information in contained in the page_private field.  At migration
+time, this information is not preserved.  To fix, simply transfer
+page_private from old to new page at migration time if necessary.
+
+There is a related race with removing a huge page from a file and
+migration.  When a huge page is removed from the pagecache, the
+page_mapping() field is cleared, yet page_private remains set until the
+page is actually freed by free_huge_page().  A page could be migrated
+while in this state.  However, since page_mapping() is not set the
+hugetlbfs specific routine to transfer page_private is not called and we
+leak the page count in the filesystem.
+
+To fix that, check for this condition before migrating a huge page.  If
+the condition is detected, return EBUSY for the page.
+
+Link: http://lkml.kernel.org/r/74510272-7319-7372-9ea6-ec914734c179@oracle.com
+Link: http://lkml.kernel.org/r/20190212221400.3512-1-mike.kravetz@oracle.com
+Fixes: bcc54222309c ("mm: hugetlb: introduce page_huge_active")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: <stable@vger.kernel.org>
+[mike.kravetz@oracle.com: v2]
+  Link: http://lkml.kernel.org/r/7534d322-d782-8ac6-1c8d-a8dc380eb3ab@oracle.com
+[mike.kravetz@oracle.com: update comment and changelog]
+  Link: http://lkml.kernel.org/r/420bcfd6-158b-38e4-98da-26d0cd85bd01@oracle.com
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ fs/hugetlbfs/inode.c |   12 ++++++++++++
+ mm/hugetlb.c         |   14 ++++++++++++--
+ mm/migrate.c         |   11 +++++++++++
+ 3 files changed, 35 insertions(+), 2 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -609,6 +609,18 @@ static int hugetlbfs_migrate_page(struct
+       rc = migrate_huge_page_move_mapping(mapping, newpage, page);
+       if (rc != MIGRATEPAGE_SUCCESS)
+               return rc;
++
++      /*
++       * page_private is subpool pointer in hugetlb pages.  Transfer to
++       * new page.  PagePrivate is not associated with page_private for
++       * hugetlb pages and can not be set here as only page_huge_active
++       * pages can be migrated.
++       */
++      if (page_private(page)) {
++              set_page_private(newpage, page_private(page));
++              set_page_private(page, 0);
++      }
++
+       migrate_page_copy(newpage, page);
+       return MIGRATEPAGE_SUCCESS;
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2938,7 +2938,6 @@ retry_avoidcopy:
+       copy_user_huge_page(new_page, old_page, address, vma,
+                           pages_per_huge_page(h));
+       __SetPageUptodate(new_page);
+-      set_page_huge_active(new_page);
+       mmun_start = address & huge_page_mask(h);
+       mmun_end = mmun_start + huge_page_size(h);
+@@ -2959,6 +2958,7 @@ retry_avoidcopy:
+                               make_huge_pte(vma, new_page, 1));
+               page_remove_rmap(old_page);
+               hugepage_add_new_anon_rmap(new_page, vma, address);
++              set_page_huge_active(new_page);
+               /* Make the old page be freed below */
+               new_page = old_page;
+       }
+@@ -3017,6 +3017,7 @@ static int hugetlb_no_page(struct mm_str
+       struct page *page;
+       pte_t new_pte;
+       spinlock_t *ptl;
++      bool new_page = false;
+       /*
+        * Currently, we are forced to kill the process in the event the
+@@ -3050,7 +3051,7 @@ retry:
+               }
+               clear_huge_page(page, address, pages_per_huge_page(h));
+               __SetPageUptodate(page);
+-              set_page_huge_active(page);
++              new_page = true;
+               if (vma->vm_flags & VM_MAYSHARE) {
+                       int err;
+@@ -3126,6 +3127,15 @@ retry:
+       }
+       spin_unlock(ptl);
++
++      /*
++       * Only make newly allocated pages active.  Existing pages found
++       * in the pagecache could be !page_huge_active() if they have been
++       * isolated for migration.
++       */
++      if (new_page)
++              set_page_huge_active(page);
++
+       unlock_page(page);
+ out:
+       return ret;
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1050,6 +1050,16 @@ static int unmap_and_move_huge_page(new_
+               lock_page(hpage);
+       }
++      /*
++       * Check for pages which are in the process of being freed.  Without
++       * page_mapping() set, hugetlbfs specific move page routine will not
++       * be called and we could leak usage counts for subpools.
++       */
++      if (page_private(hpage) && !page_mapping(hpage)) {
++              rc = -EBUSY;
++              goto out_unlock;
++      }
++
+       if (PageAnon(hpage))
+               anon_vma = page_get_anon_vma(hpage);
+@@ -1067,6 +1077,7 @@ static int unmap_and_move_huge_page(new_
+       if (rc == MIGRATEPAGE_SUCCESS)
+               hugetlb_cgroup_migrate(hpage, new_hpage);
++out_unlock:
+       unlock_page(hpage);
+ out:
+       if (rc != -EAGAIN)
index 96d1adfce56584c7de2d54df11d4dd21b692679f..2d9771878eeda1458610dce44171b36807da839a 100644 (file)
@@ -51,3 +51,5 @@ net-avoid-use-ipcb-in-cipso_v4_error.patch
 net-phy-micrel-ksz8061-link-failure-after-cable-connect.patch
 netlabel-fix-out-of-bounds-memory-accesses.patch
 ip6mr-do-not-call-__ip6_inc_stats-from-preemptible-context.patch
+hugetlbfs-fix-races-and-page-leaks-during-migration.patch
+xtensa-fix-get_wchan.patch
diff --git a/queue-3.18/xtensa-fix-get_wchan.patch b/queue-3.18/xtensa-fix-get_wchan.patch
new file mode 100644 (file)
index 0000000..240495f
--- /dev/null
@@ -0,0 +1,35 @@
+From d90b88fd3653f1fb66ecc6571b860d5a5749fa56 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Wed, 2 Jan 2019 01:08:32 -0800
+Subject: xtensa: fix get_wchan
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit d90b88fd3653f1fb66ecc6571b860d5a5749fa56 upstream.
+
+Stack unwinding is implemented incorrectly in xtensa get_wchan: instead
+of extracting a0 and a1 registers from the spill location under the
+stack pointer it extracts a word pointed to by the stack pointer and
+subtracts 4 or 3 from it.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/kernel/process.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/xtensa/kernel/process.c
++++ b/arch/xtensa/kernel/process.c
+@@ -303,8 +303,8 @@ unsigned long get_wchan(struct task_stru
+               /* Stack layout: sp-4: ra, sp-3: sp' */
+-              pc = MAKE_PC_FROM_RA(*(unsigned long*)sp - 4, sp);
+-              sp = *(unsigned long *)sp - 3;
++              pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp);
++              sp = SPILL_SLOT(sp, 1);
+       } while (count++ < 16);
+       return 0;
+ }