]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 Apr 2015 09:56:59 +0000 (11:56 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 Apr 2015 09:56:59 +0000 (11:56 +0200)
added patches:
mm-hugetlb-reduce-arch-dependent-code-around-follow_huge_.patch
mm-hugetlb-take-page-table-lock-in-follow_huge_pmd.patch

queue-3.19/mm-hugetlb-reduce-arch-dependent-code-around-follow_huge_.patch [new file with mode: 0644]
queue-3.19/mm-hugetlb-take-page-table-lock-in-follow_huge_pmd.patch [new file with mode: 0644]
queue-3.19/series

diff --git a/queue-3.19/mm-hugetlb-reduce-arch-dependent-code-around-follow_huge_.patch b/queue-3.19/mm-hugetlb-reduce-arch-dependent-code-around-follow_huge_.patch
new file mode 100644 (file)
index 0000000..ade0c32
--- /dev/null
@@ -0,0 +1,393 @@
+From 61f77eda9bbf0d2e922197ed2dcf88638a639ce5 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:25:15 -0800
+Subject: mm/hugetlb: reduce arch dependent code around follow_huge_*
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 61f77eda9bbf0d2e922197ed2dcf88638a639ce5 upstream.
+
+Currently we have many duplicates in definitions around
+follow_huge_addr(), follow_huge_pmd(), and follow_huge_pud(), so this
+patch tries to remove the m.  The basic idea is to put the default
+implementation for these functions in mm/hugetlb.c as weak symbols
+(regardless of CONFIG_ARCH_WANT_GENERAL_HUGETL B), and to implement
+arch-specific code only when the arch needs it.
+
+For follow_huge_addr(), only powerpc and ia64 have their own
+implementation, and in all other architectures this function just returns
+ERR_PTR(-EINVAL).  So this patch sets returning ERR_PTR(-EINVAL) as
+default.
+
+As for follow_huge_(pmd|pud)(), if (pmd|pud)_huge() is implemented to
+always return 0 in your architecture (like in ia64 or sparc,) it's never
+called (the callsite is optimized away) no matter how implemented it is.
+So in such architectures, we don't need arch-specific implementation.
+
+In some architecture (like mips, s390 and tile,) their current
+arch-specific follow_huge_(pmd|pud)() are effectively identical with the
+common code, so this patch lets these architecture use the common code.
+
+One exception is metag, where pmd_huge() could return non-zero but it
+expects follow_huge_pmd() to always return NULL.  This means that we need
+arch-specific implementation which returns NULL.  This behavior looks
+strange to me (because non-zero pmd_huge() implies that the architecture
+supports PMD-based hugepage, so follow_huge_pmd() can/should return some
+relevant value,) but that's beyond this cleanup patch, so let's keep it.
+
+Justification of non-trivial changes:
+- in s390, follow_huge_pmd() checks !MACHINE_HAS_HPAGE at first, and this
+  patch removes the check. This is OK because we can assume MACHINE_HAS_HPAGE
+  is true when follow_huge_pmd() can be called (note that pmd_huge() has
+  the same check and always returns 0 for !MACHINE_HAS_HPAGE.)
+- in s390 and mips, we use HPAGE_MASK instead of PMD_MASK as done in common
+  code. This patch forces these archs use PMD_MASK, but it's OK because
+  they are identical in both archs.
+  In s390, both of HPAGE_SHIFT and PMD_SHIFT are 20.
+  In mips, HPAGE_SHIFT is defined as (PAGE_SHIFT + PAGE_SHIFT - 3) and
+  PMD_SHIFT is define as (PAGE_SHIFT + PAGE_SHIFT + PTE_ORDER - 3), but
+  PTE_ORDER is always 0, so these are identical.
+
+[n-horiguchi@ah.jp.nec.com: resolve conflict to apply to v3.19.1]
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: James Hogan <james.hogan@imgtec.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
+Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
+Cc: Steve Capper <steve.capper@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mm/hugetlbpage.c     |    6 ------
+ arch/arm64/mm/hugetlbpage.c   |    6 ------
+ arch/ia64/mm/hugetlbpage.c    |    6 ------
+ arch/metag/mm/hugetlbpage.c   |    6 ------
+ arch/mips/mm/hugetlbpage.c    |   18 ------------------
+ arch/powerpc/mm/hugetlbpage.c |    8 ++++++++
+ arch/s390/mm/hugetlbpage.c    |   20 --------------------
+ arch/sh/mm/hugetlbpage.c      |   12 ------------
+ arch/sparc/mm/hugetlbpage.c   |   12 ------------
+ arch/tile/mm/hugetlbpage.c    |   28 ----------------------------
+ arch/x86/mm/hugetlbpage.c     |   12 ------------
+ mm/hugetlb.c                  |   30 +++++++++++++++---------------
+ 12 files changed, 23 insertions(+), 141 deletions(-)
+
+--- a/arch/arm/mm/hugetlbpage.c
++++ b/arch/arm/mm/hugetlbpage.c
+@@ -36,12 +36,6 @@
+  * of type casting from pmd_t * to pte_t *.
+  */
+-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+-                            int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pud_huge(pud_t pud)
+ {
+       return 0;
+--- a/arch/arm64/mm/hugetlbpage.c
++++ b/arch/arm64/mm/hugetlbpage.c
+@@ -38,12 +38,6 @@ int huge_pmd_unshare(struct mm_struct *m
+ }
+ #endif
+-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+-                            int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       return !(pmd_val(pmd) & PMD_TABLE_BIT);
+--- a/arch/ia64/mm/hugetlbpage.c
++++ b/arch/ia64/mm/hugetlbpage.c
+@@ -114,12 +114,6 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-struct page *
+-follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
+-{
+-      return NULL;
+-}
+-
+ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+                       unsigned long addr, unsigned long end,
+                       unsigned long floor, unsigned long ceiling)
+--- a/arch/metag/mm/hugetlbpage.c
++++ b/arch/metag/mm/hugetlbpage.c
+@@ -94,12 +94,6 @@ int huge_pmd_unshare(struct mm_struct *m
+       return 0;
+ }
+-struct page *follow_huge_addr(struct mm_struct *mm,
+-                            unsigned long address, int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       return pmd_page_shift(pmd) > PAGE_SHIFT;
+--- a/arch/mips/mm/hugetlbpage.c
++++ b/arch/mips/mm/hugetlbpage.c
+@@ -68,12 +68,6 @@ int is_aligned_hugepage_range(unsigned l
+       return 0;
+ }
+-struct page *
+-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       return (pmd_val(pmd) & _PAGE_HUGE) != 0;
+@@ -83,15 +77,3 @@ int pud_huge(pud_t pud)
+ {
+       return (pud_val(pud) & _PAGE_HUGE) != 0;
+ }
+-
+-struct page *
+-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-              pmd_t *pmd, int write)
+-{
+-      struct page *page;
+-
+-      page = pte_page(*(pte_t *)pmd);
+-      if (page)
+-              page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+-      return page;
+-}
+--- a/arch/powerpc/mm/hugetlbpage.c
++++ b/arch/powerpc/mm/hugetlbpage.c
+@@ -714,6 +714,14 @@ follow_huge_pmd(struct mm_struct *mm, un
+       return NULL;
+ }
++struct page *
++follow_huge_pud(struct mm_struct *mm, unsigned long address,
++              pud_t *pud, int write)
++{
++      BUG();
++      return NULL;
++}
++
+ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
+                                     unsigned long sz)
+ {
+--- a/arch/s390/mm/hugetlbpage.c
++++ b/arch/s390/mm/hugetlbpage.c
+@@ -192,12 +192,6 @@ int huge_pmd_unshare(struct mm_struct *m
+       return 0;
+ }
+-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+-                            int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       if (!MACHINE_HAS_HPAGE)
+@@ -210,17 +204,3 @@ int pud_huge(pud_t pud)
+ {
+       return 0;
+ }
+-
+-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-                           pmd_t *pmdp, int write)
+-{
+-      struct page *page;
+-
+-      if (!MACHINE_HAS_HPAGE)
+-              return NULL;
+-
+-      page = pmd_page(*pmdp);
+-      if (page)
+-              page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+-      return page;
+-}
+--- a/arch/sh/mm/hugetlbpage.c
++++ b/arch/sh/mm/hugetlbpage.c
+@@ -67,12 +67,6 @@ int huge_pmd_unshare(struct mm_struct *m
+       return 0;
+ }
+-struct page *follow_huge_addr(struct mm_struct *mm,
+-                            unsigned long address, int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       return 0;
+@@ -82,9 +76,3 @@ int pud_huge(pud_t pud)
+ {
+       return 0;
+ }
+-
+-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-                           pmd_t *pmd, int write)
+-{
+-      return NULL;
+-}
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -215,12 +215,6 @@ pte_t huge_ptep_get_and_clear(struct mm_
+       return entry;
+ }
+-struct page *follow_huge_addr(struct mm_struct *mm,
+-                            unsigned long address, int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       return 0;
+@@ -230,9 +224,3 @@ int pud_huge(pud_t pud)
+ {
+       return 0;
+ }
+-
+-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-                           pmd_t *pmd, int write)
+-{
+-      return NULL;
+-}
+--- a/arch/tile/mm/hugetlbpage.c
++++ b/arch/tile/mm/hugetlbpage.c
+@@ -150,12 +150,6 @@ pte_t *huge_pte_offset(struct mm_struct
+       return NULL;
+ }
+-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+-                            int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ int pmd_huge(pmd_t pmd)
+ {
+       return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
+@@ -166,28 +160,6 @@ int pud_huge(pud_t pud)
+       return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
+ }
+-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-                           pmd_t *pmd, int write)
+-{
+-      struct page *page;
+-
+-      page = pte_page(*(pte_t *)pmd);
+-      if (page)
+-              page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+-      return page;
+-}
+-
+-struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+-                           pud_t *pud, int write)
+-{
+-      struct page *page;
+-
+-      page = pte_page(*(pte_t *)pud);
+-      if (page)
+-              page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+-      return page;
+-}
+-
+ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+ {
+       return 0;
+--- a/arch/x86/mm/hugetlbpage.c
++++ b/arch/x86/mm/hugetlbpage.c
+@@ -52,20 +52,8 @@ int pud_huge(pud_t pud)
+       return 0;
+ }
+-struct page *
+-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-              pmd_t *pmd, int write)
+-{
+-      return NULL;
+-}
+ #else
+-struct page *
+-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+-{
+-      return ERR_PTR(-EINVAL);
+-}
+-
+ /*
+  * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
+  * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3700,7 +3700,20 @@ pte_t *huge_pte_offset(struct mm_struct
+       return (pte_t *) pmd;
+ }
+-struct page *
++#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
++
++/*
++ * These functions are overwritable if your architecture needs its own
++ * behavior.
++ */
++struct page * __weak
++follow_huge_addr(struct mm_struct *mm, unsigned long address,
++                            int write)
++{
++      return ERR_PTR(-EINVAL);
++}
++
++struct page * __weak
+ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+               pmd_t *pmd, int write)
+ {
+@@ -3714,7 +3727,7 @@ follow_huge_pmd(struct mm_struct *mm, un
+       return page;
+ }
+-struct page *
++struct page * __weak
+ follow_huge_pud(struct mm_struct *mm, unsigned long address,
+               pud_t *pud, int write)
+ {
+@@ -3726,19 +3739,6 @@ follow_huge_pud(struct mm_struct *mm, un
+       return page;
+ }
+-#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
+-
+-/* Can be overriden by architectures */
+-struct page * __weak
+-follow_huge_pud(struct mm_struct *mm, unsigned long address,
+-             pud_t *pud, int write)
+-{
+-      BUG();
+-      return NULL;
+-}
+-
+-#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
+-
+ #ifdef CONFIG_MEMORY_FAILURE
+ /* Should be called in hugetlb_lock */
diff --git a/queue-3.19/mm-hugetlb-take-page-table-lock-in-follow_huge_pmd.patch b/queue-3.19/mm-hugetlb-take-page-table-lock-in-follow_huge_pmd.patch
new file mode 100644 (file)
index 0000000..60b6017
--- /dev/null
@@ -0,0 +1,305 @@
+From e66f17ff71772b209eed39de35aaa99ba819c93d Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 11 Feb 2015 15:25:22 -0800
+Subject: mm/hugetlb: take page table lock in follow_huge_pmd()
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit e66f17ff71772b209eed39de35aaa99ba819c93d upstream.
+
+We have a race condition between move_pages() and freeing hugepages, where
+move_pages() calls follow_page(FOLL_GET) for hugepages internally and
+tries to get its refcount without preventing concurrent freeing.  This
+race crashes the kernel, so this patch fixes it by moving FOLL_GET code
+for hugepages into follow_huge_pmd() with taking the page table lock.
+
+This patch intentionally removes page==NULL check after pte_page.
+This is justified because pte_page() never returns NULL for any
+architectures or configurations.
+
+This patch changes the behavior of follow_huge_pmd() for tail pages and
+then tail pages can be pinned/returned.  So the caller must be changed to
+properly handle the returned tail pages.
+
+We could have a choice to add the similar locking to
+follow_huge_(addr|pud) for consistency, but it's not necessary because
+currently these functions don't support FOLL_GET flag, so let's leave it
+for future development.
+
+Here is the reproducer:
+
+  $ cat movepages.c
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <numaif.h>
+
+  #define ADDR_INPUT      0x700000000000UL
+  #define HPS             0x200000
+  #define PS              0x1000
+
+  int main(int argc, char *argv[]) {
+          int i;
+          int nr_hp = strtol(argv[1], NULL, 0);
+          int nr_p  = nr_hp * HPS / PS;
+          int ret;
+          void **addrs;
+          int *status;
+          int *nodes;
+          pid_t pid;
+
+          pid = strtol(argv[2], NULL, 0);
+          addrs  = malloc(sizeof(char *) * nr_p + 1);
+          status = malloc(sizeof(char *) * nr_p + 1);
+          nodes  = malloc(sizeof(char *) * nr_p + 1);
+
+          while (1) {
+                  for (i = 0; i < nr_p; i++) {
+                          addrs[i] = (void *)ADDR_INPUT + i * PS;
+                          nodes[i] = 1;
+                          status[i] = 0;
+                  }
+                  ret = numa_move_pages(pid, nr_p, addrs, nodes, status,
+                                        MPOL_MF_MOVE_ALL);
+                  if (ret == -1)
+                          err("move_pages");
+
+                  for (i = 0; i < nr_p; i++) {
+                          addrs[i] = (void *)ADDR_INPUT + i * PS;
+                          nodes[i] = 0;
+                          status[i] = 0;
+                  }
+                  ret = numa_move_pages(pid, nr_p, addrs, nodes, status,
+                                        MPOL_MF_MOVE_ALL);
+                  if (ret == -1)
+                          err("move_pages");
+          }
+          return 0;
+  }
+
+  $ cat hugepage.c
+  #include <stdio.h>
+  #include <sys/mman.h>
+  #include <string.h>
+
+  #define ADDR_INPUT      0x700000000000UL
+  #define HPS             0x200000
+
+  int main(int argc, char *argv[]) {
+          int nr_hp = strtol(argv[1], NULL, 0);
+          char *p;
+
+          while (1) {
+                  p = mmap((void *)ADDR_INPUT, nr_hp * HPS, PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+                  if (p != (void *)ADDR_INPUT) {
+                          perror("mmap");
+                          break;
+                  }
+                  memset(p, 0, nr_hp * HPS);
+                  munmap(p, nr_hp * HPS);
+          }
+  }
+
+  $ sysctl vm.nr_hugepages=40
+  $ ./hugepage 10 &
+  $ ./movepages 10 $(pgrep -f hugepage)
+
+
+[n-horiguchi@ah.jp.nec.com: resolve conflict to apply to v3.19.1]
+Fixes: e632a938d914 ("mm: migrate: add hugepage migration code to move_pages()")
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reported-by: Hugh Dickins <hughd@google.com>
+Cc: James Hogan <james.hogan@imgtec.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
+Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
+Cc: Steve Capper <steve.capper@linaro.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h |    8 ++++----
+ include/linux/swapops.h |    4 ++++
+ mm/gup.c                |   25 ++++++++-----------------
+ mm/hugetlb.c            |   48 ++++++++++++++++++++++++++++++++++--------------
+ mm/migrate.c            |    5 +++--
+ 5 files changed, 53 insertions(+), 37 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -99,9 +99,9 @@ int huge_pmd_unshare(struct mm_struct *m
+ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+                             int write);
+ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-                              pmd_t *pmd, int write);
++                              pmd_t *pmd, int flags);
+ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+-                              pud_t *pud, int write);
++                              pud_t *pud, int flags);
+ int pmd_huge(pmd_t pmd);
+ int pud_huge(pud_t pmd);
+ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+@@ -133,8 +133,8 @@ static inline void hugetlb_report_meminf
+ static inline void hugetlb_show_meminfo(void)
+ {
+ }
+-#define follow_huge_pmd(mm, addr, pmd, write) NULL
+-#define follow_huge_pud(mm, addr, pud, write) NULL
++#define follow_huge_pmd(mm, addr, pmd, flags) NULL
++#define follow_huge_pud(mm, addr, pud, flags) NULL
+ #define prepare_hugepage_range(file, addr, len)       (-EINVAL)
+ #define pmd_huge(x)   0
+ #define pud_huge(x)   0
+--- a/include/linux/swapops.h
++++ b/include/linux/swapops.h
+@@ -137,6 +137,8 @@ static inline void make_migration_entry_
+       *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry));
+ }
++extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
++                                      spinlock_t *ptl);
+ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+                                       unsigned long address);
+ extern void migration_entry_wait_huge(struct vm_area_struct *vma,
+@@ -150,6 +152,8 @@ static inline int is_migration_entry(swp
+ }
+ #define migration_entry_to_page(swp) NULL
+ static inline void make_migration_entry_read(swp_entry_t *entryp) { }
++static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
++                                      spinlock_t *ptl) { }
+ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+                                        unsigned long address) { }
+ static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -167,10 +167,10 @@ struct page *follow_page_mask(struct vm_
+       if (pud_none(*pud))
+               return no_page_table(vma, flags);
+       if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
+-              if (flags & FOLL_GET)
+-                      return NULL;
+-              page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
+-              return page;
++              page = follow_huge_pud(mm, address, pud, flags);
++              if (page)
++                      return page;
++              return no_page_table(vma, flags);
+       }
+       if (unlikely(pud_bad(*pud)))
+               return no_page_table(vma, flags);
+@@ -179,19 +179,10 @@ struct page *follow_page_mask(struct vm_
+       if (pmd_none(*pmd))
+               return no_page_table(vma, flags);
+       if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
+-              page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
+-              if (flags & FOLL_GET) {
+-                      /*
+-                       * Refcount on tail pages are not well-defined and
+-                       * shouldn't be taken. The caller should handle a NULL
+-                       * return when trying to follow tail pages.
+-                       */
+-                      if (PageHead(page))
+-                              get_page(page);
+-                      else
+-                              page = NULL;
+-              }
+-              return page;
++              page = follow_huge_pmd(mm, address, pmd, flags);
++              if (page)
++                      return page;
++              return no_page_table(vma, flags);
+       }
+       if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+               return no_page_table(vma, flags);
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3715,28 +3715,48 @@ follow_huge_addr(struct mm_struct *mm, u
+ struct page * __weak
+ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+-              pmd_t *pmd, int write)
++              pmd_t *pmd, int flags)
+ {
+-      struct page *page;
+-
+-      if (!pmd_present(*pmd))
+-              return NULL;
+-      page = pte_page(*(pte_t *)pmd);
+-      if (page)
+-              page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
++      struct page *page = NULL;
++      spinlock_t *ptl;
++retry:
++      ptl = pmd_lockptr(mm, pmd);
++      spin_lock(ptl);
++      /*
++       * make sure that the address range covered by this pmd is not
++       * unmapped from other threads.
++       */
++      if (!pmd_huge(*pmd))
++              goto out;
++      if (pmd_present(*pmd)) {
++              page = pte_page(*(pte_t *)pmd) +
++                      ((address & ~PMD_MASK) >> PAGE_SHIFT);
++              if (flags & FOLL_GET)
++                      get_page(page);
++      } else {
++              if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
++                      spin_unlock(ptl);
++                      __migration_entry_wait(mm, (pte_t *)pmd, ptl);
++                      goto retry;
++              }
++              /*
++               * hwpoisoned entry is treated as no_page_table in
++               * follow_page_mask().
++               */
++      }
++out:
++      spin_unlock(ptl);
+       return page;
+ }
+ struct page * __weak
+ follow_huge_pud(struct mm_struct *mm, unsigned long address,
+-              pud_t *pud, int write)
++              pud_t *pud, int flags)
+ {
+-      struct page *page;
++      if (flags & FOLL_GET)
++              return NULL;
+-      page = pte_page(*(pte_t *)pud);
+-      if (page)
+-              page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+-      return page;
++      return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+ }
+ #ifdef CONFIG_MEMORY_FAILURE
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -229,7 +229,7 @@ static void remove_migration_ptes(struct
+  * get to the page and wait until migration is finished.
+  * When we return from this function the fault will be retried.
+  */
+-static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
++void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
+                               spinlock_t *ptl)
+ {
+       pte_t pte;
+@@ -1268,7 +1268,8 @@ static int do_move_page_to_node_array(st
+                       goto put_and_set;
+               if (PageHuge(page)) {
+-                      isolate_huge_page(page, &pagelist);
++                      if (PageHead(page))
++                              isolate_huge_page(page, &pagelist);
+                       goto put_and_set;
+               }
index 3506fcc1b320c6b7d301fc7f4d2464237f5c1390..8204dc048ee1597045e1c92d95a9ec5993a0e49d 100644 (file)
@@ -21,3 +21,5 @@ usbnet-fix-tx_bytes-statistic-running-backward-in-cdc_ncm.patch
 tg3-hold-tp-lock-before-calling-tg3_halt-from-tg3_init_one.patch
 kvm-nvmx-mask-unrestricted_guest-if-disabled-on-l0.patch
 staging-comedi-adv_pci1710-fix-ai-insn_read-for-non-zero-channel.patch
+mm-hugetlb-reduce-arch-dependent-code-around-follow_huge_.patch
+mm-hugetlb-take-page-table-lock-in-follow_huge_pmd.patch