]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Dec 2017 08:49:34 +0000 (09:49 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Dec 2017 08:49:34 +0000 (09:49 +0100)
added patches:
mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch
mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch
mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch
mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch

queue-4.9/mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch [new file with mode: 0644]
queue-4.9/mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch [new file with mode: 0644]
queue-4.9/mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch [new file with mode: 0644]
queue-4.9/mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch b/queue-4.9/mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch
new file mode 100644 (file)
index 0000000..9a95a65
--- /dev/null
@@ -0,0 +1,63 @@
+From 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Wed, 29 Nov 2017 16:10:01 -0800
+Subject: mm/cma: fix alloc_contig_range ret code/potential leak
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a upstream.
+
+If the call __alloc_contig_migrate_range() in alloc_contig_range returns
+-EBUSY, processing continues so that test_pages_isolated() is called
+where there is a tracepoint to identify the busy pages.  However, it is
+possible for busy pages to become available between the calls to these
+two routines.  In this case, the range of pages may be allocated.
+Unfortunately, the original return code (ret == -EBUSY) is still set and
+returned to the caller.  Therefore, the caller believes the pages were
+not allocated and they are leaked.
+
+Update the comment to indicate that allocation is still possible even if
+__alloc_contig_migrate_range returns -EBUSY.  Also, clear return code in
+this case so that it is not accidentally used or returned to caller.
+
+Link: http://lkml.kernel.org/r/20171122185214.25285-1-mike.kravetz@oracle.com
+Fixes: 8ef5849fa8a2 ("mm/cma: always check which page caused allocation failure")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Laura Abbott <labbott@redhat.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -7309,11 +7309,18 @@ int alloc_contig_range(unsigned long sta
+       /*
+        * In case of -EBUSY, we'd like to know which page causes problem.
+-       * So, just fall through. We will check it in test_pages_isolated().
++       * So, just fall through. test_pages_isolated() has a tracepoint
++       * which will report the busy page.
++       *
++       * It is possible that busy pages could become available before
++       * the call to test_pages_isolated, and the range will actually be
++       * allocated.  So, if we fall through be sure to clear ret so that
++       * -EBUSY is not accidentally used or returned to caller.
+        */
+       ret = __alloc_contig_migrate_range(&cc, start, end);
+       if (ret && ret != -EBUSY)
+               goto done;
++      ret =0;
+       /*
+        * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
diff --git a/queue-4.9/mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch b/queue-4.9/mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch
new file mode 100644 (file)
index 0000000..568fc1b
--- /dev/null
@@ -0,0 +1,90 @@
+From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Wed, 29 Nov 2017 16:10:28 -0800
+Subject: mm, hugetlbfs: introduce ->split() to vm_operations_struct
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 31383c6865a578834dd953d9dbc88e6b19fe3997 upstream.
+
+Patch series "device-dax: fix unaligned munmap handling"
+
+When device-dax is operating in huge-page mode we want it to behave like
+hugetlbfs and fail attempts to split vmas into unaligned ranges.  It
+would be messy to teach the munmap path about device-dax alignment
+constraints in the same (hstate) way that hugetlbfs communicates this
+constraint.  Instead, these patches introduce a new ->split() vm
+operation.
+
+This patch (of 2):
+
+The device-dax interface has similar constraints as hugetlbfs in that it
+requires the munmap path to unmap in huge page aligned units.  Rather
+than add more custom vma handling code in __split_vma() introduce a new
+vm operation to perform this vma specific check.
+
+Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com
+Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Cc: Jeff Moyer <jmoyer@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm.h |    1 +
+ mm/hugetlb.c       |    8 ++++++++
+ mm/mmap.c          |    8 +++++---
+ 3 files changed, 14 insertions(+), 3 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -347,6 +347,7 @@ struct fault_env {
+ struct vm_operations_struct {
+       void (*open)(struct vm_area_struct * area);
+       void (*close)(struct vm_area_struct * area);
++      int (*split)(struct vm_area_struct * area, unsigned long addr);
+       int (*mremap)(struct vm_area_struct * area);
+       int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+       int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3135,6 +3135,13 @@ static void hugetlb_vm_op_close(struct v
+       }
+ }
++static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
++{
++      if (addr & ~(huge_page_mask(hstate_vma(vma))))
++              return -EINVAL;
++      return 0;
++}
++
+ /*
+  * We cannot handle pagefaults against hugetlb pages at all.  They cause
+  * handle_mm_fault() to try to instantiate regular-sized pages in the
+@@ -3151,6 +3158,7 @@ const struct vm_operations_struct hugetl
+       .fault = hugetlb_vm_op_fault,
+       .open = hugetlb_vm_op_open,
+       .close = hugetlb_vm_op_close,
++      .split = hugetlb_vm_op_split,
+ };
+ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -2538,9 +2538,11 @@ static int __split_vma(struct mm_struct
+       struct vm_area_struct *new;
+       int err;
+-      if (is_vm_hugetlb_page(vma) && (addr &
+-                                      ~(huge_page_mask(hstate_vma(vma)))))
+-              return -EINVAL;
++      if (vma->vm_ops && vma->vm_ops->split) {
++              err = vma->vm_ops->split(vma, addr);
++              if (err)
++                      return err;
++      }
+       new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+       if (!new)
diff --git a/queue-4.9/mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch b/queue-4.9/mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch
new file mode 100644 (file)
index 0000000..e59a1d5
--- /dev/null
@@ -0,0 +1,74 @@
+From 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 Mon Sep 17 00:00:00 2001
+From: chenjie <chenjie6@huawei.com>
+Date: Wed, 29 Nov 2017 16:10:54 -0800
+Subject: mm/madvise.c: fix madvise() infinite loop under special circumstances
+
+From: chenjie <chenjie6@huawei.com>
+
+commit 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 upstream.
+
+MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings.
+Unfortunately madvise_willneed() doesn't communicate this information
+properly to the generic madvise syscall implementation.  The calling
+convention is quite subtle there.  madvise_vma() is supposed to either
+return an error or update &prev otherwise the main loop will never
+advance to the next vma and it will keep looping for ever without a way
+to get out of the kernel.
+
+It seems this has been broken since introduction.  Nobody has noticed
+because nobody seems to be using MADVISE_WILLNEED on these DAX mappings.
+
+[mhocko@suse.com: rewrite changelog]
+Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com
+Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place")
+Signed-off-by: chenjie <chenjie6@huawei.com>
+Signed-off-by: guoxuenan <guoxuenan@huawei.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: zhangyi (F) <yi.zhang@huawei.com>
+Cc: Miao Xie <miaoxie@huawei.com>
+Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Cc: Shaohua Li <shli@fb.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Carsten Otte <cotte@de.ibm.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/madvise.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -228,15 +228,14 @@ static long madvise_willneed(struct vm_a
+ {
+       struct file *file = vma->vm_file;
++      *prev = vma;
+ #ifdef CONFIG_SWAP
+       if (!file) {
+-              *prev = vma;
+               force_swapin_readahead(vma, start, end);
+               return 0;
+       }
+       if (shmem_mapping(file->f_mapping)) {
+-              *prev = vma;
+               force_shm_swapin_readahead(vma, start, end,
+                                       file->f_mapping);
+               return 0;
+@@ -251,7 +250,6 @@ static long madvise_willneed(struct vm_a
+               return 0;
+       }
+-      *prev = vma;
+       start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+       if (end > vma->vm_end)
+               end = vma->vm_end;
diff --git a/queue-4.9/mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch b/queue-4.9/mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch
new file mode 100644 (file)
index 0000000..94f65f5
--- /dev/null
@@ -0,0 +1,78 @@
+From a8f97366452ed491d13cf1e44241bc0b5740b1f0 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Mon, 27 Nov 2017 06:21:25 +0300
+Subject: mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit a8f97366452ed491d13cf1e44241bc0b5740b1f0 upstream.
+
+Currently, we unconditionally make page table dirty in touch_pmd().
+It may result in false-positive can_follow_write_pmd().
+
+We may avoid the situation, if we would only make the page table entry
+dirty if caller asks for write access -- FOLL_WRITE.
+
+The patch also changes touch_pud() in the same way.
+
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[Salvatore Bonaccorso: backport for 4.9:
+ - Adjust context
+ - Drop specific part for PUD-sized transparent hugepages. Support
+   for PUD-sized transparent hugepages was added in v4.11-rc1
+]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -745,20 +745,15 @@ int vmf_insert_pfn_pmd(struct vm_area_st
+ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
+ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+-              pmd_t *pmd)
++              pmd_t *pmd, int flags)
+ {
+       pmd_t _pmd;
+-      /*
+-       * We should set the dirty bit only for FOLL_WRITE but for now
+-       * the dirty bit in the pmd is meaningless.  And if the dirty
+-       * bit will become meaningful and we'll only set it with
+-       * FOLL_WRITE, an atomic set_bit will be required on the pmd to
+-       * set the young bit, instead of the current set_pmd_at.
+-       */
+-      _pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
++      _pmd = pmd_mkyoung(*pmd);
++      if (flags & FOLL_WRITE)
++              _pmd = pmd_mkdirty(_pmd);
+       if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
+-                              pmd, _pmd,  1))
++                              pmd, _pmd, flags & FOLL_WRITE))
+               update_mmu_cache_pmd(vma, addr, pmd);
+ }
+@@ -787,7 +782,7 @@ struct page *follow_devmap_pmd(struct vm
+               return NULL;
+       if (flags & FOLL_TOUCH)
+-              touch_pmd(vma, addr, pmd);
++              touch_pmd(vma, addr, pmd, flags);
+       /*
+        * device mapped pages can only be returned if the
+@@ -1158,7 +1153,7 @@ struct page *follow_trans_huge_pmd(struc
+       page = pmd_page(*pmd);
+       VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+       if (flags & FOLL_TOUCH)
+-              touch_pmd(vma, addr, pmd);
++              touch_pmd(vma, addr, pmd, flags);
+       if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+               /*
+                * We don't mlock() pte-mapped THPs. This way we can avoid
index 7066743a9541a18d5758d1b60e2e5b6d58e71096..36acdd03a64c8772404c23dcbaac1370661616c1 100644 (file)
@@ -1,2 +1,6 @@
 arm-dts-logicpd-torpedo-fix-camera-pin-mux.patch
 arm-dts-omap3-logicpd-torpedo-37xx-devkit-fix-mmc1-cd-gpio.patch
+mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch
+mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch
+mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch
+mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch