From: Greg Kroah-Hartman Date: Mon, 4 Dec 2017 08:49:34 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v3.18.86~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9d2de3146d8610ea488479b184892a6d2b25ef13;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch --- diff --git a/queue-4.9/mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch b/queue-4.9/mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch new file mode 100644 index 00000000000..9a95a65080f --- /dev/null +++ b/queue-4.9/mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch @@ -0,0 +1,63 @@ +From 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a Mon Sep 17 00:00:00 2001 +From: Mike Kravetz +Date: Wed, 29 Nov 2017 16:10:01 -0800 +Subject: mm/cma: fix alloc_contig_range ret code/potential leak + +From: Mike Kravetz + +commit 63cd448908b5eb51d84c52f02b31b9b4ccd1cb5a upstream. + +If the call __alloc_contig_migrate_range() in alloc_contig_range returns +-EBUSY, processing continues so that test_pages_isolated() is called +where there is a tracepoint to identify the busy pages. However, it is +possible for busy pages to become available between the calls to these +two routines. In this case, the range of pages may be allocated. +Unfortunately, the original return code (ret == -EBUSY) is still set and +returned to the caller. Therefore, the caller believes the pages were +not allocated and they are leaked. + +Update the comment to indicate that allocation is still possible even if +__alloc_contig_migrate_range returns -EBUSY. Also, clear return code in +this case so that it is not accidentally used or returned to caller. + +Link: http://lkml.kernel.org/r/20171122185214.25285-1-mike.kravetz@oracle.com +Fixes: 8ef5849fa8a2 ("mm/cma: always check which page caused allocation failure") +Signed-off-by: Mike Kravetz +Acked-by: Vlastimil Babka +Acked-by: Michal Hocko +Acked-by: Johannes Weiner +Acked-by: Joonsoo Kim +Cc: Michal Nazarewicz +Cc: Laura Abbott +Cc: Michal Hocko +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -7309,11 +7309,18 @@ int alloc_contig_range(unsigned long sta + + /* + * In case of -EBUSY, we'd like to know which page causes problem. +- * So, just fall through. We will check it in test_pages_isolated(). ++ * So, just fall through. test_pages_isolated() has a tracepoint ++ * which will report the busy page. ++ * ++ * It is possible that busy pages could become available before ++ * the call to test_pages_isolated, and the range will actually be ++ * allocated. So, if we fall through be sure to clear ret so that ++ * -EBUSY is not accidentally used or returned to caller. + */ + ret = __alloc_contig_migrate_range(&cc, start, end); + if (ret && ret != -EBUSY) + goto done; ++ ret =0; + + /* + * Pages from [start, end) are within a MAX_ORDER_NR_PAGES diff --git a/queue-4.9/mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch b/queue-4.9/mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch new file mode 100644 index 00000000000..568fc1b6e41 --- /dev/null +++ b/queue-4.9/mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch @@ -0,0 +1,90 @@ +From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Wed, 29 Nov 2017 16:10:28 -0800 +Subject: mm, hugetlbfs: introduce ->split() to vm_operations_struct + +From: Dan Williams + +commit 31383c6865a578834dd953d9dbc88e6b19fe3997 upstream. + +Patch series "device-dax: fix unaligned munmap handling" + +When device-dax is operating in huge-page mode we want it to behave like +hugetlbfs and fail attempts to split vmas into unaligned ranges. It +would be messy to teach the munmap path about device-dax alignment +constraints in the same (hstate) way that hugetlbfs communicates this +constraint. Instead, these patches introduce a new ->split() vm +operation. + +This patch (of 2): + +The device-dax interface has similar constraints as hugetlbfs in that it +requires the munmap path to unmap in huge page aligned units. Rather +than add more custom vma handling code in __split_vma() introduce a new +vm operation to perform this vma specific check. + +Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com +Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") +Signed-off-by: Dan Williams +Cc: Jeff Moyer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mm.h | 1 + + mm/hugetlb.c | 8 ++++++++ + mm/mmap.c | 8 +++++--- + 3 files changed, 14 insertions(+), 3 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -347,6 +347,7 @@ struct fault_env { + struct vm_operations_struct { + void (*open)(struct vm_area_struct * area); + void (*close)(struct vm_area_struct * area); ++ int (*split)(struct vm_area_struct * area, unsigned long addr); + int (*mremap)(struct vm_area_struct * area); + int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*pmd_fault)(struct vm_area_struct *, unsigned long address, +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -3135,6 +3135,13 @@ static void hugetlb_vm_op_close(struct v + } + } + ++static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) ++{ ++ if (addr & ~(huge_page_mask(hstate_vma(vma)))) ++ return -EINVAL; ++ return 0; ++} ++ + /* + * We cannot handle pagefaults against hugetlb pages at all. They cause + * handle_mm_fault() to try to instantiate regular-sized pages in the +@@ -3151,6 +3158,7 @@ const struct vm_operations_struct hugetl + .fault = hugetlb_vm_op_fault, + .open = hugetlb_vm_op_open, + .close = hugetlb_vm_op_close, ++ .split = hugetlb_vm_op_split, + }; + + static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2538,9 +2538,11 @@ static int __split_vma(struct mm_struct + struct vm_area_struct *new; + int err; + +- if (is_vm_hugetlb_page(vma) && (addr & +- ~(huge_page_mask(hstate_vma(vma))))) +- return -EINVAL; ++ if (vma->vm_ops && vma->vm_ops->split) { ++ err = vma->vm_ops->split(vma, addr); ++ if (err) ++ return err; ++ } + + new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + if (!new) diff --git a/queue-4.9/mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch b/queue-4.9/mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch new file mode 100644 index 00000000000..e59a1d53897 --- /dev/null +++ b/queue-4.9/mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch @@ -0,0 +1,74 @@ +From 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 Mon Sep 17 00:00:00 2001 +From: chenjie +Date: Wed, 29 Nov 2017 16:10:54 -0800 +Subject: mm/madvise.c: fix madvise() infinite loop under special circumstances + +From: chenjie + +commit 6ea8d958a2c95a1d514015d4e29ba21a8c0a1a91 upstream. + +MADVISE_WILLNEED has always been a noop for DAX (formerly XIP) mappings. +Unfortunately madvise_willneed() doesn't communicate this information +properly to the generic madvise syscall implementation. The calling +convention is quite subtle there. madvise_vma() is supposed to either +return an error or update &prev otherwise the main loop will never +advance to the next vma and it will keep looping for ever without a way +to get out of the kernel. + +It seems this has been broken since introduction. Nobody has noticed +because nobody seems to be using MADVISE_WILLNEED on these DAX mappings. + +[mhocko@suse.com: rewrite changelog] +Link: http://lkml.kernel.org/r/20171127115318.911-1-guoxuenan@huawei.com +Fixes: fe77ba6f4f97 ("[PATCH] xip: madvice/fadvice: execute in place") +Signed-off-by: chenjie +Signed-off-by: guoxuenan +Acked-by: Michal Hocko +Cc: Minchan Kim +Cc: zhangyi (F) +Cc: Miao Xie +Cc: Mike Rapoport +Cc: Shaohua Li +Cc: Andrea Arcangeli +Cc: Mel Gorman +Cc: Kirill A. Shutemov +Cc: David Rientjes +Cc: Anshuman Khandual +Cc: Rik van Riel +Cc: Carsten Otte +Cc: Dan Williams +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/madvise.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -228,15 +228,14 @@ static long madvise_willneed(struct vm_a + { + struct file *file = vma->vm_file; + ++ *prev = vma; + #ifdef CONFIG_SWAP + if (!file) { +- *prev = vma; + force_swapin_readahead(vma, start, end); + return 0; + } + + if (shmem_mapping(file->f_mapping)) { +- *prev = vma; + force_shm_swapin_readahead(vma, start, end, + file->f_mapping); + return 0; +@@ -251,7 +250,6 @@ static long madvise_willneed(struct vm_a + return 0; + } + +- *prev = vma; + start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (end > vma->vm_end) + end = vma->vm_end; diff --git a/queue-4.9/mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch b/queue-4.9/mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch new file mode 100644 index 00000000000..94f65f5fd96 --- /dev/null +++ b/queue-4.9/mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch @@ -0,0 +1,78 @@ +From a8f97366452ed491d13cf1e44241bc0b5740b1f0 Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Mon, 27 Nov 2017 06:21:25 +0300 +Subject: mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d() + +From: Kirill A. Shutemov + +commit a8f97366452ed491d13cf1e44241bc0b5740b1f0 upstream. + +Currently, we unconditionally make page table dirty in touch_pmd(). +It may result in false-positive can_follow_write_pmd(). + +We may avoid the situation, if we would only make the page table entry +dirty if caller asks for write access -- FOLL_WRITE. + +The patch also changes touch_pud() in the same way. + +Signed-off-by: Kirill A. Shutemov +Cc: Michal Hocko +Cc: Hugh Dickins +Signed-off-by: Linus Torvalds +[Salvatore Bonaccorso: backport for 4.9: + - Adjust context + - Drop specific part for PUD-sized transparent hugepages. Support + for PUD-sized transparent hugepages was added in v4.11-rc1 +] +Signed-off-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -745,20 +745,15 @@ int vmf_insert_pfn_pmd(struct vm_area_st + EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); + + static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, +- pmd_t *pmd) ++ pmd_t *pmd, int flags) + { + pmd_t _pmd; + +- /* +- * We should set the dirty bit only for FOLL_WRITE but for now +- * the dirty bit in the pmd is meaningless. And if the dirty +- * bit will become meaningful and we'll only set it with +- * FOLL_WRITE, an atomic set_bit will be required on the pmd to +- * set the young bit, instead of the current set_pmd_at. +- */ +- _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); ++ _pmd = pmd_mkyoung(*pmd); ++ if (flags & FOLL_WRITE) ++ _pmd = pmd_mkdirty(_pmd); + if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, +- pmd, _pmd, 1)) ++ pmd, _pmd, flags & FOLL_WRITE)) + update_mmu_cache_pmd(vma, addr, pmd); + } + +@@ -787,7 +782,7 @@ struct page *follow_devmap_pmd(struct vm + return NULL; + + if (flags & FOLL_TOUCH) +- touch_pmd(vma, addr, pmd); ++ touch_pmd(vma, addr, pmd, flags); + + /* + * device mapped pages can only be returned if the +@@ -1158,7 +1153,7 @@ struct page *follow_trans_huge_pmd(struc + page = pmd_page(*pmd); + VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); + if (flags & FOLL_TOUCH) +- touch_pmd(vma, addr, pmd); ++ touch_pmd(vma, addr, pmd, flags); + if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { + /* + * We don't mlock() pte-mapped THPs. This way we can avoid diff --git a/queue-4.9/series b/queue-4.9/series index 7066743a954..36acdd03a64 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -1,2 +1,6 @@ arm-dts-logicpd-torpedo-fix-camera-pin-mux.patch arm-dts-omap3-logicpd-torpedo-37xx-devkit-fix-mmc1-cd-gpio.patch +mm-thp-do-not-make-page-table-dirty-unconditionally-in-touch_pd.patch +mm-cma-fix-alloc_contig_range-ret-code-potential-leak.patch +mm-hugetlbfs-introduce-split-to-vm_operations_struct.patch +mm-madvise.c-fix-madvise-infinite-loop-under-special-circumstances.patch