From: Greg Kroah-Hartman Date: Thu, 6 Feb 2020 16:50:23 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.19.103~123 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e45b454874cd295679e11407a9d3c5a12b15d82c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: powerpc-pseries-advance-pfn-if-section-is-not-present-in-lmb_is_removable.patch s390-mm-fix-dynamic-pagetable-upgrade-for-hugetlbfs.patch --- diff --git a/queue-4.9/powerpc-pseries-advance-pfn-if-section-is-not-present-in-lmb_is_removable.patch b/queue-4.9/powerpc-pseries-advance-pfn-if-section-is-not-present-in-lmb_is_removable.patch new file mode 100644 index 00000000000..43b417f2b33 --- /dev/null +++ b/queue-4.9/powerpc-pseries-advance-pfn-if-section-is-not-present-in-lmb_is_removable.patch @@ -0,0 +1,38 @@ +From fbee6ba2dca30d302efe6bddb3a886f5e964a257 Mon Sep 17 00:00:00 2001 +From: Pingfan Liu +Date: Fri, 10 Jan 2020 12:54:02 +0800 +Subject: powerpc/pseries: Advance pfn if section is not present in lmb_is_removable() + +From: Pingfan Liu + +commit fbee6ba2dca30d302efe6bddb3a886f5e964a257 upstream. + +In lmb_is_removable(), if a section is not present, it should continue +to test the rest of the sections in the block. But the current code +fails to do so. + +Fixes: 51925fb3c5c9 ("powerpc/pseries: Implement memory hotplug remove in the kernel") +Cc: stable@vger.kernel.org # v4.1+ +Signed-off-by: Pingfan Liu +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/1578632042-12415-1-git-send-email-kernelfans@gmail.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/platforms/pseries/hotplug-memory.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/platforms/pseries/hotplug-memory.c ++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c +@@ -398,8 +398,10 @@ static bool lmb_is_removable(struct of_d + + for (i = 0; i < scns_per_block; i++) { + pfn = PFN_DOWN(phys_addr); +- if (!pfn_present(pfn)) ++ if (!pfn_present(pfn)) { ++ phys_addr += MIN_MEMORY_BLOCK_SIZE; + continue; ++ } + + rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); + phys_addr += MIN_MEMORY_BLOCK_SIZE; diff --git a/queue-4.9/s390-mm-fix-dynamic-pagetable-upgrade-for-hugetlbfs.patch b/queue-4.9/s390-mm-fix-dynamic-pagetable-upgrade-for-hugetlbfs.patch new file mode 100644 index 00000000000..1ca52bf14a1 --- /dev/null +++ b/queue-4.9/s390-mm-fix-dynamic-pagetable-upgrade-for-hugetlbfs.patch @@ -0,0 +1,181 @@ +From 5f490a520bcb393389a4d44bec90afcb332eb112 Mon Sep 17 00:00:00 2001 +From: Gerald Schaefer +Date: Thu, 16 Jan 2020 19:59:04 +0100 +Subject: s390/mm: fix dynamic pagetable upgrade for hugetlbfs + +From: Gerald Schaefer + +commit 5f490a520bcb393389a4d44bec90afcb332eb112 upstream. + +Commit ee71d16d22bb ("s390/mm: make TASK_SIZE independent from the number +of page table levels") changed the logic of TASK_SIZE and also removed the +arch_mmap_check() implementation for s390. This combination has a subtle +effect on how get_unmapped_area() for hugetlbfs pages works. It is now +possible that a user process establishes a hugetlbfs mapping at an address +above 4 TB, without triggering a dynamic pagetable upgrade from 3 to 4 +levels. + +This is because hugetlbfs mappings will not use mm->get_unmapped_area, but +rather file->f_op->get_unmapped_area, which currently is the generic +implementation of hugetlb_get_unmapped_area() that does not know about s390 +dynamic pagetable upgrades, but with the new definition of TASK_SIZE, it +will now allow mappings above 4 TB. + +Subsequent access to such a mapped address above 4 TB will result in a page +fault loop, because the CPU cannot translate such a large address with 3 +pagetable levels. The fault handler will try to map in a hugepage at the +address, but due to the folded pagetable logic it will end up with creating +entries in the 3 level pagetable, possibly overwriting existing mappings, +and then it all repeats when the access is retried. + +Apart from the page fault loop, this can have various nasty effects, e.g. +kernel panic from one of the BUG_ON() checks in memory management code, +or even data loss if an existing mapping gets overwritten. + +Fix this by implementing HAVE_ARCH_HUGETLB_UNMAPPED_AREA support for s390, +providing an s390 version for hugetlb_get_unmapped_area() with pagetable +upgrade support similar to arch_get_unmapped_area(), which will then be +used instead of the generic version. + +Fixes: ee71d16d22bb ("s390/mm: make TASK_SIZE independent from the number of page table levels") +Cc: # 4.12+ +Signed-off-by: Gerald Schaefer +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/include/asm/page.h | 2 + arch/s390/mm/hugetlbpage.c | 100 ++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 101 insertions(+), 1 deletion(-) + +--- a/arch/s390/include/asm/page.h ++++ b/arch/s390/include/asm/page.h +@@ -28,6 +28,8 @@ + #define ARCH_HAS_PREPARE_HUGEPAGE + #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH + ++#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA ++ + #include + #ifndef __ASSEMBLY__ + +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -1,7 +1,7 @@ + /* + * IBM System z Huge TLB Page Support for Kernel. + * +- * Copyright IBM Corp. 2007,2016 ++ * Copyright IBM Corp. 2007,2020 + * Author(s): Gerald Schaefer + */ + +@@ -10,6 +10,9 @@ + + #include + #include ++#include ++#include ++#include + + /* + * If the bit selected by single-bit bitmask "a" is set within "x", move +@@ -225,3 +228,98 @@ static __init int setup_hugepagesz(char + return 1; + } + __setup("hugepagesz=", setup_hugepagesz); ++ ++static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, ++ unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ struct hstate *h = hstate_file(file); ++ struct vm_unmapped_area_info info; ++ ++ info.flags = 0; ++ info.length = len; ++ info.low_limit = current->mm->mmap_base; ++ info.high_limit = TASK_SIZE; ++ info.align_mask = PAGE_MASK & ~huge_page_mask(h); ++ info.align_offset = 0; ++ return vm_unmapped_area(&info); ++} ++ ++static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, ++ unsigned long addr0, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ struct hstate *h = hstate_file(file); ++ struct vm_unmapped_area_info info; ++ unsigned long addr; ++ ++ info.flags = VM_UNMAPPED_AREA_TOPDOWN; ++ info.length = len; ++ info.low_limit = max(PAGE_SIZE, mmap_min_addr); ++ info.high_limit = current->mm->mmap_base; ++ info.align_mask = PAGE_MASK & ~huge_page_mask(h); ++ info.align_offset = 0; ++ addr = vm_unmapped_area(&info); ++ ++ /* ++ * A failed mmap() very likely causes application failure, ++ * so fall back to the bottom-up function here. This scenario ++ * can happen with large stack limits and large mmap() ++ * allocations. ++ */ ++ if (addr & ~PAGE_MASK) { ++ VM_BUG_ON(addr != -ENOMEM); ++ info.flags = 0; ++ info.low_limit = TASK_UNMAPPED_BASE; ++ info.high_limit = TASK_SIZE; ++ addr = vm_unmapped_area(&info); ++ } ++ ++ return addr; ++} ++ ++unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, ++ unsigned long len, unsigned long pgoff, unsigned long flags) ++{ ++ struct hstate *h = hstate_file(file); ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ int rc; ++ ++ if (len & ~huge_page_mask(h)) ++ return -EINVAL; ++ if (len > TASK_SIZE - mmap_min_addr) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) { ++ if (prepare_hugepage_range(file, addr, len)) ++ return -EINVAL; ++ goto check_asce_limit; ++ } ++ ++ if (addr) { ++ addr = ALIGN(addr, huge_page_size(h)); ++ vma = find_vma(mm, addr); ++ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && ++ (!vma || addr + len <= vm_start_gap(vma))) ++ goto check_asce_limit; ++ } ++ ++ if (mm->get_unmapped_area == arch_get_unmapped_area) ++ addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, ++ pgoff, flags); ++ else ++ addr = hugetlb_get_unmapped_area_topdown(file, addr, len, ++ pgoff, flags); ++ if (addr & ~PAGE_MASK) ++ return addr; ++ ++check_asce_limit: ++ if (addr + len > current->mm->context.asce_limit && ++ addr + len <= TASK_SIZE) { ++ rc = crst_table_upgrade(mm, addr + len); ++ if (rc) ++ return (unsigned long) rc; ++ } ++ return addr; ++} diff --git a/queue-4.9/series b/queue-4.9/series index 05cac6bcda1..5efd83ba5a6 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -22,3 +22,5 @@ alsa-dummy-fix-pcm-format-loop-in-proc-output.patch media-v4l2-core-set-pages-dirty-upon-releasing-dma-buffers.patch media-v4l2-rect.h-fix-v4l2_rect_map_inside-top-left-adjustments.patch lib-test_kasan.c-fix-memory-leak-in-kmalloc_oob_krealloc_more.patch +s390-mm-fix-dynamic-pagetable-upgrade-for-hugetlbfs.patch +powerpc-pseries-advance-pfn-if-section-is-not-present-in-lmb_is_removable.patch