--- /dev/null
+From 885902531586d5a20a74099c1357bfdc982befe3 Mon Sep 17 00:00:00 2001
+From: Shijie Hu <hushijie3@huawei.com>
+Date: Wed, 3 Jun 2020 16:03:34 -0700
+Subject: hugetlbfs: get unmapped area below TASK_UNMAPPED_BASE for hugetlbfs
+
+From: Shijie Hu <hushijie3@huawei.com>
+
+commit 885902531586d5a20a74099c1357bfdc982befe3 upstream.
+
+In a 32-bit program, running on arm64 architecture. When the address
+space below mmap base is completely exhausted, shmat() for huge pages will
+return ENOMEM, but shmat() for normal pages can still success on no-legacy
+mode. This seems not fair.
+
+For normal pages, the calling trace of get_unmapped_area() is:
+
+ => mm->get_unmapped_area()
+ if on legacy mode,
+ => arch_get_unmapped_area()
+ => vm_unmapped_area()
+ if on no-legacy mode,
+ => arch_get_unmapped_area_topdown()
+ => vm_unmapped_area()
+
+For huge pages, the calling trace of get_unmapped_area() is:
+
+ => file->f_op->get_unmapped_area()
+ => hugetlb_get_unmapped_area()
+ => vm_unmapped_area()
+
+To solve this issue, we only need to make hugetlb_get_unmapped_area() take
+the same way as mm->get_unmapped_area(). Add *bottomup() and *topdown()
+for hugetlbfs, and check current mm->get_unmapped_area() to decide which
+one to use. If mm->get_unmapped_area is equal to
+arch_get_unmapped_area_topdown(), hugetlb_get_unmapped_area() calls
+topdown routine, otherwise calls bottomup routine.
+
+Reported-by: kbuild test robot <lkp@intel.com>
+Signed-off-by: Shijie Hu <hushijie3@huawei.com>
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Xiaoming Ni <nixiaoming@huawei.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: yangerkun <yangerkun@huawei.com>
+Cc: ChenGang <cg.chen@huawei.com>
+Cc: Chen Jie <chenjie6@huawei.com>
+Link: http://lkml.kernel.org/r/20200518065338.113664-1-hushijie3@huawei.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c | 67 ++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 59 insertions(+), 8 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -38,6 +38,7 @@
+ #include <linux/uio.h>
+
+ #include <linux/uaccess.h>
++#include <linux/sched/mm.h>
+
+ static const struct super_operations hugetlbfs_ops;
+ static const struct address_space_operations hugetlbfs_aops;
+@@ -201,13 +202,60 @@ out:
+
+ #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+ static unsigned long
++hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
++ unsigned long len, unsigned long pgoff, unsigned long flags)
++{
++ struct hstate *h = hstate_file(file);
++ struct vm_unmapped_area_info info;
++
++ info.flags = 0;
++ info.length = len;
++ info.low_limit = current->mm->mmap_base;
++ info.high_limit = TASK_SIZE;
++ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
++ info.align_offset = 0;
++ return vm_unmapped_area(&info);
++}
++
++static unsigned long
++hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
++ unsigned long len, unsigned long pgoff, unsigned long flags)
++{
++ struct hstate *h = hstate_file(file);
++ struct vm_unmapped_area_info info;
++
++ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
++ info.length = len;
++ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
++ info.high_limit = current->mm->mmap_base;
++ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
++ info.align_offset = 0;
++ addr = vm_unmapped_area(&info);
++
++ /*
++ * A failed mmap() very likely causes application failure,
++ * so fall back to the bottom-up function here. This scenario
++ * can happen with large stack limits and large mmap()
++ * allocations.
++ */
++ if (unlikely(offset_in_page(addr))) {
++ VM_BUG_ON(addr != -ENOMEM);
++ info.flags = 0;
++ info.low_limit = current->mm->mmap_base;
++ info.high_limit = TASK_SIZE;
++ addr = vm_unmapped_area(&info);
++ }
++
++ return addr;
++}
++
++static unsigned long
+ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct hstate *h = hstate_file(file);
+- struct vm_unmapped_area_info info;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+@@ -228,13 +276,16 @@ hugetlb_get_unmapped_area(struct file *f
+ return addr;
+ }
+
+- info.flags = 0;
+- info.length = len;
+- info.low_limit = TASK_UNMAPPED_BASE;
+- info.high_limit = TASK_SIZE;
+- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+- info.align_offset = 0;
+- return vm_unmapped_area(&info);
++ /*
++ * Use mm->get_unmapped_area value as a hint to use topdown routine.
++ * If architectures have special needs, they should define their own
++ * version of hugetlb_get_unmapped_area.
++ */
++ if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
++ return hugetlb_get_unmapped_area_topdown(file, addr, len,
++ pgoff, flags);
++ return hugetlb_get_unmapped_area_bottomup(file, addr, len,
++ pgoff, flags);
+ }
+ #endif
+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
- fs/hugetlbfs/inode.c | 5 +++--
+ fs/hugetlbfs/inode.c | 9 +++++----
include/linux/sched/mm.h | 8 ++++++++
mm/mmap.c | 8 --------
- 3 files changed, 11 insertions(+), 10 deletions(-)
+ 3 files changed, 13 insertions(+), 12 deletions(-)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
-@@ -208,6 +208,7 @@ hugetlb_get_unmapped_area(struct file *f
+@@ -211,7 +211,7 @@ hugetlb_get_unmapped_area_bottomup(struc
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+- info.high_limit = TASK_SIZE;
++ info.high_limit = arch_get_mmap_end(addr);
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+@@ -227,7 +227,7 @@ hugetlb_get_unmapped_area_topdown(struct
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+- info.high_limit = current->mm->mmap_base;
++ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+@@ -242,7 +242,7 @@ hugetlb_get_unmapped_area_topdown(struct
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = current->mm->mmap_base;
+- info.high_limit = TASK_SIZE;
++ info.high_limit = arch_get_mmap_end(addr);
+ addr = vm_unmapped_area(&info);
+ }
+
+@@ -256,6 +256,7 @@ hugetlb_get_unmapped_area(struct file *f
+ struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ const unsigned long mmap_end = arch_get_mmap_end(addr);
if (len & ~huge_page_mask(h))
return -EINVAL;
-@@ -223,7 +224,7 @@ hugetlb_get_unmapped_area(struct file *f
+@@ -271,7 +272,7 @@ hugetlb_get_unmapped_area(struct file *f
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
-@@ -231,7 +232,7 @@ hugetlb_get_unmapped_area(struct file *f
- info.flags = 0;
- info.length = len;
- info.low_limit = TASK_UNMAPPED_BASE;
-- info.high_limit = TASK_SIZE;
-+ info.high_limit = arch_get_mmap_end(addr);
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
- return vm_unmapped_area(&info);
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -133,6 +133,14 @@ static inline void mm_update_next_owner(