]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/5.0.18/hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch
Linux 4.19.45
[thirdparty/kernel/stable-queue.git] / releases / 5.0.18 / hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch
1 From 1b426bac66e6cc83c9f2d92b96e4e72acf43419a Mon Sep 17 00:00:00 2001
2 From: Mike Kravetz <mike.kravetz@oracle.com>
3 Date: Mon, 13 May 2019 17:19:41 -0700
4 Subject: hugetlb: use same fault hash key for shared and private mappings
5
6 From: Mike Kravetz <mike.kravetz@oracle.com>
7
8 commit 1b426bac66e6cc83c9f2d92b96e4e72acf43419a upstream.
9
10 hugetlb uses a fault mutex hash table to prevent page faults of the
11 same pages concurrently. The key for shared and private mappings is
12 different. Shared keys off address_space and file index. Private keys
13 off mm and virtual address. Consider a private mappings of a populated
14 hugetlbfs file. A fault will map the page from the file and if needed
15 do a COW to map a writable page.
16
17 Hugetlbfs hole punch uses the fault mutex to prevent mappings of file
18 pages. It uses the address_space file index key. However, private
19 mappings will use a different key and could race with this code to map
20 the file page. This causes problems (BUG) for the page cache remove
21 code as it expects the page to be unmapped. A sample stack is:
22
23 page dumped because: VM_BUG_ON_PAGE(page_mapped(page))
24 kernel BUG at mm/filemap.c:169!
25 ...
26 RIP: 0010:unaccount_page_cache_page+0x1b8/0x200
27 ...
28 Call Trace:
29 __delete_from_page_cache+0x39/0x220
30 delete_from_page_cache+0x45/0x70
31 remove_inode_hugepages+0x13c/0x380
32 ? __add_to_page_cache_locked+0x162/0x380
33 hugetlbfs_fallocate+0x403/0x540
34 ? _cond_resched+0x15/0x30
35 ? __inode_security_revalidate+0x5d/0x70
36 ? selinux_file_permission+0x100/0x130
37 vfs_fallocate+0x13f/0x270
38 ksys_fallocate+0x3c/0x80
39 __x64_sys_fallocate+0x1a/0x20
40 do_syscall_64+0x5b/0x180
41 entry_SYSCALL_64_after_hwframe+0x44/0xa9
42
43 There seems to be another potential COW issue/race with this approach
44 of different private and shared keys as noted in commit 8382d914ebf7
45 ("mm, hugetlb: improve page-fault scalability").
46
47 Since every hugetlb mapping (even anon and private) is actually a file
48 mapping, just use the address_space index key for all mappings. This
49 results in potentially more hash collisions. However, this should not
50 be the common case.
51
52 Link: http://lkml.kernel.org/r/20190328234704.27083-3-mike.kravetz@oracle.com
53 Link: http://lkml.kernel.org/r/20190412165235.t4sscoujczfhuiyt@linux-r8p5
54 Fixes: b5cec28d36f5 ("hugetlbfs: truncate_hugepages() takes a range of pages")
55 Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
56 Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
57 Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
58 Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
59 Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
60 Cc: Michal Hocko <mhocko@kernel.org>
61 Cc: <stable@vger.kernel.org>
62 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
63 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
64 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
65
66 ---
67 fs/hugetlbfs/inode.c | 7 ++-----
68 include/linux/hugetlb.h | 4 +---
69 mm/hugetlb.c | 22 ++++++----------------
70 mm/userfaultfd.c | 3 +--
71 4 files changed, 10 insertions(+), 26 deletions(-)
72
73 --- a/fs/hugetlbfs/inode.c
74 +++ b/fs/hugetlbfs/inode.c
75 @@ -426,9 +426,7 @@ static void remove_inode_hugepages(struc
76 u32 hash;
77
78 index = page->index;
79 - hash = hugetlb_fault_mutex_hash(h, current->mm,
80 - &pseudo_vma,
81 - mapping, index, 0);
82 + hash = hugetlb_fault_mutex_hash(h, mapping, index, 0);
83 mutex_lock(&hugetlb_fault_mutex_table[hash]);
84
85 /*
86 @@ -625,8 +623,7 @@ static long hugetlbfs_fallocate(struct f
87 addr = index * hpage_size;
88
89 /* mutex taken here, fault path and hole punch */
90 - hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
91 - index, addr);
92 + hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
93 mutex_lock(&hugetlb_fault_mutex_table[hash]);
94
95 /* See if already present in mapping to avoid alloc/free */
96 --- a/include/linux/hugetlb.h
97 +++ b/include/linux/hugetlb.h
98 @@ -123,9 +123,7 @@ void move_hugetlb_state(struct page *old
99 void free_huge_page(struct page *page);
100 void hugetlb_fix_reserve_counts(struct inode *inode);
101 extern struct mutex *hugetlb_fault_mutex_table;
102 -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
103 - struct vm_area_struct *vma,
104 - struct address_space *mapping,
105 +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
106 pgoff_t idx, unsigned long address);
107
108 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
109 --- a/mm/hugetlb.c
110 +++ b/mm/hugetlb.c
111 @@ -3777,8 +3777,7 @@ retry:
112 * handling userfault. Reacquire after handling
113 * fault to make calling code simpler.
114 */
115 - hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
116 - idx, haddr);
117 + hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr);
118 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
119 ret = handle_userfault(&vmf, VM_UFFD_MISSING);
120 mutex_lock(&hugetlb_fault_mutex_table[hash]);
121 @@ -3886,21 +3885,14 @@ backout_unlocked:
122 }
123
124 #ifdef CONFIG_SMP
125 -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
126 - struct vm_area_struct *vma,
127 - struct address_space *mapping,
128 +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
129 pgoff_t idx, unsigned long address)
130 {
131 unsigned long key[2];
132 u32 hash;
133
134 - if (vma->vm_flags & VM_SHARED) {
135 - key[0] = (unsigned long) mapping;
136 - key[1] = idx;
137 - } else {
138 - key[0] = (unsigned long) mm;
139 - key[1] = address >> huge_page_shift(h);
140 - }
141 + key[0] = (unsigned long) mapping;
142 + key[1] = idx;
143
144 hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0);
145
146 @@ -3911,9 +3903,7 @@ u32 hugetlb_fault_mutex_hash(struct hsta
147 * For uniprocesor systems we always use a single mutex, so just
148 * return 0 and avoid the hashing overhead.
149 */
150 -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
151 - struct vm_area_struct *vma,
152 - struct address_space *mapping,
153 +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
154 pgoff_t idx, unsigned long address)
155 {
156 return 0;
157 @@ -3958,7 +3948,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
158 * get spurious allocation failures if two CPUs race to instantiate
159 * the same page in the page cache.
160 */
161 - hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr);
162 + hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr);
163 mutex_lock(&hugetlb_fault_mutex_table[hash]);
164
165 entry = huge_ptep_get(ptep);
166 --- a/mm/userfaultfd.c
167 +++ b/mm/userfaultfd.c
168 @@ -271,8 +271,7 @@ retry:
169 */
170 idx = linear_page_index(dst_vma, dst_addr);
171 mapping = dst_vma->vm_file->f_mapping;
172 - hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping,
173 - idx, dst_addr);
174 + hash = hugetlb_fault_mutex_hash(h, mapping, idx, dst_addr);
175 mutex_lock(&hugetlb_fault_mutex_table[hash]);
176
177 err = -ENOMEM;