]> git.ipfire.org Git - thirdparty/kernel/linux.git/blame - arch/x86/entry/vdso/vma.c
Merge tag 'pinctrl-v6.10-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[thirdparty/kernel/linux.git] / arch / x86 / entry / vdso / vma.c
CommitLineData
003ba957 1// SPDX-License-Identifier: GPL-2.0-only
2aae950b 2/*
2aae950b 3 * Copyright 2007 Andi Kleen, SUSE Labs.
1c0c1b93
AL
4 *
5 * This contains most of the x86 vDSO kernel-side code.
2aae950b
AK
6 */
7#include <linux/mm.h>
4e950f6f 8#include <linux/err.h>
2aae950b 9#include <linux/sched.h>
68db0cf1 10#include <linux/sched/task_stack.h>
5a0e3ad6 11#include <linux/slab.h>
2aae950b
AK
12#include <linux/init.h>
13#include <linux/random.h>
3fa89ca7 14#include <linux/elf.h>
d4f829dd 15#include <linux/cpu.h>
b059a453 16#include <linux/ptrace.h>
af34ebeb
DS
17#include <linux/time_namespace.h>
18
cc1e24fd 19#include <asm/pvclock.h>
2aae950b
AK
20#include <asm/vgtod.h>
21#include <asm/proto.h>
7f3646aa 22#include <asm/vdso.h>
1c0c1b93 23#include <asm/vvar.h>
af34ebeb 24#include <asm/tlb.h>
aafade24 25#include <asm/page.h>
d4f829dd 26#include <asm/desc.h>
cd4d09ec 27#include <asm/cpufeature.h>
dd2cb348 28#include <clocksource/hyperv_timer.h>
2aae950b 29
64b302ab
DS
30#undef _ASM_X86_VVAR_H
31#define EMIT_VVAR(name, offset) \
32 const size_t name ## _offset = offset;
33#include <asm/vvar.h>
34
35struct vdso_data *arch_get_vdso_data(void *vvar_page)
36{
37 return (struct vdso_data *)(vvar_page + _vdso_data_offset);
38}
39#undef EMIT_VVAR
40
eec399dd
TG
41unsigned int vclocks_used __read_mostly;
42
b4b541a6 43#if defined(CONFIG_X86_64)
3d7ee969 44unsigned int __read_mostly vdso64_enabled = 1;
b4b541a6 45#endif
1a21d4e0 46
4c382d72 47int __init init_vdso_image(const struct vdso_image *image)
1a21d4e0 48{
4c382d72 49 BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
6f121e54 50 BUG_ON(image->size % PAGE_SIZE != 0);
1a21d4e0 51
6f121e54
AL
52 apply_alternatives((struct alt_instr *)(image->data + image->alt),
53 (struct alt_instr *)(image->data + image->alt +
54 image->alt_len));
4c382d72
BG
55
56 return 0;
1a21d4e0 57}
1b3f2a72 58
70ddf651 59static const struct vm_special_mapping vvar_mapping;
2aae950b
AK
60struct linux_binprm;
61
b13fd1dc 62static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
05ef76b2
AL
63 struct vm_area_struct *vma, struct vm_fault *vmf)
64{
65 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
66
67 if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
68 return VM_FAULT_SIGBUS;
69
70 vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
71 get_page(vmf->page);
72 return 0;
73}
74
b059a453
DS
75static void vdso_fix_landing(const struct vdso_image *image,
76 struct vm_area_struct *new_vma)
77{
78#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
79 if (in_ia32_syscall() && image == &vdso_image_32) {
80 struct pt_regs *regs = current_pt_regs();
81 unsigned long vdso_land = image->sym_int80_landing_pad;
82 unsigned long old_land_addr = vdso_land +
83 (unsigned long)current->mm->context.vdso;
84
85 /* Fixing userspace landing - look at do_fast_syscall_32 */
86 if (regs->ip == old_land_addr)
87 regs->ip = new_vma->vm_start + vdso_land;
88 }
89#endif
90}
91
92static int vdso_mremap(const struct vm_special_mapping *sm,
93 struct vm_area_struct *new_vma)
94{
b059a453
DS
95 const struct vdso_image *image = current->mm->context.vdso_image;
96
b059a453
DS
97 vdso_fix_landing(image, new_vma);
98 current->mm->context.vdso = (void __user *)new_vma->vm_start;
99
100 return 0;
101}
05ef76b2 102
af34ebeb 103#ifdef CONFIG_TIME_NS
70ddf651
DS
104/*
105 * The vvar page layout depends on whether a task belongs to the root or
106 * non-root time namespace. Whenever a task changes its namespace, the VVAR
107 * page tables are cleared and then they will re-faulted with a
108 * corresponding layout.
109 * See also the comment near timens_setup_vdso_data() for details.
110 */
111int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
112{
113 struct mm_struct *mm = task->mm;
114 struct vm_area_struct *vma;
a3884621 115 VMA_ITERATOR(vmi, mm, 0);
70ddf651 116
42815808 117 mmap_read_lock(mm);
a3884621 118 for_each_vma(vmi, vma) {
70ddf651 119 if (vma_is_special_mapping(vma, &vvar_mapping))
e9adcfec 120 zap_vma_pages(vma);
70ddf651 121 }
42815808 122 mmap_read_unlock(mm);
a3884621 123
70ddf651
DS
124 return 0;
125}
af34ebeb
DS
126#endif
127
b13fd1dc 128static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
a48a7042
AL
129 struct vm_area_struct *vma, struct vm_fault *vmf)
130{
131 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
af34ebeb 132 unsigned long pfn;
a48a7042 133 long sym_offset;
a48a7042
AL
134
135 if (!image)
136 return VM_FAULT_SIGBUS;
137
138 sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
139 image->sym_vvar_start;
140
141 /*
142 * Sanity check: a symbol offset of zero means that the page
143 * does not exist for this vdso image, not that the page is at
144 * offset zero relative to the text mapping. This should be
145 * impossible here, because sym_offset should only be zero for
146 * the page past the end of the vvar mapping.
147 */
148 if (sym_offset == 0)
149 return VM_FAULT_SIGBUS;
150
151 if (sym_offset == image->sym_vvar_page) {
af34ebeb
DS
152 struct page *timens_page = find_timens_vvar_page(vma);
153
154 pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
155
156 /*
157 * If a task belongs to a time namespace then a namespace
158 * specific VVAR is mapped with the sym_vvar_page offset and
159 * the real VVAR page is mapped with the sym_timens_page
160 * offset.
161 * See also the comment near timens_setup_vdso_data().
162 */
e6b28ec6
DS
163 if (timens_page) {
164 unsigned long addr;
165 vm_fault_t err;
166
167 /*
168 * Optimization: inside time namespace pre-fault
169 * VVAR page too. As on timens page there are only
170 * offsets for clocks on VVAR, it'll be faulted
171 * shortly by VDSO code.
172 */
173 addr = vmf->address + (image->sym_timens_page - sym_offset);
174 err = vmf_insert_pfn(vma, addr, pfn);
175 if (unlikely(err & VM_FAULT_ERROR))
176 return err;
177
af34ebeb 178 pfn = page_to_pfn(timens_page);
e6b28ec6 179 }
af34ebeb
DS
180
181 return vmf_insert_pfn(vma, vmf->address, pfn);
a48a7042
AL
182 } else if (sym_offset == image->sym_pvclock_page) {
183 struct pvclock_vsyscall_time_info *pvti =
9f08890a 184 pvclock_get_pvti_cpu0_va();
b95a8a27 185 if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
b13fd1dc
MW
186 return vmf_insert_pfn_prot(vma, vmf->address,
187 __pa(pvti) >> PAGE_SHIFT,
188 pgprot_decrypted(vma->vm_page_prot));
a48a7042 189 }
90b20432 190 } else if (sym_offset == image->sym_hvclock_page) {
364adc45 191 pfn = hv_get_tsc_pfn();
90b20432 192
364adc45
SK
193 if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
194 return vmf_insert_pfn(vma, vmf->address, pfn);
af34ebeb
DS
195 } else if (sym_offset == image->sym_timens_page) {
196 struct page *timens_page = find_timens_vvar_page(vma);
197
198 if (!timens_page)
199 return VM_FAULT_SIGBUS;
200
201 pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
202 return vmf_insert_pfn(vma, vmf->address, pfn);
a48a7042
AL
203 }
204
a48a7042
AL
205 return VM_FAULT_SIGBUS;
206}
207
2eefd878
DS
208static const struct vm_special_mapping vdso_mapping = {
209 .name = "[vdso]",
210 .fault = vdso_fault,
211 .mremap = vdso_mremap,
212};
213static const struct vm_special_mapping vvar_mapping = {
214 .name = "[vvar]",
215 .fault = vvar_fault,
216};
217
576ebfef
DS
218/*
219 * Add vdso and vvar mappings to current process.
220 * @image - blob to map
221 * @addr - request a specific address (zero to map at free addr)
222 */
223static int map_vdso(const struct vdso_image *image, unsigned long addr)
2aae950b
AK
224{
225 struct mm_struct *mm = current->mm;
18d0a6fd 226 struct vm_area_struct *vma;
576ebfef 227 unsigned long text_start;
18d0a6fd 228 int ret = 0;
b059a453 229
d8ed45c5 230 if (mmap_write_lock_killable(mm))
69048176 231 return -EINTR;
18d0a6fd 232
e6577a7c
AL
233 addr = get_unmapped_area(NULL, addr,
234 image->size - image->sym_vvar_start, 0, 0);
2aae950b
AK
235 if (IS_ERR_VALUE(addr)) {
236 ret = addr;
237 goto up_fail;
238 }
239
e6577a7c 240 text_start = addr - image->sym_vvar_start;
f7b6eb3f 241
18d0a6fd
AL
242 /*
243 * MAYWRITE to allow gdb to COW and set breakpoints
244 */
a62c34bd 245 vma = _install_special_mapping(mm,
e6577a7c 246 text_start,
a62c34bd
AL
247 image->size,
248 VM_READ|VM_EXEC|
249 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
b059a453 250 &vdso_mapping);
18d0a6fd 251
a62c34bd
AL
252 if (IS_ERR(vma)) {
253 ret = PTR_ERR(vma);
18d0a6fd 254 goto up_fail;
a62c34bd 255 }
18d0a6fd
AL
256
257 vma = _install_special_mapping(mm,
e6577a7c
AL
258 addr,
259 -image->sym_vvar_start,
a48a7042
AL
260 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
261 VM_PFNMAP,
a62c34bd 262 &vvar_mapping);
18d0a6fd
AL
263
264 if (IS_ERR(vma)) {
265 ret = PTR_ERR(vma);
897ab3e0 266 do_munmap(mm, text_start, image->size, NULL);
67dece7d
DS
267 } else {
268 current->mm->context.vdso = (void __user *)text_start;
269 current->mm->context.vdso_image = image;
f7b6eb3f 270 }
2aae950b 271
2aae950b 272up_fail:
d8ed45c5 273 mmap_write_unlock(mm);
2aae950b
AK
274 return ret;
275}
276
2eefd878
DS
277int map_vdso_once(const struct vdso_image *image, unsigned long addr)
278{
279 struct mm_struct *mm = current->mm;
280 struct vm_area_struct *vma;
a3884621 281 VMA_ITERATOR(vmi, mm, 0);
2eefd878 282
d8ed45c5 283 mmap_write_lock(mm);
2eefd878
DS
284 /*
285 * Check if we have already mapped vdso blob - fail to prevent
163b0991 286 * abusing from userspace install_special_mapping, which may
2eefd878
DS
287 * not do accounting and rlimit right.
288 * We could search vma near context.vdso, but it's a slowpath,
a97673a1 289 * so let's explicitly check all VMAs to be completely sure.
2eefd878 290 */
a3884621 291 for_each_vma(vmi, vma) {
2eefd878
DS
292 if (vma_is_special_mapping(vma, &vdso_mapping) ||
293 vma_is_special_mapping(vma, &vvar_mapping)) {
d8ed45c5 294 mmap_write_unlock(mm);
2eefd878
DS
295 return -EEXIST;
296 }
297 }
d8ed45c5 298 mmap_write_unlock(mm);
2eefd878
DS
299
300 return map_vdso(image, addr);
301}
302
ab8b82ee 303#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
18d0a6fd
AL
304static int load_vdso32(void)
305{
18d0a6fd
AL
306 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
307 return 0;
308
576ebfef 309 return map_vdso(&vdso_image_32, 0);
18d0a6fd
AL
310}
311#endif
312
313#ifdef CONFIG_X86_64
1a21d4e0
L
314int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
315{
18d0a6fd
AL
316 if (!vdso64_enabled)
317 return 0;
318
3c6539b4 319 return map_vdso(&vdso_image_64, 0);
1a21d4e0
L
320}
321
18d0a6fd
AL
322#ifdef CONFIG_COMPAT
323int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
3316ec8c 324 int uses_interp, bool x32)
18d0a6fd 325{
1a21d4e0 326#ifdef CONFIG_X86_X32_ABI
3316ec8c 327 if (x32) {
18d0a6fd
AL
328 if (!vdso64_enabled)
329 return 0;
3c6539b4 330 return map_vdso(&vdso_image_x32, 0);
18d0a6fd
AL
331 }
332#endif
ab8b82ee 333#ifdef CONFIG_IA32_EMULATION
18d0a6fd 334 return load_vdso32();
ab8b82ee
BG
335#else
336 return 0;
337#endif
18d0a6fd
AL
338}
339#endif
340#else
341int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
1a21d4e0 342{
18d0a6fd 343 return load_vdso32();
1a21d4e0
L
344}
345#endif
346
c5c87812
GKB
347bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
348{
349#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
350 const struct vdso_image *image = current->mm->context.vdso_image;
351 unsigned long vdso = (unsigned long) current->mm->context.vdso;
352
353 if (in_ia32_syscall() && image == &vdso_image_32) {
354 if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad ||
355 regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad)
356 return true;
357 }
358#endif
359 return false;
360}
361
18d0a6fd 362#ifdef CONFIG_X86_64
2aae950b
AK
363static __init int vdso_setup(char *s)
364{
3d7ee969 365 vdso64_enabled = simple_strtoul(s, NULL, 0);
12441ccd 366 return 1;
2aae950b
AK
367}
368__setup("vdso=", vdso_setup);
1c0c1b93 369#endif /* CONFIG_X86_64 */