]>
Commit | Line | Data |
---|---|---|
2aae950b | 1 | /* |
2aae950b AK |
2 | * Copyright 2007 Andi Kleen, SUSE Labs. |
3 | * Subject to the GPL, v.2 | |
1c0c1b93 AL |
4 | * |
5 | * This contains most of the x86 vDSO kernel-side code. | |
2aae950b AK |
6 | */ |
7 | #include <linux/mm.h> | |
4e950f6f | 8 | #include <linux/err.h> |
2aae950b | 9 | #include <linux/sched.h> |
68db0cf1 | 10 | #include <linux/sched/task_stack.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
2aae950b AK |
12 | #include <linux/init.h> |
13 | #include <linux/random.h> | |
3fa89ca7 | 14 | #include <linux/elf.h> |
d4f829dd | 15 | #include <linux/cpu.h> |
b059a453 | 16 | #include <linux/ptrace.h> |
cc1e24fd | 17 | #include <asm/pvclock.h> |
2aae950b AK |
18 | #include <asm/vgtod.h> |
19 | #include <asm/proto.h> | |
7f3646aa | 20 | #include <asm/vdso.h> |
1c0c1b93 | 21 | #include <asm/vvar.h> |
aafade24 | 22 | #include <asm/page.h> |
d4f829dd | 23 | #include <asm/desc.h> |
cd4d09ec | 24 | #include <asm/cpufeature.h> |
90b20432 | 25 | #include <asm/mshyperv.h> |
2aae950b | 26 | |
b4b541a6 | 27 | #if defined(CONFIG_X86_64) |
3d7ee969 | 28 | unsigned int __read_mostly vdso64_enabled = 1; |
b4b541a6 | 29 | #endif |
1a21d4e0 | 30 | |
6f121e54 | 31 | void __init init_vdso_image(const struct vdso_image *image) |
1a21d4e0 | 32 | { |
6f121e54 | 33 | BUG_ON(image->size % PAGE_SIZE != 0); |
1a21d4e0 | 34 | |
6f121e54 AL |
35 | apply_alternatives((struct alt_instr *)(image->data + image->alt), |
36 | (struct alt_instr *)(image->data + image->alt + | |
37 | image->alt_len)); | |
1a21d4e0 | 38 | } |
1b3f2a72 | 39 | |
2aae950b AK |
40 | struct linux_binprm; |
41 | ||
b13fd1dc | 42 | static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, |
05ef76b2 AL |
43 | struct vm_area_struct *vma, struct vm_fault *vmf) |
44 | { | |
45 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; | |
46 | ||
47 | if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) | |
48 | return VM_FAULT_SIGBUS; | |
49 | ||
50 | vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); | |
51 | get_page(vmf->page); | |
52 | return 0; | |
53 | } | |
54 | ||
b059a453 DS |
55 | static void vdso_fix_landing(const struct vdso_image *image, |
56 | struct vm_area_struct *new_vma) | |
57 | { | |
58 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | |
59 | if (in_ia32_syscall() && image == &vdso_image_32) { | |
60 | struct pt_regs *regs = current_pt_regs(); | |
61 | unsigned long vdso_land = image->sym_int80_landing_pad; | |
62 | unsigned long old_land_addr = vdso_land + | |
63 | (unsigned long)current->mm->context.vdso; | |
64 | ||
65 | /* Fixing userspace landing - look at do_fast_syscall_32 */ | |
66 | if (regs->ip == old_land_addr) | |
67 | regs->ip = new_vma->vm_start + vdso_land; | |
68 | } | |
69 | #endif | |
70 | } | |
71 | ||
72 | static int vdso_mremap(const struct vm_special_mapping *sm, | |
73 | struct vm_area_struct *new_vma) | |
74 | { | |
75 | unsigned long new_size = new_vma->vm_end - new_vma->vm_start; | |
76 | const struct vdso_image *image = current->mm->context.vdso_image; | |
77 | ||
78 | if (image->size != new_size) | |
79 | return -EINVAL; | |
80 | ||
b059a453 DS |
81 | vdso_fix_landing(image, new_vma); |
82 | current->mm->context.vdso = (void __user *)new_vma->vm_start; | |
83 | ||
84 | return 0; | |
85 | } | |
05ef76b2 | 86 | |
b13fd1dc | 87 | static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, |
a48a7042 AL |
88 | struct vm_area_struct *vma, struct vm_fault *vmf) |
89 | { | |
90 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; | |
91 | long sym_offset; | |
a48a7042 AL |
92 | |
93 | if (!image) | |
94 | return VM_FAULT_SIGBUS; | |
95 | ||
96 | sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + | |
97 | image->sym_vvar_start; | |
98 | ||
99 | /* | |
100 | * Sanity check: a symbol offset of zero means that the page | |
101 | * does not exist for this vdso image, not that the page is at | |
102 | * offset zero relative to the text mapping. This should be | |
103 | * impossible here, because sym_offset should only be zero for | |
104 | * the page past the end of the vvar mapping. | |
105 | */ | |
106 | if (sym_offset == 0) | |
107 | return VM_FAULT_SIGBUS; | |
108 | ||
109 | if (sym_offset == image->sym_vvar_page) { | |
b13fd1dc MW |
110 | return vmf_insert_pfn(vma, vmf->address, |
111 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT); | |
a48a7042 AL |
112 | } else if (sym_offset == image->sym_pvclock_page) { |
113 | struct pvclock_vsyscall_time_info *pvti = | |
9f08890a | 114 | pvclock_get_pvti_cpu0_va(); |
bd902c53 | 115 | if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { |
b13fd1dc MW |
116 | return vmf_insert_pfn_prot(vma, vmf->address, |
117 | __pa(pvti) >> PAGE_SHIFT, | |
118 | pgprot_decrypted(vma->vm_page_prot)); | |
a48a7042 | 119 | } |
90b20432 VK |
120 | } else if (sym_offset == image->sym_hvclock_page) { |
121 | struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); | |
122 | ||
123 | if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) | |
b13fd1dc MW |
124 | return vmf_insert_pfn(vma, vmf->address, |
125 | vmalloc_to_pfn(tsc_pg)); | |
a48a7042 AL |
126 | } |
127 | ||
a48a7042 AL |
128 | return VM_FAULT_SIGBUS; |
129 | } | |
130 | ||
2eefd878 DS |
131 | static const struct vm_special_mapping vdso_mapping = { |
132 | .name = "[vdso]", | |
133 | .fault = vdso_fault, | |
134 | .mremap = vdso_mremap, | |
135 | }; | |
136 | static const struct vm_special_mapping vvar_mapping = { | |
137 | .name = "[vvar]", | |
138 | .fault = vvar_fault, | |
139 | }; | |
140 | ||
576ebfef DS |
141 | /* |
142 | * Add vdso and vvar mappings to current process. | |
143 | * @image - blob to map | |
144 | * @addr - request a specific address (zero to map at free addr) | |
145 | */ | |
146 | static int map_vdso(const struct vdso_image *image, unsigned long addr) | |
2aae950b AK |
147 | { |
148 | struct mm_struct *mm = current->mm; | |
18d0a6fd | 149 | struct vm_area_struct *vma; |
576ebfef | 150 | unsigned long text_start; |
18d0a6fd | 151 | int ret = 0; |
b059a453 | 152 | |
69048176 MH |
153 | if (down_write_killable(&mm->mmap_sem)) |
154 | return -EINTR; | |
18d0a6fd | 155 | |
e6577a7c AL |
156 | addr = get_unmapped_area(NULL, addr, |
157 | image->size - image->sym_vvar_start, 0, 0); | |
2aae950b AK |
158 | if (IS_ERR_VALUE(addr)) { |
159 | ret = addr; | |
160 | goto up_fail; | |
161 | } | |
162 | ||
e6577a7c | 163 | text_start = addr - image->sym_vvar_start; |
f7b6eb3f | 164 | |
18d0a6fd AL |
165 | /* |
166 | * MAYWRITE to allow gdb to COW and set breakpoints | |
167 | */ | |
a62c34bd | 168 | vma = _install_special_mapping(mm, |
e6577a7c | 169 | text_start, |
a62c34bd AL |
170 | image->size, |
171 | VM_READ|VM_EXEC| | |
172 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | |
b059a453 | 173 | &vdso_mapping); |
18d0a6fd | 174 | |
a62c34bd AL |
175 | if (IS_ERR(vma)) { |
176 | ret = PTR_ERR(vma); | |
18d0a6fd | 177 | goto up_fail; |
a62c34bd | 178 | } |
18d0a6fd AL |
179 | |
180 | vma = _install_special_mapping(mm, | |
e6577a7c AL |
181 | addr, |
182 | -image->sym_vvar_start, | |
a48a7042 AL |
183 | VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| |
184 | VM_PFNMAP, | |
a62c34bd | 185 | &vvar_mapping); |
18d0a6fd AL |
186 | |
187 | if (IS_ERR(vma)) { | |
188 | ret = PTR_ERR(vma); | |
897ab3e0 | 189 | do_munmap(mm, text_start, image->size, NULL); |
67dece7d DS |
190 | } else { |
191 | current->mm->context.vdso = (void __user *)text_start; | |
192 | current->mm->context.vdso_image = image; | |
f7b6eb3f | 193 | } |
2aae950b | 194 | |
2aae950b AK |
195 | up_fail: |
196 | up_write(&mm->mmap_sem); | |
197 | return ret; | |
198 | } | |
199 | ||
3947f493 IM |
200 | #ifdef CONFIG_X86_64 |
201 | /* | |
202 | * Put the vdso above the (randomized) stack with another randomized | |
203 | * offset. This way there is no hole in the middle of address space. | |
204 | * To save memory make sure it is still in the same PTE as the stack | |
205 | * top. This doesn't give that many random bits. | |
206 | * | |
207 | * Note that this algorithm is imperfect: the distribution of the vdso | |
208 | * start address within a PMD is biased toward the end. | |
209 | * | |
210 | * Only used for the 64-bit and x32 vdsos. | |
211 | */ | |
212 | static unsigned long vdso_addr(unsigned long start, unsigned len) | |
213 | { | |
214 | unsigned long addr, end; | |
215 | unsigned offset; | |
216 | ||
217 | /* | |
218 | * Round up the start address. It can start out unaligned as a result | |
219 | * of stack start randomization. | |
220 | */ | |
221 | start = PAGE_ALIGN(start); | |
222 | ||
223 | /* Round the lowest possible end address up to a PMD boundary. */ | |
224 | end = (start + len + PMD_SIZE - 1) & PMD_MASK; | |
225 | if (end >= TASK_SIZE_MAX) | |
226 | end = TASK_SIZE_MAX; | |
227 | end -= len; | |
228 | ||
229 | if (end > start) { | |
230 | offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); | |
231 | addr = start + (offset << PAGE_SHIFT); | |
232 | } else { | |
233 | addr = start; | |
234 | } | |
235 | ||
236 | /* | |
237 | * Forcibly align the final address in case we have a hardware | |
238 | * issue that requires alignment for performance reasons. | |
239 | */ | |
240 | addr = align_vdso_addr(addr); | |
241 | ||
242 | return addr; | |
243 | } | |
244 | ||
576ebfef DS |
245 | static int map_vdso_randomized(const struct vdso_image *image) |
246 | { | |
3947f493 IM |
247 | unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); |
248 | ||
576ebfef DS |
249 | return map_vdso(image, addr); |
250 | } | |
3947f493 | 251 | #endif |
576ebfef | 252 | |
2eefd878 DS |
253 | int map_vdso_once(const struct vdso_image *image, unsigned long addr) |
254 | { | |
255 | struct mm_struct *mm = current->mm; | |
256 | struct vm_area_struct *vma; | |
257 | ||
258 | down_write(&mm->mmap_sem); | |
259 | /* | |
260 | * Check if we have already mapped vdso blob - fail to prevent | |
261 | * abusing from userspace install_speciall_mapping, which may | |
262 | * not do accounting and rlimit right. | |
263 | * We could search vma near context.vdso, but it's a slowpath, | |
264 | * so let's explicitely check all VMAs to be completely sure. | |
265 | */ | |
266 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
267 | if (vma_is_special_mapping(vma, &vdso_mapping) || | |
268 | vma_is_special_mapping(vma, &vvar_mapping)) { | |
269 | up_write(&mm->mmap_sem); | |
270 | return -EEXIST; | |
271 | } | |
272 | } | |
273 | up_write(&mm->mmap_sem); | |
274 | ||
275 | return map_vdso(image, addr); | |
276 | } | |
277 | ||
ab8b82ee | 278 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
18d0a6fd AL |
279 | static int load_vdso32(void) |
280 | { | |
18d0a6fd AL |
281 | if (vdso32_enabled != 1) /* Other values all mean "disabled" */ |
282 | return 0; | |
283 | ||
576ebfef | 284 | return map_vdso(&vdso_image_32, 0); |
18d0a6fd AL |
285 | } |
286 | #endif | |
287 | ||
288 | #ifdef CONFIG_X86_64 | |
1a21d4e0 L |
289 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
290 | { | |
18d0a6fd AL |
291 | if (!vdso64_enabled) |
292 | return 0; | |
293 | ||
576ebfef | 294 | return map_vdso_randomized(&vdso_image_64); |
1a21d4e0 L |
295 | } |
296 | ||
18d0a6fd AL |
297 | #ifdef CONFIG_COMPAT |
298 | int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |
299 | int uses_interp) | |
300 | { | |
1a21d4e0 | 301 | #ifdef CONFIG_X86_X32_ABI |
18d0a6fd AL |
302 | if (test_thread_flag(TIF_X32)) { |
303 | if (!vdso64_enabled) | |
304 | return 0; | |
576ebfef | 305 | return map_vdso_randomized(&vdso_image_x32); |
18d0a6fd AL |
306 | } |
307 | #endif | |
ab8b82ee | 308 | #ifdef CONFIG_IA32_EMULATION |
18d0a6fd | 309 | return load_vdso32(); |
ab8b82ee BG |
310 | #else |
311 | return 0; | |
312 | #endif | |
18d0a6fd AL |
313 | } |
314 | #endif | |
315 | #else | |
316 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |
1a21d4e0 | 317 | { |
18d0a6fd | 318 | return load_vdso32(); |
1a21d4e0 L |
319 | } |
320 | #endif | |
321 | ||
18d0a6fd | 322 | #ifdef CONFIG_X86_64 |
2aae950b AK |
323 | static __init int vdso_setup(char *s) |
324 | { | |
3d7ee969 | 325 | vdso64_enabled = simple_strtoul(s, NULL, 0); |
2aae950b AK |
326 | return 0; |
327 | } | |
328 | __setup("vdso=", vdso_setup); | |
d4f829dd | 329 | |
1c0c1b93 | 330 | static int __init init_vdso(void) |
d4f829dd | 331 | { |
1c0c1b93 AL |
332 | init_vdso_image(&vdso_image_64); |
333 | ||
334 | #ifdef CONFIG_X86_X32_ABI | |
335 | init_vdso_image(&vdso_image_x32); | |
336 | #endif | |
337 | ||
b2e2ba57 | 338 | return 0; |
d4f829dd | 339 | } |
1c0c1b93 AL |
340 | subsys_initcall(init_vdso); |
341 | #endif /* CONFIG_X86_64 */ |