]> git.ipfire.org Git - thirdparty/linux.git/blame_incremental - fs/binfmt_elf.c
Merge tag 'pm-5.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[thirdparty/linux.git] / fs / binfmt_elf.c
... / ...
CommitLineData
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * linux/fs/binfmt_elf.c
4 *
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines. Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
9 *
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/fs.h>
16#include <linux/mm.h>
17#include <linux/mman.h>
18#include <linux/errno.h>
19#include <linux/signal.h>
20#include <linux/binfmts.h>
21#include <linux/string.h>
22#include <linux/file.h>
23#include <linux/slab.h>
24#include <linux/personality.h>
25#include <linux/elfcore.h>
26#include <linux/init.h>
27#include <linux/highuid.h>
28#include <linux/compiler.h>
29#include <linux/highmem.h>
30#include <linux/hugetlb.h>
31#include <linux/pagemap.h>
32#include <linux/vmalloc.h>
33#include <linux/security.h>
34#include <linux/random.h>
35#include <linux/elf.h>
36#include <linux/elf-randomize.h>
37#include <linux/utsname.h>
38#include <linux/coredump.h>
39#include <linux/sched.h>
40#include <linux/sched/coredump.h>
41#include <linux/sched/task_stack.h>
42#include <linux/sched/cputime.h>
43#include <linux/sizes.h>
44#include <linux/types.h>
45#include <linux/cred.h>
46#include <linux/dax.h>
47#include <linux/uaccess.h>
48#include <asm/param.h>
49#include <asm/page.h>
50
51#ifndef ELF_COMPAT
52#define ELF_COMPAT 0
53#endif
54
55#ifndef user_long_t
56#define user_long_t long
57#endif
58#ifndef user_siginfo_t
59#define user_siginfo_t siginfo_t
60#endif
61
62/* That's for binfmt_elf_fdpic to deal with */
63#ifndef elf_check_fdpic
64#define elf_check_fdpic(ex) false
65#endif
66
67static int load_elf_binary(struct linux_binprm *bprm);
68
69#ifdef CONFIG_USELIB
70static int load_elf_library(struct file *);
71#else
72#define load_elf_library NULL
73#endif
74
75/*
76 * If we don't support core dumping, then supply a NULL so we
77 * don't even try.
78 */
79#ifdef CONFIG_ELF_CORE
80static int elf_core_dump(struct coredump_params *cprm);
81#else
82#define elf_core_dump NULL
83#endif
84
85#if ELF_EXEC_PAGESIZE > PAGE_SIZE
86#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
87#else
88#define ELF_MIN_ALIGN PAGE_SIZE
89#endif
90
91#ifndef ELF_CORE_EFLAGS
92#define ELF_CORE_EFLAGS 0
93#endif
94
95#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
96#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
97#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
98
99static struct linux_binfmt elf_format = {
100 .module = THIS_MODULE,
101 .load_binary = load_elf_binary,
102 .load_shlib = load_elf_library,
103 .core_dump = elf_core_dump,
104 .min_coredump = ELF_EXEC_PAGESIZE,
105};
106
107#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
108
109static int set_brk(unsigned long start, unsigned long end, int prot)
110{
111 start = ELF_PAGEALIGN(start);
112 end = ELF_PAGEALIGN(end);
113 if (end > start) {
114 /*
115 * Map the last of the bss segment.
116 * If the header is requesting these pages to be
117 * executable, honour that (ppc32 needs this).
118 */
119 int error = vm_brk_flags(start, end - start,
120 prot & PROT_EXEC ? VM_EXEC : 0);
121 if (error)
122 return error;
123 }
124 current->mm->start_brk = current->mm->brk = end;
125 return 0;
126}
127
128/* We need to explicitly zero any fractional pages
129 after the data section (i.e. bss). This would
130 contain the junk from the file that should not
131 be in memory
132 */
133static int padzero(unsigned long elf_bss)
134{
135 unsigned long nbyte;
136
137 nbyte = ELF_PAGEOFFSET(elf_bss);
138 if (nbyte) {
139 nbyte = ELF_MIN_ALIGN - nbyte;
140 if (clear_user((void __user *) elf_bss, nbyte))
141 return -EFAULT;
142 }
143 return 0;
144}
145
146/* Let's use some macros to make this stack manipulation a little clearer */
147#ifdef CONFIG_STACK_GROWSUP
148#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
149#define STACK_ROUND(sp, items) \
150 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
151#define STACK_ALLOC(sp, len) ({ \
152 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
153 old_sp; })
154#else
155#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
156#define STACK_ROUND(sp, items) \
157 (((unsigned long) (sp - items)) &~ 15UL)
158#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
159#endif
160
161#ifndef ELF_BASE_PLATFORM
162/*
163 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
164 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
165 * will be copied to the user stack in the same manner as AT_PLATFORM.
166 */
167#define ELF_BASE_PLATFORM NULL
168#endif
169
170static int
171create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
172 unsigned long load_addr, unsigned long interp_load_addr,
173 unsigned long e_entry)
174{
175 struct mm_struct *mm = current->mm;
176 unsigned long p = bprm->p;
177 int argc = bprm->argc;
178 int envc = bprm->envc;
179 elf_addr_t __user *sp;
180 elf_addr_t __user *u_platform;
181 elf_addr_t __user *u_base_platform;
182 elf_addr_t __user *u_rand_bytes;
183 const char *k_platform = ELF_PLATFORM;
184 const char *k_base_platform = ELF_BASE_PLATFORM;
185 unsigned char k_rand_bytes[16];
186 int items;
187 elf_addr_t *elf_info;
188 int ei_index;
189 const struct cred *cred = current_cred();
190 struct vm_area_struct *vma;
191
192 /*
193 * In some cases (e.g. Hyper-Threading), we want to avoid L1
194 * evictions by the processes running on the same package. One
195 * thing we can do is to shuffle the initial stack for them.
196 */
197
198 p = arch_align_stack(p);
199
200 /*
201 * If this architecture has a platform capability string, copy it
202 * to userspace. In some cases (Sparc), this info is impossible
203 * for userspace to get any other way, in others (i386) it is
204 * merely difficult.
205 */
206 u_platform = NULL;
207 if (k_platform) {
208 size_t len = strlen(k_platform) + 1;
209
210 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
211 if (__copy_to_user(u_platform, k_platform, len))
212 return -EFAULT;
213 }
214
215 /*
216 * If this architecture has a "base" platform capability
217 * string, copy it to userspace.
218 */
219 u_base_platform = NULL;
220 if (k_base_platform) {
221 size_t len = strlen(k_base_platform) + 1;
222
223 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
224 if (__copy_to_user(u_base_platform, k_base_platform, len))
225 return -EFAULT;
226 }
227
228 /*
229 * Generate 16 random bytes for userspace PRNG seeding.
230 */
231 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
232 u_rand_bytes = (elf_addr_t __user *)
233 STACK_ALLOC(p, sizeof(k_rand_bytes));
234 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
235 return -EFAULT;
236
237 /* Create the ELF interpreter info */
238 elf_info = (elf_addr_t *)mm->saved_auxv;
239 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
240#define NEW_AUX_ENT(id, val) \
241 do { \
242 *elf_info++ = id; \
243 *elf_info++ = val; \
244 } while (0)
245
246#ifdef ARCH_DLINFO
247 /*
248 * ARCH_DLINFO must come first so PPC can do its special alignment of
249 * AUXV.
250 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
251 * ARCH_DLINFO changes
252 */
253 ARCH_DLINFO;
254#endif
255 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
256 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
257 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
258 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
259 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
260 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
261 NEW_AUX_ENT(AT_BASE, interp_load_addr);
262 NEW_AUX_ENT(AT_FLAGS, 0);
263 NEW_AUX_ENT(AT_ENTRY, e_entry);
264 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
265 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
266 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
267 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
268 NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
269 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
270#ifdef ELF_HWCAP2
271 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
272#endif
273 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
274 if (k_platform) {
275 NEW_AUX_ENT(AT_PLATFORM,
276 (elf_addr_t)(unsigned long)u_platform);
277 }
278 if (k_base_platform) {
279 NEW_AUX_ENT(AT_BASE_PLATFORM,
280 (elf_addr_t)(unsigned long)u_base_platform);
281 }
282 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
283 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
284 }
285#undef NEW_AUX_ENT
286 /* AT_NULL is zero; clear the rest too */
287 memset(elf_info, 0, (char *)mm->saved_auxv +
288 sizeof(mm->saved_auxv) - (char *)elf_info);
289
290 /* And advance past the AT_NULL entry. */
291 elf_info += 2;
292
293 ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
294 sp = STACK_ADD(p, ei_index);
295
296 items = (argc + 1) + (envc + 1) + 1;
297 bprm->p = STACK_ROUND(sp, items);
298
299 /* Point sp at the lowest address on the stack */
300#ifdef CONFIG_STACK_GROWSUP
301 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
302 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
303#else
304 sp = (elf_addr_t __user *)bprm->p;
305#endif
306
307
308 /*
309 * Grow the stack manually; some architectures have a limit on how
310 * far ahead a user-space access may be in order to grow the stack.
311 */
312 vma = find_extend_vma(mm, bprm->p);
313 if (!vma)
314 return -EFAULT;
315
316 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
317 if (__put_user(argc, sp++))
318 return -EFAULT;
319
320 /* Populate list of argv pointers back to argv strings. */
321 p = mm->arg_end = mm->arg_start;
322 while (argc-- > 0) {
323 size_t len;
324 if (__put_user((elf_addr_t)p, sp++))
325 return -EFAULT;
326 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
327 if (!len || len > MAX_ARG_STRLEN)
328 return -EINVAL;
329 p += len;
330 }
331 if (__put_user(0, sp++))
332 return -EFAULT;
333 mm->arg_end = p;
334
335 /* Populate list of envp pointers back to envp strings. */
336 mm->env_end = mm->env_start = p;
337 while (envc-- > 0) {
338 size_t len;
339 if (__put_user((elf_addr_t)p, sp++))
340 return -EFAULT;
341 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
342 if (!len || len > MAX_ARG_STRLEN)
343 return -EINVAL;
344 p += len;
345 }
346 if (__put_user(0, sp++))
347 return -EFAULT;
348 mm->env_end = p;
349
350 /* Put the elf_info on the stack in the right place. */
351 if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
352 return -EFAULT;
353 return 0;
354}
355
356#ifndef elf_map
357
358static unsigned long elf_map(struct file *filep, unsigned long addr,
359 const struct elf_phdr *eppnt, int prot, int type,
360 unsigned long total_size)
361{
362 unsigned long map_addr;
363 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
364 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
365 addr = ELF_PAGESTART(addr);
366 size = ELF_PAGEALIGN(size);
367
368 /* mmap() will return -EINVAL if given a zero size, but a
369 * segment with zero filesize is perfectly valid */
370 if (!size)
371 return addr;
372
373 /*
374 * total_size is the size of the ELF (interpreter) image.
375 * The _first_ mmap needs to know the full size, otherwise
376 * randomization might put this image into an overlapping
377 * position with the ELF binary image. (since size < total_size)
378 * So we first map the 'big' image - and unmap the remainder at
379 * the end. (which unmap is needed for ELF images with holes.)
380 */
381 if (total_size) {
382 total_size = ELF_PAGEALIGN(total_size);
383 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
384 if (!BAD_ADDR(map_addr))
385 vm_munmap(map_addr+size, total_size-size);
386 } else
387 map_addr = vm_mmap(filep, addr, size, prot, type, off);
388
389 if ((type & MAP_FIXED_NOREPLACE) &&
390 PTR_ERR((void *)map_addr) == -EEXIST)
391 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
392 task_pid_nr(current), current->comm, (void *)addr);
393
394 return(map_addr);
395}
396
397#endif /* !elf_map */
398
399static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
400{
401 int i, first_idx = -1, last_idx = -1;
402
403 for (i = 0; i < nr; i++) {
404 if (cmds[i].p_type == PT_LOAD) {
405 last_idx = i;
406 if (first_idx == -1)
407 first_idx = i;
408 }
409 }
410 if (first_idx == -1)
411 return 0;
412
413 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
414 ELF_PAGESTART(cmds[first_idx].p_vaddr);
415}
416
417static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
418{
419 ssize_t rv;
420
421 rv = kernel_read(file, buf, len, &pos);
422 if (unlikely(rv != len)) {
423 return (rv < 0) ? rv : -EIO;
424 }
425 return 0;
426}
427
428/**
429 * load_elf_phdrs() - load ELF program headers
430 * @elf_ex: ELF header of the binary whose program headers should be loaded
431 * @elf_file: the opened ELF binary file
432 *
433 * Loads ELF program headers from the binary file elf_file, which has the ELF
434 * header pointed to by elf_ex, into a newly allocated array. The caller is
435 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
436 */
437static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
438 struct file *elf_file)
439{
440 struct elf_phdr *elf_phdata = NULL;
441 int retval, err = -1;
442 unsigned int size;
443
444 /*
445 * If the size of this structure has changed, then punt, since
446 * we will be doing the wrong thing.
447 */
448 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
449 goto out;
450
451 /* Sanity check the number of program headers... */
452 /* ...and their total size. */
453 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
454 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
455 goto out;
456
457 elf_phdata = kmalloc(size, GFP_KERNEL);
458 if (!elf_phdata)
459 goto out;
460
461 /* Read in the program headers */
462 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
463 if (retval < 0) {
464 err = retval;
465 goto out;
466 }
467
468 /* Success! */
469 err = 0;
470out:
471 if (err) {
472 kfree(elf_phdata);
473 elf_phdata = NULL;
474 }
475 return elf_phdata;
476}
477
478#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
479
480/**
481 * struct arch_elf_state - arch-specific ELF loading state
482 *
483 * This structure is used to preserve architecture specific data during
484 * the loading of an ELF file, throughout the checking of architecture
485 * specific ELF headers & through to the point where the ELF load is
486 * known to be proceeding (ie. SET_PERSONALITY).
487 *
488 * This implementation is a dummy for architectures which require no
489 * specific state.
490 */
491struct arch_elf_state {
492};
493
494#define INIT_ARCH_ELF_STATE {}
495
496/**
497 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
498 * @ehdr: The main ELF header
499 * @phdr: The program header to check
500 * @elf: The open ELF file
501 * @is_interp: True if the phdr is from the interpreter of the ELF being
502 * loaded, else false.
503 * @state: Architecture-specific state preserved throughout the process
504 * of loading the ELF.
505 *
506 * Inspects the program header phdr to validate its correctness and/or
507 * suitability for the system. Called once per ELF program header in the
508 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
509 * interpreter.
510 *
511 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
512 * with that return code.
513 */
514static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
515 struct elf_phdr *phdr,
516 struct file *elf, bool is_interp,
517 struct arch_elf_state *state)
518{
519 /* Dummy implementation, always proceed */
520 return 0;
521}
522
523/**
524 * arch_check_elf() - check an ELF executable
525 * @ehdr: The main ELF header
526 * @has_interp: True if the ELF has an interpreter, else false.
527 * @interp_ehdr: The interpreter's ELF header
528 * @state: Architecture-specific state preserved throughout the process
529 * of loading the ELF.
530 *
531 * Provides a final opportunity for architecture code to reject the loading
532 * of the ELF & cause an exec syscall to return an error. This is called after
533 * all program headers to be checked by arch_elf_pt_proc have been.
534 *
535 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
536 * with that return code.
537 */
538static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
539 struct elfhdr *interp_ehdr,
540 struct arch_elf_state *state)
541{
542 /* Dummy implementation, always proceed */
543 return 0;
544}
545
546#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
547
548static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
549 bool has_interp, bool is_interp)
550{
551 int prot = 0;
552
553 if (p_flags & PF_R)
554 prot |= PROT_READ;
555 if (p_flags & PF_W)
556 prot |= PROT_WRITE;
557 if (p_flags & PF_X)
558 prot |= PROT_EXEC;
559
560 return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
561}
562
563/* This is much more generalized than the library routine read function,
564 so we keep this separate. Technically the library read function
565 is only provided so that we can read a.out libraries that have
566 an ELF header */
567
568static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
569 struct file *interpreter,
570 unsigned long no_base, struct elf_phdr *interp_elf_phdata,
571 struct arch_elf_state *arch_state)
572{
573 struct elf_phdr *eppnt;
574 unsigned long load_addr = 0;
575 int load_addr_set = 0;
576 unsigned long last_bss = 0, elf_bss = 0;
577 int bss_prot = 0;
578 unsigned long error = ~0UL;
579 unsigned long total_size;
580 int i;
581
582 /* First of all, some simple consistency checks */
583 if (interp_elf_ex->e_type != ET_EXEC &&
584 interp_elf_ex->e_type != ET_DYN)
585 goto out;
586 if (!elf_check_arch(interp_elf_ex) ||
587 elf_check_fdpic(interp_elf_ex))
588 goto out;
589 if (!interpreter->f_op->mmap)
590 goto out;
591
592 total_size = total_mapping_size(interp_elf_phdata,
593 interp_elf_ex->e_phnum);
594 if (!total_size) {
595 error = -EINVAL;
596 goto out;
597 }
598
599 eppnt = interp_elf_phdata;
600 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
601 if (eppnt->p_type == PT_LOAD) {
602 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
603 int elf_prot = make_prot(eppnt->p_flags, arch_state,
604 true, true);
605 unsigned long vaddr = 0;
606 unsigned long k, map_addr;
607
608 vaddr = eppnt->p_vaddr;
609 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
610 elf_type |= MAP_FIXED_NOREPLACE;
611 else if (no_base && interp_elf_ex->e_type == ET_DYN)
612 load_addr = -vaddr;
613
614 map_addr = elf_map(interpreter, load_addr + vaddr,
615 eppnt, elf_prot, elf_type, total_size);
616 total_size = 0;
617 error = map_addr;
618 if (BAD_ADDR(map_addr))
619 goto out;
620
621 if (!load_addr_set &&
622 interp_elf_ex->e_type == ET_DYN) {
623 load_addr = map_addr - ELF_PAGESTART(vaddr);
624 load_addr_set = 1;
625 }
626
627 /*
628 * Check to see if the section's size will overflow the
629 * allowed task size. Note that p_filesz must always be
630 * <= p_memsize so it's only necessary to check p_memsz.
631 */
632 k = load_addr + eppnt->p_vaddr;
633 if (BAD_ADDR(k) ||
634 eppnt->p_filesz > eppnt->p_memsz ||
635 eppnt->p_memsz > TASK_SIZE ||
636 TASK_SIZE - eppnt->p_memsz < k) {
637 error = -ENOMEM;
638 goto out;
639 }
640
641 /*
642 * Find the end of the file mapping for this phdr, and
643 * keep track of the largest address we see for this.
644 */
645 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
646 if (k > elf_bss)
647 elf_bss = k;
648
649 /*
650 * Do the same thing for the memory mapping - between
651 * elf_bss and last_bss is the bss section.
652 */
653 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
654 if (k > last_bss) {
655 last_bss = k;
656 bss_prot = elf_prot;
657 }
658 }
659 }
660
661 /*
662 * Now fill out the bss section: first pad the last page from
663 * the file up to the page boundary, and zero it from elf_bss
664 * up to the end of the page.
665 */
666 if (padzero(elf_bss)) {
667 error = -EFAULT;
668 goto out;
669 }
670 /*
671 * Next, align both the file and mem bss up to the page size,
672 * since this is where elf_bss was just zeroed up to, and where
673 * last_bss will end after the vm_brk_flags() below.
674 */
675 elf_bss = ELF_PAGEALIGN(elf_bss);
676 last_bss = ELF_PAGEALIGN(last_bss);
677 /* Finally, if there is still more bss to allocate, do it. */
678 if (last_bss > elf_bss) {
679 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
680 bss_prot & PROT_EXEC ? VM_EXEC : 0);
681 if (error)
682 goto out;
683 }
684
685 error = load_addr;
686out:
687 return error;
688}
689
690/*
691 * These are the functions used to load ELF style executables and shared
692 * libraries. There is no binary dependent code anywhere else.
693 */
694
695static int parse_elf_property(const char *data, size_t *off, size_t datasz,
696 struct arch_elf_state *arch,
697 bool have_prev_type, u32 *prev_type)
698{
699 size_t o, step;
700 const struct gnu_property *pr;
701 int ret;
702
703 if (*off == datasz)
704 return -ENOENT;
705
706 if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
707 return -EIO;
708 o = *off;
709 datasz -= *off;
710
711 if (datasz < sizeof(*pr))
712 return -ENOEXEC;
713 pr = (const struct gnu_property *)(data + o);
714 o += sizeof(*pr);
715 datasz -= sizeof(*pr);
716
717 if (pr->pr_datasz > datasz)
718 return -ENOEXEC;
719
720 WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
721 step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
722 if (step > datasz)
723 return -ENOEXEC;
724
725 /* Properties are supposed to be unique and sorted on pr_type: */
726 if (have_prev_type && pr->pr_type <= *prev_type)
727 return -ENOEXEC;
728 *prev_type = pr->pr_type;
729
730 ret = arch_parse_elf_property(pr->pr_type, data + o,
731 pr->pr_datasz, ELF_COMPAT, arch);
732 if (ret)
733 return ret;
734
735 *off = o + step;
736 return 0;
737}
738
739#define NOTE_DATA_SZ SZ_1K
740#define GNU_PROPERTY_TYPE_0_NAME "GNU"
741#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
742
743static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
744 struct arch_elf_state *arch)
745{
746 union {
747 struct elf_note nhdr;
748 char data[NOTE_DATA_SZ];
749 } note;
750 loff_t pos;
751 ssize_t n;
752 size_t off, datasz;
753 int ret;
754 bool have_prev_type;
755 u32 prev_type;
756
757 if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
758 return 0;
759
760 /* load_elf_binary() shouldn't call us unless this is true... */
761 if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
762 return -ENOEXEC;
763
764 /* If the properties are crazy large, that's too bad (for now): */
765 if (phdr->p_filesz > sizeof(note))
766 return -ENOEXEC;
767
768 pos = phdr->p_offset;
769 n = kernel_read(f, &note, phdr->p_filesz, &pos);
770
771 BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
772 if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
773 return -EIO;
774
775 if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
776 note.nhdr.n_namesz != NOTE_NAME_SZ ||
777 strncmp(note.data + sizeof(note.nhdr),
778 GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
779 return -ENOEXEC;
780
781 off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
782 ELF_GNU_PROPERTY_ALIGN);
783 if (off > n)
784 return -ENOEXEC;
785
786 if (note.nhdr.n_descsz > n - off)
787 return -ENOEXEC;
788 datasz = off + note.nhdr.n_descsz;
789
790 have_prev_type = false;
791 do {
792 ret = parse_elf_property(note.data, &off, datasz, arch,
793 have_prev_type, &prev_type);
794 have_prev_type = true;
795 } while (!ret);
796
797 return ret == -ENOENT ? 0 : ret;
798}
799
800static int load_elf_binary(struct linux_binprm *bprm)
801{
802 struct file *interpreter = NULL; /* to shut gcc up */
803 unsigned long load_addr = 0, load_bias = 0;
804 int load_addr_set = 0;
805 unsigned long error;
806 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
807 struct elf_phdr *elf_property_phdata = NULL;
808 unsigned long elf_bss, elf_brk;
809 int bss_prot = 0;
810 int retval, i;
811 unsigned long elf_entry;
812 unsigned long e_entry;
813 unsigned long interp_load_addr = 0;
814 unsigned long start_code, end_code, start_data, end_data;
815 unsigned long reloc_func_desc __maybe_unused = 0;
816 int executable_stack = EXSTACK_DEFAULT;
817 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
818 struct elfhdr *interp_elf_ex = NULL;
819 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
820 struct mm_struct *mm;
821 struct pt_regs *regs;
822
823 retval = -ENOEXEC;
824 /* First of all, some simple consistency checks */
825 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
826 goto out;
827
828 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
829 goto out;
830 if (!elf_check_arch(elf_ex))
831 goto out;
832 if (elf_check_fdpic(elf_ex))
833 goto out;
834 if (!bprm->file->f_op->mmap)
835 goto out;
836
837 elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
838 if (!elf_phdata)
839 goto out;
840
841 elf_ppnt = elf_phdata;
842 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
843 char *elf_interpreter;
844
845 if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
846 elf_property_phdata = elf_ppnt;
847 continue;
848 }
849
850 if (elf_ppnt->p_type != PT_INTERP)
851 continue;
852
853 /*
854 * This is the program interpreter used for shared libraries -
855 * for now assume that this is an a.out format binary.
856 */
857 retval = -ENOEXEC;
858 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
859 goto out_free_ph;
860
861 retval = -ENOMEM;
862 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
863 if (!elf_interpreter)
864 goto out_free_ph;
865
866 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
867 elf_ppnt->p_offset);
868 if (retval < 0)
869 goto out_free_interp;
870 /* make sure path is NULL terminated */
871 retval = -ENOEXEC;
872 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
873 goto out_free_interp;
874
875 interpreter = open_exec(elf_interpreter);
876 kfree(elf_interpreter);
877 retval = PTR_ERR(interpreter);
878 if (IS_ERR(interpreter))
879 goto out_free_ph;
880
881 /*
882 * If the binary is not readable then enforce mm->dumpable = 0
883 * regardless of the interpreter's permissions.
884 */
885 would_dump(bprm, interpreter);
886
887 interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
888 if (!interp_elf_ex) {
889 retval = -ENOMEM;
890 goto out_free_ph;
891 }
892
893 /* Get the exec headers */
894 retval = elf_read(interpreter, interp_elf_ex,
895 sizeof(*interp_elf_ex), 0);
896 if (retval < 0)
897 goto out_free_dentry;
898
899 break;
900
901out_free_interp:
902 kfree(elf_interpreter);
903 goto out_free_ph;
904 }
905
906 elf_ppnt = elf_phdata;
907 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
908 switch (elf_ppnt->p_type) {
909 case PT_GNU_STACK:
910 if (elf_ppnt->p_flags & PF_X)
911 executable_stack = EXSTACK_ENABLE_X;
912 else
913 executable_stack = EXSTACK_DISABLE_X;
914 break;
915
916 case PT_LOPROC ... PT_HIPROC:
917 retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
918 bprm->file, false,
919 &arch_state);
920 if (retval)
921 goto out_free_dentry;
922 break;
923 }
924
925 /* Some simple consistency checks for the interpreter */
926 if (interpreter) {
927 retval = -ELIBBAD;
928 /* Not an ELF interpreter */
929 if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
930 goto out_free_dentry;
931 /* Verify the interpreter has a valid arch */
932 if (!elf_check_arch(interp_elf_ex) ||
933 elf_check_fdpic(interp_elf_ex))
934 goto out_free_dentry;
935
936 /* Load the interpreter program headers */
937 interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
938 interpreter);
939 if (!interp_elf_phdata)
940 goto out_free_dentry;
941
942 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
943 elf_property_phdata = NULL;
944 elf_ppnt = interp_elf_phdata;
945 for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
946 switch (elf_ppnt->p_type) {
947 case PT_GNU_PROPERTY:
948 elf_property_phdata = elf_ppnt;
949 break;
950
951 case PT_LOPROC ... PT_HIPROC:
952 retval = arch_elf_pt_proc(interp_elf_ex,
953 elf_ppnt, interpreter,
954 true, &arch_state);
955 if (retval)
956 goto out_free_dentry;
957 break;
958 }
959 }
960
961 retval = parse_elf_properties(interpreter ?: bprm->file,
962 elf_property_phdata, &arch_state);
963 if (retval)
964 goto out_free_dentry;
965
966 /*
967 * Allow arch code to reject the ELF at this point, whilst it's
968 * still possible to return an error to the code that invoked
969 * the exec syscall.
970 */
971 retval = arch_check_elf(elf_ex,
972 !!interpreter, interp_elf_ex,
973 &arch_state);
974 if (retval)
975 goto out_free_dentry;
976
977 /* Flush all traces of the currently running executable */
978 retval = flush_old_exec(bprm);
979 if (retval)
980 goto out_free_dentry;
981
982 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
983 may depend on the personality. */
984 SET_PERSONALITY2(*elf_ex, &arch_state);
985 if (elf_read_implies_exec(*elf_ex, executable_stack))
986 current->personality |= READ_IMPLIES_EXEC;
987
988 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
989 current->flags |= PF_RANDOMIZE;
990
991 setup_new_exec(bprm);
992 install_exec_creds(bprm);
993
994 /* Do this so that we can load the interpreter, if need be. We will
995 change some of these later */
996 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
997 executable_stack);
998 if (retval < 0)
999 goto out_free_dentry;
1000
1001 elf_bss = 0;
1002 elf_brk = 0;
1003
1004 start_code = ~0UL;
1005 end_code = 0;
1006 start_data = 0;
1007 end_data = 0;
1008
1009 /* Now we do a little grungy work by mmapping the ELF image into
1010 the correct location in memory. */
1011 for(i = 0, elf_ppnt = elf_phdata;
1012 i < elf_ex->e_phnum; i++, elf_ppnt++) {
1013 int elf_prot, elf_flags;
1014 unsigned long k, vaddr;
1015 unsigned long total_size = 0;
1016
1017 if (elf_ppnt->p_type != PT_LOAD)
1018 continue;
1019
1020 if (unlikely (elf_brk > elf_bss)) {
1021 unsigned long nbyte;
1022
1023 /* There was a PT_LOAD segment with p_memsz > p_filesz
1024 before this one. Map anonymous pages, if needed,
1025 and clear the area. */
1026 retval = set_brk(elf_bss + load_bias,
1027 elf_brk + load_bias,
1028 bss_prot);
1029 if (retval)
1030 goto out_free_dentry;
1031 nbyte = ELF_PAGEOFFSET(elf_bss);
1032 if (nbyte) {
1033 nbyte = ELF_MIN_ALIGN - nbyte;
1034 if (nbyte > elf_brk - elf_bss)
1035 nbyte = elf_brk - elf_bss;
1036 if (clear_user((void __user *)elf_bss +
1037 load_bias, nbyte)) {
1038 /*
1039 * This bss-zeroing can fail if the ELF
1040 * file specifies odd protections. So
1041 * we don't check the return value
1042 */
1043 }
1044 }
1045 }
1046
1047 elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1048 !!interpreter, false);
1049
1050 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
1051
1052 vaddr = elf_ppnt->p_vaddr;
1053 /*
1054 * If we are loading ET_EXEC or we have already performed
1055 * the ET_DYN load_addr calculations, proceed normally.
1056 */
1057 if (elf_ex->e_type == ET_EXEC || load_addr_set) {
1058 elf_flags |= MAP_FIXED;
1059 } else if (elf_ex->e_type == ET_DYN) {
1060 /*
1061 * This logic is run once for the first LOAD Program
1062 * Header for ET_DYN binaries to calculate the
1063 * randomization (load_bias) for all the LOAD
1064 * Program Headers, and to calculate the entire
1065 * size of the ELF mapping (total_size). (Note that
1066 * load_addr_set is set to true later once the
1067 * initial mapping is performed.)
1068 *
1069 * There are effectively two types of ET_DYN
1070 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1071 * and loaders (ET_DYN without INTERP, since they
1072 * _are_ the ELF interpreter). The loaders must
1073 * be loaded away from programs since the program
1074 * may otherwise collide with the loader (especially
1075 * for ET_EXEC which does not have a randomized
1076 * position). For example to handle invocations of
1077 * "./ld.so someprog" to test out a new version of
1078 * the loader, the subsequent program that the
1079 * loader loads must avoid the loader itself, so
1080 * they cannot share the same load range. Sufficient
1081 * room for the brk must be allocated with the
1082 * loader as well, since brk must be available with
1083 * the loader.
1084 *
1085 * Therefore, programs are loaded offset from
1086 * ELF_ET_DYN_BASE and loaders are loaded into the
1087 * independently randomized mmap region (0 load_bias
1088 * without MAP_FIXED).
1089 */
1090 if (interpreter) {
1091 load_bias = ELF_ET_DYN_BASE;
1092 if (current->flags & PF_RANDOMIZE)
1093 load_bias += arch_mmap_rnd();
1094 elf_flags |= MAP_FIXED;
1095 } else
1096 load_bias = 0;
1097
1098 /*
1099 * Since load_bias is used for all subsequent loading
1100 * calculations, we must lower it by the first vaddr
1101 * so that the remaining calculations based on the
1102 * ELF vaddrs will be correctly offset. The result
1103 * is then page aligned.
1104 */
1105 load_bias = ELF_PAGESTART(load_bias - vaddr);
1106
1107 total_size = total_mapping_size(elf_phdata,
1108 elf_ex->e_phnum);
1109 if (!total_size) {
1110 retval = -EINVAL;
1111 goto out_free_dentry;
1112 }
1113 }
1114
1115 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1116 elf_prot, elf_flags, total_size);
1117 if (BAD_ADDR(error)) {
1118 retval = IS_ERR((void *)error) ?
1119 PTR_ERR((void*)error) : -EINVAL;
1120 goto out_free_dentry;
1121 }
1122
1123 if (!load_addr_set) {
1124 load_addr_set = 1;
1125 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1126 if (elf_ex->e_type == ET_DYN) {
1127 load_bias += error -
1128 ELF_PAGESTART(load_bias + vaddr);
1129 load_addr += load_bias;
1130 reloc_func_desc = load_bias;
1131 }
1132 }
1133 k = elf_ppnt->p_vaddr;
1134 if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1135 start_code = k;
1136 if (start_data < k)
1137 start_data = k;
1138
1139 /*
1140 * Check to see if the section's size will overflow the
1141 * allowed task size. Note that p_filesz must always be
1142 * <= p_memsz so it is only necessary to check p_memsz.
1143 */
1144 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1145 elf_ppnt->p_memsz > TASK_SIZE ||
1146 TASK_SIZE - elf_ppnt->p_memsz < k) {
1147 /* set_brk can never work. Avoid overflows. */
1148 retval = -EINVAL;
1149 goto out_free_dentry;
1150 }
1151
1152 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1153
1154 if (k > elf_bss)
1155 elf_bss = k;
1156 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1157 end_code = k;
1158 if (end_data < k)
1159 end_data = k;
1160 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1161 if (k > elf_brk) {
1162 bss_prot = elf_prot;
1163 elf_brk = k;
1164 }
1165 }
1166
1167 e_entry = elf_ex->e_entry + load_bias;
1168 elf_bss += load_bias;
1169 elf_brk += load_bias;
1170 start_code += load_bias;
1171 end_code += load_bias;
1172 start_data += load_bias;
1173 end_data += load_bias;
1174
1175 /* Calling set_brk effectively mmaps the pages that we need
1176 * for the bss and break sections. We must do this before
1177 * mapping in the interpreter, to make sure it doesn't wind
1178 * up getting placed where the bss needs to go.
1179 */
1180 retval = set_brk(elf_bss, elf_brk, bss_prot);
1181 if (retval)
1182 goto out_free_dentry;
1183 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1184 retval = -EFAULT; /* Nobody gets to see this, but.. */
1185 goto out_free_dentry;
1186 }
1187
1188 if (interpreter) {
1189 elf_entry = load_elf_interp(interp_elf_ex,
1190 interpreter,
1191 load_bias, interp_elf_phdata,
1192 &arch_state);
1193 if (!IS_ERR((void *)elf_entry)) {
1194 /*
1195 * load_elf_interp() returns relocation
1196 * adjustment
1197 */
1198 interp_load_addr = elf_entry;
1199 elf_entry += interp_elf_ex->e_entry;
1200 }
1201 if (BAD_ADDR(elf_entry)) {
1202 retval = IS_ERR((void *)elf_entry) ?
1203 (int)elf_entry : -EINVAL;
1204 goto out_free_dentry;
1205 }
1206 reloc_func_desc = interp_load_addr;
1207
1208 allow_write_access(interpreter);
1209 fput(interpreter);
1210
1211 kfree(interp_elf_ex);
1212 kfree(interp_elf_phdata);
1213 } else {
1214 elf_entry = e_entry;
1215 if (BAD_ADDR(elf_entry)) {
1216 retval = -EINVAL;
1217 goto out_free_dentry;
1218 }
1219 }
1220
1221 kfree(elf_phdata);
1222
1223 set_binfmt(&elf_format);
1224
1225#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1226 retval = arch_setup_additional_pages(bprm, !!interpreter);
1227 if (retval < 0)
1228 goto out;
1229#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1230
1231 retval = create_elf_tables(bprm, elf_ex,
1232 load_addr, interp_load_addr, e_entry);
1233 if (retval < 0)
1234 goto out;
1235
1236 mm = current->mm;
1237 mm->end_code = end_code;
1238 mm->start_code = start_code;
1239 mm->start_data = start_data;
1240 mm->end_data = end_data;
1241 mm->start_stack = bprm->p;
1242
1243 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1244 /*
1245 * For architectures with ELF randomization, when executing
1246 * a loader directly (i.e. no interpreter listed in ELF
1247 * headers), move the brk area out of the mmap region
1248 * (since it grows up, and may collide early with the stack
1249 * growing down), and into the unused ELF_ET_DYN_BASE region.
1250 */
1251 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1252 elf_ex->e_type == ET_DYN && !interpreter) {
1253 mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1254 }
1255
1256 mm->brk = mm->start_brk = arch_randomize_brk(mm);
1257#ifdef compat_brk_randomized
1258 current->brk_randomized = 1;
1259#endif
1260 }
1261
1262 if (current->personality & MMAP_PAGE_ZERO) {
1263 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1264 and some applications "depend" upon this behavior.
1265 Since we do not have the power to recompile these, we
1266 emulate the SVr4 behavior. Sigh. */
1267 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1268 MAP_FIXED | MAP_PRIVATE, 0);
1269 }
1270
1271 regs = current_pt_regs();
1272#ifdef ELF_PLAT_INIT
1273 /*
1274 * The ABI may specify that certain registers be set up in special
1275 * ways (on i386 %edx is the address of a DT_FINI function, for
1276 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1277 * that the e_entry field is the address of the function descriptor
1278 * for the startup routine, rather than the address of the startup
1279 * routine itself. This macro performs whatever initialization to
1280 * the regs structure is required as well as any relocations to the
1281 * function descriptor entries when executing dynamically links apps.
1282 */
1283 ELF_PLAT_INIT(regs, reloc_func_desc);
1284#endif
1285
1286 finalize_exec(bprm);
1287 start_thread(regs, elf_entry, bprm->p);
1288 retval = 0;
1289out:
1290 return retval;
1291
1292 /* error cleanup */
1293out_free_dentry:
1294 kfree(interp_elf_ex);
1295 kfree(interp_elf_phdata);
1296 allow_write_access(interpreter);
1297 if (interpreter)
1298 fput(interpreter);
1299out_free_ph:
1300 kfree(elf_phdata);
1301 goto out;
1302}
1303
1304#ifdef CONFIG_USELIB
1305/* This is really simpleminded and specialized - we are loading an
1306 a.out library that is given an ELF header. */
1307static int load_elf_library(struct file *file)
1308{
1309 struct elf_phdr *elf_phdata;
1310 struct elf_phdr *eppnt;
1311 unsigned long elf_bss, bss, len;
1312 int retval, error, i, j;
1313 struct elfhdr elf_ex;
1314
1315 error = -ENOEXEC;
1316 retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1317 if (retval < 0)
1318 goto out;
1319
1320 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1321 goto out;
1322
1323 /* First of all, some simple consistency checks */
1324 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1325 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1326 goto out;
1327 if (elf_check_fdpic(&elf_ex))
1328 goto out;
1329
1330 /* Now read in all of the header information */
1331
1332 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1333 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1334
1335 error = -ENOMEM;
1336 elf_phdata = kmalloc(j, GFP_KERNEL);
1337 if (!elf_phdata)
1338 goto out;
1339
1340 eppnt = elf_phdata;
1341 error = -ENOEXEC;
1342 retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1343 if (retval < 0)
1344 goto out_free_ph;
1345
1346 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1347 if ((eppnt + i)->p_type == PT_LOAD)
1348 j++;
1349 if (j != 1)
1350 goto out_free_ph;
1351
1352 while (eppnt->p_type != PT_LOAD)
1353 eppnt++;
1354
1355 /* Now use mmap to map the library into memory. */
1356 error = vm_mmap(file,
1357 ELF_PAGESTART(eppnt->p_vaddr),
1358 (eppnt->p_filesz +
1359 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1360 PROT_READ | PROT_WRITE | PROT_EXEC,
1361 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1362 (eppnt->p_offset -
1363 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1364 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1365 goto out_free_ph;
1366
1367 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1368 if (padzero(elf_bss)) {
1369 error = -EFAULT;
1370 goto out_free_ph;
1371 }
1372
1373 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1374 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1375 if (bss > len) {
1376 error = vm_brk(len, bss - len);
1377 if (error)
1378 goto out_free_ph;
1379 }
1380 error = 0;
1381
1382out_free_ph:
1383 kfree(elf_phdata);
1384out:
1385 return error;
1386}
1387#endif /* #ifdef CONFIG_USELIB */
1388
1389#ifdef CONFIG_ELF_CORE
1390/*
1391 * ELF core dumper
1392 *
1393 * Modelled on fs/exec.c:aout_core_dump()
1394 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1395 */
1396
1397/*
1398 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1399 * that are useful for post-mortem analysis are included in every core dump.
1400 * In that way we ensure that the core dump is fully interpretable later
1401 * without matching up the same kernel and hardware config to see what PC values
1402 * meant. These special mappings include - vDSO, vsyscall, and other
1403 * architecture specific mappings
1404 */
1405static bool always_dump_vma(struct vm_area_struct *vma)
1406{
1407 /* Any vsyscall mappings? */
1408 if (vma == get_gate_vma(vma->vm_mm))
1409 return true;
1410
1411 /*
1412 * Assume that all vmas with a .name op should always be dumped.
1413 * If this changes, a new vm_ops field can easily be added.
1414 */
1415 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1416 return true;
1417
1418 /*
1419 * arch_vma_name() returns non-NULL for special architecture mappings,
1420 * such as vDSO sections.
1421 */
1422 if (arch_vma_name(vma))
1423 return true;
1424
1425 return false;
1426}
1427
1428/*
1429 * Decide what to dump of a segment, part, all or none.
1430 */
1431static unsigned long vma_dump_size(struct vm_area_struct *vma,
1432 unsigned long mm_flags)
1433{
1434#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1435
1436 /* always dump the vdso and vsyscall sections */
1437 if (always_dump_vma(vma))
1438 goto whole;
1439
1440 if (vma->vm_flags & VM_DONTDUMP)
1441 return 0;
1442
1443 /* support for DAX */
1444 if (vma_is_dax(vma)) {
1445 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1446 goto whole;
1447 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1448 goto whole;
1449 return 0;
1450 }
1451
1452 /* Hugetlb memory check */
1453 if (is_vm_hugetlb_page(vma)) {
1454 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1455 goto whole;
1456 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1457 goto whole;
1458 return 0;
1459 }
1460
1461 /* Do not dump I/O mapped devices or special mappings */
1462 if (vma->vm_flags & VM_IO)
1463 return 0;
1464
1465 /* By default, dump shared memory if mapped from an anonymous file. */
1466 if (vma->vm_flags & VM_SHARED) {
1467 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1468 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1469 goto whole;
1470 return 0;
1471 }
1472
1473 /* Dump segments that have been written to. */
1474 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1475 goto whole;
1476 if (vma->vm_file == NULL)
1477 return 0;
1478
1479 if (FILTER(MAPPED_PRIVATE))
1480 goto whole;
1481
1482 /*
1483 * If this looks like the beginning of a DSO or executable mapping,
1484 * check for an ELF header. If we find one, dump the first page to
1485 * aid in determining what was mapped here.
1486 */
1487 if (FILTER(ELF_HEADERS) &&
1488 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1489 u32 __user *header = (u32 __user *) vma->vm_start;
1490 u32 word;
1491 /*
1492 * Doing it this way gets the constant folded by GCC.
1493 */
1494 union {
1495 u32 cmp;
1496 char elfmag[SELFMAG];
1497 } magic;
1498 BUILD_BUG_ON(SELFMAG != sizeof word);
1499 magic.elfmag[EI_MAG0] = ELFMAG0;
1500 magic.elfmag[EI_MAG1] = ELFMAG1;
1501 magic.elfmag[EI_MAG2] = ELFMAG2;
1502 magic.elfmag[EI_MAG3] = ELFMAG3;
1503 if (unlikely(get_user(word, header)))
1504 word = 0;
1505 if (word == magic.cmp)
1506 return PAGE_SIZE;
1507 }
1508
1509#undef FILTER
1510
1511 return 0;
1512
1513whole:
1514 return vma->vm_end - vma->vm_start;
1515}
1516
1517/* An ELF note in memory */
1518struct memelfnote
1519{
1520 const char *name;
1521 int type;
1522 unsigned int datasz;
1523 void *data;
1524};
1525
1526static int notesize(struct memelfnote *en)
1527{
1528 int sz;
1529
1530 sz = sizeof(struct elf_note);
1531 sz += roundup(strlen(en->name) + 1, 4);
1532 sz += roundup(en->datasz, 4);
1533
1534 return sz;
1535}
1536
1537static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1538{
1539 struct elf_note en;
1540 en.n_namesz = strlen(men->name) + 1;
1541 en.n_descsz = men->datasz;
1542 en.n_type = men->type;
1543
1544 return dump_emit(cprm, &en, sizeof(en)) &&
1545 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1546 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1547}
1548
1549static void fill_elf_header(struct elfhdr *elf, int segs,
1550 u16 machine, u32 flags)
1551{
1552 memset(elf, 0, sizeof(*elf));
1553
1554 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1555 elf->e_ident[EI_CLASS] = ELF_CLASS;
1556 elf->e_ident[EI_DATA] = ELF_DATA;
1557 elf->e_ident[EI_VERSION] = EV_CURRENT;
1558 elf->e_ident[EI_OSABI] = ELF_OSABI;
1559
1560 elf->e_type = ET_CORE;
1561 elf->e_machine = machine;
1562 elf->e_version = EV_CURRENT;
1563 elf->e_phoff = sizeof(struct elfhdr);
1564 elf->e_flags = flags;
1565 elf->e_ehsize = sizeof(struct elfhdr);
1566 elf->e_phentsize = sizeof(struct elf_phdr);
1567 elf->e_phnum = segs;
1568}
1569
1570static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1571{
1572 phdr->p_type = PT_NOTE;
1573 phdr->p_offset = offset;
1574 phdr->p_vaddr = 0;
1575 phdr->p_paddr = 0;
1576 phdr->p_filesz = sz;
1577 phdr->p_memsz = 0;
1578 phdr->p_flags = 0;
1579 phdr->p_align = 0;
1580}
1581
1582static void fill_note(struct memelfnote *note, const char *name, int type,
1583 unsigned int sz, void *data)
1584{
1585 note->name = name;
1586 note->type = type;
1587 note->datasz = sz;
1588 note->data = data;
1589}
1590
1591/*
1592 * fill up all the fields in prstatus from the given task struct, except
1593 * registers which need to be filled up separately.
1594 */
1595static void fill_prstatus(struct elf_prstatus *prstatus,
1596 struct task_struct *p, long signr)
1597{
1598 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1599 prstatus->pr_sigpend = p->pending.signal.sig[0];
1600 prstatus->pr_sighold = p->blocked.sig[0];
1601 rcu_read_lock();
1602 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1603 rcu_read_unlock();
1604 prstatus->pr_pid = task_pid_vnr(p);
1605 prstatus->pr_pgrp = task_pgrp_vnr(p);
1606 prstatus->pr_sid = task_session_vnr(p);
1607 if (thread_group_leader(p)) {
1608 struct task_cputime cputime;
1609
1610 /*
1611 * This is the record for the group leader. It shows the
1612 * group-wide total, not its individual thread total.
1613 */
1614 thread_group_cputime(p, &cputime);
1615 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1616 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1617 } else {
1618 u64 utime, stime;
1619
1620 task_cputime(p, &utime, &stime);
1621 prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1622 prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1623 }
1624
1625 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1626 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1627}
1628
1629static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1630 struct mm_struct *mm)
1631{
1632 const struct cred *cred;
1633 unsigned int i, len;
1634
1635 /* first copy the parameters from user space */
1636 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1637
1638 len = mm->arg_end - mm->arg_start;
1639 if (len >= ELF_PRARGSZ)
1640 len = ELF_PRARGSZ-1;
1641 if (copy_from_user(&psinfo->pr_psargs,
1642 (const char __user *)mm->arg_start, len))
1643 return -EFAULT;
1644 for(i = 0; i < len; i++)
1645 if (psinfo->pr_psargs[i] == 0)
1646 psinfo->pr_psargs[i] = ' ';
1647 psinfo->pr_psargs[len] = 0;
1648
1649 rcu_read_lock();
1650 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1651 rcu_read_unlock();
1652 psinfo->pr_pid = task_pid_vnr(p);
1653 psinfo->pr_pgrp = task_pgrp_vnr(p);
1654 psinfo->pr_sid = task_session_vnr(p);
1655
1656 i = p->state ? ffz(~p->state) + 1 : 0;
1657 psinfo->pr_state = i;
1658 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1659 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1660 psinfo->pr_nice = task_nice(p);
1661 psinfo->pr_flag = p->flags;
1662 rcu_read_lock();
1663 cred = __task_cred(p);
1664 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1665 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1666 rcu_read_unlock();
1667 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1668
1669 return 0;
1670}
1671
1672static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1673{
1674 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1675 int i = 0;
1676 do
1677 i += 2;
1678 while (auxv[i - 2] != AT_NULL);
1679 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1680}
1681
1682static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1683 const kernel_siginfo_t *siginfo)
1684{
1685 copy_siginfo_to_external(csigdata, siginfo);
1686 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1687}
1688
1689#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1690/*
1691 * Format of NT_FILE note:
1692 *
1693 * long count -- how many files are mapped
1694 * long page_size -- units for file_ofs
1695 * array of [COUNT] elements of
1696 * long start
1697 * long end
1698 * long file_ofs
1699 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1700 */
1701static int fill_files_note(struct memelfnote *note)
1702{
1703 struct mm_struct *mm = current->mm;
1704 struct vm_area_struct *vma;
1705 unsigned count, size, names_ofs, remaining, n;
1706 user_long_t *data;
1707 user_long_t *start_end_ofs;
1708 char *name_base, *name_curpos;
1709
1710 /* *Estimated* file count and total data size needed */
1711 count = mm->map_count;
1712 if (count > UINT_MAX / 64)
1713 return -EINVAL;
1714 size = count * 64;
1715
1716 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1717 alloc:
1718 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1719 return -EINVAL;
1720 size = round_up(size, PAGE_SIZE);
1721 /*
1722 * "size" can be 0 here legitimately.
1723 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1724 */
1725 data = kvmalloc(size, GFP_KERNEL);
1726 if (ZERO_OR_NULL_PTR(data))
1727 return -ENOMEM;
1728
1729 start_end_ofs = data + 2;
1730 name_base = name_curpos = ((char *)data) + names_ofs;
1731 remaining = size - names_ofs;
1732 count = 0;
1733 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1734 struct file *file;
1735 const char *filename;
1736
1737 file = vma->vm_file;
1738 if (!file)
1739 continue;
1740 filename = file_path(file, name_curpos, remaining);
1741 if (IS_ERR(filename)) {
1742 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1743 kvfree(data);
1744 size = size * 5 / 4;
1745 goto alloc;
1746 }
1747 continue;
1748 }
1749
1750 /* file_path() fills at the end, move name down */
1751 /* n = strlen(filename) + 1: */
1752 n = (name_curpos + remaining) - filename;
1753 remaining = filename - name_curpos;
1754 memmove(name_curpos, filename, n);
1755 name_curpos += n;
1756
1757 *start_end_ofs++ = vma->vm_start;
1758 *start_end_ofs++ = vma->vm_end;
1759 *start_end_ofs++ = vma->vm_pgoff;
1760 count++;
1761 }
1762
1763 /* Now we know exact count of files, can store it */
1764 data[0] = count;
1765 data[1] = PAGE_SIZE;
1766 /*
1767 * Count usually is less than mm->map_count,
1768 * we need to move filenames down.
1769 */
1770 n = mm->map_count - count;
1771 if (n != 0) {
1772 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1773 memmove(name_base - shift_bytes, name_base,
1774 name_curpos - name_base);
1775 name_curpos -= shift_bytes;
1776 }
1777
1778 size = name_curpos - (char *)data;
1779 fill_note(note, "CORE", NT_FILE, size, data);
1780 return 0;
1781}
1782
1783#ifdef CORE_DUMP_USE_REGSET
1784#include <linux/regset.h>
1785
1786struct elf_thread_core_info {
1787 struct elf_thread_core_info *next;
1788 struct task_struct *task;
1789 struct elf_prstatus prstatus;
1790 struct memelfnote notes[0];
1791};
1792
1793struct elf_note_info {
1794 struct elf_thread_core_info *thread;
1795 struct memelfnote psinfo;
1796 struct memelfnote signote;
1797 struct memelfnote auxv;
1798 struct memelfnote files;
1799 user_siginfo_t csigdata;
1800 size_t size;
1801 int thread_notes;
1802};
1803
1804/*
1805 * When a regset has a writeback hook, we call it on each thread before
1806 * dumping user memory. On register window machines, this makes sure the
1807 * user memory backing the register data is up to date before we read it.
1808 */
1809static void do_thread_regset_writeback(struct task_struct *task,
1810 const struct user_regset *regset)
1811{
1812 if (regset->writeback)
1813 regset->writeback(task, regset, 1);
1814}
1815
1816#ifndef PRSTATUS_SIZE
1817#define PRSTATUS_SIZE(S, R) sizeof(S)
1818#endif
1819
1820#ifndef SET_PR_FPVALID
1821#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1822#endif
1823
1824static int fill_thread_core_info(struct elf_thread_core_info *t,
1825 const struct user_regset_view *view,
1826 long signr, size_t *total)
1827{
1828 unsigned int i;
1829 unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1830
1831 /*
1832 * NT_PRSTATUS is the one special case, because the regset data
1833 * goes into the pr_reg field inside the note contents, rather
1834 * than being the whole note contents. We fill the reset in here.
1835 * We assume that regset 0 is NT_PRSTATUS.
1836 */
1837 fill_prstatus(&t->prstatus, t->task, signr);
1838 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1839 &t->prstatus.pr_reg, NULL);
1840
1841 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1842 PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1843 *total += notesize(&t->notes[0]);
1844
1845 do_thread_regset_writeback(t->task, &view->regsets[0]);
1846
1847 /*
1848 * Each other regset might generate a note too. For each regset
1849 * that has no core_note_type or is inactive, we leave t->notes[i]
1850 * all zero and we'll know to skip writing it later.
1851 */
1852 for (i = 1; i < view->n; ++i) {
1853 const struct user_regset *regset = &view->regsets[i];
1854 do_thread_regset_writeback(t->task, regset);
1855 if (regset->core_note_type && regset->get &&
1856 (!regset->active || regset->active(t->task, regset) > 0)) {
1857 int ret;
1858 size_t size = regset_size(t->task, regset);
1859 void *data = kzalloc(size, GFP_KERNEL);
1860 if (unlikely(!data))
1861 return 0;
1862 ret = regset->get(t->task, regset,
1863 0, size, data, NULL);
1864 if (unlikely(ret))
1865 kfree(data);
1866 else {
1867 if (regset->core_note_type != NT_PRFPREG)
1868 fill_note(&t->notes[i], "LINUX",
1869 regset->core_note_type,
1870 size, data);
1871 else {
1872 SET_PR_FPVALID(&t->prstatus,
1873 1, regset0_size);
1874 fill_note(&t->notes[i], "CORE",
1875 NT_PRFPREG, size, data);
1876 }
1877 *total += notesize(&t->notes[i]);
1878 }
1879 }
1880 }
1881
1882 return 1;
1883}
1884
1885static int fill_note_info(struct elfhdr *elf, int phdrs,
1886 struct elf_note_info *info,
1887 const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1888{
1889 struct task_struct *dump_task = current;
1890 const struct user_regset_view *view = task_user_regset_view(dump_task);
1891 struct elf_thread_core_info *t;
1892 struct elf_prpsinfo *psinfo;
1893 struct core_thread *ct;
1894 unsigned int i;
1895
1896 info->size = 0;
1897 info->thread = NULL;
1898
1899 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1900 if (psinfo == NULL) {
1901 info->psinfo.data = NULL; /* So we don't free this wrongly */
1902 return 0;
1903 }
1904
1905 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1906
1907 /*
1908 * Figure out how many notes we're going to need for each thread.
1909 */
1910 info->thread_notes = 0;
1911 for (i = 0; i < view->n; ++i)
1912 if (view->regsets[i].core_note_type != 0)
1913 ++info->thread_notes;
1914
1915 /*
1916 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1917 * since it is our one special case.
1918 */
1919 if (unlikely(info->thread_notes == 0) ||
1920 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1921 WARN_ON(1);
1922 return 0;
1923 }
1924
1925 /*
1926 * Initialize the ELF file header.
1927 */
1928 fill_elf_header(elf, phdrs,
1929 view->e_machine, view->e_flags);
1930
1931 /*
1932 * Allocate a structure for each thread.
1933 */
1934 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1935 t = kzalloc(offsetof(struct elf_thread_core_info,
1936 notes[info->thread_notes]),
1937 GFP_KERNEL);
1938 if (unlikely(!t))
1939 return 0;
1940
1941 t->task = ct->task;
1942 if (ct->task == dump_task || !info->thread) {
1943 t->next = info->thread;
1944 info->thread = t;
1945 } else {
1946 /*
1947 * Make sure to keep the original task at
1948 * the head of the list.
1949 */
1950 t->next = info->thread->next;
1951 info->thread->next = t;
1952 }
1953 }
1954
1955 /*
1956 * Now fill in each thread's information.
1957 */
1958 for (t = info->thread; t != NULL; t = t->next)
1959 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1960 return 0;
1961
1962 /*
1963 * Fill in the two process-wide notes.
1964 */
1965 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1966 info->size += notesize(&info->psinfo);
1967
1968 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1969 info->size += notesize(&info->signote);
1970
1971 fill_auxv_note(&info->auxv, current->mm);
1972 info->size += notesize(&info->auxv);
1973
1974 if (fill_files_note(&info->files) == 0)
1975 info->size += notesize(&info->files);
1976
1977 return 1;
1978}
1979
1980static size_t get_note_info_size(struct elf_note_info *info)
1981{
1982 return info->size;
1983}
1984
1985/*
1986 * Write all the notes for each thread. When writing the first thread, the
1987 * process-wide notes are interleaved after the first thread-specific note.
1988 */
1989static int write_note_info(struct elf_note_info *info,
1990 struct coredump_params *cprm)
1991{
1992 bool first = true;
1993 struct elf_thread_core_info *t = info->thread;
1994
1995 do {
1996 int i;
1997
1998 if (!writenote(&t->notes[0], cprm))
1999 return 0;
2000
2001 if (first && !writenote(&info->psinfo, cprm))
2002 return 0;
2003 if (first && !writenote(&info->signote, cprm))
2004 return 0;
2005 if (first && !writenote(&info->auxv, cprm))
2006 return 0;
2007 if (first && info->files.data &&
2008 !writenote(&info->files, cprm))
2009 return 0;
2010
2011 for (i = 1; i < info->thread_notes; ++i)
2012 if (t->notes[i].data &&
2013 !writenote(&t->notes[i], cprm))
2014 return 0;
2015
2016 first = false;
2017 t = t->next;
2018 } while (t);
2019
2020 return 1;
2021}
2022
2023static void free_note_info(struct elf_note_info *info)
2024{
2025 struct elf_thread_core_info *threads = info->thread;
2026 while (threads) {
2027 unsigned int i;
2028 struct elf_thread_core_info *t = threads;
2029 threads = t->next;
2030 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
2031 for (i = 1; i < info->thread_notes; ++i)
2032 kfree(t->notes[i].data);
2033 kfree(t);
2034 }
2035 kfree(info->psinfo.data);
2036 kvfree(info->files.data);
2037}
2038
2039#else
2040
2041/* Here is the structure in which status of each thread is captured. */
2042struct elf_thread_status
2043{
2044 struct list_head list;
2045 struct elf_prstatus prstatus; /* NT_PRSTATUS */
2046 elf_fpregset_t fpu; /* NT_PRFPREG */
2047 struct task_struct *thread;
2048#ifdef ELF_CORE_COPY_XFPREGS
2049 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
2050#endif
2051 struct memelfnote notes[3];
2052 int num_notes;
2053};
2054
2055/*
2056 * In order to add the specific thread information for the elf file format,
2057 * we need to keep a linked list of every threads pr_status and then create
2058 * a single section for them in the final core file.
2059 */
2060static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
2061{
2062 int sz = 0;
2063 struct task_struct *p = t->thread;
2064 t->num_notes = 0;
2065
2066 fill_prstatus(&t->prstatus, p, signr);
2067 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
2068
2069 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
2070 &(t->prstatus));
2071 t->num_notes++;
2072 sz += notesize(&t->notes[0]);
2073
2074 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
2075 &t->fpu))) {
2076 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
2077 &(t->fpu));
2078 t->num_notes++;
2079 sz += notesize(&t->notes[1]);
2080 }
2081
2082#ifdef ELF_CORE_COPY_XFPREGS
2083 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
2084 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
2085 sizeof(t->xfpu), &t->xfpu);
2086 t->num_notes++;
2087 sz += notesize(&t->notes[2]);
2088 }
2089#endif
2090 return sz;
2091}
2092
2093struct elf_note_info {
2094 struct memelfnote *notes;
2095 struct memelfnote *notes_files;
2096 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
2097 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
2098 struct list_head thread_list;
2099 elf_fpregset_t *fpu;
2100#ifdef ELF_CORE_COPY_XFPREGS
2101 elf_fpxregset_t *xfpu;
2102#endif
2103 user_siginfo_t csigdata;
2104 int thread_status_size;
2105 int numnote;
2106};
2107
2108static int elf_note_info_init(struct elf_note_info *info)
2109{
2110 memset(info, 0, sizeof(*info));
2111 INIT_LIST_HEAD(&info->thread_list);
2112
2113 /* Allocate space for ELF notes */
2114 info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2115 if (!info->notes)
2116 return 0;
2117 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2118 if (!info->psinfo)
2119 return 0;
2120 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2121 if (!info->prstatus)
2122 return 0;
2123 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2124 if (!info->fpu)
2125 return 0;
2126#ifdef ELF_CORE_COPY_XFPREGS
2127 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2128 if (!info->xfpu)
2129 return 0;
2130#endif
2131 return 1;
2132}
2133
2134static int fill_note_info(struct elfhdr *elf, int phdrs,
2135 struct elf_note_info *info,
2136 const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2137{
2138 struct core_thread *ct;
2139 struct elf_thread_status *ets;
2140
2141 if (!elf_note_info_init(info))
2142 return 0;
2143
2144 for (ct = current->mm->core_state->dumper.next;
2145 ct; ct = ct->next) {
2146 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2147 if (!ets)
2148 return 0;
2149
2150 ets->thread = ct->task;
2151 list_add(&ets->list, &info->thread_list);
2152 }
2153
2154 list_for_each_entry(ets, &info->thread_list, list) {
2155 int sz;
2156
2157 sz = elf_dump_thread_status(siginfo->si_signo, ets);
2158 info->thread_status_size += sz;
2159 }
2160 /* now collect the dump for the current */
2161 memset(info->prstatus, 0, sizeof(*info->prstatus));
2162 fill_prstatus(info->prstatus, current, siginfo->si_signo);
2163 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2164
2165 /* Set up header */
2166 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2167
2168 /*
2169 * Set up the notes in similar form to SVR4 core dumps made
2170 * with info from their /proc.
2171 */
2172
2173 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2174 sizeof(*info->prstatus), info->prstatus);
2175 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2176 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2177 sizeof(*info->psinfo), info->psinfo);
2178
2179 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2180 fill_auxv_note(info->notes + 3, current->mm);
2181 info->numnote = 4;
2182
2183 if (fill_files_note(info->notes + info->numnote) == 0) {
2184 info->notes_files = info->notes + info->numnote;
2185 info->numnote++;
2186 }
2187
2188 /* Try to dump the FPU. */
2189 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2190 info->fpu);
2191 if (info->prstatus->pr_fpvalid)
2192 fill_note(info->notes + info->numnote++,
2193 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2194#ifdef ELF_CORE_COPY_XFPREGS
2195 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2196 fill_note(info->notes + info->numnote++,
2197 "LINUX", ELF_CORE_XFPREG_TYPE,
2198 sizeof(*info->xfpu), info->xfpu);
2199#endif
2200
2201 return 1;
2202}
2203
2204static size_t get_note_info_size(struct elf_note_info *info)
2205{
2206 int sz = 0;
2207 int i;
2208
2209 for (i = 0; i < info->numnote; i++)
2210 sz += notesize(info->notes + i);
2211
2212 sz += info->thread_status_size;
2213
2214 return sz;
2215}
2216
2217static int write_note_info(struct elf_note_info *info,
2218 struct coredump_params *cprm)
2219{
2220 struct elf_thread_status *ets;
2221 int i;
2222
2223 for (i = 0; i < info->numnote; i++)
2224 if (!writenote(info->notes + i, cprm))
2225 return 0;
2226
2227 /* write out the thread status notes section */
2228 list_for_each_entry(ets, &info->thread_list, list) {
2229 for (i = 0; i < ets->num_notes; i++)
2230 if (!writenote(&ets->notes[i], cprm))
2231 return 0;
2232 }
2233
2234 return 1;
2235}
2236
2237static void free_note_info(struct elf_note_info *info)
2238{
2239 while (!list_empty(&info->thread_list)) {
2240 struct list_head *tmp = info->thread_list.next;
2241 list_del(tmp);
2242 kfree(list_entry(tmp, struct elf_thread_status, list));
2243 }
2244
2245 /* Free data possibly allocated by fill_files_note(): */
2246 if (info->notes_files)
2247 kvfree(info->notes_files->data);
2248
2249 kfree(info->prstatus);
2250 kfree(info->psinfo);
2251 kfree(info->notes);
2252 kfree(info->fpu);
2253#ifdef ELF_CORE_COPY_XFPREGS
2254 kfree(info->xfpu);
2255#endif
2256}
2257
2258#endif
2259
2260static struct vm_area_struct *first_vma(struct task_struct *tsk,
2261 struct vm_area_struct *gate_vma)
2262{
2263 struct vm_area_struct *ret = tsk->mm->mmap;
2264
2265 if (ret)
2266 return ret;
2267 return gate_vma;
2268}
2269/*
2270 * Helper function for iterating across a vma list. It ensures that the caller
2271 * will visit `gate_vma' prior to terminating the search.
2272 */
2273static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2274 struct vm_area_struct *gate_vma)
2275{
2276 struct vm_area_struct *ret;
2277
2278 ret = this_vma->vm_next;
2279 if (ret)
2280 return ret;
2281 if (this_vma == gate_vma)
2282 return NULL;
2283 return gate_vma;
2284}
2285
2286static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2287 elf_addr_t e_shoff, int segs)
2288{
2289 elf->e_shoff = e_shoff;
2290 elf->e_shentsize = sizeof(*shdr4extnum);
2291 elf->e_shnum = 1;
2292 elf->e_shstrndx = SHN_UNDEF;
2293
2294 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2295
2296 shdr4extnum->sh_type = SHT_NULL;
2297 shdr4extnum->sh_size = elf->e_shnum;
2298 shdr4extnum->sh_link = elf->e_shstrndx;
2299 shdr4extnum->sh_info = segs;
2300}
2301
2302/*
2303 * Actual dumper
2304 *
2305 * This is a two-pass process; first we find the offsets of the bits,
2306 * and then they are actually written out. If we run out of core limit
2307 * we just truncate.
2308 */
2309static int elf_core_dump(struct coredump_params *cprm)
2310{
2311 int has_dumped = 0;
2312 int segs, i;
2313 size_t vma_data_size = 0;
2314 struct vm_area_struct *vma, *gate_vma;
2315 struct elfhdr elf;
2316 loff_t offset = 0, dataoff;
2317 struct elf_note_info info = { };
2318 struct elf_phdr *phdr4note = NULL;
2319 struct elf_shdr *shdr4extnum = NULL;
2320 Elf_Half e_phnum;
2321 elf_addr_t e_shoff;
2322 elf_addr_t *vma_filesz = NULL;
2323
2324 /*
2325 * We no longer stop all VM operations.
2326 *
2327 * This is because those proceses that could possibly change map_count
2328 * or the mmap / vma pages are now blocked in do_exit on current
2329 * finishing this core dump.
2330 *
2331 * Only ptrace can touch these memory addresses, but it doesn't change
2332 * the map_count or the pages allocated. So no possibility of crashing
2333 * exists while dumping the mm->vm_next areas to the core file.
2334 */
2335
2336 /*
2337 * The number of segs are recored into ELF header as 16bit value.
2338 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2339 */
2340 segs = current->mm->map_count;
2341 segs += elf_core_extra_phdrs();
2342
2343 gate_vma = get_gate_vma(current->mm);
2344 if (gate_vma != NULL)
2345 segs++;
2346
2347 /* for notes section */
2348 segs++;
2349
2350 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2351 * this, kernel supports extended numbering. Have a look at
2352 * include/linux/elf.h for further information. */
2353 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2354
2355 /*
2356 * Collect all the non-memory information about the process for the
2357 * notes. This also sets up the file header.
2358 */
2359 if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2360 goto end_coredump;
2361
2362 has_dumped = 1;
2363
2364 offset += sizeof(elf); /* Elf header */
2365 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2366
2367 /* Write notes phdr entry */
2368 {
2369 size_t sz = get_note_info_size(&info);
2370
2371 sz += elf_coredump_extra_notes_size();
2372
2373 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2374 if (!phdr4note)
2375 goto end_coredump;
2376
2377 fill_elf_note_phdr(phdr4note, sz, offset);
2378 offset += sz;
2379 }
2380
2381 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2382
2383 /*
2384 * Zero vma process will get ZERO_SIZE_PTR here.
2385 * Let coredump continue for register state at least.
2386 */
2387 vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2388 GFP_KERNEL);
2389 if (!vma_filesz)
2390 goto end_coredump;
2391
2392 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2393 vma = next_vma(vma, gate_vma)) {
2394 unsigned long dump_size;
2395
2396 dump_size = vma_dump_size(vma, cprm->mm_flags);
2397 vma_filesz[i++] = dump_size;
2398 vma_data_size += dump_size;
2399 }
2400
2401 offset += vma_data_size;
2402 offset += elf_core_extra_data_size();
2403 e_shoff = offset;
2404
2405 if (e_phnum == PN_XNUM) {
2406 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2407 if (!shdr4extnum)
2408 goto end_coredump;
2409 fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2410 }
2411
2412 offset = dataoff;
2413
2414 if (!dump_emit(cprm, &elf, sizeof(elf)))
2415 goto end_coredump;
2416
2417 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2418 goto end_coredump;
2419
2420 /* Write program headers for segments dump */
2421 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2422 vma = next_vma(vma, gate_vma)) {
2423 struct elf_phdr phdr;
2424
2425 phdr.p_type = PT_LOAD;
2426 phdr.p_offset = offset;
2427 phdr.p_vaddr = vma->vm_start;
2428 phdr.p_paddr = 0;
2429 phdr.p_filesz = vma_filesz[i++];
2430 phdr.p_memsz = vma->vm_end - vma->vm_start;
2431 offset += phdr.p_filesz;
2432 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2433 if (vma->vm_flags & VM_WRITE)
2434 phdr.p_flags |= PF_W;
2435 if (vma->vm_flags & VM_EXEC)
2436 phdr.p_flags |= PF_X;
2437 phdr.p_align = ELF_EXEC_PAGESIZE;
2438
2439 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2440 goto end_coredump;
2441 }
2442
2443 if (!elf_core_write_extra_phdrs(cprm, offset))
2444 goto end_coredump;
2445
2446 /* write out the notes section */
2447 if (!write_note_info(&info, cprm))
2448 goto end_coredump;
2449
2450 if (elf_coredump_extra_notes_write(cprm))
2451 goto end_coredump;
2452
2453 /* Align to page */
2454 if (!dump_skip(cprm, dataoff - cprm->pos))
2455 goto end_coredump;
2456
2457 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2458 vma = next_vma(vma, gate_vma)) {
2459 unsigned long addr;
2460 unsigned long end;
2461
2462 end = vma->vm_start + vma_filesz[i++];
2463
2464 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2465 struct page *page;
2466 int stop;
2467
2468 page = get_dump_page(addr);
2469 if (page) {
2470 void *kaddr = kmap(page);
2471 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2472 kunmap(page);
2473 put_page(page);
2474 } else
2475 stop = !dump_skip(cprm, PAGE_SIZE);
2476 if (stop)
2477 goto end_coredump;
2478 }
2479 }
2480 dump_truncate(cprm);
2481
2482 if (!elf_core_write_extra_data(cprm))
2483 goto end_coredump;
2484
2485 if (e_phnum == PN_XNUM) {
2486 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2487 goto end_coredump;
2488 }
2489
2490end_coredump:
2491 free_note_info(&info);
2492 kfree(shdr4extnum);
2493 kvfree(vma_filesz);
2494 kfree(phdr4note);
2495 return has_dumped;
2496}
2497
2498#endif /* CONFIG_ELF_CORE */
2499
2500static int __init init_elf_binfmt(void)
2501{
2502 register_binfmt(&elf_format);
2503 return 0;
2504}
2505
2506static void __exit exit_elf_binfmt(void)
2507{
2508 /* Remove the COFF and ELF loaders. */
2509 unregister_binfmt(&elf_format);
2510}
2511
2512core_initcall(init_elf_binfmt);
2513module_exit(exit_elf_binfmt);
2514MODULE_LICENSE("GPL");