]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - arch/x86_64/kernel/entry.S
Linux-2.6.12-rc2
[thirdparty/kernel/stable.git] / arch / x86_64 / kernel / entry.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 */
10
11 /*
12 * entry.S contains the system-call and fault low-level handling routines.
13 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
19 *
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
25 *
26 * TODO:
27 * - schedule it carefully for the final hardware.
28 */
29
30 #define ASSEMBLY 1
31 #include <linux/config.h>
32 #include <linux/linkage.h>
33 #include <asm/segment.h>
34 #include <asm/smp.h>
35 #include <asm/cache.h>
36 #include <asm/errno.h>
37 #include <asm/dwarf2.h>
38 #include <asm/calling.h>
39 #include <asm/offset.h>
40 #include <asm/msr.h>
41 #include <asm/unistd.h>
42 #include <asm/thread_info.h>
43 #include <asm/hw_irq.h>
44
45 .code64
46
47 #ifdef CONFIG_PREEMPT
48 #define preempt_stop cli
49 #else
50 #define preempt_stop
51 #define retint_kernel retint_restore_args
52 #endif
53
54 /*
55 * C code is not supposed to know about undefined top of stack. Every time
56 * a C function with an pt_regs argument is called from the SYSCALL based
57 * fast path FIXUP_TOP_OF_STACK is needed.
58 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
59 * manipulation.
60 */
61
62 /* %rsp:at FRAMEEND */
63 .macro FIXUP_TOP_OF_STACK tmp
64 movq %gs:pda_oldrsp,\tmp
65 movq \tmp,RSP(%rsp)
66 movq $__USER_DS,SS(%rsp)
67 movq $__USER_CS,CS(%rsp)
68 movq $-1,RCX(%rsp)
69 movq R11(%rsp),\tmp /* get eflags */
70 movq \tmp,EFLAGS(%rsp)
71 .endm
72
73 .macro RESTORE_TOP_OF_STACK tmp,offset=0
74 movq RSP-\offset(%rsp),\tmp
75 movq \tmp,%gs:pda_oldrsp
76 movq EFLAGS-\offset(%rsp),\tmp
77 movq \tmp,R11-\offset(%rsp)
78 .endm
79
80 .macro FAKE_STACK_FRAME child_rip
81 /* push in order ss, rsp, eflags, cs, rip */
82 xorq %rax, %rax
83 pushq %rax /* ss */
84 CFI_ADJUST_CFA_OFFSET 8
85 pushq %rax /* rsp */
86 CFI_ADJUST_CFA_OFFSET 8
87 CFI_OFFSET rip,0
88 pushq $(1<<9) /* eflags - interrupts on */
89 CFI_ADJUST_CFA_OFFSET 8
90 pushq $__KERNEL_CS /* cs */
91 CFI_ADJUST_CFA_OFFSET 8
92 pushq \child_rip /* rip */
93 CFI_ADJUST_CFA_OFFSET 8
94 CFI_OFFSET rip,0
95 pushq %rax /* orig rax */
96 CFI_ADJUST_CFA_OFFSET 8
97 .endm
98
99 .macro UNFAKE_STACK_FRAME
100 addq $8*6, %rsp
101 CFI_ADJUST_CFA_OFFSET -(6*8)
102 .endm
103
104 .macro CFI_DEFAULT_STACK
105 CFI_ADJUST_CFA_OFFSET (SS)
106 CFI_OFFSET r15,R15-SS
107 CFI_OFFSET r14,R14-SS
108 CFI_OFFSET r13,R13-SS
109 CFI_OFFSET r12,R12-SS
110 CFI_OFFSET rbp,RBP-SS
111 CFI_OFFSET rbx,RBX-SS
112 CFI_OFFSET r11,R11-SS
113 CFI_OFFSET r10,R10-SS
114 CFI_OFFSET r9,R9-SS
115 CFI_OFFSET r8,R8-SS
116 CFI_OFFSET rax,RAX-SS
117 CFI_OFFSET rcx,RCX-SS
118 CFI_OFFSET rdx,RDX-SS
119 CFI_OFFSET rsi,RSI-SS
120 CFI_OFFSET rdi,RDI-SS
121 CFI_OFFSET rsp,RSP-SS
122 CFI_OFFSET rip,RIP-SS
123 .endm
124 /*
125 * A newly forked process directly context switches into this.
126 */
127 /* rdi: prev */
128 ENTRY(ret_from_fork)
129 CFI_STARTPROC
130 CFI_DEFAULT_STACK
131 call schedule_tail
132 GET_THREAD_INFO(%rcx)
133 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
134 jnz rff_trace
135 rff_action:
136 RESTORE_REST
137 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
138 je int_ret_from_sys_call
139 testl $_TIF_IA32,threadinfo_flags(%rcx)
140 jnz int_ret_from_sys_call
141 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
142 jmp ret_from_sys_call
143 rff_trace:
144 movq %rsp,%rdi
145 call syscall_trace_leave
146 GET_THREAD_INFO(%rcx)
147 jmp rff_action
148 CFI_ENDPROC
149
150 /*
151 * System call entry. Upto 6 arguments in registers are supported.
152 *
153 * SYSCALL does not save anything on the stack and does not change the
154 * stack pointer.
155 */
156
157 /*
158 * Register setup:
159 * rax system call number
160 * rdi arg0
161 * rcx return address for syscall/sysret, C arg3
162 * rsi arg1
163 * rdx arg2
164 * r10 arg3 (--> moved to rcx for C)
165 * r8 arg4
166 * r9 arg5
167 * r11 eflags for syscall/sysret, temporary for C
168 * r12-r15,rbp,rbx saved by C code, not touched.
169 *
170 * Interrupts are off on entry.
171 * Only called from user space.
172 *
173 * XXX if we had a free scratch register we could save the RSP into the stack frame
174 * and report it properly in ps. Unfortunately we haven't.
175 */
176
177 ENTRY(system_call)
178 CFI_STARTPROC
179 swapgs
180 movq %rsp,%gs:pda_oldrsp
181 movq %gs:pda_kernelstack,%rsp
182 sti
183 SAVE_ARGS 8,1
184 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
185 movq %rcx,RIP-ARGOFFSET(%rsp)
186 GET_THREAD_INFO(%rcx)
187 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
188 jnz tracesys
189 cmpq $__NR_syscall_max,%rax
190 ja badsys
191 movq %r10,%rcx
192 call *sys_call_table(,%rax,8) # XXX: rip relative
193 movq %rax,RAX-ARGOFFSET(%rsp)
194 /*
195 * Syscall return path ending with SYSRET (fast path)
196 * Has incomplete stack frame and undefined top of stack.
197 */
198 .globl ret_from_sys_call
199 ret_from_sys_call:
200 movl $_TIF_WORK_MASK,%edi
201 /* edi: flagmask */
202 sysret_check:
203 GET_THREAD_INFO(%rcx)
204 cli
205 movl threadinfo_flags(%rcx),%edx
206 andl %edi,%edx
207 jnz sysret_careful
208 movq RIP-ARGOFFSET(%rsp),%rcx
209 RESTORE_ARGS 0,-ARG_SKIP,1
210 movq %gs:pda_oldrsp,%rsp
211 swapgs
212 sysretq
213
214 /* Handle reschedules */
215 /* edx: work, edi: workmask */
216 sysret_careful:
217 bt $TIF_NEED_RESCHED,%edx
218 jnc sysret_signal
219 sti
220 pushq %rdi
221 call schedule
222 popq %rdi
223 jmp sysret_check
224
225 /* Handle a signal */
226 sysret_signal:
227 sti
228 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
229 jz 1f
230
231 /* Really a signal */
232 /* edx: work flags (arg3) */
233 leaq do_notify_resume(%rip),%rax
234 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
235 xorl %esi,%esi # oldset -> arg2
236 call ptregscall_common
237 1: movl $_TIF_NEED_RESCHED,%edi
238 jmp sysret_check
239
240 /* Do syscall tracing */
241 tracesys:
242 SAVE_REST
243 movq $-ENOSYS,RAX(%rsp)
244 FIXUP_TOP_OF_STACK %rdi
245 movq %rsp,%rdi
246 call syscall_trace_enter
247 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
248 RESTORE_REST
249 cmpq $__NR_syscall_max,%rax
250 ja 1f
251 movq %r10,%rcx /* fixup for C */
252 call *sys_call_table(,%rax,8)
253 movq %rax,RAX-ARGOFFSET(%rsp)
254 1: SAVE_REST
255 movq %rsp,%rdi
256 call syscall_trace_leave
257 RESTORE_TOP_OF_STACK %rbx
258 RESTORE_REST
259 jmp ret_from_sys_call
260
261 badsys:
262 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
263 jmp ret_from_sys_call
264
265 /*
266 * Syscall return path ending with IRET.
267 * Has correct top of stack, but partial stack frame.
268 */
269 ENTRY(int_ret_from_sys_call)
270 cli
271 testl $3,CS-ARGOFFSET(%rsp)
272 je retint_restore_args
273 movl $_TIF_ALLWORK_MASK,%edi
274 /* edi: mask to check */
275 int_with_check:
276 GET_THREAD_INFO(%rcx)
277 movl threadinfo_flags(%rcx),%edx
278 andl %edi,%edx
279 jnz int_careful
280 jmp retint_swapgs
281
282 /* Either reschedule or signal or syscall exit tracking needed. */
283 /* First do a reschedule test. */
284 /* edx: work, edi: workmask */
285 int_careful:
286 bt $TIF_NEED_RESCHED,%edx
287 jnc int_very_careful
288 sti
289 pushq %rdi
290 call schedule
291 popq %rdi
292 jmp int_with_check
293
294 /* handle signals and tracing -- both require a full stack frame */
295 int_very_careful:
296 sti
297 SAVE_REST
298 /* Check for syscall exit trace */
299 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
300 jz int_signal
301 pushq %rdi
302 leaq 8(%rsp),%rdi # &ptregs -> arg1
303 call syscall_trace_leave
304 popq %rdi
305 btr $TIF_SYSCALL_TRACE,%edi
306 btr $TIF_SYSCALL_AUDIT,%edi
307 btr $TIF_SINGLESTEP,%edi
308 jmp int_restore_rest
309
310 int_signal:
311 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
312 jz 1f
313 movq %rsp,%rdi # &ptregs -> arg1
314 xorl %esi,%esi # oldset -> arg2
315 call do_notify_resume
316 1: movl $_TIF_NEED_RESCHED,%edi
317 int_restore_rest:
318 RESTORE_REST
319 jmp int_with_check
320 CFI_ENDPROC
321
322 /*
323 * Certain special system calls that need to save a complete full stack frame.
324 */
325
326 .macro PTREGSCALL label,func,arg
327 .globl \label
328 \label:
329 leaq \func(%rip),%rax
330 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
331 jmp ptregscall_common
332 .endm
333
334 PTREGSCALL stub_clone, sys_clone, %r8
335 PTREGSCALL stub_fork, sys_fork, %rdi
336 PTREGSCALL stub_vfork, sys_vfork, %rdi
337 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
338 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
339 PTREGSCALL stub_iopl, sys_iopl, %rsi
340
341 ENTRY(ptregscall_common)
342 CFI_STARTPROC
343 popq %r11
344 CFI_ADJUST_CFA_OFFSET -8
345 SAVE_REST
346 movq %r11, %r15
347 FIXUP_TOP_OF_STACK %r11
348 call *%rax
349 RESTORE_TOP_OF_STACK %r11
350 movq %r15, %r11
351 RESTORE_REST
352 pushq %r11
353 CFI_ADJUST_CFA_OFFSET 8
354 ret
355 CFI_ENDPROC
356
357 ENTRY(stub_execve)
358 CFI_STARTPROC
359 popq %r11
360 CFI_ADJUST_CFA_OFFSET -8
361 SAVE_REST
362 movq %r11, %r15
363 FIXUP_TOP_OF_STACK %r11
364 call sys_execve
365 GET_THREAD_INFO(%rcx)
366 bt $TIF_IA32,threadinfo_flags(%rcx)
367 jc exec_32bit
368 RESTORE_TOP_OF_STACK %r11
369 movq %r15, %r11
370 RESTORE_REST
371 push %r11
372 ret
373
374 exec_32bit:
375 CFI_ADJUST_CFA_OFFSET REST_SKIP
376 movq %rax,RAX(%rsp)
377 RESTORE_REST
378 jmp int_ret_from_sys_call
379 CFI_ENDPROC
380
381 /*
382 * sigreturn is special because it needs to restore all registers on return.
383 * This cannot be done with SYSRET, so use the IRET return path instead.
384 */
385 ENTRY(stub_rt_sigreturn)
386 CFI_STARTPROC
387 addq $8, %rsp
388 SAVE_REST
389 movq %rsp,%rdi
390 FIXUP_TOP_OF_STACK %r11
391 call sys_rt_sigreturn
392 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
393 RESTORE_REST
394 jmp int_ret_from_sys_call
395 CFI_ENDPROC
396
397 /*
398 * Interrupt entry/exit.
399 *
400 * Interrupt entry points save only callee clobbered registers in fast path.
401 *
402 * Entry runs with interrupts off.
403 */
404
405 /* 0(%rsp): interrupt number */
406 .macro interrupt func
407 CFI_STARTPROC simple
408 CFI_DEF_CFA rsp,(SS-RDI)
409 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
410 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
411 cld
412 #ifdef CONFIG_DEBUG_INFO
413 SAVE_ALL
414 movq %rsp,%rdi
415 /*
416 * Setup a stack frame pointer. This allows gdb to trace
417 * back to the original stack.
418 */
419 movq %rsp,%rbp
420 CFI_DEF_CFA_REGISTER rbp
421 #else
422 SAVE_ARGS
423 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
424 #endif
425 testl $3,CS(%rdi)
426 je 1f
427 swapgs
428 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
429 movq %gs:pda_irqstackptr,%rax
430 cmoveq %rax,%rsp
431 pushq %rdi # save old stack
432 call \func
433 .endm
434
435 ENTRY(common_interrupt)
436 interrupt do_IRQ
437 /* 0(%rsp): oldrsp-ARGOFFSET */
438 ret_from_intr:
439 popq %rdi
440 cli
441 subl $1,%gs:pda_irqcount
442 #ifdef CONFIG_DEBUG_INFO
443 movq RBP(%rdi),%rbp
444 #endif
445 leaq ARGOFFSET(%rdi),%rsp
446 exit_intr:
447 GET_THREAD_INFO(%rcx)
448 testl $3,CS-ARGOFFSET(%rsp)
449 je retint_kernel
450
451 /* Interrupt came from user space */
452 /*
453 * Has a correct top of stack, but a partial stack frame
454 * %rcx: thread info. Interrupts off.
455 */
456 retint_with_reschedule:
457 movl $_TIF_WORK_MASK,%edi
458 retint_check:
459 movl threadinfo_flags(%rcx),%edx
460 andl %edi,%edx
461 jnz retint_careful
462 retint_swapgs:
463 cli
464 swapgs
465 retint_restore_args:
466 cli
467 RESTORE_ARGS 0,8,0
468 iret_label:
469 iretq
470
471 .section __ex_table,"a"
472 .quad iret_label,bad_iret
473 .previous
474 .section .fixup,"ax"
475 /* force a signal here? this matches i386 behaviour */
476 /* running with kernel gs */
477 bad_iret:
478 movq $-9999,%rdi /* better code? */
479 jmp do_exit
480 .previous
481
482 /* edi: workmask, edx: work */
483 retint_careful:
484 bt $TIF_NEED_RESCHED,%edx
485 jnc retint_signal
486 sti
487 pushq %rdi
488 call schedule
489 popq %rdi
490 GET_THREAD_INFO(%rcx)
491 cli
492 jmp retint_check
493
494 retint_signal:
495 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
496 jz retint_swapgs
497 sti
498 SAVE_REST
499 movq $-1,ORIG_RAX(%rsp)
500 xorq %rsi,%rsi # oldset
501 movq %rsp,%rdi # &pt_regs
502 call do_notify_resume
503 RESTORE_REST
504 cli
505 movl $_TIF_NEED_RESCHED,%edi
506 GET_THREAD_INFO(%rcx)
507 jmp retint_check
508
509 #ifdef CONFIG_PREEMPT
510 /* Returning to kernel space. Check if we need preemption */
511 /* rcx: threadinfo. interrupts off. */
512 .p2align
513 retint_kernel:
514 cmpl $0,threadinfo_preempt_count(%rcx)
515 jnz retint_restore_args
516 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
517 jnc retint_restore_args
518 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
519 jnc retint_restore_args
520 call preempt_schedule_irq
521 jmp exit_intr
522 #endif
523 CFI_ENDPROC
524
525 /*
526 * APIC interrupts.
527 */
528 .macro apicinterrupt num,func
529 pushq $\num-256
530 interrupt \func
531 jmp ret_from_intr
532 CFI_ENDPROC
533 .endm
534
535 ENTRY(thermal_interrupt)
536 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
537
538 #ifdef CONFIG_SMP
539 ENTRY(reschedule_interrupt)
540 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
541
542 ENTRY(invalidate_interrupt)
543 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
544
545 ENTRY(call_function_interrupt)
546 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
547 #endif
548
549 #ifdef CONFIG_X86_LOCAL_APIC
550 ENTRY(apic_timer_interrupt)
551 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
552
553 ENTRY(error_interrupt)
554 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
555
556 ENTRY(spurious_interrupt)
557 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
558 #endif
559
560 /*
561 * Exception entry points.
562 */
563 .macro zeroentry sym
564 pushq $0 /* push error code/oldrax */
565 pushq %rax /* push real oldrax to the rdi slot */
566 leaq \sym(%rip),%rax
567 jmp error_entry
568 .endm
569
570 .macro errorentry sym
571 pushq %rax
572 leaq \sym(%rip),%rax
573 jmp error_entry
574 .endm
575
576 /* error code is on the stack already */
577 /* handle NMI like exceptions that can happen everywhere */
578 .macro paranoidentry sym
579 SAVE_ALL
580 cld
581 movl $1,%ebx
582 movl $MSR_GS_BASE,%ecx
583 rdmsr
584 testl %edx,%edx
585 js 1f
586 swapgs
587 xorl %ebx,%ebx
588 1: movq %rsp,%rdi
589 movq ORIG_RAX(%rsp),%rsi
590 movq $-1,ORIG_RAX(%rsp)
591 call \sym
592 .endm
593
594 /*
595 * Exception entry point. This expects an error code/orig_rax on the stack
596 * and the exception handler in %rax.
597 */
598 ENTRY(error_entry)
599 CFI_STARTPROC simple
600 CFI_DEF_CFA rsp,(SS-RDI)
601 CFI_REL_OFFSET rsp,(RSP-RDI)
602 CFI_REL_OFFSET rip,(RIP-RDI)
603 /* rdi slot contains rax, oldrax contains error code */
604 cld
605 subq $14*8,%rsp
606 CFI_ADJUST_CFA_OFFSET (14*8)
607 movq %rsi,13*8(%rsp)
608 CFI_REL_OFFSET rsi,RSI
609 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
610 movq %rdx,12*8(%rsp)
611 CFI_REL_OFFSET rdx,RDX
612 movq %rcx,11*8(%rsp)
613 CFI_REL_OFFSET rcx,RCX
614 movq %rsi,10*8(%rsp) /* store rax */
615 CFI_REL_OFFSET rax,RAX
616 movq %r8, 9*8(%rsp)
617 CFI_REL_OFFSET r8,R8
618 movq %r9, 8*8(%rsp)
619 CFI_REL_OFFSET r9,R9
620 movq %r10,7*8(%rsp)
621 CFI_REL_OFFSET r10,R10
622 movq %r11,6*8(%rsp)
623 CFI_REL_OFFSET r11,R11
624 movq %rbx,5*8(%rsp)
625 CFI_REL_OFFSET rbx,RBX
626 movq %rbp,4*8(%rsp)
627 CFI_REL_OFFSET rbp,RBP
628 movq %r12,3*8(%rsp)
629 CFI_REL_OFFSET r12,R12
630 movq %r13,2*8(%rsp)
631 CFI_REL_OFFSET r13,R13
632 movq %r14,1*8(%rsp)
633 CFI_REL_OFFSET r14,R14
634 movq %r15,(%rsp)
635 CFI_REL_OFFSET r15,R15
636 xorl %ebx,%ebx
637 testl $3,CS(%rsp)
638 je error_kernelspace
639 error_swapgs:
640 swapgs
641 error_sti:
642 movq %rdi,RDI(%rsp)
643 movq %rsp,%rdi
644 movq ORIG_RAX(%rsp),%rsi /* get error code */
645 movq $-1,ORIG_RAX(%rsp)
646 call *%rax
647 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
648 error_exit:
649 movl %ebx,%eax
650 RESTORE_REST
651 cli
652 GET_THREAD_INFO(%rcx)
653 testl %eax,%eax
654 jne retint_kernel
655 movl threadinfo_flags(%rcx),%edx
656 movl $_TIF_WORK_MASK,%edi
657 andl %edi,%edx
658 jnz retint_careful
659 swapgs
660 RESTORE_ARGS 0,8,0
661 iretq
662 CFI_ENDPROC
663
664 error_kernelspace:
665 incl %ebx
666 /* There are two places in the kernel that can potentially fault with
667 usergs. Handle them here. The exception handlers after
668 iret run with kernel gs again, so don't set the user space flag.
669 B stepping K8s sometimes report an truncated RIP for IRET
670 exceptions returning to compat mode. Check for these here too. */
671 leaq iret_label(%rip),%rbp
672 cmpq %rbp,RIP(%rsp)
673 je error_swapgs
674 movl %ebp,%ebp /* zero extend */
675 cmpq %rbp,RIP(%rsp)
676 je error_swapgs
677 cmpq $gs_change,RIP(%rsp)
678 je error_swapgs
679 jmp error_sti
680
681 /* Reload gs selector with exception handling */
682 /* edi: new selector */
683 ENTRY(load_gs_index)
684 pushf
685 cli
686 swapgs
687 gs_change:
688 movl %edi,%gs
689 2: mfence /* workaround */
690 swapgs
691 popf
692 ret
693
694 .section __ex_table,"a"
695 .align 8
696 .quad gs_change,bad_gs
697 .previous
698 .section .fixup,"ax"
699 /* running with kernelgs */
700 bad_gs:
701 swapgs /* switch back to user gs */
702 xorl %eax,%eax
703 movl %eax,%gs
704 jmp 2b
705 .previous
706
707 /*
708 * Create a kernel thread.
709 *
710 * C extern interface:
711 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
712 *
713 * asm input arguments:
714 * rdi: fn, rsi: arg, rdx: flags
715 */
716 ENTRY(kernel_thread)
717 CFI_STARTPROC
718 FAKE_STACK_FRAME $child_rip
719 SAVE_ALL
720
721 # rdi: flags, rsi: usp, rdx: will be &pt_regs
722 movq %rdx,%rdi
723 orq kernel_thread_flags(%rip),%rdi
724 movq $-1, %rsi
725 movq %rsp, %rdx
726
727 xorl %r8d,%r8d
728 xorl %r9d,%r9d
729
730 # clone now
731 call do_fork
732 movq %rax,RAX(%rsp)
733 xorl %edi,%edi
734
735 /*
736 * It isn't worth to check for reschedule here,
737 * so internally to the x86_64 port you can rely on kernel_thread()
738 * not to reschedule the child before returning, this avoids the need
739 * of hacks for example to fork off the per-CPU idle tasks.
740 * [Hopefully no generic code relies on the reschedule -AK]
741 */
742 RESTORE_ALL
743 UNFAKE_STACK_FRAME
744 ret
745 CFI_ENDPROC
746
747
748 child_rip:
749 /*
750 * Here we are in the child and the registers are set as they were
751 * at kernel_thread() invocation in the parent.
752 */
753 movq %rdi, %rax
754 movq %rsi, %rdi
755 call *%rax
756 # exit
757 xorq %rdi, %rdi
758 call do_exit
759
760 /*
761 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
762 *
763 * C extern interface:
764 * extern long execve(char *name, char **argv, char **envp)
765 *
766 * asm input arguments:
767 * rdi: name, rsi: argv, rdx: envp
768 *
769 * We want to fallback into:
770 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
771 *
772 * do_sys_execve asm fallback arguments:
773 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
774 */
775 ENTRY(execve)
776 CFI_STARTPROC
777 FAKE_STACK_FRAME $0
778 SAVE_ALL
779 call sys_execve
780 movq %rax, RAX(%rsp)
781 RESTORE_REST
782 testq %rax,%rax
783 je int_ret_from_sys_call
784 RESTORE_ARGS
785 UNFAKE_STACK_FRAME
786 ret
787 CFI_ENDPROC
788
789 ENTRY(page_fault)
790 errorentry do_page_fault
791
792 ENTRY(coprocessor_error)
793 zeroentry do_coprocessor_error
794
795 ENTRY(simd_coprocessor_error)
796 zeroentry do_simd_coprocessor_error
797
798 ENTRY(device_not_available)
799 zeroentry math_state_restore
800
801 /* runs on exception stack */
802 ENTRY(debug)
803 CFI_STARTPROC
804 pushq $0
805 CFI_ADJUST_CFA_OFFSET 8
806 paranoidentry do_debug
807 /* switch back to process stack to restore the state ptrace touched */
808 movq %rax,%rsp
809 testl $3,CS(%rsp)
810 jnz paranoid_userspace
811 jmp paranoid_exit
812 CFI_ENDPROC
813
814 /* runs on exception stack */
815 ENTRY(nmi)
816 CFI_STARTPROC
817 pushq $-1
818 CFI_ADJUST_CFA_OFFSET 8
819 paranoidentry do_nmi
820 /* ebx: no swapgs flag */
821 paranoid_exit:
822 testl %ebx,%ebx /* swapgs needed? */
823 jnz paranoid_restore
824 paranoid_swapgs:
825 cli
826 swapgs
827 paranoid_restore:
828 RESTORE_ALL 8
829 iretq
830 paranoid_userspace:
831 cli
832 GET_THREAD_INFO(%rcx)
833 movl threadinfo_flags(%rcx),%edx
834 testl $_TIF_NEED_RESCHED,%edx
835 jnz paranoid_resched
836 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
837 jnz paranoid_signal
838 jmp paranoid_swapgs
839 paranoid_resched:
840 sti
841 call schedule
842 jmp paranoid_exit
843 paranoid_signal:
844 sti
845 xorl %esi,%esi /* oldset */
846 movq %rsp,%rdi /* &pt_regs */
847 call do_notify_resume
848 jmp paranoid_exit
849 CFI_ENDPROC
850
851 ENTRY(int3)
852 zeroentry do_int3
853
854 ENTRY(overflow)
855 zeroentry do_overflow
856
857 ENTRY(bounds)
858 zeroentry do_bounds
859
860 ENTRY(invalid_op)
861 zeroentry do_invalid_op
862
863 ENTRY(coprocessor_segment_overrun)
864 zeroentry do_coprocessor_segment_overrun
865
866 ENTRY(reserved)
867 zeroentry do_reserved
868
869 /* runs on exception stack */
870 ENTRY(double_fault)
871 CFI_STARTPROC
872 paranoidentry do_double_fault
873 movq %rax,%rsp
874 testl $3,CS(%rsp)
875 jnz paranoid_userspace
876 jmp paranoid_exit
877 CFI_ENDPROC
878
879 ENTRY(invalid_TSS)
880 errorentry do_invalid_TSS
881
882 ENTRY(segment_not_present)
883 errorentry do_segment_not_present
884
885 /* runs on exception stack */
886 ENTRY(stack_segment)
887 CFI_STARTPROC
888 paranoidentry do_stack_segment
889 movq %rax,%rsp
890 testl $3,CS(%rsp)
891 jnz paranoid_userspace
892 jmp paranoid_exit
893 CFI_ENDPROC
894
895 ENTRY(general_protection)
896 errorentry do_general_protection
897
898 ENTRY(alignment_check)
899 errorentry do_alignment_check
900
901 ENTRY(divide_error)
902 zeroentry do_divide_error
903
904 ENTRY(spurious_interrupt_bug)
905 zeroentry do_spurious_interrupt_bug
906
907 #ifdef CONFIG_X86_MCE
908 /* runs on exception stack */
909 ENTRY(machine_check)
910 CFI_STARTPROC
911 pushq $0
912 CFI_ADJUST_CFA_OFFSET 8
913 paranoidentry do_machine_check
914 jmp paranoid_exit
915 CFI_ENDPROC
916 #endif
917
918 ENTRY(call_debug)
919 zeroentry do_call_debug
920