-##--------------------------------------------------------------------##
-##--- The core dispatch loop, for jumping to a code address. ---##
-##--- dispatch-amd64.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address. ---*/
+/*--- dispatch-amd64.S ---*/
+/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
/*--- The dispatch loop. ---*/
/*------------------------------------------------------------*/
-/* signature: UWord VG_(run_innerloop) ( void* guest_state ) */
+/*----------------------------------------------------*/
+/*--- Preamble (set everything up) ---*/
+/*----------------------------------------------------*/
+/* signature:
+UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+*/
+
+.text
.globl VG_(run_innerloop)
VG_(run_innerloop):
/* %rdi holds guest_state */
+ /* %rsi holds do_profiling */
/* ----- entry point to VG_(run_innerloop) ----- */
pushq %rbx
pushq %r13
pushq %r14
pushq %r15
- pushq %rdi
+ pushq %rdi /* guest_state */
- movq VG_(dispatch_ctr)@GOTPCREL(%rip), %rsi
- pushq (%rsi)
+ movq VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
+ movl (%r15), %r15d
+ pushq %r15
- /* 8(%rsp) holds cached copy of guest_state */
+ /* 8(%rsp) holds cached copy of guest_state ptr */
/* 0(%rsp) holds cached copy of VG_(dispatch_ctr) */
/* Set up the guest state pointer */
/* set dir flag to known value */
cld
- /* fall into main loop */
+ /* fall into main loop (the right one) */
+ cmpq $0, %rsi
+ je VG_(run_innerloop__dispatch_unprofiled)
+ jmp VG_(run_innerloop__dispatch_profiled)
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- NO-PROFILING (standard) dispatcher ---*/
+/*----------------------------------------------------*/
- /* Here, %rax is the only live (real) register. The entire
- simulated state is saved in the ThreadState. */
+.align 16
+.global VG_(run_innerloop__dispatch_unprofiled)
+VG_(run_innerloop__dispatch_unprofiled):
+ /* AT ENTRY: %rax is next guest addr, %rbp is possibly
+ modified guest state ptr */
+
+ /* Has the guest state pointer been messed with? If yes, exit. */
+ cmpq 8(%rsp), %rbp
+ jnz gsp_changed
-dispatch_boring:
/* save the jump address in the guest state */
movq %rax, OFFSET_amd64_RIP(%rbp)
jz counter_is_zero
/* try a fast lookup in the translation cache */
- movq %rax, %rbx
- andq $VG_TT_FAST_MASK, %rbx
- movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
- movq (%rcx,%rbx,8), %rcx
- cmpq %rax, (%rcx)
- jnz fast_lookup_failed
- /* increment bb profile counter */
- movq VG_(tt_fastN)@GOTPCREL(%rip), %rdx
- movq (%rdx,%rbx,8), %rdx
- incl (%rdx)
+ movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
+ movq %rax, %rbx
+ andq $VG_TT_FAST_MASK, %rbx
+ movq (%rcx,%rbx,8), %rcx
+ cmpq %rax, (%rcx)
+ jnz fast_lookup_failed
/* Found a match. Call tce[1], which is 8 bytes along, since
each tce element is a 64-bit int. */
addq $8, %rcx
- call *%rcx
+ jmp *%rcx
+ ud2 /* persuade insn decoders not to speculate past here */
+ /* generated code should run, then jump back to
+ VG_(run_innerloop__dispatch_unprofiled). */
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- PROFILING dispatcher (can be much slower) ---*/
+/*----------------------------------------------------*/
+
+.align 16
+.global VG_(run_innerloop__dispatch_profiled)
+VG_(run_innerloop__dispatch_profiled):
+ /* AT ENTRY: %rax is next guest addr, %rbp is possibly
+ modified guest state ptr */
+
+ /* Has the guest state pointer been messed with? If yes, exit. */
+ cmpq 8(%rsp), %rbp
+ jnz gsp_changed
- /*
- %rax holds destination (original) address.
- %rbp indicates further details of the control transfer
- requested to the address in %rax.
-
- If rbp is unchanged (== * 8(%rsp)), just jump next to %rax.
+ /* save the jump address in the guest state */
+ movq %rax, OFFSET_amd64_RIP(%rbp)
- Otherwise fall out, back to the scheduler, and let it
- figure out what to do next.
- */
+ /* Are we out of timeslice? If yes, defer to scheduler. */
+ subl $1, 0(%rsp)
+ jz counter_is_zero
- cmpq 8(%rsp), %rbp
- jz dispatch_boring
+ /* try a fast lookup in the translation cache */
+ movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
+ movq %rax, %rbx
+ andq $VG_TT_FAST_MASK, %rbx
+ movq (%rcx,%rbx,8), %rcx
+ cmpq %rax, (%rcx)
+ jnz fast_lookup_failed
- jmp dispatch_exceptional
+ /* increment bb profile counter */
+ movq VG_(tt_fastN)@GOTPCREL(%rip), %rdx
+ movq (%rdx,%rbx,8), %rdx
+ addl $1, (%rdx)
+
+ /* Found a match. Call tce[1], which is 8 bytes along, since
+ each tce element is a 64-bit int. */
+ addq $8, %rcx
+ jmp *%rcx
+ ud2 /* persuade insn decoders not to speculate past here */
+ /* generated code should run, then jump back to
+ VG_(run_innerloop__dispatch_profiled). */
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- exit points ---*/
+/*----------------------------------------------------*/
+
+gsp_changed:
+ /* Someone messed with the gsp. Have to
+ defer to scheduler to resolve this. dispatch ctr
+ is not yet decremented, so no need to increment. */
+ /* %RIP is NOT up to date here. First, need to write
+ %rax back to %RIP, but without trashing %rbp since
+ that holds the value we want to return to the scheduler.
+ Hence use %r15 transiently for the guest state pointer. */
+ movq 8(%rsp), %r15
+ movq %rax, OFFSET_amd64_RIP(%r15)
+ movq %rbp, %rax
+ jmp run_innerloop_exit
+ /*NOTREACHED*/
+
+counter_is_zero:
+ /* %RIP is up to date here */
+ /* back out decrement of the dispatch counter */
+ addl $1, 0(%rsp)
+ movq $VG_TRC_INNER_COUNTERZERO, %rax
+ jmp run_innerloop_exit
+
+fast_lookup_failed:
+ /* %RIP is up to date here */
+ /* back out decrement of the dispatch counter */
+ addl $1, 0(%rsp)
+ movq $VG_TRC_INNER_FASTMISS, %rax
+ jmp run_innerloop_exit
pushq $0
fstcw (%rsp)
cmpl $0x027F, (%rsp)
- popq %r11 /* get rid of the word without trashing %eflags */
+ popq %r15 /* get rid of the word without trashing %eflags */
jnz invariant_violation
#endif
pushq $0
stmxcsr (%rsp)
andl $0xFFFFFFC0, (%rsp) /* mask out status flags */
cmpl $0x1F80, (%rsp)
- popq %r11
+ popq %r15
jnz invariant_violation
/* otherwise we're OK */
jmp run_innerloop_exit_REALLY
jmp run_innerloop_exit_REALLY
run_innerloop_exit_REALLY:
- movq VG_(dispatch_ctr)@GOTPCREL(%rip), %rsi
- popq (%rsi)
+
+ /* restore VG_(dispatch_ctr) */
+ popq %r14
+ movq VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
+ movl %r14d, (%r15)
+
popq %rdi
popq %r15
popq %r14
/* Other ways of getting out of the inner loop. Placed out-of-line to
make it look cleaner.
*/
-dispatch_exceptional:
- /* this is jumped to only, not fallen-through from above */
-
- /* save %rax in %RIP and defer to sched */
- movq 8(%rsp), %rdi
- movq %rax, OFFSET_amd64_RIP(%rdi)
- movq %rbp, %rax
- jmp run_innerloop_exit
-fast_lookup_failed:
- /* %RIP is up to date here since dispatch_boring dominates */
- addl $1, 0(%rsp)
- movq $VG_TRC_INNER_FASTMISS, %rax
- jmp run_innerloop_exit
-counter_is_zero:
- /* %RIP is up to date here since dispatch_boring dominates */
- addl $1, 0(%rsp)
- movq $VG_TRC_INNER_COUNTERZERO, %rax
- jmp run_innerloop_exit
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
-##--------------------------------------------------------------------##
-##--- end ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/