From: Julian Seward Date: Thu, 15 Dec 2005 14:02:34 +0000 (+0000) Subject: - x86 back end: change code generation convention, so that instead of X-Git-Tag: svn/VALGRIND_3_2_3^2~160 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1dd039af9a3a779450a1683c2eff95f47cd7ca0f;p=thirdparty%2Fvalgrind.git - x86 back end: change code generation convention, so that instead of dispatchers CALLing generated code which later RETs, dispatchers jump to generated code and it jumps back to the dispatcher. This removes two memory references per translation run and by itself gives a measureable performance improvement on P4. As a result, there is new plumbing so that the caller of LibVEX_Translate can supply the address of the dispatcher to jump back to. This probably breaks all other targets. Do not update. - Administrative cleanup: LibVEX_Translate has an excessive number of arguments. Remove them all and instead add a struct by which the arguments are supplied. Add further comments about the meaning of some fields. git-svn-id: svn://svn.valgrind.org/vex/trunk@1494 --- diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c index 8471492e2c..ca946054f9 100644 --- a/VEX/priv/host-x86/hdefs.c +++ b/VEX/priv/host-x86/hdefs.c @@ -942,14 +942,16 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) { vex_printf("if (%%eflags.%s) { ", showX86CondCode(i->Xin.Goto.cond)); } - if (i->Xin.Goto.jk != Ijk_Boring) { + if (i->Xin.Goto.jk != Ijk_Boring + && i->Xin.Goto.jk != Ijk_Call + && i->Xin.Goto.jk != Ijk_Ret) { vex_printf("movl $"); ppIRJumpKind(i->Xin.Goto.jk); vex_printf(",%%ebp ; "); } vex_printf("movl "); ppX86RI(i->Xin.Goto.dst); - vex_printf(",%%eax ; ret"); + vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx"); if (i->Xin.Goto.cond != Xcc_ALWAYS) { vex_printf(" }"); } @@ -1216,8 +1218,13 @@ void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) return; case Xin_Goto: addRegUsage_X86RI(u, i->Xin.Goto.dst); - addHRegUse(u, HRmWrite, hregX86_EAX()); - if (i->Xin.Goto.jk != Ijk_Boring) + addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */ + addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */ + if (i->Xin.Goto.jk != Ijk_Boring + && i->Xin.Goto.jk != Ijk_Call + && i->Xin.Goto.jk != Ijk_Ret) + /* note, this is irrelevant since ebp is not actually + available to the allocator. But still .. */ addHRegUse(u, HRmWrite, hregX86_EBP()); return; case Xin_CMov32: @@ -1832,7 +1839,8 @@ static UChar* push_word_from_tags ( UChar* p, UShort tags ) Note that buf is not the insn's final place, and therefore it is imperative to emit position-independent code. */ -Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, Bool mode64 ) +Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, + Bool mode64, void* dispatch ) { UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; @@ -2185,20 +2193,30 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, Bool mode64 ) /* Get the destination address into %eax */ if (i->Xin.Goto.dst->tag == Xri_Imm) { - /* movl $immediate, %eax ; ret */ + /* movl $immediate, %eax */ *p++ = 0xB8; p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32); } else { vassert(i->Xin.Goto.dst->tag == Xri_Reg); - /* movl %reg, %eax ; ret */ + /* movl %reg, %eax */ if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) { *p++ = 0x89; p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX()); } } - /* ret */ - *p++ = 0xC3; + /* Get the dispatcher address into %edx. This has to happen + after the load of %eax since %edx might be carrying the value + destined for %eax immediately prior to this Xin_Goto. */ + vassert(sizeof(UInt) == sizeof(void*)); + vassert(dispatch != NULL); + /* movl $imm32, %edx */ + *p++ = 0xBA; + p = emit32(p, (UInt)dispatch); + + /* jmp *%edx */ + *p++ = 0xFF; + *p++ = 0xE2; /* Fix up the conditional jump, if there was one. */ if (i->Xin.Goto.cond != Xcc_ALWAYS) { diff --git a/VEX/priv/host-x86/hdefs.h b/VEX/priv/host-x86/hdefs.h index 28346c2a07..b6656ceb62 100644 --- a/VEX/priv/host-x86/hdefs.h +++ b/VEX/priv/host-x86/hdefs.h @@ -660,7 +660,8 @@ extern void ppX86Instr ( X86Instr*, Bool ); extern void getRegUsage_X86Instr ( HRegUsage*, X86Instr*, Bool ); extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool ); extern Bool isMove_X86Instr ( X86Instr*, HReg*, HReg* ); -extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*, Bool ); +extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*, + Bool, void* dispatch ); extern X86Instr* genSpill_X86 ( HReg rreg, Int offset, Bool ); extern X86Instr* genReload_X86 ( HReg rreg, Int offset, Bool ); extern void getAllocableRegs_X86 ( Int*, HReg** ); diff --git a/VEX/priv/main/vex_main.c b/VEX/priv/main/vex_main.c index fbdf8b694b..24b6fe3817 100644 --- a/VEX/priv/main/vex_main.c +++ b/VEX/priv/main/vex_main.c @@ -173,43 +173,7 @@ void LibVEX_Init ( /* Exported to library client. */ -VexTranslateResult LibVEX_Translate ( - /* The instruction sets we are translating from and to. */ - VexArch arch_guest, - VexArchInfo* archinfo_guest, - VexArch arch_host, - VexArchInfo* archinfo_host, - /* IN: the block to translate, and its guest address. */ - /* where are the actual bytes in the host's address space? */ - UChar* guest_bytes, - /* where do the bytes came from in the guest's aspace? */ - Addr64 guest_bytes_addr, - /* what guest entry point address do they correspond to? */ - Addr64 guest_bytes_addr_noredir, - /* Is it OK to chase into this guest address? */ - Bool (*chase_into_ok) ( Addr64 ), - /* OUT: which bits of guest code actually got translated */ - VexGuestExtents* guest_extents, - /* IN: a place to put the resulting code, and its size */ - UChar* host_bytes, - Int host_bytes_size, - /* OUT: how much of the output area is used. */ - Int* host_bytes_used, - /* IN: optionally, two instrumentation functions. */ - IRBB* (*instrument1) ( IRBB*, VexGuestLayout*, - Addr64, VexGuestExtents*, - IRType gWordTy, IRType hWordTy ), - IRBB* (*instrument2) ( IRBB*, VexGuestLayout*, - Addr64, VexGuestExtents*, - IRType gWordTy, IRType hWordTy ), - Bool cleanup_after_instrumentation, - /* IN: should this translation be self-checking? */ - Bool do_self_check, - /* IN: optionally, an access check function for guest code. */ - Bool (*byte_accessible) ( Addr64 ), - /* IN: debug: trace vex activity at various points */ - Int traceflags -) +VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) { /* This the bundle of functions we need to do the back-end stuff (insn selection, reg-alloc, assembly) whilst being insulated @@ -224,7 +188,7 @@ VexTranslateResult LibVEX_Translate ( void (*ppInstr) ( HInstr*, Bool ); void (*ppReg) ( HReg ); HInstrArray* (*iselBB) ( IRBB*, VexArchInfo* ); - Int (*emit) ( UChar*, Int, HInstr*, Bool ); + Int (*emit) ( UChar*, Int, HInstr*, Bool, void* ); IRExpr* (*specHelper) ( HChar*, IRExpr** ); Bool (*preciseMemExnsFn) ( Int, Int ); @@ -263,7 +227,7 @@ VexTranslateResult LibVEX_Translate ( offB_TILEN = 0; mode64 = False; - vex_traceflags = traceflags; + vex_traceflags = vta->traceflags; vassert(vex_initdone); vexSetAllocModeTEMP_and_clear(); @@ -272,7 +236,7 @@ VexTranslateResult LibVEX_Translate ( /* First off, check that the guest and host insn sets are supported. */ - switch (arch_host) { + switch (vta->arch_host) { case VexArchX86: mode64 = False; @@ -286,12 +250,13 @@ VexTranslateResult LibVEX_Translate ( ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr; ppReg = (void(*)(HReg)) ppHRegX86; iselBB = iselBB_X86; - emit = (Int(*)(UChar*,Int,HInstr*, Bool)) emit_X86Instr; + emit = emit_X86Instr; host_is_bigendian = False; host_word_type = Ity_I32; - vassert(archinfo_host->subarch == VexSubArchX86_sse0 - || archinfo_host->subarch == VexSubArchX86_sse1 - || archinfo_host->subarch == VexSubArchX86_sse2); + vassert(vta->archinfo_host.subarch == VexSubArchX86_sse0 + || vta->archinfo_host.subarch == VexSubArchX86_sse1 + || vta->archinfo_host.subarch == VexSubArchX86_sse2); + vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */ break; case VexArchAMD64: @@ -309,7 +274,8 @@ VexTranslateResult LibVEX_Translate ( emit = (Int(*)(UChar*,Int,HInstr*, Bool)) emit_AMD64Instr; host_is_bigendian = False; host_word_type = Ity_I64; - vassert(archinfo_host->subarch == VexSubArch_NONE); + vassert(vta->archinfo_host.subarch == VexSubArch_NONE); + vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */ break; case VexArchPPC32: @@ -327,9 +293,10 @@ VexTranslateResult LibVEX_Translate ( emit = (Int(*)(UChar*,Int,HInstr*,Bool)) emit_PPC32Instr; host_is_bigendian = True; host_word_type = Ity_I32; - vassert(archinfo_guest->subarch == VexSubArchPPC32_I - || archinfo_guest->subarch == VexSubArchPPC32_FI - || archinfo_guest->subarch == VexSubArchPPC32_VFI); + vassert(vta->archinfo_guest.subarch == VexSubArchPPC32_I + || vta->archinfo_guest.subarch == VexSubArchPPC32_FI + || vta->archinfo_guest.subarch == VexSubArchPPC32_VFI); + vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */ break; case VexArchPPC64: @@ -347,8 +314,9 @@ VexTranslateResult LibVEX_Translate ( emit = (Int(*)(UChar*,Int,HInstr*, Bool)) emit_PPC32Instr; host_is_bigendian = True; host_word_type = Ity_I64; - vassert(archinfo_guest->subarch == VexSubArchPPC64_FI - || archinfo_guest->subarch == VexSubArchPPC64_VFI); + vassert(vta->archinfo_guest.subarch == VexSubArchPPC64_FI + || vta->archinfo_guest.subarch == VexSubArchPPC64_VFI); + vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */ break; default: @@ -356,7 +324,7 @@ VexTranslateResult LibVEX_Translate ( } - switch (arch_guest) { + switch (vta->arch_guest) { case VexArchX86: preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns; @@ -367,9 +335,9 @@ VexTranslateResult LibVEX_Translate ( guest_layout = &x86guest_layout; offB_TISTART = offsetof(VexGuestX86State,guest_TISTART); offB_TILEN = offsetof(VexGuestX86State,guest_TILEN); - vassert(archinfo_guest->subarch == VexSubArchX86_sse0 - || archinfo_guest->subarch == VexSubArchX86_sse1 - || archinfo_guest->subarch == VexSubArchX86_sse2); + vassert(vta->archinfo_guest.subarch == VexSubArchX86_sse0 + || vta->archinfo_guest.subarch == VexSubArchX86_sse1 + || vta->archinfo_guest.subarch == VexSubArchX86_sse2); vassert(0 == sizeof(VexGuestX86State) % 8); vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART ) == 4); vassert(sizeof( ((VexGuestX86State*)0)->guest_TILEN ) == 4); @@ -384,7 +352,7 @@ VexTranslateResult LibVEX_Translate ( guest_layout = &amd64guest_layout; offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART); offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN); - vassert(archinfo_guest->subarch == VexSubArch_NONE); + vassert(vta->archinfo_guest.subarch == VexSubArch_NONE); vassert(0 == sizeof(VexGuestAMD64State) % 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8); @@ -399,7 +367,7 @@ VexTranslateResult LibVEX_Translate ( guest_layout = &armGuest_layout; offB_TISTART = 0; /* hack ... arm has bitrot */ offB_TILEN = 0; /* hack ... arm has bitrot */ - vassert(archinfo_guest->subarch == VexSubArchARM_v4); + vassert(vta->archinfo_guest.subarch == VexSubArchARM_v4); break; case VexArchPPC32: @@ -411,9 +379,9 @@ VexTranslateResult LibVEX_Translate ( guest_layout = &ppc32Guest_layout; offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART); offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN); - vassert(archinfo_guest->subarch == VexSubArchPPC32_I - || archinfo_guest->subarch == VexSubArchPPC32_FI - || archinfo_guest->subarch == VexSubArchPPC32_VFI); + vassert(vta->archinfo_guest.subarch == VexSubArchPPC32_I + || vta->archinfo_guest.subarch == VexSubArchPPC32_FI + || vta->archinfo_guest.subarch == VexSubArchPPC32_VFI); vassert(0 == sizeof(VexGuestPPC32State) % 8); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TILEN ) == 4); @@ -428,8 +396,8 @@ VexTranslateResult LibVEX_Translate ( guest_layout = &ppc64Guest_layout; offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART); offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN); - vassert(archinfo_guest->subarch == VexSubArchPPC64_FI - || archinfo_guest->subarch == VexSubArchPPC64_VFI); + vassert(vta->archinfo_guest.subarch == VexSubArchPPC64_FI + || vta->archinfo_guest.subarch == VexSubArchPPC64_VFI); vassert(0 == sizeof(VexGuestPPC64State) % 16); vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART ) == 8); vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TILEN ) == 8); @@ -440,11 +408,11 @@ VexTranslateResult LibVEX_Translate ( } /* yet more sanity checks ... */ - if (arch_guest == arch_host) { + if (vta->arch_guest == vta->arch_host) { /* doesn't necessarily have to be true, but if it isn't it means we are simulating one flavour of an architecture a different flavour of the same architecture, which is pretty strange. */ - vassert(archinfo_guest->subarch == archinfo_host->subarch); + vassert(vta->archinfo_guest.subarch == vta->archinfo_host.subarch); } vexAllocSanityCheck(); @@ -454,15 +422,15 @@ VexTranslateResult LibVEX_Translate ( " Front end " "------------------------\n\n"); - irbb = bb_to_IR ( guest_extents, + irbb = bb_to_IR ( vta->guest_extents, disInstrFn, - guest_bytes, - guest_bytes_addr, - chase_into_ok, + vta->guest_bytes, + vta->guest_bytes_addr, + vta->chase_into_ok, host_is_bigendian, - archinfo_guest, + &vta->archinfo_guest, guest_word_type, - do_self_check, + vta->do_self_check, offB_TISTART, offB_TILEN ); @@ -475,21 +443,21 @@ VexTranslateResult LibVEX_Translate ( return VexTransAccessFail; } - vassert(guest_extents->n_used >= 1 && guest_extents->n_used <= 3); - vassert(guest_extents->base[0] == guest_bytes_addr); - for (i = 0; i < guest_extents->n_used; i++) { - vassert(guest_extents->len[i] < 10000); /* sanity */ + vassert(vta->guest_extents->n_used >= 1 && vta->guest_extents->n_used <= 3); + vassert(vta->guest_extents->base[0] == vta->guest_bytes_addr); + for (i = 0; i < vta->guest_extents->n_used; i++) { + vassert(vta->guest_extents->len[i] < 10000); /* sanity */ } /* If debugging, show the raw guest bytes for this bb. */ if (0 || (vex_traceflags & VEX_TRACE_FE)) { - if (guest_extents->n_used > 1) { + if (vta->guest_extents->n_used > 1) { vex_printf("can't show code due to extents > 1\n"); } else { /* HACK */ - UChar* p = (UChar*)guest_bytes; - UInt guest_bytes_read = (UInt)guest_extents->len[0]; - vex_printf(". 0 %llx %u\n.", guest_bytes_addr, guest_bytes_read ); + UChar* p = (UChar*)vta->guest_bytes; + UInt guest_bytes_read = (UInt)vta->guest_extents->len[0]; + vex_printf(". 0 %llx %u\n.", vta->guest_bytes_addr, guest_bytes_read ); for (i = 0; i < guest_bytes_read; i++) vex_printf(" %02x", (Int)p[i] ); vex_printf("\n\n"); @@ -504,7 +472,7 @@ VexTranslateResult LibVEX_Translate ( /* Clean it up, hopefully a lot. */ irbb = do_iropt_BB ( irbb, specHelper, preciseMemExnsFn, - guest_bytes_addr ); + vta->guest_bytes_addr ); sanityCheckIRBB( irbb, "after initial iropt", True/*must be flat*/, guest_word_type ); @@ -519,16 +487,18 @@ VexTranslateResult LibVEX_Translate ( vexAllocSanityCheck(); /* Get the thing instrumented. */ - if (instrument1) - irbb = (*instrument1)(irbb, guest_layout, - guest_bytes_addr_noredir, guest_extents, - guest_word_type, host_word_type); + if (vta->instrument1) + irbb = vta->instrument1(irbb, guest_layout, + vta->guest_bytes_addr_noredir, + vta->guest_extents, + guest_word_type, host_word_type); vexAllocSanityCheck(); - if (instrument2) - irbb = (*instrument2)(irbb, guest_layout, - guest_bytes_addr_noredir, guest_extents, - guest_word_type, host_word_type); + if (vta->instrument2) + irbb = vta->instrument2(irbb, guest_layout, + vta->guest_bytes_addr_noredir, + vta->guest_extents, + guest_word_type, host_word_type); if (vex_traceflags & VEX_TRACE_INST) { vex_printf("\n------------------------" @@ -538,12 +508,12 @@ VexTranslateResult LibVEX_Translate ( vex_printf("\n"); } - if (instrument1 || instrument2) + if (vta->instrument1 || vta->instrument2) sanityCheckIRBB( irbb, "after instrumentation", True/*must be flat*/, guest_word_type ); /* Do a post-instrumentation cleanup pass. */ - if (cleanup_after_instrumentation) { + if (vta->instrument1 || vta->instrument2) { do_deadcode_BB( irbb ); irbb = cprop_BB( irbb ); do_deadcode_BB( irbb ); @@ -576,7 +546,7 @@ VexTranslateResult LibVEX_Translate ( } /* HACK */ - if (0) { *host_bytes_used = 0; return VexTransOK; } + if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; } /* end HACK */ if (vex_traceflags & VEX_TRACE_VCODE) @@ -584,7 +554,7 @@ VexTranslateResult LibVEX_Translate ( " Instruction selection " "------------------------\n"); - vcode = iselBB ( irbb, archinfo_host ); + vcode = iselBB ( irbb, &vta->archinfo_host ); vexAllocSanityCheck(); @@ -622,7 +592,7 @@ VexTranslateResult LibVEX_Translate ( } /* HACK */ - if (0) { *host_bytes_used = 0; return VexTransOK; } + if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; } /* end HACK */ /* Assemble */ @@ -638,7 +608,7 @@ VexTranslateResult LibVEX_Translate ( ppInstr(rcode->arr[i], mode64); vex_printf("\n"); } - j = (*emit)( insn_bytes, 32, rcode->arr[i], mode64 ); + j = (*emit)( insn_bytes, 32, rcode->arr[i], mode64, vta->dispatch ); if (vex_traceflags & VEX_TRACE_ASM) { for (k = 0; k < j; k++) if (insn_bytes[k] < 16) @@ -647,18 +617,18 @@ VexTranslateResult LibVEX_Translate ( vex_printf("%x ", (UInt)insn_bytes[k]); vex_printf("\n\n"); } - if (out_used + j > host_bytes_size) { + if (out_used + j > vta->host_bytes_size) { vexSetAllocModeTEMP_and_clear(); vex_traceflags = 0; return VexTransOutputFull; } for (k = 0; k < j; k++) { - host_bytes[out_used] = insn_bytes[k]; + vta->host_bytes[out_used] = insn_bytes[k]; out_used++; } - vassert(out_used <= host_bytes_size); + vassert(out_used <= vta->host_bytes_size); } - *host_bytes_used = out_used; + *(vta->host_bytes_used) = out_used; vexAllocSanityCheck(); diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index e5f605ed57..31b3f7d8eb 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -300,45 +300,89 @@ typedef VexGuestExtents; +/* A structure to carry arguments for LibVEX_Translate. There are so + many of them, it seems better to have a structure. */ +typedef + struct { + /* IN: The instruction sets we are translating from and to. */ + VexArch arch_guest; + VexArchInfo archinfo_guest; + VexArch arch_host; + VexArchInfo archinfo_host; + + /* IN: the block to translate, and its guest address. */ + /* where are the actual bytes in the host's address space? */ + UChar* guest_bytes; + /* where do the bytes really come from in the guest's aspace? + This is the post-redirection guest address. */ + Addr64 guest_bytes_addr; + /* where do the bytes claim to come from in the guest address + space? (what guest entry point address do they correspond + to?) This is the pre-redirection guest address. */ + Addr64 guest_bytes_addr_noredir; + + /* Is it OK to chase into this guest address? May not be + NULL. */ + Bool (*chase_into_ok) ( Addr64 ); + + /* OUT: which bits of guest code actually got translated */ + VexGuestExtents* guest_extents; + + /* IN: a place to put the resulting code, and its size */ + UChar* host_bytes; + Int host_bytes_size; + /* OUT: how much of the output area is used. */ + Int* host_bytes_used; + + /* IN: optionally, two instrumentation functions. May be + NULL. */ + IRBB* (*instrument1) ( IRBB*, VexGuestLayout*, + Addr64, VexGuestExtents*, + IRType gWordTy, IRType hWordTy ); + IRBB* (*instrument2) ( IRBB*, VexGuestLayout*, + Addr64, VexGuestExtents*, + IRType gWordTy, IRType hWordTy ); + + /* IN: should this translation be self-checking? default: False */ + Bool do_self_check; + /* IN: debug: trace vex activity at various points */ + Int traceflags; + + /* IN: address of the dispatcher entry point. Describes the + place where generated code should jump to at the end of each + bb. + + At the end of each translation, the next guest address is + placed in the host's standard return register (x86: %eax, + amd64: %rax, ppc32: %r3, ppc64: %r3). Optionally, the guest + state pointer register (on host x86: %ebp; amd64: %rbp; + ppc32/64: r31) may be set to a VEX_TRC_ value to indicate any + special action required before the next block is run. + + Control is then passed back to the dispatcher (beyond Vex's + control; caller supplies this) in the following way: + + - On host archs which lack a link register (x86, amd64), by a + jump to the host address specified in 'dispatcher', which + must be non-NULL. + + - On host archs which have a link register (ppc32, ppc64), by + a branch to the link register (which is guaranteed to be + unchanged from whatever it was at entry to the + translation). 'dispatch' must be NULL. + + The aim is to get back and forth between translations and the + dispatcher without creating memory traffic to store return + addresses. + */ + void* dispatch; + } + VexTranslateArgs; + + extern -VexTranslateResult LibVEX_Translate ( - - /* The instruction sets we are translating from and to. */ - VexArch arch_guest, - VexArchInfo* archinfo_guest, - VexArch arch_host, - VexArchInfo* archinfo_host, - /* IN: the block to translate, and its guest address. */ - /* where are the actual bytes in the host's address space? */ - UChar* guest_bytes, - /* where do the bytes came from in the guest's aspace? */ - Addr64 guest_bytes_addr, - /* what guest entry point address do they correspond to? */ - Addr64 guest_bytes_addr_noredir, - /* Is it OK to chase into this guest address? */ - Bool (*chase_into_ok) ( Addr64 ), - /* OUT: which bits of guest code actually got translated */ - VexGuestExtents* guest_extents, - /* IN: a place to put the resulting code, and its size */ - UChar* host_bytes, - Int host_bytes_size, - /* OUT: how much of the output area is used. */ - Int* host_bytes_used, - /* IN: optionally, two instrumentation functions. */ - IRBB* (*instrument1) ( IRBB*, VexGuestLayout*, - Addr64, VexGuestExtents*, - IRType gWordTy, IRType hWordTy ), - IRBB* (*instrument2) ( IRBB*, VexGuestLayout*, - Addr64, VexGuestExtents*, - IRType gWordTy, IRType hWordTy ), - Bool cleanup_after_instrumentation, - /* IN: should this translation be self-checking? */ - Bool do_self_check, - /* IN: optionally, an access check function for guest code. */ - Bool (*byte_accessible) ( Addr64 ), - /* IN: debug: trace vex activity at various points */ - Int traceflags -); +VexTranslateResult LibVEX_Translate ( VexTranslateArgs* ); + /* A subtlety re interaction between self-checking translations and bb-chasing. The supplied chase_into_ok function should say NO @@ -369,7 +413,7 @@ extern void LibVEX_ShowStats ( void ); x86 ~~~ - Generated code should be entered using a CALL instruction. On + Generated code should be entered using a JMP instruction. On entry, %ebp should point to the guest state, and %esp should be a valid stack pointer. The generated code may change %eax, %ebx, %ecx, %edx, %esi, %edi, all the FP registers and control state, and @@ -380,9 +424,11 @@ extern void LibVEX_ShowStats ( void ); should still have those values (after masking off the lowest 6 bits of %mxcsr). If they don't, there is a bug in VEX-generated code. - Generated code returns to the scheduler using a RET instruction. + Generated code returns to the scheduler using a JMP instruction, to + the address specified in the .dispatch field of VexTranslateArgs. %eax (or %eax:%edx, if simulating a 64-bit target) will contain the - guest address of the next block to execute. + guest address of the next block to execute. %ebp may be changed + to a VEX_TRC_ value, otherwise it should be as it was at entry. CRITICAL ISSUES in x86 code generation. The only known critical issue is that the host FPU and SSE state is not properly saved @@ -392,6 +438,22 @@ extern void LibVEX_ShowStats ( void ); generated code, the generated code is likely to go wrong. This really should be fixed. + amd64 + ~~~~~ + Analogous to x86. + + ppc32 + ~~~~~ + On entry, guest state pointer is r31. .dispatch must be NULL. + Control is returned with a branch to the link register. Generated + code will not change lr. At return, r3 holds the next guest addr + (or r3:r4 ?). r31 may be may be changed to a VEX_TRC_ value, + otherwise it should be as it was at entry. + + ppc64 + ~~~~~ + Probably the same as ppc32. + ALL GUEST ARCHITECTURES ~~~~~~~~~~~~~~~~~~~~~~~ The architecture must contain two pseudo-registers, guest_TISTART diff --git a/VEX/test_main.c b/VEX/test_main.c index 73572609c7..c40a7f32b4 100644 --- a/VEX/test_main.c +++ b/VEX/test_main.c @@ -67,6 +67,7 @@ int main ( int argc, char** argv ) VexControl vcon; VexGuestExtents vge; VexArchInfo vai_x86, vai_amd64, vai_ppc32; + VexTranslateArgs vta; if (argc != 2) { fprintf(stderr, "usage: vex file.org\n"); @@ -133,44 +134,55 @@ int main ( int argc, char** argv ) vai_ppc32.subarch = VexSubArchPPC32_VFI; vai_ppc32.ppc32_cache_line_szB = 128; - for (i = 0; i < TEST_N_ITERS; i++) - tres - = LibVEX_Translate ( + /* ----- Set up args for LibVEX_Translate ----- */ #if 1 /* ppc32 -> ppc32 */ - VexArchPPC32, &vai_ppc32, - VexArchPPC32, &vai_ppc32, + vta.arch_guest = VexArchPPC32; + vta.archinfo_guest = vai_ppc32; + vta.arch_host = VexArchPPC32; + vta.archinfo_host = vai_ppc32; #endif #if 0 /* amd64 -> amd64 */ - VexArchAMD64, &vai_amd64, - VexArchAMD64, &vai_amd64, + vta.arch_guest = VexArchAMD64; + vta.archinfo_guest = vai_amd64; + vta.arch_host = VexArchAMD64; + vta.archinfo_host = vai_amd64; #endif #if 0 /* x86 -> x86 */ - VexArchX86, &vai_x86, - VexArchX86, &vai_x86, + vta.arch_guest = VexArchX86; + vta.archinfo_guest = vai_x86; + vta.arch_host = VexArchX86; + vta.archinfo_host = vai_x86; #endif - origbuf, (Addr64)orig_addr, (Addr64)orig_addr, - chase_into_not_ok, - &vge, - transbuf, N_TRANSBUF, &trans_used, + vta.guest_bytes = origbuf; + vta.guest_bytes_addr = (Addr64)orig_addr; + vta.guest_bytes_addr_noredir = (Addr64)orig_addr; + vta.chase_into_ok = chase_into_not_ok; + vta.guest_extents = &vge; + vta.host_bytes = transbuf; + vta.host_bytes_size = N_TRANSBUF; + vta.host_bytes_used = &trans_used; #if 1 /* no instrumentation */ - NULL, /* instrument1 */ - NULL, /* instrument2 */ - False, /* cleanup after instrument */ + vta.instrument1 = NULL; + vta.instrument2 = NULL; #endif #if 0 /* addrcheck */ - ac_instrument, /* instrument1 */ - NULL, /* instrument2 */ - False, /* cleanup after instrument */ + vta.instrument1 = ac_instrument; + vta.instrument2 = NULL; #endif #if 0 /* memcheck */ - mc_instrument, /* instrument1 */ - NULL, /* instrument2 */ - True, /* cleanup after instrument */ + vta.instrument1 = mc_instrument; + vta.instrument2 = NULL; +#endif + vta.do_self_check = False; + vta.traceflags = TEST_FLAGS; +#if 1 /* x86, amd64 hosts */ + vta.dispatch = (void*)0x12345678; +#else /* ppc32, ppc64 hosts */ + vta.dispatch = NULL; #endif - False, /* do_self_check ? */ - NULL, /* access checker */ - TEST_FLAGS - ); + + for (i = 0; i < TEST_N_ITERS; i++) + tres = LibVEX_Translate ( &vta ); if (tres != VexTransOK) printf("\ntres = %d\n", (Int)tres);