From: Julian Seward Date: Fri, 20 Apr 2012 00:13:28 +0000 (+0000) Subject: Add translation chaining support for ppc32 (tested) and to X-Git-Tag: svn/VALGRIND_3_8_1^2~182^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3695f9b870ad04961750c1f4caee55c24d0ae8f0;p=thirdparty%2Fvalgrind.git Add translation chaining support for ppc32 (tested) and to a large extent for ppc64 (incomplete, untested) (VEX side) git-svn-id: svn://svn.valgrind.org/vex/branches/TCHAIN@2289 --- diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h index 7c8dc8e611..b60766d6d2 100644 --- a/VEX/priv/guest_ppc_defs.h +++ b/VEX/priv/guest_ppc_defs.h @@ -48,7 +48,6 @@ bb_to_IR.h. */ extern DisResult disInstr_PPC ( IRSB* irbb, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c index 26ec86f419..7944f8201e 100644 --- a/VEX/priv/guest_ppc_helpers.c +++ b/VEX/priv/guest_ppc_helpers.c @@ -352,6 +352,11 @@ void LibVEX_GuestPPC64_put_XER ( UInt xer_native, void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) { Int i; + vex_state->host_EvC_FAILADDR = 0; + vex_state->host_EvC_COUNTER = 0; + vex_state->pad3 = 0; + vex_state->pad4 = 0; + vex_state->guest_GPR0 = 0; vex_state->guest_GPR1 = 0; vex_state->guest_GPR2 = 0; @@ -385,7 +390,6 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) vex_state->guest_GPR30 = 0; vex_state->guest_GPR31 = 0; - /* Initialise the vector state. */ # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0; @@ -484,6 +488,8 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) vex_state->guest_FPROUND = PPCrm_NEAREST; vex_state->guest_DFPROUND = PPCrm_NEAREST; + vex_state->pad1 = 0; + vex_state->pad2 = 0; vex_state->guest_VRSAVE = 0; @@ -503,6 +509,8 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) vex_state->guest_IP_AT_SYSCALL = 0; vex_state->guest_SPRG3_RO = 0; + + vex_state->padding = 0; } @@ -510,6 +518,9 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state ) { Int i; + vex_state->host_EvC_FAILADDR = 0; + vex_state->host_EvC_COUNTER = 0; + vex_state->pad0 = 0; vex_state->guest_GPR0 = 0; vex_state->guest_GPR1 = 0; vex_state->guest_GPR2 = 0; @@ -641,6 +652,8 @@ void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state ) vex_state->guest_FPROUND = PPCrm_NEAREST; vex_state->guest_DFPROUND = PPCrm_NEAREST; + vex_state->pad1 = 0; + vex_state->pad2 = 0; vex_state->guest_VRSAVE = 0; diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index d084e2e1f7..dd3afbb8ee 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -1500,23 +1500,23 @@ static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align ) if (mode64) { vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64); stmt( - IRStmt_Exit3( + IRStmt_Exit( binop(Iop_CmpNE64, binop(Iop_And64, mkexpr(addr), mkU64(align-1)), mkU64(0)), Ijk_SigBUS, - IRConst_U64( guest_CIA_curr_instr ) + IRConst_U64( guest_CIA_curr_instr ), OFFB_CIA ) ); } else { vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32); stmt( - IRStmt_Exit3( + IRStmt_Exit( binop(Iop_CmpNE32, binop(Iop_And32, mkexpr(addr), mkU32(align-1)), mkU32(0)), Ijk_SigBUS, - IRConst_U32( guest_CIA_curr_instr ) + IRConst_U32( guest_CIA_curr_instr ), OFFB_CIA ) ); } @@ -2690,10 +2690,10 @@ static void putGST_masked ( PPC_GST reg, IRExpr* src, ULong mask ) so that Valgrind's dispatcher sees the warning. */ putGST( PPC_GST_EMWARN, mkU32(ew) ); stmt( - IRStmt_Exit3( + IRStmt_Exit( binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)), Ijk_EmWarn, - mkSzConst( ty, nextInsnAddr()) )); + mkSzConst( ty, nextInsnAddr()), OFFB_CIA )); } /* Ignore all other writes */ @@ -4975,9 +4975,9 @@ void generate_lsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32 for (i = 0; i < maxBytes; i++) { /* if (nBytes < (i+1)) goto NIA; */ - stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), + stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), Ijk_Boring, - mkSzConst( ty, nextInsnAddr()) )); + mkSzConst( ty, nextInsnAddr()), OFFB_CIA )); /* when crossing into a new dest register, set it to zero. */ if ((i % 4) == 0) { rD++; if (rD == 32) rD = 0; @@ -5026,9 +5026,9 @@ void generate_stsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32 for (i = 0; i < maxBytes; i++) { /* if (nBytes < (i+1)) goto NIA; */ - stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), + stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), Ijk_Boring, - mkSzConst( ty, nextInsnAddr() ) )); + mkSzConst( ty, nextInsnAddr() ), OFFB_CIA )); /* check for crossing into a new src register. */ if ((i % 4) == 0) { rS++; if (rS == 32) rS = 0; @@ -5250,6 +5250,7 @@ static Bool dis_branch ( UInt theInstr, /* The default what-next. Individual cases can override it. */ dres->whatNext = Dis_StopHere; + vassert(dres->jk_StopHere == Ijk_INVALID); switch (opc1) { case 0x12: // b (Branch, PPC32 p360) @@ -5282,8 +5283,8 @@ static Bool dis_branch ( UInt theInstr, dres->whatNext = Dis_ResteerU; dres->continueAt = tgt; } else { - irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring; - irsb->next = mkSzImm(ty, tgt); + dres->jk_StopHere = flag_LK ? Ijk_Call : Ijk_Boring; ; + putGST( PPC_GST_CIA, mkSzImm(ty, tgt) ); } break; @@ -5301,7 +5302,7 @@ static Bool dis_branch ( UInt theInstr, cond_ok is either zero or nonzero, since that's the cheapest way to compute it. Anding them together gives a value which is either zero or non zero and so that's what we must test - for in the IRStmt_Exit3. */ + for in the IRStmt_Exit. */ assign( ctr_ok, branch_ctr_ok( BO ) ); assign( cond_ok, branch_cond_ok( BO, BI ) ); assign( do_branch, @@ -5316,13 +5317,13 @@ static Bool dis_branch ( UInt theInstr, if (flag_LK) putGST( PPC_GST_LR, e_nia ); - stmt( IRStmt_Exit3( + stmt( IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)), flag_LK ? Ijk_Call : Ijk_Boring, - mkSzConst(ty, tgt) ) ); - - irsb->jumpkind = Ijk_Boring; - irsb->next = e_nia; + mkSzConst(ty, tgt), OFFB_CIA ) ); + + dres->jk_StopHere = Ijk_Boring; + putGST( PPC_GST_CIA, e_nia ); break; case 0x13: @@ -5351,18 +5352,18 @@ static Bool dis_branch ( UInt theInstr, if (flag_LK) putGST( PPC_GST_LR, e_nia ); - stmt( IRStmt_Exit3( + stmt( IRStmt_Exit( binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)), Ijk_Boring, - c_nia )); + c_nia, OFFB_CIA )); if (flag_LK && vbi->guest_ppc_zap_RZ_at_bl) { make_redzone_AbiHint( vbi, lr_old, "b-ctr-l (indirect call)" ); } - irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring; - irsb->next = mkexpr(lr_old); + dres->jk_StopHere = flag_LK ? Ijk_Call : Ijk_Boring;; + putGST( PPC_GST_CIA, mkexpr(lr_old) ); break; case 0x010: { // bclr (Branch Cond. to Link Register, PPC32 p365) @@ -5391,10 +5392,10 @@ static Bool dis_branch ( UInt theInstr, if (flag_LK) putGST( PPC_GST_LR, e_nia ); - stmt( IRStmt_Exit3( + stmt( IRStmt_Exit( binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)), Ijk_Boring, - c_nia )); + c_nia, OFFB_CIA )); if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr) { make_redzone_AbiHint( vbi, lr_old, @@ -5404,8 +5405,8 @@ static Bool dis_branch ( UInt theInstr, /* blrl is pretty strange; it's like a return that sets the return address of its caller to the insn following this one. Mark it as a return. */ - irsb->jumpkind = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */ - irsb->next = mkexpr(lr_old); + dres->jk_StopHere = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */ + putGST( PPC_GST_CIA, mkexpr(lr_old) ); break; } default: @@ -5558,10 +5559,11 @@ static Bool do_trap ( UChar TO, if ((TO & b11100) == b11100 || (TO & b00111) == b00111) { /* Unconditional trap. Just do the exit without testing the arguments. */ - stmt( IRStmt_Exit3( + stmt( IRStmt_Exit( binop(opCMPEQ, const0, const0), Ijk_SigTRAP, - mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia) + mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia), + OFFB_CIA )); return True; /* unconditional trap */ } @@ -5601,10 +5603,11 @@ static Bool do_trap ( UChar TO, tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const4); cond = binop(opOR, tmp, cond); } - stmt( IRStmt_Exit3( + stmt( IRStmt_Exit( binop(opCMPNE, cond, const0), Ijk_SigTRAP, - mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia) + mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia), + OFFB_CIA )); return False; /* not an unconditional trap */ } @@ -5652,9 +5655,9 @@ static Bool dis_trapi ( UInt theInstr, if (uncond) { /* If the trap shows signs of being unconditional, don't continue decoding past it. */ - irsb->next = mkSzImm( ty, nextInsnAddr() ); - irsb->jumpkind = Ijk_Boring; - dres->whatNext = Dis_StopHere; + putGST( PPC_GST_CIA, mkSzImm( ty, nextInsnAddr() )); + dres->jk_StopHere = Ijk_Boring; + dres->whatNext = Dis_StopHere; } return True; @@ -5706,9 +5709,9 @@ static Bool dis_trap ( UInt theInstr, if (uncond) { /* If the trap shows signs of being unconditional, don't continue decoding past it. */ - irsb->next = mkSzImm( ty, nextInsnAddr() ); - irsb->jumpkind = Ijk_Boring; - dres->whatNext = Dis_StopHere; + putGST( PPC_GST_CIA, mkSzImm( ty, nextInsnAddr() )); + dres->jk_StopHere = Ijk_Boring; + dres->whatNext = Dis_StopHere; } return True; @@ -5739,12 +5742,12 @@ static Bool dis_syslink ( UInt theInstr, /* It's important that all ArchRegs carry their up-to-date value at this point. So we declare an end-of-block here, which forces any TempRegs caching ArchRegs to be flushed. */ - irsb->next = abiinfo->guest_ppc_sc_continues_at_LR - ? getGST( PPC_GST_LR ) - : mkSzImm( ty, nextInsnAddr() ); - irsb->jumpkind = Ijk_Sys_syscall; + putGST( PPC_GST_CIA, abiinfo->guest_ppc_sc_continues_at_LR + ? getGST( PPC_GST_LR ) + : mkSzImm( ty, nextInsnAddr() )); - dres->whatNext = Dis_StopHere; + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Sys_syscall; return True; } @@ -6722,9 +6725,9 @@ static Bool dis_cache_manage ( UInt theInstr, /* be paranoid ... */ stmt( IRStmt_MBE(Imbe_Fence) ); - irsb->jumpkind = Ijk_TInval; - irsb->next = mkSzImm(ty, nextInsnAddr()); - dres->whatNext = Dis_StopHere; + putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr())); + dres->jk_StopHere = Ijk_TInval; + dres->whatNext = Dis_StopHere; break; } @@ -13572,7 +13575,6 @@ static UInt get_VSX60_opc2(UInt opc2_full) static DisResult disInstr_PPC_WRK ( - Bool put_IP, Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -13613,9 +13615,10 @@ DisResult disInstr_PPC_WRK ( delta = (Long)mkSzAddr(ty, (ULong)delta64); /* Set result defaults. */ - dres.whatNext = Dis_Continue; - dres.len = 0; - dres.continueAt = 0; + dres.whatNext = Dis_Continue; + dres.len = 0; + dres.continueAt = 0; + dres.jk_StopHere = Ijk_INVALID; /* At least this is simple on PPC32: insns are all 4 bytes long, and 4-aligned. So just fish the whole thing out of memory right now @@ -13626,10 +13629,6 @@ DisResult disInstr_PPC_WRK ( DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr); - /* We may be asked to update the guest CIA before going further. */ - if (put_IP) - putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) ); - /* Spot "Special" instructions (see comment at top of file). */ { UChar* code = (UChar*)(guest_code + delta); @@ -13658,9 +13657,9 @@ DisResult disInstr_PPC_WRK ( /* %R3 = client_request ( %R4 ) */ DIP("r3 = client_request ( %%r4 )\n"); delta += 20; - irsb->next = mkSzImm( ty, guest_CIA_bbstart + delta ); - irsb->jumpkind = Ijk_ClientReq; - dres.whatNext = Dis_StopHere; + putGST( PPC_GST_CIA, mkSzImm( ty, guest_CIA_bbstart + delta )); + dres.jk_StopHere = Ijk_ClientReq; + dres.whatNext = Dis_StopHere; goto decode_success; } else @@ -13678,9 +13677,9 @@ DisResult disInstr_PPC_WRK ( DIP("branch-and-link-to-noredir r11\n"); delta += 20; putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) ); - irsb->next = getIReg(11); - irsb->jumpkind = Ijk_NoRedir; - dres.whatNext = Dis_StopHere; + putGST( PPC_GST_CIA, getIReg(11)); + dres.jk_StopHere = Ijk_NoRedir; + dres.whatNext = Dis_StopHere; goto decode_success; } else @@ -14252,9 +14251,9 @@ DisResult disInstr_PPC_WRK ( Bool ok = dis_int_ldst_str( theInstr, &stopHere ); if (!ok) goto decode_failure; if (stopHere) { - irsb->next = mkSzImm(ty, nextInsnAddr()); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr()) ); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; } goto decode_success; } @@ -14601,16 +14600,28 @@ DisResult disInstr_PPC_WRK ( insn, but nevertheless be paranoid and update it again right now. */ putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) ); - irsb->next = mkSzImm(ty, guest_CIA_curr_instr); - irsb->jumpkind = Ijk_NoDecode; - dres.whatNext = Dis_StopHere; - dres.len = 0; + dres.whatNext = Dis_StopHere; + dres.jk_StopHere = Ijk_NoDecode; + dres.len = 0; return dres; } /* switch (opc) for the main (primary) opcode switch. */ decode_success: /* All decode successes end up here. */ + switch (dres.whatNext) { + case Dis_Continue: + putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr + 4)); + break; + case Dis_ResteerU: + case Dis_ResteerC: + putGST( PPC_GST_CIA, mkSzImm(ty, dres.continueAt)); + break; + case Dis_StopHere: + break; + default: + vassert(0); + } DIP("\n"); if (dres.len == 0) { @@ -14633,7 +14644,6 @@ DisResult disInstr_PPC_WRK ( is located in host memory at &guest_code[delta]. */ DisResult disInstr_PPC ( IRSB* irsb_IN, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -14677,8 +14687,7 @@ DisResult disInstr_PPC ( IRSB* irsb_IN, guest_CIA_curr_instr = mkSzAddr(ty, guest_IP); guest_CIA_bbstart = mkSzAddr(ty, guest_IP - delta); - dres = disInstr_PPC_WRK ( put_IP, - resteerOkFn, resteerCisOk, callback_opaque, + dres = disInstr_PPC_WRK ( resteerOkFn, resteerCisOk, callback_opaque, delta, archinfo, abiinfo ); return dres; diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index 556538f8df..dc1860bc6e 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -4488,7 +4488,7 @@ Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, } -/* How big is an event check? See case for Ain_EvCheck in +/* How big is an event check? See case for ARMin_EvCheck in emit_ARMInstr just above. That crosschecks what this returns, so we can tell if we're inconsistent. */ Int evCheckSzB_ARM ( void ) @@ -4569,7 +4569,7 @@ VexInvalRange unchainXDirect_ARM ( void* place_to_unchain, /* Patch the counter address into a profile inc point, as previously - created by the Xin_ProfInc case for emit_ARMInstr. */ + created by the ARMin_ProfInc case for emit_ARMInstr. */ VexInvalRange patchProfInc_ARM ( void* place_to_patch, ULong* location_of_counter ) { diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 9229e11cd8..c56f8a1e3e 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -838,13 +838,33 @@ PPCInstr* PPCInstr_Call ( PPCCondCode cond, vassert(0 == (argiregs & ~mask)); return i; } -PPCInstr* PPCInstr_Goto ( IRJumpKind jk, - PPCCondCode cond, PPCRI* dst ) { - PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); - i->tag = Pin_Goto; - i->Pin.Goto.cond = cond; - i->Pin.Goto.dst = dst; - i->Pin.Goto.jk = jk; +PPCInstr* PPCInstr_XDirect ( Addr64 dstGA, PPCAMode* amCIA, + PPCCondCode cond, Bool toFastEP ) { + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_XDirect; + i->Pin.XDirect.dstGA = dstGA; + i->Pin.XDirect.amCIA = amCIA; + i->Pin.XDirect.cond = cond; + i->Pin.XDirect.toFastEP = toFastEP; + return i; +} +PPCInstr* PPCInstr_XIndir ( HReg dstGA, PPCAMode* amCIA, + PPCCondCode cond ) { + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_XIndir; + i->Pin.XIndir.dstGA = dstGA; + i->Pin.XIndir.amCIA = amCIA; + i->Pin.XIndir.cond = cond; + return i; +} +PPCInstr* PPCInstr_XAssisted ( HReg dstGA, PPCAMode* amCIA, + PPCCondCode cond, IRJumpKind jk ) { + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_XAssisted; + i->Pin.XAssisted.dstGA = dstGA; + i->Pin.XAssisted.amCIA = amCIA; + i->Pin.XAssisted.cond = cond; + i->Pin.XAssisted.jk = jk; return i; } PPCInstr* PPCInstr_CMov ( PPCCondCode cond, @@ -1008,6 +1028,21 @@ PPCInstr* PPCInstr_Dfp128Binary(PPCFpOp op, HReg dst_hi, HReg dst_lo, return i; } +PPCInstr* PPCInstr_EvCheck ( PPCAMode* amCounter, + PPCAMode* amFailAddr ) { + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_EvCheck; + i->Pin.EvCheck.amCounter = amCounter; + i->Pin.EvCheck.amFailAddr = amFailAddr; + return i; +} +PPCInstr* PPCInstr_ProfInc ( void ) { + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_ProfInc; + return i; +} + + /* Valid combo | fromI | int32 | syned | flt64 | -------------------------------------------- @@ -1371,26 +1406,53 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 ) vex_printf("] }"); break; } - case Pin_Goto: - vex_printf("goto: "); - if (i->Pin.Goto.cond.test != Pct_ALWAYS) { - vex_printf("if (%s) ", showPPCCondCode(i->Pin.Goto.cond)); - } - vex_printf("{ "); - if (i->Pin.Goto.jk != Ijk_Boring - && i->Pin.Goto.jk != Ijk_Call - && i->Pin.Goto.jk != Ijk_Ret) { - vex_printf("li %%r31,$"); - ppIRJumpKind(i->Pin.Goto.jk); - vex_printf(" ; "); + case Pin_XDirect: + vex_printf("(xDirect) "); + vex_printf("if (%s) { ", + showPPCCondCode(i->Pin.XDirect.cond)); + if (mode64) { + vex_printf("imm64 r30,0x%llx; ", i->Pin.XDirect.dstGA); + vex_printf("std r30,"); + } else { + vex_printf("imm32 r30,0x%llx; ", i->Pin.XDirect.dstGA); + vex_printf("stw r30,"); } - if (i->Pin.Goto.dst->tag == Pri_Imm) { - ppLoadImm(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Imm, - mode64); + ppPPCAMode(i->Pin.XDirect.amCIA); + vex_printf("; "); + if (mode64) { + vex_printf("imm64-fixed5 r30,$disp_cp_chain_me_to_%sEP; ", + i->Pin.XDirect.toFastEP ? "fast" : "slow"); } else { - ppMovReg(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Reg); + vex_printf("imm32-fixed2 r30,$disp_cp_chain_me_to_%sEP; ", + i->Pin.XDirect.toFastEP ? "fast" : "slow"); } - vex_printf(" ; blr }"); + vex_printf("mtctr r30; bctrl }"); + return; + case Pin_XIndir: + vex_printf("(xIndir) "); + vex_printf("if (%s) { ", + showPPCCondCode(i->Pin.XIndir.cond)); + vex_printf("%s ", mode64 ? "std" : "stw"); + ppHRegPPC(i->Pin.XIndir.dstGA); + vex_printf(","); + ppPPCAMode(i->Pin.XIndir.amCIA); + vex_printf("; "); + vex_printf("imm%s r30,$disp_cp_xindir; ", mode64 ? "64" : "32"); + vex_printf("mtctr r30; bctr }"); + return; + case Pin_XAssisted: + vex_printf("(xAssisted) "); + vex_printf("if (%s) { ", + showPPCCondCode(i->Pin.XAssisted.cond)); + vex_printf("%s ", mode64 ? "std" : "stw"); + ppHRegPPC(i->Pin.XAssisted.dstGA); + vex_printf(","); + ppPPCAMode(i->Pin.XAssisted.amCIA); + vex_printf("; "); + vex_printf("li r31,$IRJumpKind_to_TRCVAL(%d); ", + (Int)i->Pin.XAssisted.jk); + vex_printf("imm%s r30,$disp_cp_xindir; ", mode64 ? "64" : "32"); + vex_printf("mtctr r30; bctr }"); return; case Pin_CMov: vex_printf("cmov (%s) ", showPPCCondCode(i->Pin.CMov.cond)); @@ -1773,6 +1835,30 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 ) ppHRegPPC(i->Pin.Dfp128Binary.srcR_hi); return; + case Pin_EvCheck: + /* Note that the counter dec is 32 bit even in 64-bit mode. */ + vex_printf("(evCheck) "); + vex_printf("lwz r30,"); + ppPPCAMode(i->Pin.EvCheck.amCounter); + vex_printf("; addic. r30,r30,-1; "); + vex_printf("stw r30,"); + ppPPCAMode(i->Pin.EvCheck.amCounter); + vex_printf("; bge nofail; lwz r30,"); + ppPPCAMode(i->Pin.EvCheck.amFailAddr); + vex_printf("; mtctr r30; bctr; nofail:"); + return; + + case Pin_ProfInc: + if (mode64) { + vex_printf("(profInc) imm64 r30,$NotKnownYet;"); + vex_printf("ld r29,(r30); addi r29,r29,1; std r29,(r30)"); + } else { + vex_printf("(profInc) imm32 r30,$NotKnownYet;"); + vex_printf("lwz r29,4(r30); addic. r29,r29,1; stw r29,4(r30)"); + vex_printf("lwz r29,0(r30); addze r29,r29; stw r29,0(r30)"); + } + break; + default: vex_printf("\nppPPCInstr: No such tag(%d)\n", (Int)i->tag); vpanic("ppPPCInstr"); @@ -1871,17 +1957,21 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 ) and no other, as a destination temporary. */ return; } - case Pin_Goto: - addRegUsage_PPCRI(u, i->Pin.Goto.dst); - /* GPR3 holds destination address from Pin_Goto */ - addHRegUse(u, HRmWrite, hregPPC_GPR3(mode64)); - if (i->Pin.Goto.jk != Ijk_Boring - && i->Pin.Goto.jk != Ijk_Call - && i->Pin.Goto.jk != Ijk_Ret) - /* note, this is irrelevant since the guest state pointer - register is not actually available to the allocator. - But still .. */ - addHRegUse(u, HRmWrite, GuestStatePtr(mode64)); + /* XDirect/XIndir/XAssisted are also a bit subtle. They + conditionally exit the block. Hence we only need to list (1) + the registers that they read, and (2) the registers that they + write in the case where the block is not exited. (2) is empty, + hence only (1) is relevant here. */ + case Pin_XDirect: + addRegUsage_PPCAMode(u, i->Pin.XDirect.amCIA); + return; + case Pin_XIndir: + addHRegUse(u, HRmRead, i->Pin.XIndir.dstGA); + addRegUsage_PPCAMode(u, i->Pin.XIndir.amCIA); + return; + case Pin_XAssisted: + addHRegUse(u, HRmRead, i->Pin.XAssisted.dstGA); + addRegUsage_PPCAMode(u, i->Pin.XAssisted.amCIA); return; case Pin_CMov: addRegUsage_PPCRI(u, i->Pin.CMov.src); @@ -2055,7 +2145,18 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->Pin.Dfp128Binary.srcR_hi); addHRegUse(u, HRmRead, i->Pin.Dfp128Binary.srcR_lo); return; - + case Pin_EvCheck: + /* We expect both amodes only to mention the GSP (r31), so this + is in fact pointless, since GSP isn't allocatable, but + anyway.. */ + addRegUsage_PPCAMode(u, i->Pin.EvCheck.amCounter); + addRegUsage_PPCAMode(u, i->Pin.EvCheck.amFailAddr); + addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64)); /* also unavail to RA */ + return; + case Pin_ProfInc: + addHRegUse(u, HRmWrite, hregPPC_GPR29(mode64)); + addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64)); + return; default: ppPPCInstr(i, mode64); vpanic("getRegUsage_PPCInstr"); @@ -2109,8 +2210,16 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 ) return; case Pin_Call: return; - case Pin_Goto: - mapRegs_PPCRI(m, i->Pin.Goto.dst); + case Pin_XDirect: + mapRegs_PPCAMode(m, i->Pin.XDirect.amCIA); + return; + case Pin_XIndir: + mapReg(m, &i->Pin.XIndir.dstGA); + mapRegs_PPCAMode(m, i->Pin.XIndir.amCIA); + return; + case Pin_XAssisted: + mapReg(m, &i->Pin.XAssisted.dstGA); + mapRegs_PPCAMode(m, i->Pin.XAssisted.amCIA); return; case Pin_CMov: mapRegs_PPCRI(m, i->Pin.CMov.src); @@ -2266,7 +2375,16 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 ) mapReg(m, &i->Pin.Dfp128Binary.srcR_hi); mapReg(m, &i->Pin.Dfp128Binary.srcR_lo); return; - + case Pin_EvCheck: + /* We expect both amodes only to mention the GSP (r31), so this + is in fact pointless, since GSP isn't allocatable, but + anyway.. */ + mapRegs_PPCAMode(m, i->Pin.EvCheck.amCounter); + mapRegs_PPCAMode(m, i->Pin.EvCheck.amFailAddr); + return; + case Pin_ProfInc: + /* hardwires r29 and r30 -- nothing to modify. */ + return; default: ppPPCInstr(i, mode64); vpanic("mapRegs_PPCInstr"); @@ -2400,7 +2518,7 @@ static UInt vregNo ( HReg v ) return n; } -/* Emit 32bit instruction big-endianly */ +/* Emit an instruction big-endianly */ static UChar* emit32 ( UChar* p, UInt w32 ) { *p++ = toUChar((w32 >> 24) & 0x000000FF); @@ -2410,6 +2528,17 @@ static UChar* emit32 ( UChar* p, UInt w32 ) return p; } +/* Fetch an instruction big-endianly */ +static UInt fetch32 ( UChar* p ) +{ + UInt w32 = 0; + w32 |= ((0xFF & (UInt)p[0]) << 24); + w32 |= ((0xFF & (UInt)p[1]) << 16); + w32 |= ((0xFF & (UInt)p[2]) << 8); + w32 |= ((0xFF & (UInt)p[3]) << 0); + return w32; +} + /* The following mkForm[...] functions refer to ppc instruction forms as per PPC32 p576 */ @@ -2693,6 +2822,210 @@ static UChar* mkLoadImm ( UChar* p, UInt r_dst, ULong imm, Bool mode64 ) return p; } +/* A simplified version of mkLoadImm that always generates 2 or 5 + instructions (32 or 64 bits respectively) even if it could generate + fewer. This is needed for generating fixed sized patchable + sequences. */ +static UChar* mkLoadImm_EXACTLY2or5 ( UChar* p, + UInt r_dst, ULong imm, Bool mode64 ) +{ + vassert(r_dst < 0x20); + + if (!mode64) { + /* In 32-bit mode, make sure the top 32 bits of imm are a sign + extension of the bottom 32 bits. (Probably unnecessary.) */ + UInt u32 = (UInt)imm; + Int s32 = (Int)u32; + Long s64 = (Long)s32; + imm = (ULong)s64; + } + + if (!mode64) { + // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16) + p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF); + // ori r_dst, r_dst, (imm & 0xFFFF) + p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF); + + } else { + // full 64bit immediate load: 5 (five!) insns. + + // load high word + // lis r_dst, (imm>>48) & 0xFFFF + p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF); + + // ori r_dst, r_dst, (imm>>32) & 0xFFFF + p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF); + + // shift r_dst low word to high word => rldicr + p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1); + + // load low word + // oris r_dst, r_dst, (imm>>16) & 0xFFFF + p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF); + + // ori r_dst, r_dst, (imm) & 0xFFFF + p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF); + } + return p; +} + +/* Checks whether the sequence of bytes at p was indeed created + by mkLoadImm_EXACTLY2or5 with the given parameters. */ +static Bool isLoadImm_EXACTLY2or5 ( UChar* p_to_check, + UInt r_dst, ULong imm, Bool mode64 ) +{ + vassert(r_dst < 0x20); + + if (!mode64) { + /* In 32-bit mode, make sure the top 32 bits of imm are a sign + extension of the bottom 32 bits. (Probably unnecessary.) */ + UInt u32 = (UInt)imm; + Int s32 = (Int)u32; + Long s64 = (Long)s32; + imm = (ULong)s64; + } + + if (!mode64) { + UInt expect[2] = { 0, 0 }; + UChar* p = (UChar*)&expect[0]; + // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16) + p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF); + // ori r_dst, r_dst, (imm & 0xFFFF) + p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF); + vassert(p == (UChar*)&expect[2]); + + return fetch32(p_to_check + 0) == expect[0] + && fetch32(p_to_check + 4) == expect[1]; + + } else { + UInt expect[5] = { 0, 0, 0, 0, 0 }; + UChar* p = (UChar*)&expect[0]; + // full 64bit immediate load: 5 (five!) insns. + + // load high word + // lis r_dst, (imm>>48) & 0xFFFF + p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF); + + // ori r_dst, r_dst, (imm>>32) & 0xFFFF + p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF); + + // shift r_dst low word to high word => rldicr + p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1); + + // load low word + // oris r_dst, r_dst, (imm>>16) & 0xFFFF + p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF); + + // ori r_dst, r_dst, (imm) & 0xFFFF + p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF); + + vassert(p == (UChar*)&expect[5]); + + return fetch32(p_to_check + 0) == expect[0] + && fetch32(p_to_check + 4) == expect[1] + && fetch32(p_to_check + 8) == expect[2] + && fetch32(p_to_check + 12) == expect[3] + && fetch32(p_to_check + 16) == expect[4]; + } +} + + +/* Generate a machine-word sized load or store. Simplified version of + the Pin_Load and Pin_Store cases below. */ +static UChar* do_load_or_store_machine_word ( + UChar* p, Bool isLoad, + UInt reg, PPCAMode* am, Bool mode64 ) +{ + if (isLoad) { + UInt opc1, sz = mode64 ? 8 : 4; + switch (am->tag) { + case Pam_IR: + if (mode64) { + vassert(0 == (am->Pam.IR.index & 3)); + } + switch (sz) { + case 4: opc1 = 32; vassert(!mode64); break; + case 8: opc1 = 58; vassert(mode64); break; + default: vassert(0); + } + p = doAMode_IR(p, opc1, reg, am, mode64); + break; + case Pam_RR: + /* we could handle this case, but we don't expect to ever + need to. */ + vassert(0); + default: + vassert(0); + } + } else /*store*/ { + UInt opc1, sz = mode64 ? 8 : 4; + switch (am->tag) { + case Pam_IR: + if (mode64) { + vassert(0 == (am->Pam.IR.index & 3)); + } + switch (sz) { + case 4: opc1 = 36; vassert(!mode64); break; + case 8: opc1 = 62; vassert(mode64); break; + default: vassert(0); + } + p = doAMode_IR(p, opc1, reg, am, mode64); + break; + case Pam_RR: + /* we could handle this case, but we don't expect to ever + need to. */ + vassert(0); + default: + vassert(0); + } + } + return p; +} + +/* Generate a 32-bit sized load or store. Simplified version of + do_load_or_store_machine_word above. */ +static UChar* do_load_or_store_word32 ( + UChar* p, Bool isLoad, + UInt reg, PPCAMode* am, Bool mode64 ) +{ + if (isLoad) { + UInt opc1; + switch (am->tag) { + case Pam_IR: + if (mode64) { + vassert(0 == (am->Pam.IR.index & 3)); + } + opc1 = 32; + p = doAMode_IR(p, opc1, reg, am, mode64); + break; + case Pam_RR: + /* we could handle this case, but we don't expect to ever + need to. */ + vassert(0); + default: + vassert(0); + } + } else /*store*/ { + UInt opc1; + switch (am->tag) { + case Pam_IR: + if (mode64) { + vassert(0 == (am->Pam.IR.index & 3)); + } + opc1 = 36; + p = doAMode_IR(p, opc1, reg, am, mode64); + break; + case Pam_RR: + /* we could handle this case, but we don't expect to ever + need to. */ + vassert(0); + default: + vassert(0); + } + } + return p; +} + /* Move r_dst to r_src */ static UChar* mkMoveReg ( UChar* p, UInt r_dst, UInt r_src ) { @@ -2753,18 +3086,19 @@ static UChar* mkFormVA ( UChar* p, UInt opc1, UInt r1, UInt r2, /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is - imperative to emit position-independent code. - - Note, dispatch should always be NULL since ppc32/64 backends - use a call-return scheme to get from the dispatcher to generated - code and back. + imperative to emit position-independent code. If the emitted + instruction was a profiler inc, set *is_profInc to True, else leave + it unchanged. */ -Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, +Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, PPCInstr* i, Bool mode64, - void* dispatch_unassisted, void* dispatch_assisted ) + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ) { UChar* p = &buf[0]; - UChar* ptmp = p; vassert(nbuf >= 32); if (0) { @@ -3131,6 +3465,7 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, getRegUsage_PPCInstr above, %r10 is used as an address temp */ /* jump over the following insns if condition does not hold */ + UChar* ptmp = NULL; if (cond.test != Pct_ALWAYS) { /* jmp fwds if !condition */ /* don't know how many bytes to jump over yet... @@ -3159,75 +3494,175 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, goto done; } - case Pin_Goto: { - UInt trc = 0; - UChar r_ret = 3; /* Put target addr into %r3 */ - PPCCondCode cond = i->Pin.Goto.cond; - UInt r_dst; - ULong imm_dst; - - vassert(dispatch_unassisted == NULL); - vassert(dispatch_assisted == NULL); - - /* First off, if this is conditional, create a conditional - jump over the rest of it. */ - if (cond.test != Pct_ALWAYS) { - /* jmp fwds if !condition */ - /* don't know how many bytes to jump over yet... - make space for a jump instruction and fill in later. */ - ptmp = p; /* fill in this bit later */ + case Pin_XDirect: { + /* NB: what goes on here has to be very closely coordinated + with the chainXDirect_PPC and unchainXDirect_PPC below. */ + /* We're generating chain-me requests here, so we need to be + sure this is actually allowed -- no-redir translations + can't use chain-me's. Hence: */ + vassert(disp_cp_chain_me_to_slowEP != NULL); + vassert(disp_cp_chain_me_to_fastEP != NULL); + + /* First off, if this is conditional, create a conditional jump + over the rest of it. Or at least, leave a space for it that + we will shortly fill in. */ + UChar* ptmp = NULL; + if (i->Pin.XDirect.cond.test != Pct_ALWAYS) { + vassert(i->Pin.XDirect.cond.flag != Pcf_NONE); + ptmp = p; p += 4; + } else { + vassert(i->Pin.XDirect.cond.flag == Pcf_NONE); } - // cond succeeds... - - /* If a non-boring, set GuestStatePtr appropriately. */ - switch (i->Pin.Goto.jk) { - case Ijk_ClientReq: trc = VEX_TRC_JMP_CLIENTREQ; break; - case Ijk_Sys_syscall: trc = VEX_TRC_JMP_SYS_SYSCALL; break; - case Ijk_Yield: trc = VEX_TRC_JMP_YIELD; break; - case Ijk_EmWarn: trc = VEX_TRC_JMP_EMWARN; break; - case Ijk_EmFail: trc = VEX_TRC_JMP_EMFAIL; break; - case Ijk_MapFail: trc = VEX_TRC_JMP_MAPFAIL; break; - case Ijk_NoDecode: trc = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: trc = VEX_TRC_JMP_TINVAL; break; - case Ijk_NoRedir: trc = VEX_TRC_JMP_NOREDIR; break; - case Ijk_SigTRAP: trc = VEX_TRC_JMP_SIGTRAP; break; - case Ijk_SigBUS: trc = VEX_TRC_JMP_SIGBUS; break; - case Ijk_Ret: - case Ijk_Call: - case Ijk_Boring: - break; - default: - ppIRJumpKind(i->Pin.Goto.jk); - vpanic("emit_PPCInstr.Pin_Goto: unknown jump kind"); + /* Update the guest CIA. */ + /* imm32/64 r30, dstGA */ + if (!mode64) vassert(0 == (((ULong)i->Pin.XDirect.dstGA) >> 32)); + p = mkLoadImm(p, /*r*/30, (ULong)i->Pin.XDirect.dstGA, mode64); + /* stw/std r30, amCIA */ + p = do_load_or_store_machine_word( + p, False/*!isLoad*/, + /*r*/30, i->Pin.XDirect.amCIA, mode64 + ); + + /* --- FIRST PATCHABLE BYTE follows --- */ + /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling + to) backs up the return address, so as to find the address of + the first patchable byte. So: don't change the number of + instructions (32-bit: 4, 64-bit: 7) below. */ + /* imm32/64-fixed r30, VG_(disp_cp_chain_me_to_{slowEP,fastEP} */ + void* disp_cp_chain_me + = i->Pin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP + : disp_cp_chain_me_to_slowEP; + p = mkLoadImm_EXACTLY2or5( + p, /*r*/30, Ptr_to_ULong(disp_cp_chain_me), mode64); + /* mtctr r30 */ + p = mkFormXFX(p, /*r*/30, 9, 467); + /* bctrl */ + p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1); + /* --- END of PATCHABLE BYTES --- */ + + /* Fix up the conditional jump, if there was one. */ + if (i->Pin.XDirect.cond.test != Pct_ALWAYS) { + Int delta = p - ptmp; + vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3)); + /* bc !ct,cf,delta */ + mkFormB(ptmp, invertCondTest(i->Pin.XDirect.cond.test), + i->Pin.XDirect.cond.flag, (delta>>2), 0, 0); + } + goto done; + } + + case Pin_XIndir: { + /* We're generating transfers that could lead indirectly to a + chain-me, so we need to be sure this is actually allowed -- + no-redir translations are not allowed to reach normal + translations without going through the scheduler. That means + no XDirects or XIndirs out from no-redir translations. + Hence: */ + vassert(disp_cp_xindir != NULL); + + /* First off, if this is conditional, create a conditional jump + over the rest of it. Or at least, leave a space for it that + we will shortly fill in. */ + UChar* ptmp = NULL; + if (i->Pin.XIndir.cond.test != Pct_ALWAYS) { + vassert(i->Pin.XIndir.cond.flag != Pcf_NONE); + ptmp = p; + p += 4; + } else { + vassert(i->Pin.XIndir.cond.flag == Pcf_NONE); } - if (trc !=0) { - vassert(trc < 0x10000); - /* addi r31,0,trc */ - p = mkFormD(p, 14, 31, 0, trc); // p += 4 + + /* Update the guest CIA. */ + /* stw/std r-dstGA, amCIA */ + p = do_load_or_store_machine_word( + p, False/*!isLoad*/, + iregNo(i->Pin.XIndir.dstGA, mode64), + i->Pin.XIndir.amCIA, mode64 + ); + + /* imm32/64 r30, VG_(disp_cp_xindir) */ + p = mkLoadImm(p, /*r*/30, (ULong)Ptr_to_ULong(disp_cp_xindir), mode64); + /* mtctr r30 */ + p = mkFormXFX(p, /*r*/30, 9, 467); + /* bctr */ + p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0); + + /* Fix up the conditional jump, if there was one. */ + if (i->Pin.XIndir.cond.test != Pct_ALWAYS) { + Int delta = p - ptmp; + vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3)); + /* bc !ct,cf,delta */ + mkFormB(ptmp, invertCondTest(i->Pin.XIndir.cond.test), + i->Pin.XIndir.cond.flag, (delta>>2), 0, 0); } + goto done; + } - /* Get the destination address into %r_ret */ - if (i->Pin.Goto.dst->tag == Pri_Imm) { - imm_dst = i->Pin.Goto.dst->Pri.Imm; - p = mkLoadImm(p, r_ret, imm_dst, mode64); // p += 4|8|20 + case Pin_XAssisted: { + /* First off, if this is conditional, create a conditional jump + over the rest of it. Or at least, leave a space for it that + we will shortly fill in. */ + UChar* ptmp = NULL; + if (i->Pin.XAssisted.cond.test != Pct_ALWAYS) { + vassert(i->Pin.XAssisted.cond.flag != Pcf_NONE); + ptmp = p; + p += 4; } else { - vassert(i->Pin.Goto.dst->tag == Pri_Reg); - r_dst = iregNo(i->Pin.Goto.dst->Pri.Reg, mode64); - p = mkMoveReg(p, r_ret, r_dst); // p += 4 + vassert(i->Pin.XAssisted.cond.flag == Pcf_NONE); } - - /* blr */ - p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 16, 0); // p += 4 + + /* Update the guest CIA. */ + /* stw/std r-dstGA, amCIA */ + p = do_load_or_store_machine_word( + p, False/*!isLoad*/, + iregNo(i->Pin.XIndir.dstGA, mode64), + i->Pin.XIndir.amCIA, mode64 + ); + + /* imm32/64 r31, $magic_number */ + UInt trcval = 0; + switch (i->Pin.XAssisted.jk) { + case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; + case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; + //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; + //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; + case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; + //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; + case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; + case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; + //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ + //case Ijk_Ret: + //case Ijk_Call: + /* fallthrough */ + default: + ppIRJumpKind(i->Pin.XAssisted.jk); + vpanic("emit_ARMInstr.Pin_XAssisted: unexpected jump kind"); + } + vassert(trcval != 0); + p = mkLoadImm(p, /*r*/31, trcval, mode64); + + /* imm32/64 r30, VG_(disp_cp_xassisted) */ + p = mkLoadImm(p, /*r*/30, + (ULong)Ptr_to_ULong(disp_cp_xassisted), mode64); + /* mtctr r30 */ + p = mkFormXFX(p, /*r*/30, 9, 467); + /* bctr */ + p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0); /* Fix up the conditional jump, if there was one. */ - if (cond.test != Pct_ALWAYS) { + if (i->Pin.XAssisted.cond.test != Pct_ALWAYS) { Int delta = p - ptmp; - vassert(delta >= 12 && delta <= 32); + vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3)); /* bc !ct,cf,delta */ - mkFormB(ptmp, invertCondTest(cond.test), - cond.flag, delta>>2, 0, 0); + mkFormB(ptmp, invertCondTest(i->Pin.XAssisted.cond.test), + i->Pin.XAssisted.cond.flag, (delta>>2), 0, 0); } goto done; } @@ -3242,6 +3677,7 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, cond = i->Pin.CMov.cond; /* branch (if cond fails) over move instrs */ + UChar* ptmp = NULL; if (cond.test != Pct_ALWAYS) { /* don't know how many bytes to jump over yet... make space for a jump instruction and fill in later. */ @@ -4129,6 +4565,86 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, goto done; } + case Pin_EvCheck: { + /* This requires a 32-bit dec/test in both 32- and 64-bit + modes. */ + /* We generate: + lwz r30, amCounter + addic. r30, r30, -1 + stw r30, amCounter + bge nofail + lwz/ld r30, amFailAddr + mtctr r30 + bctr + nofail: + */ + UChar* p0 = p; + /* lwz r30, amCounter */ + p = do_load_or_store_word32(p, True/*isLoad*/, /*r*/30, + i->Pin.EvCheck.amCounter, mode64); + /* addic. r30,r30,-1 */ + p = emit32(p, 0x37DEFFFF); + /* stw r30, amCounter */ + p = do_load_or_store_word32(p, False/*!isLoad*/, /*r*/30, + i->Pin.EvCheck.amCounter, mode64); + /* bge nofail */ + p = emit32(p, 0x40800010); + /* lwz/ld r30, amFailAddr */ + p = do_load_or_store_machine_word(p, True/*isLoad*/, /*r*/30, + i->Pin.EvCheck.amFailAddr, mode64); + /* mtctr r30 */ + p = mkFormXFX(p, /*r*/30, 9, 467); + /* bctr */ + p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0); + /* nofail: */ + + /* Crosscheck */ + vassert(evCheckSzB_PPC() == (UChar*)p - (UChar*)p0); + goto done; + } + + case Pin_ProfInc: { + /* We generate: + (ctrP is unknown now, so use 0x65556555(65556555) in the + expectation that a later call to LibVEX_patchProfCtr + will be used to fill in the immediate fields once the + right value is known.) + 32-bit: + imm32-exactly r30, 0x65556555 + lwz r29, 4(r30) + addic. r29, r29, 1 + stw r29, 4(r30) + lwz r29, 0(r30) + addze r29, r29 + stw r29, 0(r30) + 64-bit: + imm64-exactly r30, 0x6555655565556555 + ld r29, 0(r30) + add r29, r29, 1 + std r29, 0(r30) + */ + if (mode64) { + p = mkLoadImm_EXACTLY2or5( + p, /*r*/30, 0x6555655565556555ULL, True/*mode64*/); + p = emit32(p, 0xEBBE0000); + p = emit32(p, 0x7FBD0A14); + p = emit32(p, 0xFBBE0000); + } else { + p = mkLoadImm_EXACTLY2or5( + p, /*r*/30, 0x65556555ULL, False/*!mode64*/); + p = emit32(p, 0x83BE0004); + p = emit32(p, 0x37BD0001); + p = emit32(p, 0x93BE0004); + p = emit32(p, 0x83BE0000); + p = emit32(p, 0x7FBD0194); + p = emit32(p, 0x93BE0000); + } + /* Tell the caller .. */ + vassert(!(*is_profInc)); + *is_profInc = True; + goto done; + } + default: goto bad; } @@ -4144,6 +4660,147 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, return p - &buf[0]; } + +/* How big is an event check? See case for Pin_EvCheck in + emit_PPCInstr just above. That crosschecks what this returns, so + we can tell if we're inconsistent. */ +Int evCheckSzB_PPC ( void ) +{ + return 28; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange chainXDirect_PPC ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to, + Bool mode64 ) +{ + /* What we're expecting to see is: + imm32/64-fixed r30, disp_cp_chain_me_to_EXPECTED + mtctr r30 + bctrl + viz + <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5> + 7F C9 03 A6 + 4E 80 04 21 + */ + UChar* p = (UChar*)place_to_chain; + vassert(0 == (3 & (HWord)p)); + vassert(isLoadImm_EXACTLY2or5(p, /*r*/30, + Ptr_to_ULong(disp_cp_chain_me_EXPECTED), + mode64)); + vassert(fetch32(p + (mode64 ? 20 : 8) + 0) == 0x7FC903A6); + vassert(fetch32(p + (mode64 ? 20 : 8) + 4) == 0x4E800421); + /* And what we want to change it to is: + imm32/64-fixed r30, place_to_jump_to + mtctr r30 + bctr + viz + <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5> + 7F C9 03 A6 + 4E 80 04 20 + The replacement has the same length as the original. + */ + p = mkLoadImm_EXACTLY2or5(p, /*r*/30, + Ptr_to_ULong(place_to_jump_to), mode64); + p = emit32(p, 0x7FC903A6); + p = emit32(p, 0x4E800420); + + Int len = p - (UChar*)place_to_chain; + vassert(len == (mode64 ? 28 : 16)); /* stay sane */ + VexInvalRange vir = {(HWord)place_to_chain, len}; + return vir; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange unchainXDirect_PPC ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me, + Bool mode64 ) +{ + /* What we're expecting to see is: + imm32/64-fixed r30, place_to_jump_to_EXPECTED + mtctr r30 + bctr + viz + <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5> + 7F C9 03 A6 + 4E 80 04 20 + */ + UChar* p = (UChar*)place_to_unchain; + vassert(0 == (3 & (HWord)p)); + vassert(isLoadImm_EXACTLY2or5(p, /*r*/30, + Ptr_to_ULong(place_to_jump_to_EXPECTED), + mode64)); + vassert(fetch32(p + (mode64 ? 20 : 8) + 0) == 0x7FC903A6); + vassert(fetch32(p + (mode64 ? 20 : 8) + 4) == 0x4E800420); + /* And what we want to change it to is: + imm32/64-fixed r30, disp_cp_chain_me + mtctr r30 + bctrl + viz + <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5> + 7F C9 03 A6 + 4E 80 04 21 + The replacement has the same length as the original. + */ + p = mkLoadImm_EXACTLY2or5(p, /*r*/30, + Ptr_to_ULong(disp_cp_chain_me), mode64); + p = emit32(p, 0x7FC903A6); + p = emit32(p, 0x4E800421); + + Int len = p - (UChar*)place_to_unchain; + vassert(len == (mode64 ? 28 : 16)); /* stay sane */ + VexInvalRange vir = {(HWord)place_to_unchain, len}; + return vir; +} + + +/* Patch the counter address into a profile inc point, as previously + created by the Pin_ProfInc case for emit_PPCInstr. */ +VexInvalRange patchProfInc_PPC ( void* place_to_patch, + ULong* location_of_counter, + Bool mode64 ) +{ + UChar* p = (UChar*)place_to_patch; + vassert(0 == (3 & (HWord)p)); + + Int len = 0; + if (mode64) { + vassert(isLoadImm_EXACTLY2or5(p, /*r*/30, + 0x6555655565556555ULL, True/*mode64*/)); + vassert(fetch32(p + 20) == 0xEBBE0000); + vassert(fetch32(p + 24) == 0x7FBD0A14); + vassert(fetch32(p + 28) == 0xFBBE0000); + p = mkLoadImm_EXACTLY2or5(p, /*r*/30, + Ptr_to_ULong(location_of_counter), + True/*mode64*/); + len = p - (UChar*)place_to_patch; + vassert(len == 20); + } else { + vassert(isLoadImm_EXACTLY2or5(p, /*r*/30, + 0x65556555ULL, False/*!mode64*/)); + vassert(fetch32(p + 8) == 0x83BE0004); + vassert(fetch32(p + 12) == 0x37BD0001); + vassert(fetch32(p + 16) == 0x93BE0004); + vassert(fetch32(p + 20) == 0x83BE0000); + vassert(fetch32(p + 24) == 0x7FBD0194); + vassert(fetch32(p + 28) == 0x93BE0000); + p = mkLoadImm_EXACTLY2or5(p, /*r*/30, + Ptr_to_ULong(location_of_counter), + False/*!mode64*/); + len = p - (UChar*)place_to_patch; + vassert(len == 8); + } + VexInvalRange vir = {(HWord)place_to_patch, len}; + return vir; +} + + /*---------------------------------------------------------------*/ /*--- end host_ppc_defs.c ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 9f6797bd63..6a8542bb6b 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -451,7 +451,9 @@ typedef Pin_MulL, /* widening multiply */ Pin_Div, /* div */ Pin_Call, /* call to address in register */ - Pin_Goto, /* conditional/unconditional jmp to dst */ + Pin_XDirect, /* direct transfer to GA */ + Pin_XIndir, /* indirect transfer to GA */ + Pin_XAssisted, /* assisted transfer to GA */ Pin_CMov, /* conditional move */ Pin_Load, /* zero-extending load a 8|16|32|64 bit value from mem */ Pin_LoadL, /* load-linked (lwarx/ldarx) 32|64 bit value from mem */ @@ -494,7 +496,9 @@ typedef Pin_Dfp64Unary, /* DFP64 unary op */ Pin_Dfp128nary, /* DFP128 unary op */ Pin_Dfp64Binary, /* DFP64 binary op */ - Pin_Dfp128Binary /* DFP128 binary op */ + Pin_Dfp128Binary, /* DFP128 binary op */ + Pin_EvCheck, /* Event check */ + Pin_ProfInc /* 64-bit profile counter increment */ } PPCInstrTag; @@ -586,13 +590,30 @@ typedef Addr64 target; UInt argiregs; } Call; - /* Pseudo-insn. Goto dst, on given condition (which could be - Pct_ALWAYS). */ - struct { + /* Update the guest CIA value, then exit requesting to chain + to it. May be conditional. Use of Addr64 in order to cope + with 64-bit hosts. */ + struct { + Addr64 dstGA; /* next guest address */ + PPCAMode* amCIA; /* amode in guest state for CIA */ + PPCCondCode cond; /* can be ALWAYS */ + Bool toFastEP; /* chain to the slow or fast point? */ + } XDirect; + /* Boring transfer to a guest address not known at JIT time. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + PPCAMode* amCIA; + PPCCondCode cond; /* can be ALWAYS */ + } XIndir; + /* Assisted transfer to a guest address, most general case. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + PPCAMode* amCIA; + PPCCondCode cond; /* can be ALWAYS */ IRJumpKind jk; - PPCCondCode cond; - PPCRI* dst; - } Goto; + } XAssisted; /* Mov src to dst on the given condition, which may not be the bogus Pct_ALWAYS. */ struct { @@ -820,6 +841,17 @@ typedef HReg srcR_hi; HReg srcR_lo; } Dfp128Binary; + + struct { + PPCAMode* amCounter; + PPCAMode* amFailAddr; + } EvCheck; + struct { + /* No fields. The address of the counter to inc is + installed later, post-translation, by patching it in, + as it is not known at translation time. */ + } ProfInc; + } Pin; } PPCInstr; @@ -834,7 +866,12 @@ extern PPCInstr* PPCInstr_Unary ( PPCUnaryOp op, HReg dst, HReg src ); extern PPCInstr* PPCInstr_MulL ( Bool syned, Bool hi32, Bool sz32, HReg, HReg, HReg ); extern PPCInstr* PPCInstr_Div ( Bool extended, Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR ); extern PPCInstr* PPCInstr_Call ( PPCCondCode, Addr64, UInt ); -extern PPCInstr* PPCInstr_Goto ( IRJumpKind, PPCCondCode cond, PPCRI* dst ); +extern PPCInstr* PPCInstr_XDirect ( Addr64 dstGA, PPCAMode* amCIA, + PPCCondCode cond, Bool toFastEP ); +extern PPCInstr* PPCInstr_XIndir ( HReg dstGA, PPCAMode* amCIA, + PPCCondCode cond ); +extern PPCInstr* PPCInstr_XAssisted ( HReg dstGA, PPCAMode* amCIA, + PPCCondCode cond, IRJumpKind jk ); extern PPCInstr* PPCInstr_CMov ( PPCCondCode, HReg dst, PPCRI* src ); extern PPCInstr* PPCInstr_Load ( UChar sz, HReg dst, PPCAMode* src, Bool mode64 ); @@ -883,6 +920,9 @@ extern PPCInstr* PPCInstr_Dfp64Binary ( PPCFpOp op, HReg dst, HReg srcL, HReg srcR ); extern PPCInstr* PPCInstr_Dfp128Binary( PPCFpOp op, HReg dst_hi, HReg dst_lo, HReg srcR_hi, HReg srcR_lo ); +extern PPCInstr* PPCInstr_EvCheck ( PPCAMode* amCounter, + PPCAMode* amFailAddr ); +extern PPCInstr* PPCInstr_ProfInc ( void ); extern void ppPPCInstr(PPCInstr*, Bool mode64); @@ -892,10 +932,13 @@ extern void ppPPCInstr(PPCInstr*, Bool mode64); extern void getRegUsage_PPCInstr ( HRegUsage*, PPCInstr*, Bool mode64 ); extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64); extern Bool isMove_PPCInstr ( PPCInstr*, HReg*, HReg* ); -extern Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr*, +extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, PPCInstr* i, Bool mode64, - void* dispatch_unassisted, - void* dispatch_assisted ); + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ); extern void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offsetB, Bool mode64 ); @@ -903,9 +946,37 @@ extern void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offsetB, Bool mode64 ); extern void getAllocableRegs_PPC ( Int*, HReg**, Bool mode64 ); -extern HInstrArray* iselSB_PPC ( IRSB*, VexArch, - VexArchInfo*, - VexAbiInfo* ); +extern HInstrArray* iselSB_PPC ( IRSB*, + VexArch, + VexArchInfo*, + VexAbiInfo*, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ); + +/* How big is an event check? This is kind of a kludge because it + depends on the offsets of host_EvC_FAILADDR and + host_EvC_COUNTER. */ +extern Int evCheckSzB_PPC ( void ); + +/* Perform a chaining and unchaining of an XDirect jump. */ +extern VexInvalRange chainXDirect_PPC ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to, + Bool mode64 ); + +extern VexInvalRange unchainXDirect_PPC ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me, + Bool mode64 ); + +/* Patch the counter location into an existing ProfInc point. */ +extern VexInvalRange patchProfInc_PPC ( void* place_to_patch, + ULong* location_of_counter, + Bool mode64 ); + #endif /* ndef __VEX_HOST_PPC_DEFS_H */ diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c index 78b2a08b58..04b88126a6 100644 --- a/VEX/priv/host_ppc_isel.c +++ b/VEX/priv/host_ppc_isel.c @@ -218,17 +218,20 @@ static IRExpr* bind ( Int binder ) - A mapping from IRTemp to HReg. This tells the insn selector which virtual register(s) are associated with each IRTemp - temporary. This is computed before insn selection starts, and - does not change. We expect this mapping to map precisely the - same set of IRTemps as the type mapping does. + temporary. This is computed before insn selection starts, and + does not change. We expect this mapping to map precisely the + same set of IRTemps as the type mapping does. - - vregmap holds the primary register for the IRTemp. - - vregmapHI holds the secondary register for the IRTemp, + - vregmapLo holds the primary register for the IRTemp. + - vregmapMedLo holds the secondary register for the IRTemp, if any is needed. That's only for Ity_I64 temps in 32 bit mode or Ity_I128 temps in 64-bit mode. - - - The name of the vreg in which we stash a copy of the link reg, - so helper functions don't kill it. + - vregmapMedHi is only for dealing with Ity_I128 temps in + 32 bit mode. It holds bits 95:64 (Intel numbering) + of the IRTemp. + - vregmapHi is also only for dealing with Ity_I128 temps + in 32 bit mode. It holds the most significant bits + (127:96 in Intel numbering) of the IRTemp. - The code array, that is, the insns selected so far. @@ -247,11 +250,20 @@ static IRExpr* bind ( Int binder ) described in set_FPU_rounding_mode below. - A VexMiscInfo*, needed for knowing how to generate - function calls for this target + function calls for this target. + + - The maximum guest address of any guest insn in this block. + Actually, the address of the highest-addressed byte from any + insn in this block. Is set at the start and does not change. + This is used for detecting jumps which are definitely + forward-edges from this block, and therefore can be made + (chained) to the fast entry point of the destination, thereby + avoiding the destination's event check. */ typedef struct { + /* Constant -- are set at the start and do not change. */ IRTypeEnv* type_env; // 64-bit mode 32-bit mode HReg* vregmapLo; // Low 64-bits [63:0] Low 32-bits [31:0] @@ -260,20 +272,21 @@ typedef HReg* vregmapHi; // unused highest 32-bits [127:96] Int n_vregmap; - HReg savedLR; - - HInstrArray* code; - - Int vreg_ctr; - /* 27 Jan 06: Not currently used, but should be */ UInt hwcaps; Bool mode64; - IRExpr* previous_rm; - VexAbiInfo* vbi; + + Bool chainingAllowed; + Addr64 max_ga; + + /* These are modified as we go along. */ + HInstrArray* code; + Int vreg_ctr; + + IRExpr* previous_rm; } ISelEnv; @@ -4545,18 +4558,60 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /* --------- EXIT --------- */ case Ist_Exit: { - PPCRI* ri_dst; - PPCCondCode cc; - IRConstTag tag = stmt->Ist.Exit.dst->tag; - if (!mode64 && (tag != Ico_U32)) + IRConst* dst = stmt->Ist.Exit.dst; + if (!mode64 && dst->tag != Ico_U32) vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value"); - if (mode64 && (tag != Ico_U64)) + if (mode64 && dst->tag != Ico_U64) vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value"); - ri_dst = iselWordExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst)); - cc = iselCondCode(env,stmt->Ist.Exit.guard); - addInstr(env, PPCInstr_RdWrLR(True, env->savedLR)); - addInstr(env, PPCInstr_Goto(stmt->Ist.Exit.jk, cc, ri_dst)); - return; + + PPCCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); + PPCAMode* amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP, + hregPPC_GPR31(mode64)); + + /* Case: boring transfer to known address */ + if (stmt->Ist.Exit.jk == Ijk_Boring + || stmt->Ist.Exit.jk == Ijk_Call + /* || stmt->Ist.Exit.jk == Ijk_Ret */) { + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = mode64 + ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga) + : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga); + if (0) vex_printf("%s", toFastEP ? "Y" : ","); + addInstr(env, PPCInstr_XDirect( + mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64 + : (Addr64)stmt->Ist.Exit.dst->Ico.U32, + amCIA, cc, toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring)); + } + return; + } + + /* Case: assisted transfer to arbitrary address */ + switch (stmt->Ist.Exit.jk) { + //case Ijk_MapFail: + //case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: + case Ijk_NoDecode: case Ijk_SigBUS: case Ijk_SigTRAP: + { + HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, + stmt->Ist.Exit.jk)); + return; + } + default: + break; + } + + /* Do we ever expect to see any other kind? */ + goto stmt_fail; } default: break; @@ -4571,21 +4626,91 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /*--- ISEL: Basic block terminators (Nexts) ---*/ /*---------------------------------------------------------*/ -static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) +static void iselNext ( ISelEnv* env, + IRExpr* next, IRJumpKind jk, Int offsIP ) { - PPCCondCode cond; - PPCRI* ri; if (vex_traceflags & VEX_TRACE_VCODE) { - vex_printf("\n-- goto {"); + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); ppIRJumpKind(jk); - vex_printf("} "); - ppIRExpr(next); - vex_printf("\n"); + vex_printf( "\n"); + } + + PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE ); + + /* Case: boring transfer to known address */ + if (next->tag == Iex_Const) { + IRConst* cdst = next->Iex.Const.con; + vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32)); + if (jk == Ijk_Boring || jk == Ijk_Call) { + /* Boring transfer to known address */ + PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64)); + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = env->mode64 + ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga) + : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga); + if (0) vex_printf("%s", toFastEP ? "X" : "."); + addInstr(env, PPCInstr_XDirect( + env->mode64 ? (Addr64)cdst->Ico.U64 + : (Addr64)cdst->Ico.U32, + amCIA, always, toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselWordExpr_R(env, next); + addInstr(env, PPCInstr_XAssisted(r, amCIA, always, + Ijk_Boring)); + } + return; + } } - cond = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE ); - ri = iselWordExpr_RI(env, next); - addInstr(env, PPCInstr_RdWrLR(True, env->savedLR)); - addInstr(env, PPCInstr_Goto(jk, cond, ri)); + + /* Case: call/return (==boring) transfer to any address */ + switch (jk) { + case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { + HReg r = iselWordExpr_R(env, next); + PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64)); + if (env->chainingAllowed) { + addInstr(env, PPCInstr_XIndir(r, amCIA, always)); + } else { + addInstr(env, PPCInstr_XAssisted(r, amCIA, always, + Ijk_Boring)); + } + return; + } + default: + break; + } + + /* Case: some other kind of transfer to any address */ + switch (jk) { + case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoDecode: + case Ijk_EmWarn: case Ijk_SigTRAP: case Ijk_TInval: + case Ijk_NoRedir: + //case Ijk_Sys_int128: + //case Ijk_Yield: + { + HReg r = iselWordExpr_R(env, next); + PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64)); + addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk)); + return; + } + default: + break; + } + + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); + ppIRJumpKind(jk); + vex_printf( "\n"); + vassert(0); // are we expecting any other kind? } @@ -4593,20 +4718,29 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) /*--- Insn selector top-level ---*/ /*---------------------------------------------------------*/ -/* Translate an entire BS to ppc code. */ -HInstrArray* iselSB_PPC ( IRSB* bb, VexArch arch_host, +/* Translate an entire SB to ppc code. */ +HInstrArray* iselSB_PPC ( IRSB* bb, + VexArch arch_host, VexArchInfo* archinfo_host, - VexAbiInfo* vbi ) + VexAbiInfo* vbi, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ) { - Int i, j; - HReg hregLo, hregMedLo, hregMedHi, hregHi; - ISelEnv* env; - UInt hwcaps_host = archinfo_host->hwcaps; - Bool mode64 = False; - UInt mask32, mask64; + Int i, j; + HReg hregLo, hregMedLo, hregMedHi, hregHi; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; + Bool mode64 = False; + UInt mask32, mask64; + PPCAMode *amCounter, *amFailAddr; + vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64); mode64 = arch_host == VexArchPPC64; + if (mode64) vassert(max_ga <= 0xFFFFFFFFULL); /* do some sanity checks */ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V @@ -4643,15 +4777,20 @@ HInstrArray* iselSB_PPC ( IRSB* bb, VexArch arch_host, env->n_vregmap = bb->tyenv->types_used; env->vregmapLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); env->vregmapMedLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); - if (!mode64) { + if (mode64) { + env->vregmapMedHi = NULL; + env->vregmapHi = NULL; + } else { env->vregmapMedHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); env->vregmapHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); } /* and finally ... */ - env->hwcaps = hwcaps_host; - env->previous_rm = NULL; - env->vbi = vbi; + env->chainingAllowed = chainingAllowed; + env->max_ga = max_ga; + env->hwcaps = hwcaps_host; + env->previous_rm = NULL; + env->vbi = vbi; /* For each IR temporary, allocate a suitably-kinded virtual register. */ @@ -4698,16 +4837,24 @@ HInstrArray* iselSB_PPC ( IRSB* bb, VexArch arch_host, } env->vreg_ctr = j; - /* Keep a copy of the link reg, so helper functions don't kill it. */ - env->savedLR = newVRegI(env); - addInstr(env, PPCInstr_RdWrLR(False, env->savedLR)); + /* The very first instruction must be an event check. */ + amCounter = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64)); + amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64)); + addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr)); + + /* Possibly a block counter increment (for profiling). At this + point we don't know the address of the counter, so just pretend + it is zero. It will have to be patched later, but before this + translation is used, by a call to LibVEX_patchProfCtr. */ + if (addProfInc) { + addInstr(env, PPCInstr_ProfInc()); + } /* Ok, finally we can iterate over the statements. */ for (i = 0; i < bb->stmts_used; i++) - if (bb->stmts[i]) - iselStmt(env,bb->stmts[i]); + iselStmt(env, bb->stmts[i]); - iselNext(env,bb->next,bb->jumpkind); + iselNext(env, bb->next, bb->jumpkind, bb->offsIP); /* record the number of vregs we used. */ env->code->n_vregs = env->vreg_ctr; diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index b0f03dea69..460f443d46 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -314,7 +314,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) host_word_type = Ity_I64; vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps)); break; -#if 0 + case VexArchPPC32: mode64 = False; getAllocableRegs_PPC ( &n_available_real_regs, @@ -327,16 +327,14 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) ppInstr = (void(*)(HInstr*,Bool)) ppPPCInstr; ppReg = (void(*)(HReg)) ppHRegPPC; iselSB = iselSB_PPC; - emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*)) + emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool, + void*,void*,void*,void*)) emit_PPCInstr; host_is_bigendian = True; host_word_type = Ity_I32; vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps)); - /* return-to-dispatcher scheme */ - vassert(vta->dispatch_unassisted == NULL); - vassert(vta->dispatch_assisted == NULL); break; - +#if 0 case VexArchPPC64: mode64 = True; getAllocableRegs_PPC ( &n_available_real_regs, @@ -444,23 +442,27 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_NRADDR ) == 8); break; -#if 0 + case VexArchPPC32: - preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns; - disInstrFn = disInstr_PPC; - specHelper = guest_ppc32_spechelper; - guest_sizeB = sizeof(VexGuestPPC32State); - guest_word_type = Ity_I32; - guest_layout = &ppc32Guest_layout; - offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART); - offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN); + preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns; + disInstrFn = disInstr_PPC; + specHelper = guest_ppc32_spechelper; + guest_sizeB = sizeof(VexGuestPPC32State); + guest_word_type = Ity_I32; + guest_layout = &ppc32Guest_layout; + offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART); + offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN); + offB_GUEST_IP = offsetof(VexGuestPPC32State,guest_CIA); + szB_GUEST_IP = sizeof( ((VexGuestPPC32State*)0)->guest_CIA ); + offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestPPC32State) % 16); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TILEN ) == 4); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_NRADDR ) == 4); break; - +#if 0 case VexArchPPC64: preciseMemExnsFn = guest_ppc64_state_requires_precise_mem_exns; disInstrFn = disInstr_PPC; @@ -830,6 +832,10 @@ VexInvalRange LibVEX_Chain ( VexArch arch_host, chainXDirect = chainXDirect_ARM; break; case VexArchS390X: chainXDirect = chainXDirect_S390; break; + case VexArchPPC32: + return chainXDirect_PPC(place_to_chain, + disp_cp_chain_me_EXPECTED, + place_to_jump_to, False/*!mode64*/); default: vassert(0); } @@ -855,6 +861,10 @@ VexInvalRange LibVEX_UnChain ( VexArch arch_host, unchainXDirect = unchainXDirect_ARM; break; case VexArchS390X: unchainXDirect = unchainXDirect_S390; break; + case VexArchPPC32: + return unchainXDirect_PPC(place_to_unchain, + place_to_jump_to_EXPECTED, + disp_cp_chain_me, False/*!mode64*/); default: vassert(0); } @@ -878,6 +888,8 @@ Int LibVEX_evCheckSzB ( VexArch arch_host ) cached = evCheckSzB_ARM(); break; case VexArchS390X: cached = evCheckSzB_S390(); break; + case VexArchPPC32: + cached = evCheckSzB_PPC(); break; default: vassert(0); } @@ -899,6 +911,9 @@ VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host, patchProfInc = patchProfInc_ARM; break; case VexArchS390X: patchProfInc = patchProfInc_S390; break; + case VexArchPPC32: + return patchProfInc_PPC(place_to_patch, + location_of_counter, False/*!mode64*/); default: vassert(0); } diff --git a/VEX/pub/libvex_guest_ppc32.h b/VEX/pub/libvex_guest_ppc32.h index d848029d09..99bec3caee 100644 --- a/VEX/pub/libvex_guest_ppc32.h +++ b/VEX/pub/libvex_guest_ppc32.h @@ -48,6 +48,12 @@ typedef struct { + /* Event check fail addr and counter. */ + /* 0 */ UInt host_EvC_FAILADDR; + /* 4 */ UInt host_EvC_COUNTER; + /* 8 */ UInt pad3; + /* 12 */ UInt pad4; + /* Add 16 to all the numbers below. Sigh. */ /* General Purpose Registers */ /* 0 */ UInt guest_GPR0; /* 4 */ UInt guest_GPR1; diff --git a/VEX/pub/libvex_guest_ppc64.h b/VEX/pub/libvex_guest_ppc64.h index e086c02cfd..c3c02924f7 100644 --- a/VEX/pub/libvex_guest_ppc64.h +++ b/VEX/pub/libvex_guest_ppc64.h @@ -86,6 +86,12 @@ vrsave Non-volatile 32-bit register typedef struct { + /* Event check fail addr, counter, and padding to make GPR0 16 + aligned. */ + /* 0 */ ULong host_EvC_FAILADDR; + /* 8 */ UInt host_EvC_COUNTER; + /* 12 */ UInt pad0; + /* Add 16 to all of the offsets below .. */ /* General Purpose Registers */ /* 0 */ ULong guest_GPR0; /* 8 */ ULong guest_GPR1;