vassert(cond != Acc_ALWAYS);
return i;
}
+AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
+ AMD64AMode* addr, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_CLoad;
+ i->Ain.CLoad.cond = cond;
+ i->Ain.CLoad.szB = szB;
+ i->Ain.CLoad.addr = addr;
+ i->Ain.CLoad.dst = dst;
+ vassert(cond != Acc_ALWAYS);
+ return i;
+}
AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_MovxLQ;
vex_printf(",");
ppHRegAMD64(i->Ain.CMov64.dst);
return;
+ case Ain_CLoad:
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.CLoad.cond));
+ vex_printf("mov%c (", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
+ ppAMD64AMode(i->Ain.CLoad.addr);
+ vex_printf("), ");
+ (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.CLoad.dst);
+ vex_printf(" }");
+ return;
case Ain_MovxLQ:
vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
return;
+ case Ain_CLoad:
+ addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
+ addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
+ return;
case Ain_MovxLQ:
addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
mapRegs_AMD64RM(m, i->Ain.CMov64.src);
mapReg(m, &i->Ain.CMov64.dst);
return;
+ case Ain_CLoad:
+ mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
+ mapReg(m, &i->Ain.CLoad.dst);
+ return;
case Ain_MovxLQ:
mapReg(m, &i->Ain.MovxLQ.src);
mapReg(m, &i->Ain.MovxLQ.dst);
}
case Ain_Call: {
- if (i->Ain.Call.cond != Acc_ALWAYS
- && i->Ain.Call.rloc.pri != RLPri_None) {
- /* The call might not happen (it isn't unconditional) and it
- returns a result. In this case we will need to generate a
- control flow diamond to put 0x555..555 in the return
- register(s) in the case where the call doesn't happen. If
- this ever becomes necessary, maybe copy code from the ARM
- equivalent. Until that day, just give up. */
- goto bad;
- }
- /* As per detailed comment for Ain_Call in
- getRegUsage_AMD64Instr above, %r11 is used as an address
- temporary. */
- /* jump over the following two insns if the condition does not
- hold */
- Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
- if (i->Ain.Call.cond != Acc_ALWAYS) {
- *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
- *p++ = shortImm ? 10 : 13;
- /* 10 or 13 bytes in the next two insns */
- }
- if (shortImm) {
- /* 7 bytes: movl sign-extend(imm32), %r11 */
- *p++ = 0x49;
- *p++ = 0xC7;
- *p++ = 0xC3;
- p = emit32(p, (UInt)i->Ain.Call.target);
+ /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
+ above, %r11 is used as an address temporary. */
+ /* If we don't need to do any fixup actions in the case that the
+ call doesn't happen, just do the simple thing and emit
+ straight-line code. This is usually the case. */
+ if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
+ || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
+ /* jump over the following two insns if the condition does
+ not hold */
+ Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
+ if (i->Ain.Call.cond != Acc_ALWAYS) {
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+ *p++ = shortImm ? 10 : 13;
+ /* 10 or 13 bytes in the next two insns */
+ }
+ if (shortImm) {
+ /* 7 bytes: movl sign-extend(imm32), %r11 */
+ *p++ = 0x49;
+ *p++ = 0xC7;
+ *p++ = 0xC3;
+ p = emit32(p, (UInt)i->Ain.Call.target);
+ } else {
+ /* 10 bytes: movabsq $target, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, i->Ain.Call.target);
+ }
+ /* 3 bytes: call *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xD3;
} else {
- /* 10 bytes: movabsq $target, %r11 */
+ Int delta;
+ /* Complex case. We have to generate an if-then-else diamond. */
+ // before:
+ // j{!cond} else:
+ // movabsq $target, %r11
+ // call* %r11
+ // preElse:
+ // jmp after:
+ // else:
+ // movabsq $0x5555555555555555, %rax // possibly
+ // movq %rax, %rdx // possibly
+ // after:
+
+ // before:
+ UChar* pBefore = p;
+
+ // j{!cond} else:
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ // movabsq $target, %r11
*p++ = 0x49;
*p++ = 0xBB;
p = emit64(p, i->Ain.Call.target);
+
+ // call* %r11
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xD3;
+
+ // preElse:
+ UChar* pPreElse = p;
+
+ // jmp after:
+ *p++ = 0xEB;
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ // else:
+ UChar* pElse = p;
+
+ /* Do the 'else' actions */
+ switch (i->Ain.Call.rloc.pri) {
+ case RLPri_Int:
+ // movabsq $0x5555555555555555, %rax
+ *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
+ break;
+ case RLPri_2Int:
+ vassert(0); //ATC
+ // movabsq $0x5555555555555555, %rax
+ *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
+ // movq %rax, %rdx
+ *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
+ case RLPri_None: case RLPri_INVALID: default:
+ vassert(0);
+ }
+
+ // after:
+ UChar* pAfter = p;
+
+ // Fix up the branch offsets. The +2s in the offset
+ // calculations are there because x86 requires conditional
+ // branches to have their offset stated relative to the
+ // instruction immediately following the branch insn. And in
+ // both cases the branch insns are 2 bytes long.
+
+ // First, the "j{!cond} else:" at pBefore.
+ delta = (Int)(Long)(pElse - (pBefore + 2));
+ vassert(delta >= 0 && delta < 100/*arbitrary*/);
+ *(pBefore+1) = (UChar)delta;
+
+ // And secondly, the "jmp after:" at pPreElse.
+ delta = (Int)(Long)(pAfter - (pPreElse + 2));
+ vassert(delta >= 0 && delta < 100/*arbitrary*/);
+ *(pPreElse+1) = (UChar)delta;
}
- /* 3 bytes: call *%r11 */
- *p++ = 0x41;
- *p++ = 0xFF;
- *p++ = 0xD3;
goto done;
}
}
break;
+ case Ain_CLoad: {
+ vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
+
+ /* Only 32- or 64-bit variants are allowed. */
+ vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ /* Now the load. Either a normal 64 bit load or a normal 32 bit
+ load, which, by the default zero-extension rule, zeroes out
+ the upper half of the destination, as required. */
+ rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
+ *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
+ *p++ = 0x8B;
+ p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
+
+ /* Fix up the conditional branch */
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ goto done;
+ }
+
case Ain_MovxLQ:
/* No, _don't_ ask me why the sense of the args has to be
different in the S vs Z case. I don't know. */
Ain_XIndir, /* indirect transfer to GA */
Ain_XAssisted, /* assisted transfer to GA */
Ain_CMov64, /* conditional move */
+ Ain_CLoad, /* cond. load to int reg, 32 bit ZX or 64 bit only */
Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */
Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
Ain_Store, /* store 32/16/8 bit value in memory */
AMD64RM* src;
HReg dst;
} CMov64;
+ /* conditional load to int reg, 32 bit ZX or 64 bit only.
+ cond may not be Acc_ALWAYS. */
+ struct {
+ AMD64CondCode cond;
+ UChar szB; /* 4 or 8 only */
+ AMD64AMode* addr;
+ HReg dst;
+ } CLoad;
/* reg-reg move, sx-ing/zx-ing top half */
struct {
Bool syned;
extern AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
AMD64CondCode cond, IRJumpKind jk );
extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
+extern AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
+ AMD64AMode* addr, HReg dst );
extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
AMD64AMode* src, HReg dst );