From: Julian Seward Date: Thu, 20 Feb 2014 17:43:38 +0000 (+0000) Subject: First pass at implementation of load/store exclusive and X-Git-Tag: svn/VALGRIND_3_10_1^2~147 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dcf70155079dc47d3ebca65c711710db5eb32ea8;p=thirdparty%2Fvalgrind.git First pass at implementation of load/store exclusive and load/store exclusive w/ read-acquire/store-release: LD{,A}X{R,RH,RB} ST{,L}X{R,RH,RB} git-svn-id: svn://svn.valgrind.org/vex/trunk@2823 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index e386ca1179..31ad6f1478 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -575,6 +575,31 @@ static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm ) return res; } +/* U-widen 8/16/32/64 bit int expr to 64. */ +static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e ) +{ + switch (srcTy) { + case Ity_I64: return e; + case Ity_I32: return unop(Iop_32Uto64, e); + case Ity_I16: return unop(Iop_16Uto64, e); + case Ity_I8: return unop(Iop_8Uto64, e); + default: vpanic("widenUto64(arm64)"); + } +} + +/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some + of these combinations make sense. */ +static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) +{ + switch (dstTy) { + case Ity_I64: return e; + case Ity_I32: return unop(Iop_64to32, e); + case Ity_I16: return unop(Iop_64to16, e); + case Ity_I8: return unop(Iop_64to8, e); + default: vpanic("narrowFrom64(arm64)"); + } +} + /*------------------------------------------------------------*/ /*--- Helpers for accessing guest registers. ---*/ @@ -3990,25 +4015,19 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) /* ---------- LD1/ST1 (single structure, post index) ---------- */ /* 31 23 - 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16 - 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16 - 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16 - 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16 - 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16 - 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16 - .. + 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16 + 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16 + 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16 + 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16 + 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16 + 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16 + 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16 Note that #16 is implied and cannot be any other value. FIXME does this assume that the host is little endian? */ - if ( (insn & 0xFFFFFC00) == 0x4C9F7C00 // ST1 {vT.2d}, [xN|SP], #16 - || (insn & 0xFFFFFC00) == 0x4CDF7C00 // LD1 {vT.2d}, [xN|SP], #16 - || (insn & 0xFFFFFC00) == 0x4C9F7800 // ST1 {vT.4s}, [xN|SP], #16 - || (insn & 0xFFFFFC00) == 0x4CDF7800 // LD1 {vT.4s}, [xN|SP], #16 - || (insn & 0xFFFFFC00) == 0x4C9F7400 // ST1 {vT.8h}, [xN|SP], #16 - || (insn & 0xFFFFFC00) == 0x4CDF7400 // LD1 {vT.8h}, [xN|SP], #16 - /* */ - || (insn & 0xFFFFFC00) == 0x4CDF7000 // LD1 {vT.16b}, [xN|SP], #16 + if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases + || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases ) { Bool isLD = INSN(22,22) == 1; UInt rN = INSN(9,5); @@ -4051,56 +4070,61 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) return True; } - /* FIXME Temporary hacks to get through ld.so FIXME */ - - /* -------------------- LD{A}XR -------------------- */ - /* FIXME: this is a hack; needs real atomicity stuff. */ - /* 31 29 20 19 9 4 - 1x(size) 001000010 1 1111 1 11111 n t LDAXR Rt, [Xn|SP] - 1x(size) 001000010 1 1111 0 11111 n t LDXR Rt, [Xn|SP] + /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ + /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ + /* 31 29 23 20 14 9 4 + sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP] + sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP] + sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP] + sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP] */ - if (INSN(29,20) == BITS10(0,0,1,0,0,0,0,1,0,1) - && (INSN(19,10) == BITS10(1,1,1,1,1,1,1,1,1,1) - || INSN(19,10) == BITS10(1,1,1,1,0,1,1,1,1,1)) - && INSN(31,31) == 1) { - Bool is64 = INSN(30,30) == 1; - Bool isA = INSN(15,15) == 1; - UInt nn = INSN(9,5); - UInt tt = INSN(4,0); - if (is64) { - putIReg64orZR(tt, loadLE(Ity_I64, getIReg64orSP(nn))); - } else { - putIReg32orZR(tt, loadLE(Ity_I32, getIReg64orSP(nn))); - } - DIP("ld%sxr %s, [%s]\n", - isA ? "s" : "", nameIRegOrZR(is64, tt), nameIReg64orSP(nn)); - return True; - } - - /* -------------------- ST{L}XR -------------------- */ - /* FIXME: this is a hack; needs real atomicity stuff. */ - /* 31 29 20 15 14 9 4 - 1x(size) 001000000 s 0 11111 n t STXR Ws, Rt, [Xn|SP] - 1x(size) 001000000 s 1 11111 n t STLXR Ws, Rt, [Xn|SP] - with the result coding that Ws == 0 iff the store succeeded - */ - if (INSN(29,21) == BITS9(0,0,1,0,0,0,0,0,0) - && INSN(14,10) == BITS5(1,1,1,1,1) && INSN(31,31) == 1) { - Bool is64 = INSN(30,30) == 1; - UInt ss = INSN(20,16); - Bool isL = INSN(15,15) == 1; - UInt nn = INSN(9,5); - UInt tt = INSN(4,0); - if (is64) { - storeLE(getIReg64orSP(nn), getIReg64orZR(tt)); - } else { - storeLE(getIReg64orSP(nn), getIReg32orZR(tt)); - } - putIReg32orZR(ss, mkU32(0)); - DIP("st%sxr %s, %s, [%s]\n", - isL ? "s" : "", - nameIReg32orZR(ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn)); - return True; + if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) + && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) + && INSN(14,10) == BITS5(1,1,1,1,1)) { + UInt szBlg2 = INSN(31,30); + Bool isLD = INSN(22,22) == 1; + Bool isAcqOrRel = INSN(15,15) == 1; + UInt ss = INSN(20,16); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + + vassert(szBlg2 < 4); + UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ + IRType ty = integerIRTypeOfSize(szB); + const HChar* suffix[4] = { "rb", "rh", "r", "r" }; + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); + /* FIXME generate check that ea is szB-aligned */ + + if (isLD && ss == BITS5(1,1,1,1,1)) { + IRTemp res = newTemp(ty); + stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); + putIReg64orZR(tt, widenUto64(ty, mkexpr(res))); + if (isAcqOrRel) { + stmt(IRStmt_MBE(Imbe_Fence)); + } + DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], + nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); + return True; + } + if (!isLD) { + if (isAcqOrRel) { + stmt(IRStmt_MBE(Imbe_Fence)); + } + IRTemp res = newTemp(Ity_I1); + IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt)); + stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); + /* IR semantics: res is 1 if store succeeds, 0 if it fails. + Need to set rS to 1 on failure, 0 on success. */ + putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), + mkU64(1))); + DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2], + nameIRegOrZR(False, ss), + nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn)); + return True; + } + /* else fall through */ } vex_printf("ARM64 front end: load_store\n"); @@ -4307,8 +4331,8 @@ Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } /* Cases for FPSR - 0xD51B44 001 Rt MSR fpcr, rT - 0xD53B44 001 Rt MSR rT, fpcr + 0xD51B44 001 Rt MSR fpsr, rT + 0xD53B44 001 Rt MSR rT, fpsr */ if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/ || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) { diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 44773b3a70..1c347680bd 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1442,24 +1442,28 @@ ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, i->ARM64in.Mul.op = op; return i; } -//ZZ ARMInstr* ARMInstr_Mul ( ARMMulOp op ) { -//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); -//ZZ i->tag = ARMin_Mul; -//ZZ i->ARMin.Mul.op = op; -//ZZ return i; -//ZZ } -//ZZ ARMInstr* ARMInstr_LdrEX ( Int szB ) { -//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); -//ZZ i->tag = ARMin_LdrEX; -//ZZ i->ARMin.LdrEX.szB = szB; -//ZZ vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); -//ZZ return i; -//ZZ } -//ZZ ARMInstr* ARMInstr_StrEX ( Int szB ) { -//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); -//ZZ i->tag = ARMin_StrEX; -//ZZ i->ARMin.StrEX.szB = szB; -//ZZ vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); +ARM64Instr* ARM64Instr_LdrEX ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_LdrEX; + i->ARM64in.LdrEX.szB = szB; + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); + return i; +} +ARM64Instr* ARM64Instr_StrEX ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_StrEX; + i->ARM64in.StrEX.szB = szB; + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); + return i; +} +ARM64Instr* ARM64Instr_MFence ( void ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_MFence; + return i; +} +//ZZ ARM64Instr* ARM64Instr_CLREX( void ) { +//ZZ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); +//ZZ i->tag = ARM64in_CLREX; //ZZ return i; //ZZ } ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) { @@ -1674,16 +1678,6 @@ ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, //ZZ i->ARMin.VCvtID.src = src; //ZZ return i; //ZZ } -//ZZ ARMInstr* ARMInstr_MFence ( void ) { -//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); -//ZZ i->tag = ARMin_MFence; -//ZZ return i; -//ZZ } -//ZZ ARMInstr* ARMInstr_CLREX( void ) { -//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); -//ZZ i->tag = ARMin_CLREX; -//ZZ return i; -//ZZ } //ZZ ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) { //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); //ZZ i->tag = ARMin_NLdStD; @@ -2061,28 +2055,37 @@ void ppARM64Instr ( ARM64Instr* i ) { vex_printf(", "); ppHRegARM64(i->ARM64in.Mul.argR); return; -//ZZ case ARMin_LdrEX: { -//ZZ const HChar* sz = ""; -//ZZ switch (i->ARMin.LdrEX.szB) { -//ZZ case 1: sz = "b"; break; case 2: sz = "h"; break; -//ZZ case 8: sz = "d"; break; case 4: break; -//ZZ default: vassert(0); -//ZZ } -//ZZ vex_printf("ldrex%s %sr2, [r4]", -//ZZ sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : ""); -//ZZ return; -//ZZ } -//ZZ case ARMin_StrEX: { -//ZZ const HChar* sz = ""; -//ZZ switch (i->ARMin.StrEX.szB) { -//ZZ case 1: sz = "b"; break; case 2: sz = "h"; break; -//ZZ case 8: sz = "d"; break; case 4: break; -//ZZ default: vassert(0); -//ZZ } -//ZZ vex_printf("strex%s r0, %sr2, [r4]", -//ZZ sz, i->ARMin.StrEX.szB == 8 ? "r3:" : ""); + + case ARM64in_LdrEX: { + const HChar* sz = " "; + switch (i->ARM64in.LdrEX.szB) { + case 1: sz = "b"; break; + case 2: sz = "h"; break; + case 4: case 8: break; + default: vassert(0); + } + vex_printf("ldxr%s %c2, [x4]", + sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w'); + return; + } + case ARM64in_StrEX: { + const HChar* sz = " "; + switch (i->ARM64in.StrEX.szB) { + case 1: sz = "b"; break; + case 2: sz = "h"; break; + case 4: case 8: break; + default: vassert(0); + } + vex_printf("stxr%s w0, %c2, [x4]", + sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w'); + return; + } + case ARM64in_MFence: + vex_printf("(mfence) dsb sy; dmb sy; isb"); + return; +//ZZ case ARM64in_CLREX: +//ZZ vex_printf("clrex"); //ZZ return; -//ZZ } case ARM64in_VLdStS: if (i->ARM64in.VLdStS.isLoad) { vex_printf("ldr "); @@ -2319,12 +2322,6 @@ void ppARM64Instr ( ARM64Instr* i ) { //ZZ ppHRegARM(i->ARMin.VCvtID.src); //ZZ return; //ZZ } -//ZZ case ARMin_MFence: -//ZZ vex_printf("(mfence) dsb sy; dmb sy; isb"); -//ZZ return; -//ZZ case ARMin_CLREX: -//ZZ vex_printf("clrex"); -//ZZ return; //ZZ case ARMin_NLdStD: //ZZ if (i->ARMin.NLdStD.isLoad) //ZZ vex_printf("vld1.32 {"); @@ -2641,18 +2638,18 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->ARM64in.Mul.argL); addHRegUse(u, HRmRead, i->ARM64in.Mul.argR); return; -//ZZ case ARMin_LdrEX: -//ZZ addHRegUse(u, HRmRead, hregARM_R4()); -//ZZ addHRegUse(u, HRmWrite, hregARM_R2()); -//ZZ if (i->ARMin.LdrEX.szB == 8) -//ZZ addHRegUse(u, HRmWrite, hregARM_R3()); -//ZZ return; -//ZZ case ARMin_StrEX: -//ZZ addHRegUse(u, HRmRead, hregARM_R4()); -//ZZ addHRegUse(u, HRmWrite, hregARM_R0()); -//ZZ addHRegUse(u, HRmRead, hregARM_R2()); -//ZZ if (i->ARMin.StrEX.szB == 8) -//ZZ addHRegUse(u, HRmRead, hregARM_R3()); + case ARM64in_LdrEX: + addHRegUse(u, HRmRead, hregARM64_X4()); + addHRegUse(u, HRmWrite, hregARM64_X2()); + return; + case ARM64in_StrEX: + addHRegUse(u, HRmRead, hregARM64_X4()); + addHRegUse(u, HRmWrite, hregARM64_X0()); + addHRegUse(u, HRmRead, hregARM64_X2()); + return; + case ARM64in_MFence: + return; +//ZZ case ARMin_CLREX: //ZZ return; case ARM64in_VLdStS: addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN); @@ -2781,10 +2778,6 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) //ZZ addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst); //ZZ addHRegUse(u, HRmRead, i->ARMin.VCvtID.src); //ZZ return; -//ZZ case ARMin_MFence: -//ZZ return; -//ZZ case ARMin_CLREX: -//ZZ return; //ZZ case ARMin_NLdStD: //ZZ if (i->ARMin.NLdStD.isLoad) //ZZ addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD); @@ -2954,11 +2947,13 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL); i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR); break; -//ZZ case ARMin_Mul: -//ZZ return; -//ZZ case ARMin_LdrEX: -//ZZ return; -//ZZ case ARMin_StrEX: + case ARM64in_LdrEX: + return; + case ARM64in_StrEX: + return; + case ARM64in_MFence: + return; +//ZZ case ARMin_CLREX: //ZZ return; case ARM64in_VLdStS: i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD); @@ -3058,10 +3053,6 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) //ZZ i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst); //ZZ i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src); //ZZ return; -//ZZ case ARMin_MFence: -//ZZ return; -//ZZ case ARMin_CLREX: -//ZZ return; //ZZ case ARMin_NLdStD: //ZZ i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD); //ZZ mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode); @@ -4564,36 +4555,48 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } goto bad; } -//ZZ case ARMin_LdrEX: { -//ZZ /* E1D42F9F ldrexb r2, [r4] -//ZZ E1F42F9F ldrexh r2, [r4] -//ZZ E1942F9F ldrex r2, [r4] -//ZZ E1B42F9F ldrexd r2, r3, [r4] -//ZZ */ -//ZZ switch (i->ARMin.LdrEX.szB) { -//ZZ case 1: *p++ = 0xE1D42F9F; goto done; -//ZZ case 2: *p++ = 0xE1F42F9F; goto done; -//ZZ case 4: *p++ = 0xE1942F9F; goto done; -//ZZ case 8: *p++ = 0xE1B42F9F; goto done; -//ZZ default: break; -//ZZ } -//ZZ goto bad; -//ZZ } -//ZZ case ARMin_StrEX: { -//ZZ /* E1C40F92 strexb r0, r2, [r4] -//ZZ E1E40F92 strexh r0, r2, [r4] -//ZZ E1840F92 strex r0, r2, [r4] -//ZZ E1A40F92 strexd r0, r2, r3, [r4] -//ZZ */ -//ZZ switch (i->ARMin.StrEX.szB) { -//ZZ case 1: *p++ = 0xE1C40F92; goto done; -//ZZ case 2: *p++ = 0xE1E40F92; goto done; -//ZZ case 4: *p++ = 0xE1840F92; goto done; -//ZZ case 8: *p++ = 0xE1A40F92; goto done; -//ZZ default: break; -//ZZ } -//ZZ goto bad; -//ZZ } + case ARM64in_LdrEX: { + /* 085F7C82 ldxrb w2, [x4] + 485F7C82 ldxrh w2, [x4] + 885F7C82 ldxr w2, [x4] + C85F7C82 ldxr x2, [x4] + */ + switch (i->ARM64in.LdrEX.szB) { + case 1: *p++ = 0x085F7C82; goto done; + case 2: *p++ = 0x485F7C82; goto done; + case 4: *p++ = 0x885F7C82; goto done; + case 8: *p++ = 0xC85F7C82; goto done; + default: break; + } + goto bad; + } + case ARM64in_StrEX: { + /* 08007C82 stxrb w0, w2, [x4] + 48007C82 stxrh w0, w2, [x4] + 88007C82 stxr w0, w2, [x4] + C8007C82 stxr w0, x2, [x4] + */ + switch (i->ARM64in.StrEX.szB) { + case 1: *p++ = 0x08007C82; goto done; + case 2: *p++ = 0x48007C82; goto done; + case 4: *p++ = 0x88007C82; goto done; + case 8: *p++ = 0xC8007C82; goto done; + default: break; + } + goto bad; + } + case ARM64in_MFence: { + *p++ = 0xD5033F9F; /* DSB sy */ + *p++ = 0xD5033FBF; /* DMB sy */ + *p++ = 0xD5033FDF; /* ISB */ + goto done; + } + //case ARM64in_CLREX: { + // //ATC, but believed to be correct + // goto bad; + // *p++ = 0xD5033F5F; /* clrex */ + // goto done; + //} case ARM64in_VLdStS: { /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4] 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4] @@ -5257,22 +5260,6 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, //ZZ /*UNREACHED*/ //ZZ vassert(0); //ZZ } -//ZZ case ARMin_MFence: { -//ZZ // It's not clear (to me) how these relate to the ARMv7 -//ZZ // versions, so let's just use the v7 versions as they -//ZZ // are at least well documented. -//ZZ //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */ -//ZZ //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */ -//ZZ //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */ -//ZZ *p++ = 0xF57FF04F; /* DSB sy */ -//ZZ *p++ = 0xF57FF05F; /* DMB sy */ -//ZZ *p++ = 0xF57FF06F; /* ISB */ -//ZZ goto done; -//ZZ } -//ZZ case ARMin_CLREX: { -//ZZ *p++ = 0xF57FF01F; /* clrex */ -//ZZ goto done; -//ZZ } //ZZ case ARMin_NLdStD: { //ZZ UInt regD = dregNo(i->ARMin.NLdStD.dD); //ZZ UInt regN, regM; diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index c84117118b..c4e0e13651 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -525,8 +525,10 @@ typedef ARM64in_AddToSP, /* move SP by small, signed constant */ ARM64in_FromSP, /* move SP to integer register */ ARM64in_Mul, -//ZZ ARMin_LdrEX, -//ZZ ARMin_StrEX, + ARM64in_LdrEX, + ARM64in_StrEX, + ARM64in_MFence, +//ZZ ARMin_CLREX, /* ARM64in_V*: scalar ops involving vector registers */ ARM64in_VLdStS, /* 32-bit FP load/store, with imm offset */ ARM64in_VLdStD, /* 64-bit FP load/store, with imm offset */ @@ -552,8 +554,6 @@ typedef //ZZ ARMin_VXferD, //ZZ ARMin_VXferS, //ZZ ARMin_VCvtID, -//ZZ ARMin_MFence, -//ZZ ARMin_CLREX, //ZZ /* Neon */ //ZZ ARMin_NLdStD, //ZZ ARMin_NUnary, @@ -712,22 +712,23 @@ typedef HReg argR; ARM64MulOp op; } Mul; -//ZZ /* LDREX{,H,B} r2, [r4] and -//ZZ LDREXD r2, r3, [r4] (on LE hosts, transferred value is r3:r2) -//ZZ Again, hardwired registers since this is not performance -//ZZ critical, and there are possibly constraints on the -//ZZ registers that we can't express in the register allocator.*/ -//ZZ struct { -//ZZ Int szB; /* 1, 2, 4 or 8 */ -//ZZ } LdrEX; -//ZZ /* STREX{,H,B} r0, r2, [r4] and -//ZZ STREXD r0, r2, r3, [r4] (on LE hosts, transferred value is r3:r2) -//ZZ r0 = SC( [r4] = r2 ) (8, 16, 32 bit transfers) -//ZZ r0 = SC( [r4] = r3:r2) (64 bit transfers) -//ZZ Ditto comment re fixed registers. */ + /* LDXR{,H,B} x2, [x4] */ + struct { + Int szB; /* 1, 2, 4 or 8 */ + } LdrEX; + /* STXR{,H,B} w0, x2, [x4] */ + struct { + Int szB; /* 1, 2, 4 or 8 */ + } StrEX; + /* Mem fence. An insn which fences all loads and stores as + much as possible before continuing. On ARM64 we emit the + sequence "dsb sy ; dmb sy ; isb sy", which is probably + total nuclear overkill, but better safe than sorry. */ + struct { + } MFence; +//ZZ /* A CLREX instruction. */ //ZZ struct { -//ZZ Int szB; /* 1, 2, 4 or 8 */ -//ZZ } StrEX; +//ZZ } CLREX; /* --- INSTRUCTIONS INVOLVING VECTOR REGISTERS --- */ /* 32-bit Fp load/store */ struct { @@ -882,20 +883,6 @@ typedef //ZZ HReg dst; //ZZ HReg src; //ZZ } VCvtID; -//ZZ /* Mem fence. An insn which fences all loads and stores as -//ZZ much as possible before continuing. On ARM we emit the -//ZZ sequence -//ZZ mcr 15,0,r0,c7,c10,4 (DSB) -//ZZ mcr 15,0,r0,c7,c10,5 (DMB) -//ZZ mcr 15,0,r0,c7,c5,4 (ISB) -//ZZ which is probably total overkill, but better safe than -//ZZ sorry. -//ZZ */ -//ZZ struct { -//ZZ } MFence; -//ZZ /* A CLREX instruction. */ -//ZZ struct { -//ZZ } CLREX; //ZZ /* Neon data processing instruction: 3 registers of the same //ZZ length */ //ZZ struct { @@ -1020,10 +1007,10 @@ extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ); extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ); extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, ARM64MulOp op ); - -//ZZ extern ARMInstr* ARMInstr_Mul ( ARMMulOp op ); -//ZZ extern ARMInstr* ARMInstr_LdrEX ( Int szB ); -//ZZ extern ARMInstr* ARMInstr_StrEX ( Int szB ); +extern ARM64Instr* ARM64Instr_LdrEX ( Int szB ); +extern ARM64Instr* ARM64Instr_StrEX ( Int szB ); +extern ARM64Instr* ARM64Instr_MFence ( void ); +//ZZ extern ARMInstr* ARMInstr_CLREX ( void ); extern ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 /* 0 .. 16380, 0 % 4 */ ); extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, @@ -1052,8 +1039,6 @@ extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, //ZZ extern ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ); //ZZ extern ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned, //ZZ HReg dst, HReg src ); -//ZZ extern ARMInstr* ARMInstr_MFence ( void ); -//ZZ extern ARMInstr* ARMInstr_CLREX ( void ); //ZZ extern ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg, ARMAModeN* ); //ZZ extern ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp, HReg, HReg, UInt, Bool ); //ZZ extern ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS, ARMNRS*, ARMNRS*, diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 0c8d2315b5..a17ab75ba3 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -2157,17 +2157,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) //ZZ } //ZZ return rLo; //ZZ } -//ZZ -//ZZ case Iop_1Uto32: -//ZZ /* 1Uto32(tmp). Since I1 values generated into registers -//ZZ are guaranteed to have value either only zero or one, -//ZZ we can simply return the value of the register in this -//ZZ case. */ -//ZZ if (e->Iex.Unop.arg->tag == Iex_RdTmp) { -//ZZ HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); -//ZZ return dst; -//ZZ } -//ZZ /* else fall through */ + + case Iop_1Uto64: + /* 1Uto64(tmp). */ + if (e->Iex.Unop.arg->tag == Iex_RdTmp) { + ARM64RIL* one = mb_mkARM64RIL_I(1); + HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); + HReg dst = newVRegI(env); + vassert(one); + addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND)); + return dst; + } + /* else fall through */ + break; // RM when 1Uto8 is implemented //ZZ case Iop_1Uto8: { //ZZ HReg dst = newVRegI(env); //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); @@ -6611,109 +6613,83 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) break; } -//ZZ /* --------- Load Linked and Store Conditional --------- */ -//ZZ case Ist_LLSC: { -//ZZ if (stmt->Ist.LLSC.storedata == NULL) { -//ZZ /* LL */ -//ZZ IRTemp res = stmt->Ist.LLSC.result; -//ZZ IRType ty = typeOfIRTemp(env->type_env, res); -//ZZ if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { -//ZZ Int szB = 0; -//ZZ HReg r_dst = lookupIRTemp(env, res); -//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); -//ZZ switch (ty) { -//ZZ case Ity_I8: szB = 1; break; -//ZZ case Ity_I16: szB = 2; break; -//ZZ case Ity_I32: szB = 4; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); -//ZZ addInstr(env, ARMInstr_LdrEX(szB)); -//ZZ addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2())); -//ZZ return; -//ZZ } -//ZZ if (ty == Ity_I64) { -//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); -//ZZ addInstr(env, ARMInstr_LdrEX(8)); -//ZZ /* Result is in r3:r2. On a non-NEON capable CPU, we must -//ZZ move it into a result register pair. On a NEON capable -//ZZ CPU, the result register will be a 64 bit NEON -//ZZ register, so we must move it there instead. */ -//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { -//ZZ HReg dst = lookupIRTemp(env, res); -//ZZ addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(), -//ZZ hregARM_R2())); -//ZZ } else { -//ZZ HReg r_dst_hi, r_dst_lo; -//ZZ lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res); -//ZZ addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2())); -//ZZ addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3())); -//ZZ } -//ZZ return; -//ZZ } -//ZZ /*NOTREACHED*/ -//ZZ vassert(0); -//ZZ } else { -//ZZ /* SC */ -//ZZ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); -//ZZ if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) { -//ZZ Int szB = 0; -//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); -//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); -//ZZ switch (tyd) { -//ZZ case Ity_I8: szB = 1; break; -//ZZ case Ity_I16: szB = 2; break; -//ZZ case Ity_I32: szB = 4; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD)); -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); -//ZZ addInstr(env, ARMInstr_StrEX(szB)); -//ZZ } else { -//ZZ vassert(tyd == Ity_I64); -//ZZ /* This is really ugly. There is no is/is-not NEON -//ZZ decision akin to the case for LL, because iselInt64Expr -//ZZ fudges this for us, and always gets the result into two -//ZZ GPRs even if this means moving it from a NEON -//ZZ register. */ -//ZZ HReg rDhi, rDlo; -//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata); -//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo)); -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi)); -//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); -//ZZ addInstr(env, ARMInstr_StrEX(8)); -//ZZ } -//ZZ /* now r0 is 1 if failed, 0 if success. Change to IR -//ZZ conventions (0 is fail, 1 is success). Also transfer -//ZZ result to r_res. */ -//ZZ IRTemp res = stmt->Ist.LLSC.result; -//ZZ IRType ty = typeOfIRTemp(env->type_env, res); -//ZZ HReg r_res = lookupIRTemp(env, res); -//ZZ ARMRI84* one = ARMRI84_I84(1,0); -//ZZ vassert(ty == Ity_I1); -//ZZ addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one)); -//ZZ /* And be conservative -- mask off all but the lowest bit */ -//ZZ addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one)); -//ZZ return; -//ZZ } -//ZZ break; -//ZZ } -//ZZ -//ZZ /* --------- MEM FENCE --------- */ -//ZZ case Ist_MBE: -//ZZ switch (stmt->Ist.MBE.event) { -//ZZ case Imbe_Fence: -//ZZ addInstr(env, ARMInstr_MFence()); -//ZZ return; + /* --------- Load Linked and Store Conditional --------- */ + case Ist_LLSC: { + if (stmt->Ist.LLSC.storedata == NULL) { + /* LL */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); + if (ty == Ity_I64 || ty == Ity_I32 + || ty == Ity_I16 || ty == Ity_I8) { + Int szB = 0; + HReg r_dst = lookupIRTemp(env, res); + HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (ty) { + case Ity_I8: szB = 1; break; + case Ity_I16: szB = 2; break; + case Ity_I32: szB = 4; break; + case Ity_I64: szB = 8; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); + addInstr(env, ARM64Instr_LdrEX(szB)); + addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); + return; + } + goto stmt_fail; + } else { + /* SC */ + IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); + if (tyd == Ity_I64 || tyd == Ity_I32 + || tyd == Ity_I16 || tyd == Ity_I8) { + Int szB = 0; + HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); + HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (tyd) { + case Ity_I8: szB = 1; break; + case Ity_I16: szB = 2; break; + case Ity_I32: szB = 4; break; + case Ity_I64: szB = 8; break; + default: vassert(0); + } + addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); + addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); + addInstr(env, ARM64Instr_StrEX(szB)); + } else { + goto stmt_fail; + } + /* now r0 is 1 if failed, 0 if success. Change to IR + conventions (0 is fail, 1 is success). Also transfer + result to r_res. */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); + HReg r_res = lookupIRTemp(env, res); + ARM64RIL* one = mb_mkARM64RIL_I(1); + vassert(ty == Ity_I1); + vassert(one); + addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one, + ARM64lo_XOR)); + /* And be conservative -- mask off all but the lowest bit. */ + addInstr(env, ARM64Instr_Logic(r_res, r_res, one, + ARM64lo_AND)); + return; + } + break; + } + + /* --------- MEM FENCE --------- */ + case Ist_MBE: + switch (stmt->Ist.MBE.event) { + case Imbe_Fence: + addInstr(env, ARM64Instr_MFence()); + return; //ZZ case Imbe_CancelReservation: //ZZ addInstr(env, ARMInstr_CLREX()); //ZZ return; -//ZZ default: -//ZZ break; -//ZZ } -//ZZ break; + default: + break; + } + break; /* --------- INSTR MARK --------- */ /* Doesn't generate any executable code ... */ diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 87b0b63cee..f37a161d11 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -4483,6 +4483,17 @@ Int sizeofIRType ( IRType ty ) } } +IRType integerIRTypeOfSize ( Int szB ) +{ + switch (szB) { + case 8: return Ity_I64; + case 4: return Ity_I32; + case 2: return Ity_I16; + case 1: return Ity_I8; + default: vpanic("integerIRTypeOfSize"); + } +} + IRExpr* mkIRExpr_HWord ( HWord hw ) { vassert(sizeof(void*) == sizeof(HWord)); diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 39d45e4f72..7acd298128 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -242,6 +242,10 @@ extern void ppIRType ( IRType ); /* Get the size (in bytes) of an IRType */ extern Int sizeofIRType ( IRType ); +/* Translate 1/2/4/8 into Ity_I{8,16,32,64} respectively. Asserts on + any other input. */ +extern IRType integerIRTypeOfSize ( Int szB ); + /* ------------------ Endianness ------------------ */ @@ -2862,12 +2866,12 @@ extern Bool isPlausibleIRType ( IRType ty ); /*---------------------------------------------------------------*/ /*--- IR injection ---*/ /*---------------------------------------------------------------*/ + void vex_inject_ir(IRSB *, IREndness); #endif /* ndef __LIBVEX_IR_H */ - /*---------------------------------------------------------------*/ /*--- libvex_ir.h ---*/ /*---------------------------------------------------------------*/