From: Julian Seward Date: Mon, 11 Jul 2011 11:43:38 +0000 (+0000) Subject: Complete the implementation of ARM atomic ops: {LD,ST}REX{,B,H,D} in X-Git-Tag: svn/VALGRIND_3_7_0^2~52 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=775a45280c4366d5571e5bbd72da5dda9877050b;p=thirdparty%2Fvalgrind.git Complete the implementation of ARM atomic ops: {LD,ST}REX{,B,H,D} in both ARM and Thumb encodings, for NEON and non-NEON capable backends. Bug 266035 comments 4, 43, 51. Derived from patches by Jeff Brown , Igor Saenko and Dr. David Alan Gilbert . git-svn-id: svn://svn.valgrind.org/vex/trunk@2172 --- diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c index bc05bc8c01..5fdd1dd679 100644 --- a/VEX/priv/guest_arm_toIR.c +++ b/VEX/priv/guest_arm_toIR.c @@ -13370,52 +13370,107 @@ DisResult disInstr_ARM_WRK ( /* -- ARMv6 instructions -- */ /* ----------------------------------------------------------- */ - /* --------------------- ldrex, strex --------------------- */ - - // LDREX - if (0x01900F9F == (insn & 0x0FF00FFF)) { - UInt rT = INSN(15,12); - UInt rN = INSN(19,16); - if (rT == 15 || rN == 15) { - /* undecodable; fall through */ + /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */ + + // LDREXD, LDREX, LDREXH, LDREXB + if (0x01900F9F == (insn & 0x0F900FFF)) { + UInt rT = INSN(15,12); + UInt rN = INSN(19,16); + IRType ty = Ity_INVALID; + IROp widen = Iop_INVALID; + HChar* nm = NULL; + Bool valid = True; + switch (INSN(22,21)) { + case 0: nm = ""; ty = Ity_I32; break; + case 1: nm = "d"; ty = Ity_I64; break; + case 2: nm = "b"; ty = Ity_I8; widen = Iop_8Uto32; break; + case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break; + default: vassert(0); + } + if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { + if (rT == 15 || rN == 15) + valid = False; } else { + vassert(ty == Ity_I64); + if ((rT & 1) == 1 || rT == 14 || rN == 15) + valid = False; + } + if (valid) { IRTemp res; /* make unconditional */ if (condT != IRTemp_INVALID) { - mk_skip_over_A32_if_cond_is_false( condT ); - condT = IRTemp_INVALID; + mk_skip_over_A32_if_cond_is_false( condT ); + condT = IRTemp_INVALID; } /* Ok, now we're unconditional. Do the load. */ - res = newTemp(Ity_I32); + res = newTemp(ty); + // FIXME: assumes little-endian guest stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN), NULL/*this is a load*/) ); - putIRegA(rT, mkexpr(res), IRTemp_INVALID, Ijk_Boring); - DIP("ldrex%s r%u, [r%u]\n", nCC(INSN_COND), rT, rN); + if (ty == Ity_I64) { + // FIXME: assumes little-endian guest + putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)), + IRTemp_INVALID, Ijk_Boring); + putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)), + IRTemp_INVALID, Ijk_Boring); + DIP("ldrex%s%s r%u, r%u, [r%u]\n", + nm, nCC(INSN_COND), rT+0, rT+1, rN); + } else { + putIRegA(rT, widen == Iop_INVALID + ? mkexpr(res) : unop(widen, mkexpr(res)), + IRTemp_INVALID, Ijk_Boring); + DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN); + } goto decode_success; } - /* fall through */ + /* undecodable; fall through */ } - // STREX - if (0x01800F90 == (insn & 0x0FF00FF0)) { - UInt rT = INSN(3,0); - UInt rN = INSN(19,16); - UInt rD = INSN(15,12); - if (rT == 15 || rN == 15 || rD == 15 - || rD == rT || rD == rN) { - /* undecodable; fall through */ + // STREXD, STREX, STREXH, STREXB + if (0x01800F90 == (insn & 0x0F900FF0)) { + UInt rT = INSN(3,0); + UInt rN = INSN(19,16); + UInt rD = INSN(15,12); + IRType ty = Ity_INVALID; + IROp narrow = Iop_INVALID; + HChar* nm = NULL; + Bool valid = True; + switch (INSN(22,21)) { + case 0: nm = ""; ty = Ity_I32; break; + case 1: nm = "d"; ty = Ity_I64; break; + case 2: nm = "b"; ty = Ity_I8; narrow = Iop_32to8; break; + case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break; + default: vassert(0); + } + if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { + if (rD == 15 || rN == 15 || rT == 15 + || rD == rN || rD == rT) + valid = False; } else { - IRTemp resSC1, resSC32; - + vassert(ty == Ity_I64); + if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15 + || rD == rN || rD == rT || rD == rT+1) + valid = False; + } + if (valid) { + IRTemp resSC1, resSC32, data; /* make unconditional */ if (condT != IRTemp_INVALID) { mk_skip_over_A32_if_cond_is_false( condT ); condT = IRTemp_INVALID; } - /* Ok, now we're unconditional. Do the store. */ + data = newTemp(ty); + assign(data, + ty == Ity_I64 + // FIXME: assumes little-endian guest + ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0)) + : narrow == Iop_INVALID + ? getIRegA(rT) + : unop(narrow, getIRegA(rT))); resSC1 = newTemp(Ity_I1); - stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), getIRegA(rT)) ); + // FIXME: assumes little-endian guest + stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) ); /* Set rD to 1 on failure, 0 on success. Currently we have resSC1 == 0 on failure, 1 on success. */ @@ -13425,7 +13480,13 @@ DisResult disInstr_ARM_WRK ( putIRegA(rD, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring); - DIP("strex%s r%u, r%u, [r%u]\n", nCC(INSN_COND), rD, rT, rN); + if (ty == Ity_I64) { + DIP("strex%s%s r%u, r%u, r%u, [r%u]\n", + nm, nCC(INSN_COND), rD, rT, rT+1, rN); + } else { + DIP("strex%s%s r%u, r%u, [r%u]\n", + nm, nCC(INSN_COND), rD, rT, rN); + } goto decode_success; } /* fall through */ @@ -17771,6 +17832,49 @@ DisResult disInstr_THUMB_WRK ( } } + /* --------------- (T1) LDREX{B,H} --------------- */ + if (INSN0(15,4) == 0xE8D + && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) { + UInt rN = INSN0(3,0); + UInt rT = INSN1(15,12); + Bool isH = INSN1(11,0) == 0xF5F; + if (!isBadRegT(rT) && rN != 15) { + IRTemp res; + // go uncond + mk_skip_over_T32_if_cond_is_false( condT ); + // now uncond + res = newTemp(isH ? Ity_I16 : Ity_I8); + stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN), + NULL/*this is a load*/ )); + putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)), + IRTemp_INVALID); + DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN); + goto decode_success; + } + } + + /* --------------- (T1) LDREXD --------------- */ + if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) { + UInt rN = INSN0(3,0); + UInt rT = INSN1(15,12); + UInt rT2 = INSN1(11,8); + if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) { + IRTemp res; + // go uncond + mk_skip_over_T32_if_cond_is_false( condT ); + // now uncond + res = newTemp(Ity_I64); + // FIXME: assumes little-endian guest + stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN), + NULL/*this is a load*/ )); + // FIXME: assumes little-endian guest + putIRegT(rT, unop(Iop_64to32, mkexpr(res)), IRTemp_INVALID); + putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID); + DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN); + goto decode_success; + } + } + /* ----------------- (T1) STREX ----------------- */ if (INSN0(15,4) == 0xE84) { UInt rN = INSN0(3,0); @@ -17780,30 +17884,84 @@ DisResult disInstr_THUMB_WRK ( if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15 && rD != rN && rD != rT) { IRTemp resSC1, resSC32; - // go uncond mk_skip_over_T32_if_cond_is_false( condT ); // now uncond - /* Ok, now we're unconditional. Do the store. */ resSC1 = newTemp(Ity_I1); stmt( IRStmt_LLSC(Iend_LE, resSC1, binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)), getIRegT(rT)) ); - /* Set rD to 1 on failure, 0 on success. Currently we have resSC1 == 0 on failure, 1 on success. */ resSC32 = newTemp(Ity_I32); assign(resSC32, unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1)))); - putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID); DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4); goto decode_success; } } + /* --------------- (T1) STREX{B,H} --------------- */ + if (INSN0(15,4) == 0xE8C + && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) { + UInt rN = INSN0(3,0); + UInt rT = INSN1(15,12); + UInt rD = INSN1(3,0); + Bool isH = INSN1(11,4) == 0xF5; + if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15 + && rD != rN && rD != rT) { + IRTemp resSC1, resSC32; + // go uncond + mk_skip_over_T32_if_cond_is_false( condT ); + // now uncond + /* Ok, now we're unconditional. Do the store. */ + resSC1 = newTemp(Ity_I1); + stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), + unop(isH ? Iop_32to16 : Iop_32to8, + getIRegT(rT))) ); + /* Set rD to 1 on failure, 0 on success. Currently we have + resSC1 == 0 on failure, 1 on success. */ + resSC32 = newTemp(Ity_I32); + assign(resSC32, + unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1)))); + putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID); + DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN); + goto decode_success; + } + } + + /* ---------------- (T1) STREXD ---------------- */ + if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) { + UInt rN = INSN0(3,0); + UInt rT = INSN1(15,12); + UInt rT2 = INSN1(11,8); + UInt rD = INSN1(3,0); + if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2) + && rN != 15 && rD != rN && rD != rT && rD != rT) { + IRTemp resSC1, resSC32, data; + // go uncond + mk_skip_over_T32_if_cond_is_false( condT ); + // now uncond + /* Ok, now we're unconditional. Do the store. */ + resSC1 = newTemp(Ity_I1); + data = newTemp(Ity_I64); + // FIXME: assumes little-endian guest + assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT))); + // FIXME: assumes little-endian guest + stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data))); + /* Set rD to 1 on failure, 0 on success. Currently we have + resSC1 == 0 on failure, 1 on success. */ + resSC32 = newTemp(Ity_I32); + assign(resSC32, + unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1)))); + putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID); + DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN); + goto decode_success; + } + } /* -------------- v7 barrier insns -------------- */ if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) { /* XXX this isn't really right, is it? The generated IR does diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index fd3719756b..fb8df5c38c 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -1206,14 +1206,14 @@ ARMInstr* ARMInstr_LdrEX ( Int szB ) { ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); i->tag = ARMin_LdrEX; i->ARMin.LdrEX.szB = szB; - vassert(szB == 4 || szB == 1); + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); return i; } ARMInstr* ARMInstr_StrEX ( Int szB ) { ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); i->tag = ARMin_StrEX; i->ARMin.StrEX.szB = szB; - vassert(szB == 4 || szB == 1); + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); return i; } ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) { @@ -1603,16 +1603,28 @@ void ppARMInstr ( ARMInstr* i ) { vex_printf("r1:r0, r2, r3"); } return; - case ARMin_LdrEX: - vex_printf("ldrex%s ", i->ARMin.LdrEX.szB == 1 ? "b" - : i->ARMin.LdrEX.szB == 2 ? "h" : ""); - vex_printf("r0, [r1]"); + case ARMin_LdrEX: { + HChar* sz = ""; + switch (i->ARMin.LdrEX.szB) { + case 1: sz = "b"; break; case 2: sz = "h"; break; + case 8: sz = "d"; break; case 4: break; + default: vassert(0); + } + vex_printf("ldrex%s %sr2, [r4]", + sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : ""); return; - case ARMin_StrEX: - vex_printf("strex%s ", i->ARMin.StrEX.szB == 1 ? "b" - : i->ARMin.StrEX.szB == 2 ? "h" : ""); - vex_printf("r0, r1, [r2]"); + } + case ARMin_StrEX: { + HChar* sz = ""; + switch (i->ARMin.StrEX.szB) { + case 1: sz = "b"; break; case 2: sz = "h"; break; + case 8: sz = "d"; break; case 4: break; + default: vassert(0); + } + vex_printf("strex%s r0, %sr2, [r4]", + sz, i->ARMin.StrEX.szB == 8 ? "r3:" : ""); return; + } case ARMin_VLdStD: if (i->ARMin.VLdStD.isLoad) { vex_printf("fldd "); @@ -1989,13 +2001,17 @@ void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 ) addHRegUse(u, HRmWrite, hregARM_R1()); return; case ARMin_LdrEX: - addHRegUse(u, HRmWrite, hregARM_R0()); - addHRegUse(u, HRmRead, hregARM_R1()); + addHRegUse(u, HRmRead, hregARM_R4()); + addHRegUse(u, HRmWrite, hregARM_R2()); + if (i->ARMin.LdrEX.szB == 8) + addHRegUse(u, HRmWrite, hregARM_R3()); return; case ARMin_StrEX: + addHRegUse(u, HRmRead, hregARM_R4()); addHRegUse(u, HRmWrite, hregARM_R0()); - addHRegUse(u, HRmRead, hregARM_R1()); addHRegUse(u, HRmRead, hregARM_R2()); + if (i->ARMin.StrEX.szB == 8) + addHRegUse(u, HRmRead, hregARM_R3()); return; case ARMin_VLdStD: addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode); @@ -2959,27 +2975,31 @@ Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i, goto bad; } case ARMin_LdrEX: { - /* E1910F9F ldrex r0, [r1] - E1F10F9F ldrexh r0, [r1] - E1D10F9F ldrexb r0, [r1] + /* E1D42F9F ldrexb r2, [r4] + E1F42F9F ldrexh r2, [r4] + E1942F9F ldrex r2, [r4] + E1B42F9F ldrexd r2, r3, [r4] */ switch (i->ARMin.LdrEX.szB) { - case 4: *p++ = 0xE1910F9F; goto done; - //case 2: *p++ = 0xE1F10F9F; goto done; - case 1: *p++ = 0xE1D10F9F; goto done; + case 1: *p++ = 0xE1D42F9F; goto done; + case 2: *p++ = 0xE1F42F9F; goto done; + case 4: *p++ = 0xE1942F9F; goto done; + case 8: *p++ = 0xE1B42F9F; goto done; default: break; } goto bad; } case ARMin_StrEX: { - /* E1820F91 strex r0, r1, [r2] - E1E20F91 strexh r0, r1, [r2] - E1C20F91 strexb r0, r1, [r2] + /* E1C40F92 strexb r0, r2, [r4] + E1E40F92 strexh r0, r2, [r4] + E1840F92 strex r0, r2, [r4] + E1A40F92 strexd r0, r2, r3, [r4] */ switch (i->ARMin.StrEX.szB) { - case 4: *p++ = 0xE1820F91; goto done; - //case 2: *p++ = 0xE1E20F91; goto done; - case 1: *p++ = 0xE1C20F91; goto done; + case 1: *p++ = 0xE1C40F92; goto done; + case 2: *p++ = 0xE1E40F92; goto done; + case 4: *p++ = 0xE1840F92; goto done; + case 8: *p++ = 0xE1A40F92; goto done; default: break; } goto bad; diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index b96ec3ab23..92bdbe06f9 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -709,18 +709,21 @@ typedef struct { ARMMulOp op; } Mul; - /* LDREX{,H,B} r0, [r1] + /* LDREX{,H,B} r2, [r4] and + LDREXD r2, r3, [r4] (on LE hosts, transferred value is r3:r2) Again, hardwired registers since this is not performance critical, and there are possibly constraints on the registers that we can't express in the register allocator.*/ struct { - Int szB; /* currently only 4 is allowed */ + Int szB; /* 1, 2, 4 or 8 */ } LdrEX; - /* STREX{,H,B} r0, r1, [r2] - r0 = SC( [r2] = r1 ) + /* STREX{,H,B} r0, r2, [r4] and + STREXD r0, r2, r3, [r4] (on LE hosts, transferred value is r3:r2) + r0 = SC( [r4] = r2 ) (8, 16, 32 bit transfers) + r0 = SC( [r4] = r3:r2) (64 bit transfers) Ditto comment re fixed registers. */ struct { - Int szB; /* currently only 4 is allowed */ + Int szB; /* 1, 2, 4 or 8 */ } StrEX; /* VFP INSTRUCTIONS */ /* 64-bit Fp load/store */ diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c index d4d9c86ff5..35c4c65b6b 100644 --- a/VEX/priv/host_arm_isel.c +++ b/VEX/priv/host_arm_isel.c @@ -211,8 +211,8 @@ static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e ); static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ); static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ); -static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ); -static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ); +static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ); +static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ); static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e ); @@ -5820,50 +5820,86 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /* LL */ IRTemp res = stmt->Ist.LLSC.result; IRType ty = typeOfIRTemp(env->type_env, res); - if (ty == Ity_I32 || ty == Ity_I8) { + if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { Int szB = 0; HReg r_dst = lookupIRTemp(env, res); HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); switch (ty) { case Ity_I8: szB = 1; break; + case Ity_I16: szB = 2; break; case Ity_I32: szB = 4; break; default: vassert(0); } - addInstr(env, mk_iMOVds_RR(hregARM_R1(), raddr)); + addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); addInstr(env, ARMInstr_LdrEX(szB)); - addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R0())); + addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2())); return; } - /* else fall thru; is unhandled */ + if (ty == Ity_I64) { + HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr)); + addInstr(env, ARMInstr_LdrEX(8)); + /* Result is in r3:r2. On a non-NEON capable CPU, we must + move it into a result register pair. On a NEON capable + CPU, the result register will be a 64 bit NEON + register, so we must move it there instead. */ + if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + HReg dst = lookupIRTemp(env, res); + addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(), + hregARM_R2())); + } else { + HReg r_dst_hi, r_dst_lo; + lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res); + addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2())); + addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3())); + } + return; + } + /*NOTREACHED*/ + vassert(0); } else { /* SC */ - IRTemp res = stmt->Ist.LLSC.result; - IRType ty = typeOfIRTemp(env->type_env, res); IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); - vassert(ty == Ity_I1); - if (tyd == Ity_I32 || tyd == Ity_I8) { - Int szB = 0; - HReg r_res = lookupIRTemp(env, res); - HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); - HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); - ARMRI84* one = ARMRI84_I84(1,0); + if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) { + Int szB = 0; + HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); + HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); switch (tyd) { case Ity_I8: szB = 1; break; + case Ity_I16: szB = 2; break; case Ity_I32: szB = 4; break; default: vassert(0); } - addInstr(env, mk_iMOVds_RR(hregARM_R1(), rD)); - addInstr(env, mk_iMOVds_RR(hregARM_R2(), rA)); + addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD)); + addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); addInstr(env, ARMInstr_StrEX(szB)); - /* now r0 is 1 if failed, 0 if success. Change to IR - conventions (0 is fail, 1 is success). Also transfer - result to r_res. */ - addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one)); - /* And be conservative -- mask off all but the lowest bit */ - addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one)); - return; - } - /* else fall thru; is unhandled */ + } else { + vassert(tyd == Ity_I64); + /* This is really ugly. There is no is/is-not NEON + decision akin to the case for LL, because iselInt64Expr + fudges this for us, and always gets the result into two + GPRs even if this means moving it from a NEON + register. */ + HReg rDhi, rDlo; + iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata); + HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo)); + addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi)); + addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA)); + addInstr(env, ARMInstr_StrEX(8)); + } + /* now r0 is 1 if failed, 0 if success. Change to IR + conventions (0 is fail, 1 is success). Also transfer + result to r_res. */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); + HReg r_res = lookupIRTemp(env, res); + ARMRI84* one = ARMRI84_I84(1,0); + vassert(ty == Ity_I1); + addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one)); + /* And be conservative -- mask off all but the lowest bit */ + addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one)); + return; } break; } diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index f5ac5fc94c..f44ac87367 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -3344,14 +3344,16 @@ void tcStmt ( IRSB* bb, IRStmt* stmt, IRType gWordTy ) tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result); if (stmt->Ist.LLSC.storedata == NULL) { /* it's a LL */ - if (tyRes != Ity_I64 && tyRes != Ity_I32 && tyRes != Ity_I8) + if (tyRes != Ity_I64 && tyRes != Ity_I32 + && tyRes != Ity_I16 && tyRes != Ity_I8) sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus"); } else { /* it's a SC */ if (tyRes != Ity_I1) sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1"); tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata); - if (tyData != Ity_I64 && tyData != Ity_I32 && tyData != Ity_I8) + if (tyData != Ity_I64 && tyData != Ity_I32 + && tyData != Ity_I16 && tyData != Ity_I8) sanityCheckFail(bb,stmt, "Ist.LLSC(SC).result :: storedata bogus"); }