From: Julian Seward Date: Mon, 9 Mar 2020 08:18:09 +0000 (+0100) Subject: Bug 415136 - ARMv8.1 Compare-and-Swap instructions are not supported. X-Git-Tag: VALGRIND_3_16_0~83 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2281c8c8675969537dc734237b60157e8a0fe0d6;p=thirdparty%2Fvalgrind.git Bug 415136 - ARMv8.1 Compare-and-Swap instructions are not supported. This commit implements ARMv8.1 CAS instructions. It does not contain test cases; those will be in a subsequent commit. Patch by Assad Hashmi . --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index aae3c6f52b..c8bfd1888f 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -6864,6 +6864,49 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, return True; } + /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */ + /* 31 29 22 21 20 15 14 9 4 + sz 0010001 A 1 s R 11111 n t CAS{,A}{,L} , , [] + */ + if (INSN(29,23) == BITS7(0,0,1,0,0,0,1) + && INSN(21,21) == 1 + && INSN(14,10) == BITS5(1,1,1,1,1)) { + UInt szBlg2 = INSN(31,30); + Bool isAcq = INSN(22,22) == 1; + Bool isRel = INSN(15,15) == 1; + UInt ss = INSN(20,16); + UInt nn = INSN(9,5); + UInt tt = INSN(4,0); + + const HChar* suffix[4] = { "b", "h", "", "" }; + + UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */ + IRType ty = integerIRTypeOfSize(szB); + Bool is64 = szB == 8; + + IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss)); + IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt)); + + if (isAcq) + stmt(IRStmt_MBE(Imbe_Fence)); + + // Store the result back if LHS remains unchanged in memory. + IRTemp old = newTemp(ty); + stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old, + Iend_LE, getIReg64orSP(nn), + /*expdHi*/NULL, exp, + /*dataHi*/NULL, new)) ); + + if (isRel) + stmt(IRStmt_MBE(Imbe_Fence)); + + putIReg64orZR(ss, widenUto64(ty, mkexpr(old))); + DIP("cas%s%s%s %s, %s, [%s]\n", + isAcq ? "a" : "", isRel ? "l" : "", suffix[szBlg2], + nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn)); + return True; + } + if (sigill_diag) { vex_printf("ARM64 front end: load_store\n"); } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 33acae9462..13a61b0bd4 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -3852,9 +3852,14 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } case ARM64in_CAS: { /* This isn't simple. For an explanation see the comment in - host_arm64_defs.h on the the definition of ARM64Instr case - CAS. */ + host_arm64_defs.h on the definition of ARM64Instr case CAS. + + NOTE: We could place "loop:" after mov/and but then we need + an additional scratch register. + */ /* Generate: + + loop: -- one of: mov x8, x5 // AA0503E8 and x8, x5, #0xFFFFFFFF // 92407CA8 @@ -3872,13 +3877,13 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, bne out // 54000061 -- one of: - stxr w1, x7, [x3] // C8017C67 - stxr w1, w7, [x3] // 88017C67 - stxrh w1, w7, [x3] // 48017C67 - stxrb w1, w7, [x3] // 08017C67 + stxr w8, x7, [x3] // C8087C67 + stxr w8, w7, [x3] // 88087C67 + stxrh w8, w7, [x3] // 48087C67 + stxrb w8, w7, [x3] // 08087C67 -- always: - eor x1, x5, x1 // CA0100A1 + cbne w8, loop // 35FFFF68 out: */ switch (i->ARM64in.CAS.szB) { @@ -3897,12 +3902,12 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = 0xEB08003F; *p++ = 0x54000061; switch (i->ARM64in.CAS.szB) { - case 8: *p++ = 0xC8017C67; break; - case 4: *p++ = 0x88017C67; break; - case 2: *p++ = 0x48017C67; break; - case 1: *p++ = 0x08017C67; break; + case 8: *p++ = 0xC8087C67; break; + case 4: *p++ = 0x88087C67; break; + case 2: *p++ = 0x48087C67; break; + case 1: *p++ = 0x08087C67; break; } - *p++ = 0xCA0100A1; + *p++ = 0x35FFFF68; goto done; } case ARM64in_MFence: { diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index db500565d6..63cf2bb790 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -679,6 +679,7 @@ typedef Uses x8 as scratch (but that's not allocatable). Hence: RD x3, x5, x7; WR x1 + loop: (szB=8) mov x8, x5 (szB=4) and x8, x5, #0xFFFFFFFF (szB=2) and x8, x5, #0xFFFF @@ -690,10 +691,10 @@ typedef bne after -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value -- attempt to store - stxr w1, x7, [x3] + stxr w8, x7, [x3] -- if store successful, x1==0, so the eor is "x1 := x5" - -- if store failed, x1==1, so the eor makes x1 != x5 - eor x1, x5, x1 + -- if store failed, branch back and try again. + cbne w8, loop after: */ struct { diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 4fa50b9819..d19b19fcf8 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -3988,14 +3988,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) addInstr(env, ARM64Instr_CAS(sz)); /* Now we have the lowest szB bytes of x1 are either equal to the lowest szB bytes of x5, indicating success, or they - aren't, indicating failure. The IR semantics actually - require us to return the old value at the location, - regardless of success or failure, but in the case of - failure it's not clear how to do this, since - ARM64Instr_CAS can't provide that. Instead we'll just - return the relevant bit of x1, since that's at least - guaranteed to be different from the lowest bits of x5 on - failure. */ + aren't, indicating failure. */ HReg rResult = hregARM64_X1(); switch (sz) { case 8: break;