return True;
}
+ /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
+ /* 31 29 22 21 20 15 14 9 4
+ sz 0010001 A 1 s R 11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
+ */
+ if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
+ && INSN(21,21) == 1
+ && INSN(14,10) == BITS5(1,1,1,1,1)) {
+ UInt szBlg2 = INSN(31,30);
+ Bool isAcq = INSN(22,22) == 1;
+ Bool isRel = INSN(15,15) == 1;
+ UInt ss = INSN(20,16);
+ UInt nn = INSN(9,5);
+ UInt tt = INSN(4,0);
+
+ const HChar* suffix[4] = { "b", "h", "", "" };
+
+ UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
+ IRType ty = integerIRTypeOfSize(szB);
+ Bool is64 = szB == 8;
+
+ IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
+ IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
+
+ if (isAcq)
+ stmt(IRStmt_MBE(Imbe_Fence));
+
+ // Store the result back if LHS remains unchanged in memory.
+ IRTemp old = newTemp(ty);
+ stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
+ Iend_LE, getIReg64orSP(nn),
+ /*expdHi*/NULL, exp,
+ /*dataHi*/NULL, new)) );
+
+ if (isRel)
+ stmt(IRStmt_MBE(Imbe_Fence));
+
+ putIReg64orZR(ss, widenUto64(ty, mkexpr(old)));
+ DIP("cas%s%s%s %s, %s, [%s]\n",
+ isAcq ? "a" : "", isRel ? "l" : "", suffix[szBlg2],
+ nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
+ return True;
+ }
+
if (sigill_diag) {
vex_printf("ARM64 front end: load_store\n");
}
}
case ARM64in_CAS: {
/* This isn't simple. For an explanation see the comment in
- host_arm64_defs.h on the the definition of ARM64Instr case
- CAS. */
+ host_arm64_defs.h on the definition of ARM64Instr case CAS.
+
+ NOTE: We could place "loop:" after mov/and but then we need
+ an additional scratch register.
+ */
/* Generate:
+
+ loop:
-- one of:
mov x8, x5 // AA0503E8
and x8, x5, #0xFFFFFFFF // 92407CA8
bne out // 54000061
-- one of:
- stxr w1, x7, [x3] // C8017C67
- stxr w1, w7, [x3] // 88017C67
- stxrh w1, w7, [x3] // 48017C67
- stxrb w1, w7, [x3] // 08017C67
+ stxr w8, x7, [x3] // C8087C67
+ stxr w8, w7, [x3] // 88087C67
+ stxrh w8, w7, [x3] // 48087C67
+ stxrb w8, w7, [x3] // 08087C67
-- always:
- eor x1, x5, x1 // CA0100A1
+ cbne w8, loop // 35FFFF68
out:
*/
switch (i->ARM64in.CAS.szB) {
*p++ = 0xEB08003F;
*p++ = 0x54000061;
switch (i->ARM64in.CAS.szB) {
- case 8: *p++ = 0xC8017C67; break;
- case 4: *p++ = 0x88017C67; break;
- case 2: *p++ = 0x48017C67; break;
- case 1: *p++ = 0x08017C67; break;
+ case 8: *p++ = 0xC8087C67; break;
+ case 4: *p++ = 0x88087C67; break;
+ case 2: *p++ = 0x48087C67; break;
+ case 1: *p++ = 0x08087C67; break;
}
- *p++ = 0xCA0100A1;
+ *p++ = 0x35FFFF68;
goto done;
}
case ARM64in_MFence: {
Uses x8 as scratch (but that's not allocatable).
Hence: RD x3, x5, x7; WR x1
+ loop:
(szB=8) mov x8, x5
(szB=4) and x8, x5, #0xFFFFFFFF
(szB=2) and x8, x5, #0xFFFF
bne after
-- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
-- attempt to store
- stxr w1, x7, [x3]
+ stxr w8, x7, [x3]
-- if store successful, x1==0, so the eor is "x1 := x5"
- -- if store failed, x1==1, so the eor makes x1 != x5
- eor x1, x5, x1
+ -- if store failed, branch back and try again.
+ cbne w8, loop
after:
*/
struct {
addInstr(env, ARM64Instr_CAS(sz));
/* Now we have the lowest szB bytes of x1 are either equal to
the lowest szB bytes of x5, indicating success, or they
- aren't, indicating failure. The IR semantics actually
- require us to return the old value at the location,
- regardless of success or failure, but in the case of
- failure it's not clear how to do this, since
- ARM64Instr_CAS can't provide that. Instead we'll just
- return the relevant bit of x1, since that's at least
- guaranteed to be different from the lowest bits of x5 on
- failure. */
+ aren't, indicating failure. */
HReg rResult = hregARM64_X1();
switch (sz) {
case 8: break;