Implementation only; tests to follow.
Patch by Assad Hashmi <assad.hashmi@linaro.org>.
return True;
}
+ /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
+ /* 31 30 29 22 21 20 15 14 9 4
+ 0 sz 0010000 A 1 s R 11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
+ */
+ if (INSN(31,31) == 0
+ && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
+ && INSN(21,21) == 1
+ && INSN(14,10) == BITS5(1,1,1,1,1)) {
+ UInt is64 = INSN(30,30);
+ Bool isAcq = INSN(22,22) == 1;
+ Bool isRel = INSN(15,15) == 1;
+ UInt ss = INSN(20,16);
+ UInt nn = INSN(9,5);
+ UInt tt = INSN(4,0);
+
+ if ((ss & 0x1) || (tt & 0x1)) {
+ /* undefined; fall through */
+ } else {
+ IRExpr *expLo = getIRegOrZR(is64, ss);
+ IRExpr *expHi = getIRegOrZR(is64, ss + 1);
+ IRExpr *newLo = getIRegOrZR(is64, tt);
+ IRExpr *newHi = getIRegOrZR(is64, tt + 1);
+ IRTemp oldLo = newTemp(is64 ? Ity_I64 : Ity_I32);
+ IRTemp oldHi = newTemp(is64 ? Ity_I64 : Ity_I32);
+
+ if (isAcq)
+ stmt(IRStmt_MBE(Imbe_Fence));
+
+ stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
+ Iend_LE, getIReg64orSP(nn),
+ expHi, expLo,
+ newHi, newLo)) );
+
+ if (isRel)
+ stmt(IRStmt_MBE(Imbe_Fence));
+
+ putIRegOrZR(is64, ss, mkexpr(oldLo));
+ putIRegOrZR(is64, ss+1, mkexpr(oldHi));
+ DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
+ isAcq ? "a" : "", isRel ? "l" : "",
+ nameIRegOrZR(is64, ss), nameIRegOrZR(is64, ss+1),
+ nameIRegOrZR(is64, tt), nameIRegOrZR(is64, tt+1),
+ nameIReg64orSP(nn));
+ return True;
+ }
+ }
+
if (sigill_diag) {
vex_printf("ARM64 front end: load_store\n");
}
vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
return i;
}
+ARM64Instr* ARM64Instr_CASP ( Int szB ) {
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+ i->tag = ARM64in_CASP;
+ i->ARM64in.CASP.szB = szB;
+ vassert(szB == 8 || szB == 4);
+ return i;
+}
ARM64Instr* ARM64Instr_MFence ( void ) {
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
i->tag = ARM64in_MFence;
vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
return;
}
+ case ARM64in_CASP: {
+ vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB);
+ return;
+ }
case ARM64in_MFence:
vex_printf("(mfence) dsb sy; dmb sy; isb");
return;
/* Pointless to state this since X8 is not available to RA. */
addHRegUse(u, HRmWrite, hregARM64_X8());
break;
+ case ARM64in_CASP:
+ addHRegUse(u, HRmRead, hregARM64_X2());
+ addHRegUse(u, HRmRead, hregARM64_X4());
+ addHRegUse(u, HRmRead, hregARM64_X5());
+ addHRegUse(u, HRmRead, hregARM64_X6());
+ addHRegUse(u, HRmRead, hregARM64_X7());
+ addHRegUse(u, HRmWrite, hregARM64_X0());
+ addHRegUse(u, HRmWrite, hregARM64_X1());
+ addHRegUse(u, HRmWrite, hregARM64_X9());
+ addHRegUse(u, HRmWrite, hregARM64_X8());
+ break;
case ARM64in_MFence:
return;
case ARM64in_ClrEX:
return;
case ARM64in_CAS:
return;
+ case ARM64in_CASP:
+ return;
case ARM64in_MFence:
return;
case ARM64in_ClrEX:
*p++ = 0x35FFFF68;
goto done;
}
+ case ARM64in_CASP: {
+ /* Generate:
+ CASP <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>{,#0}]
+
+ Register allocation (see ARM64in_CASP in getRegUsage_ARM64Instr):
+ Xn: memory address
+ -> X2 (INPUT)
+ Xs, X(s+1): values to be compared with value read from address
+ -> X4,X5 (INPUTS)
+ -> X0,X1 (OUTPUTS) loaded from memory and compared with
+ scratch registers X8,X9 (CLOBBERED) which contain
+ contents of X4,X5
+ Xt, X(t+1): values to be stored to memory if X0,X1==X8,X9
+ -> X6,X7 (INPUT)
+
+ loop:
+ -- two of:
+ mov x8, x4 // AA0403E8
+ mov x9, x5 // AA0503E9
+ and x8, x4, #0xFFFFFFFF // 92407C88
+ and x9, x5, #0xFFFFFFFF // 92407CA9
+
+ -- one of:
+ ldxp x0,x1, [x2] // C87F0440
+ ldxp w0,w1, [x2] // 887F0440
+
+ -- always:
+ cmp x0, x8 // EB08001F
+ bne out // 540000E1 (b.ne #28 <out>)
+ cmp x1, x9 // EB09003F
+ bne out // 540000A1 (b.ne #20 <out>)
+
+ -- one of:
+ stxp w1, x6, x7, [x2] // C8211C46
+ stxp w1, w6, w7, [x2] // 88211C46
+
+ -- always:
+ cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 <loop>)
+ out:
+ */
+ switch (i->ARM64in.CASP.szB) {
+ case 8: *p++ = 0xAA0403E8; *p++ = 0xAA0503E9; break;
+ case 4: *p++ = 0x92407C88; *p++ = 0x92407CA9; break;
+ default: vassert(0);
+ }
+ switch (i->ARM64in.CASP.szB) {
+ case 8: *p++ = 0xC87F0440; break;
+ case 4: *p++ = 0x887F0440; break;
+ default: vassert(0);
+ }
+ *p++ = 0xEB08001F;
+ *p++ = 0x540000E1;
+ *p++ = 0xEB09003F;
+ *p++ = 0x540000A1;
+ switch (i->ARM64in.CASP.szB) {
+ case 8: *p++ = 0xC8211C46; break;
+ case 4: *p++ = 0x88211C46; break;
+ default: vassert(0);
+ }
+ *p++ = 0x35FFFE81;
+ goto done;
+ }
case ARM64in_MFence: {
*p++ = 0xD5033F9F; /* DSB sy */
*p++ = 0xD5033FBF; /* DMB sy */
ARM64in_LdrEX,
ARM64in_StrEX,
ARM64in_CAS,
+ ARM64in_CASP,
ARM64in_MFence,
ARM64in_ClrEX,
/* ARM64in_V*: scalar ops involving vector registers */
struct {
Int szB; /* 1, 2, 4 or 8 */
} CAS;
+ struct {
+ Int szB; /* 4 or 8 */
+ } CASP;
/* Mem fence. An insn which fences all loads and stores as
much as possible before continuing. On ARM64 we emit the
sequence "dsb sy ; dmb sy ; isb sy", which is probably
extern ARM64Instr* ARM64Instr_LdrEX ( Int szB );
extern ARM64Instr* ARM64Instr_StrEX ( Int szB );
extern ARM64Instr* ARM64Instr_CAS ( Int szB );
+extern ARM64Instr* ARM64Instr_CASP ( Int szB );
extern ARM64Instr* ARM64Instr_MFence ( void );
extern ARM64Instr* ARM64Instr_ClrEX ( void );
extern ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN,
addInstr(env, ARM64Instr_MovI(rOld, rResult));
return;
}
+ else {
+ /* Paired register CAS, i.e. CASP */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ switch (ty) {
+ case Ity_I64: sz = 8; break;
+ case Ity_I32: sz = 4; break;
+ default: goto unhandled_cas;
+ }
+ HReg rAddr = iselIntExpr_R(env, cas->addr);
+
+ HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
+ vassert(cas->expdHi != NULL);
+ HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
+
+ HReg rData0 = iselIntExpr_R(env, cas->dataLo);
+ vassert(cas->dataHi != NULL);
+ HReg rData1 = iselIntExpr_R(env, cas->dataHi);
+
+ addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
+
+ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
+ addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
+
+ addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
+ addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
+
+ addInstr(env, ARM64Instr_CASP(sz));
+
+ HReg rResult0 = hregARM64_X0();
+ HReg rResult1 = hregARM64_X1();
+ switch (sz) {
+ case 8: break;
+ case 4: rResult0 = widen_z_32_to_64(env, rResult0);
+ rResult1 = widen_z_32_to_64(env, rResult1);
+ break;
+ default: vassert(0);
+ }
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
+ addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
+ return;
+ }
unhandled_cas:
break;
}