Int off = offsetQRegLane(qregNo, laneTy, laneNo);
switch (laneTy) {
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
- case Ity_F64:
+ case Ity_F64: case Ity_F32:
break;
default:
vassert(0); // Other cases are ATC
}
+/* Duplicates the src element exactly so as to fill a V128 value. Only
+ handles src types of F64 and F32. */
+static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
+{
+ IRTemp res = newTemp(Ity_V128);
+ if (srcTy == Ity_F64) {
+ IRTemp i64 = newTemp(Ity_I64);
+ assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
+ assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
+ return res;
+ }
+ if (srcTy == Ity_F32) {
+ IRTemp i64a = newTemp(Ity_I64);
+ assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
+ IRTemp i64b = newTemp(Ity_I64);
+ assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
+ mkexpr(i64a)));
+ assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
+ return res;
+ }
+ vassert(0);
+}
+
+
/*------------------------------------------------------------*/
/*--- FP comparison helpers ---*/
/*------------------------------------------------------------*/
static
Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 28 23 21 20 19 15 11 9 4
+ 0 Q U 01111 size L M m opcode H 0 n d
+ Decode fields are: u,size,opcode
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,31) != 0
+ && INSN(28,24) != BITS5(0,1,1,1,1) && INSN(10,10) !=0) {
+ return False;
+ }
+ UInt bitQ = INSN(30,30);
+ UInt bitU = INSN(29,29);
+ UInt size = INSN(23,22);
+ UInt bitL = INSN(21,21);
+ UInt bitM = INSN(20,20);
+ UInt mmLO4 = INSN(19,16);
+ UInt opcode = INSN(15,12);
+ UInt bitH = INSN(11,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ UInt mm = (bitM << 4) | mmLO4;
+ vassert(size < 4);
+
+ if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
+ /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
+ if (bitQ == 0 && size == X11) return False; // implied 1d case
+ Bool isD = (size & 1) == 1;
+ UInt index;
+ if (!isD) index = (bitH << 1) | bitL;
+ else if (isD && bitL == 0) index = bitH;
+ else return False; // sz:L == x11 => unallocated encoding
+ vassert(index < (isD ? 2 : 4));
+ IRType ity = isD ? Ity_F64 : Ity_F32;
+ IRTemp elem = newTemp(ity);
+ assign(elem, getQRegLane(mm, index, ity));
+ IRTemp dupd = math_DUP_TO_V128(elem, ity);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
+ mkexpr(mk_get_IR_rounding_mode()),
+ getQReg128(nn), mkexpr(dupd)));
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
+ : mkexpr(res));
+ const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+ DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
+ nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
+ return True;
+ }
+
return False;
# undef INSN
}
vassert(laneNo <= 1);
return i;
}
+ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
+ i->tag = ARM64in_VXfromDorS;
+ i->ARM64in.VXfromDorS.rX = rX;
+ i->ARM64in.VXfromDorS.rDorS = rDorS;
+ i->ARM64in.VXfromDorS.fromD = fromD;
+ return i;
+}
ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
i->tag = ARM64in_VMov;
ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
return;
case ARM64in_VXfromQ:
- vex_printf("mov ");
+ vex_printf("fmov ");
ppHRegARM64(i->ARM64in.VXfromQ.rX);
vex_printf(", ");
ppHRegARM64(i->ARM64in.VXfromQ.rQ);
vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
return;
+ case ARM64in_VXfromDorS:
+ vex_printf("fmov ");
+ ppHRegARM64(i->ARM64in.VXfromDorS.rX);
+ vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
+ ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
+ vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
+ return;
case ARM64in_VMov: {
UChar aux = '?';
switch (i->ARM64in.VMov.szB) {
addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
return;
+ case ARM64in_VXfromDorS:
+ addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
+ addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
+ return;
case ARM64in_VMov:
addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
i->ARM64in.VXfromQ.rQ
= lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
return;
+ case ARM64in_VXfromDorS:
+ i->ARM64in.VXfromDorS.rX
+ = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
+ i->ARM64in.VXfromDorS.rDorS
+ = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
+ return;
case ARM64in_VMov:
i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
#define X00000 BITS8(0,0,0, 0,0,0,0,0)
#define X00001 BITS8(0,0,0, 0,0,0,0,1)
+#define X00110 BITS8(0,0,0, 0,0,1,1,0)
#define X00111 BITS8(0,0,0, 0,0,1,1,1)
#define X01000 BITS8(0,0,0, 0,1,0,0,0)
#define X10000 BITS8(0,0,0, 1,0,0,0,0)
goto done;
}
+ case ARM64in_VXfromDorS: {
+ /* 000 11110001 00110 000000 n d FMOV Wd, Sn
+ 100 11110011 00110 000000 n d FMOV Xd, Dn
+ */
+ UInt dd = iregNo(i->ARM64in.VXfromDorS.rX);
+ UInt nn = dregNo(i->ARM64in.VXfromDorS.rDorS);
+ Bool fromD = i->ARM64in.VXfromDorS.fromD;
+ vassert(dd < 31);
+ *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
+ fromD ? X11110011 : X11110001,
+ X00110, X000000, nn, dd);
+ goto done;
+ }
+
case ARM64in_VMov: {
/* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
000 11110 01 10000 00 10000 n d FMOV Dd, Dn
ARM64in_VDfromX, /* Move an Xreg to a Dreg */
ARM64in_VQfromXX, /* Move 2 Xregs to a Qreg */
ARM64in_VXfromQ, /* Move half a Qreg to an Xreg */
+ ARM64in_VXfromDorS, /* Move Dreg or Sreg(ZX) to an Xreg */
ARM64in_VMov, /* vector reg-reg move, 16, 8 or 4 bytes */
/* infrastructure */
ARM64in_EvCheck, /* Event check */
HReg rQ;
UInt laneNo; /* either 0 or 1 */
} VXfromQ;
+ struct {
+ HReg rX;
+ HReg rDorS;
+ Bool fromD;
+ } VXfromDorS;
/* MOV dst, src -- reg-reg move for vector registers */
struct {
UInt szB; // 16=mov qD,qS; 8=mov dD,dS; 4=mov sD,sS
extern ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX );
extern ARM64Instr* ARM64Instr_VQfromXX( HReg rQ, HReg rXhi, HReg rXlo );
extern ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo );
+extern ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD );
extern ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
return dst;
}
+ case Iop_ReinterpF64asI64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
+ return dst;
+ }
+ case Iop_ReinterpF32asI32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
+ return dst;
+ }
case Iop_1Sto32:
case Iop_1Sto64: {
/* As with the iselStmt case for 'tmp:I1 = expr', we could