From: Julian Seward <jseward@acm.org>
Date: Thu, 12 Jun 2014 13:16:01 +0000 (+0000)
Subject: Implement FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[].
X-Git-Tag: svn/VALGRIND_3_10_1^2~97
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ec6971bfd348e93019069b9c9f6e565d622dfbb;p=thirdparty%2Fvalgrind.git

Implement FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[].


git-svn-id: svn://svn.valgrind.org/vex/trunk@2874
---

diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index cd83232bec..91cd6dc3fc 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -1112,7 +1112,7 @@ static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
    Int off = offsetQRegLane(qregNo, laneTy, laneNo);
    switch (laneTy) {
       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
-      case Ity_F64:
+      case Ity_F64: case Ity_F32:
          break;
       default:
          vassert(0); // Other cases are ATC
@@ -1688,6 +1688,30 @@ static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
 }
 
 
+/* Duplicates the src element exactly so as to fill a V128 value.  Only
+   handles src types of F64 and F32. */
+static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
+{
+   IRTemp res = newTemp(Ity_V128);
+   if (srcTy == Ity_F64) {
+      IRTemp i64 = newTemp(Ity_I64);
+      assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
+      assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
+      return res;
+   }
+   if (srcTy == Ity_F32) {
+      IRTemp i64a = newTemp(Ity_I64);
+      assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
+      IRTemp i64b = newTemp(Ity_I64);
+      assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
+                                   mkexpr(i64a)));
+      assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
+      return res;
+   }
+   vassert(0);
+}
+
+
 /*------------------------------------------------------------*/
 /*--- FP comparison helpers                                ---*/
 /*------------------------------------------------------------*/
@@ -6842,7 +6866,53 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
 static
 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
 {
+   /* 31    28    23   21 20 19 15     11   9 4
+      0 Q U 01111 size L  M  m  opcode H  0 n d
+      Decode fields are: u,size,opcode
+   */
 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
+   if (INSN(31,31) != 0
+       && INSN(28,24) != BITS5(0,1,1,1,1) && INSN(10,10) !=0) {
+      return False;
+   }
+   UInt bitQ   = INSN(30,30);
+   UInt bitU   = INSN(29,29);
+   UInt size   = INSN(23,22);
+   UInt bitL   = INSN(21,21);
+   UInt bitM   = INSN(20,20);
+   UInt mmLO4  = INSN(19,16);
+   UInt opcode = INSN(15,12);
+   UInt bitH   = INSN(11,11);
+   UInt nn     = INSN(9,5);
+   UInt dd     = INSN(4,0);
+   UInt mm     = (bitM << 4) | mmLO4;
+   vassert(size < 4);
+
+   if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
+      /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
+      if (bitQ == 0 && size == X11) return False; // implied 1d case
+      Bool isD = (size & 1) == 1;
+      UInt index;
+      if      (!isD)             index = (bitH << 1) | bitL;
+      else if (isD && bitL == 0) index = bitH;
+      else return False; // sz:L == x11 => unallocated encoding
+      vassert(index < (isD ? 2 : 4));
+      IRType ity  = isD ? Ity_F64 : Ity_F32;
+      IRTemp elem = newTemp(ity);
+      assign(elem, getQRegLane(mm, index, ity));
+      IRTemp dupd = math_DUP_TO_V128(elem, ity);
+      IRTemp res  = newTemp(Ity_V128);
+      assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
+                        mkexpr(mk_get_IR_rounding_mode()),
+                        getQReg128(nn), mkexpr(dupd)));
+      putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
+                               : mkexpr(res));
+      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+      DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
+          nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
index 3bc52172f8..aa88f7bfef 100644
--- a/VEX/priv/host_arm64_defs.c
+++ b/VEX/priv/host_arm64_defs.c
@@ -1809,6 +1809,14 @@ ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
    vassert(laneNo <= 1);
    return i;
 }
+ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
+   ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
+   i->tag                      = ARM64in_VXfromDorS;
+   i->ARM64in.VXfromDorS.rX    = rX;
+   i->ARM64in.VXfromDorS.rDorS = rDorS;
+   i->ARM64in.VXfromDorS.fromD = fromD;
+   return i;
+}
 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
    ARM64Instr* i       = LibVEX_Alloc(sizeof(ARM64Instr));
    i->tag              = ARM64in_VMov;
@@ -2472,12 +2480,19 @@ void ppARM64Instr ( ARM64Instr* i ) {
          ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
          return;
       case ARM64in_VXfromQ:
-         vex_printf("mov    ");
+         vex_printf("fmov   ");
          ppHRegARM64(i->ARM64in.VXfromQ.rX);
          vex_printf(", ");
          ppHRegARM64(i->ARM64in.VXfromQ.rQ);
          vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
          return;
+      case ARM64in_VXfromDorS:
+         vex_printf("fmov   ");
+         ppHRegARM64(i->ARM64in.VXfromDorS.rX);
+         vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
+         ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
+         vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
+         return;
       case ARM64in_VMov: {
          UChar aux = '?';
          switch (i->ARM64in.VMov.szB) {
@@ -2865,6 +2880,10 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 )
          addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
          addHRegUse(u, HRmRead,  i->ARM64in.VXfromQ.rQ);
          return;
+      case ARM64in_VXfromDorS:
+         addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
+         addHRegUse(u, HRmRead,  i->ARM64in.VXfromDorS.rDorS);
+         return;
       case ARM64in_VMov:
          addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
          addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
@@ -3144,6 +3163,12 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
          i->ARM64in.VXfromQ.rQ
             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
          return;
+      case ARM64in_VXfromDorS:
+         i->ARM64in.VXfromDorS.rX
+            = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
+         i->ARM64in.VXfromDorS.rDorS
+            = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
+         return;
       case ARM64in_VMov:
          i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
          i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
@@ -3349,6 +3374,7 @@ static inline UChar qregNo ( HReg r )
 
 #define X00000   BITS8(0,0,0, 0,0,0,0,0)
 #define X00001   BITS8(0,0,0, 0,0,0,0,1)
+#define X00110   BITS8(0,0,0, 0,0,1,1,0)
 #define X00111   BITS8(0,0,0, 0,0,1,1,1)
 #define X01000   BITS8(0,0,0, 0,1,0,0,0)
 #define X10000   BITS8(0,0,0, 1,0,0,0,0)
@@ -6294,6 +6320,20 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
          goto done;
       }
 
+      case ARM64in_VXfromDorS: {
+         /* 000 11110001 00110 000000 n d     FMOV Wd, Sn
+            100 11110011 00110 000000 n d     FMOV Xd, Dn
+         */
+         UInt dd    = iregNo(i->ARM64in.VXfromDorS.rX);
+         UInt nn    = dregNo(i->ARM64in.VXfromDorS.rDorS);
+         Bool fromD = i->ARM64in.VXfromDorS.fromD;
+         vassert(dd < 31);
+         *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
+                              fromD ? X11110011 : X11110001,
+                              X00110, X000000, nn, dd);
+         goto done;
+      }
+
       case ARM64in_VMov: {
          /* 000 11110 00 10000 00 10000 n d   FMOV Sd, Sn
             000 11110 01 10000 00 10000 n d   FMOV Dd, Dn
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
index 99e9b063d9..437834e5f1 100644
--- a/VEX/priv/host_arm64_defs.h
+++ b/VEX/priv/host_arm64_defs.h
@@ -598,6 +598,7 @@ typedef
       ARM64in_VDfromX,    /* Move an Xreg to a Dreg */
       ARM64in_VQfromXX,   /* Move 2 Xregs to a Qreg */
       ARM64in_VXfromQ,    /* Move half a Qreg to an Xreg */
+      ARM64in_VXfromDorS, /* Move Dreg or Sreg(ZX) to an Xreg */
       ARM64in_VMov,       /* vector reg-reg move, 16, 8 or 4 bytes */
       /* infrastructure */
       ARM64in_EvCheck,     /* Event check */
@@ -990,6 +991,11 @@ typedef
             HReg rQ;
             UInt laneNo; /* either 0 or 1 */
          } VXfromQ;
+         struct {
+            HReg rX;
+            HReg rDorS;
+            Bool fromD;
+         } VXfromDorS;
          /* MOV dst, src -- reg-reg move for vector registers */
          struct {
             UInt szB; // 16=mov qD,qS;  8=mov dD,dS;  4=mov sD,sS
@@ -1084,6 +1090,7 @@ extern ARM64Instr* ARM64Instr_VImmQ   ( HReg, UShort );
 extern ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX );
 extern ARM64Instr* ARM64Instr_VQfromXX( HReg rQ, HReg rXhi, HReg rXlo );
 extern ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo );
+extern ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD );
 extern ARM64Instr* ARM64Instr_VMov    ( UInt szB, HReg dst, HReg src );
 
 extern ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
index 470df6bd93..792d0747bc 100644
--- a/VEX/priv/host_arm64_isel.c
+++ b/VEX/priv/host_arm64_isel.c
@@ -2109,6 +2109,18 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
             return dst;
          }
+         case Iop_ReinterpF64asI64: {
+            HReg dst = newVRegI(env);
+            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+            addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
+            return dst;
+         }
+         case Iop_ReinterpF32asI32: {
+            HReg dst = newVRegI(env);
+            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+            addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
+            return dst;
+         }
          case Iop_1Sto32:
          case Iop_1Sto64: {
             /* As with the iselStmt case for 'tmp:I1 = expr', we could