]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
arm64: implement:
authorJulian Seward <jseward@acm.org>
Sun, 24 Aug 2014 20:36:14 +0000 (20:36 +0000)
committerJulian Seward <jseward@acm.org>
Sun, 24 Aug 2014 20:36:14 +0000 (20:36 +0000)
  {zip,uzp,trn}{1,2} (vector)
  urecpe, ursqrte (vector)

git-svn-id: svn://svn.valgrind.org/vex/trunk@2933

VEX/priv/guest_arm64_toIR.c
VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm64_isel.c

index 837abddbe09377df8f3512ffa6187b731d52036a..d0db663d665b0756ebfda3d07df0e33c413aa132 100644 (file)
@@ -6937,7 +6937,99 @@ Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
 static
 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
 {
+   /* 31  29     23   21 20 15 14     11 9 4
+      0 q 001110 size 0  m  0  opcode 10 n d
+      Decode fields: opcode
+   */
 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
+   if (INSN(31,31) != 0
+       || INSN(29,24) != BITS6(0,0,1,1,1,0)
+       || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
+      return False;
+   }
+   UInt bitQ   = INSN(30,30);
+   UInt size   = INSN(23,22);
+   UInt mm     = INSN(20,16);
+   UInt opcode = INSN(14,12);
+   UInt nn     = INSN(9,5);
+   UInt dd     = INSN(4,0);
+
+   if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
+      /* -------- 001 UZP1 std7_std7_std7 -------- */
+      /* -------- 101 UZP2 std7_std7_std7 -------- */
+      if (bitQ == 0 && size == X11) return False; // implied 1d case
+      Bool   isUZP1 = opcode == BITS3(0,0,1);
+      IROp   op     = isUZP1 ? mkVecCATEVENLANES(size)
+                             : mkVecCATODDLANES(size);
+      IRTemp preL = newTempV128();
+      IRTemp preR = newTempV128();
+      IRTemp res  = newTempV128();
+      if (bitQ == 0) {
+         assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
+                                                  getQReg128(nn)));
+         assign(preR, mkexpr(preL));
+      } else {
+         assign(preL, getQReg128(mm));
+         assign(preR, getQReg128(nn));
+      }
+      assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+      const HChar* nm  = isUZP1 ? "uzp1" : "uzp2";
+      const HChar* arr = nameArr_Q_SZ(bitQ, size);
+      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+      return True;
+   }
+
+   if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
+      /* -------- 010 TRN1 std7_std7_std7 -------- */
+      /* -------- 110 TRN2 std7_std7_std7 -------- */
+      if (bitQ == 0 && size == X11) return False; // implied 1d case
+      Bool   isTRN1 = opcode == BITS3(0,1,0);
+      IROp   op1    = isTRN1 ? mkVecCATEVENLANES(size)
+                             : mkVecCATODDLANES(size);
+      IROp op2 = mkVecINTERLEAVEHI(size);
+      IRTemp srcM = newTempV128();
+      IRTemp srcN = newTempV128();
+      IRTemp res  = newTempV128();
+      assign(srcM, getQReg128(mm));
+      assign(srcN, getQReg128(nn));
+      assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
+                             binop(op1, mkexpr(srcN), mkexpr(srcN))));
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+      const HChar* nm  = isTRN1 ? "trn1" : "trn2";
+      const HChar* arr = nameArr_Q_SZ(bitQ, size);
+      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+      return True;
+   }
+
+   if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
+      /* -------- 011 ZIP1 std7_std7_std7 -------- */
+      /* -------- 111 ZIP2 std7_std7_std7 -------- */
+      if (bitQ == 0 && size == X11) return False; // implied 1d case
+      Bool   isZIP1 = opcode == BITS3(0,1,1);
+      IROp   op     = isZIP1 ? mkVecINTERLEAVELO(size)
+                             : mkVecINTERLEAVEHI(size);
+      IRTemp preL = newTempV128();
+      IRTemp preR = newTempV128();
+      IRTemp res  = newTempV128();
+      if (bitQ == 0 && !isZIP1) {
+         assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32)));
+         assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32)));
+      } else {
+         assign(preL, getQReg128(mm));
+         assign(preR, getQReg128(nn));
+      }
+      assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+      const HChar* nm  = isZIP1 ? "zip1" : "zip2";
+      const HChar* arr = nameArr_Q_SZ(bitQ, size);
+      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
@@ -10056,6 +10148,21 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
+      /* -------- 0,10,11100: URECPE  4s_4s, 2s_2s -------- */
+      /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
+      Bool isREC = bitU == 0;
+      IROp op    = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
+      IRTemp res = newTempV128();
+      assign(res, unop(op, getQReg128(nn)));
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+      const HChar* nm  = isREC ? "urecpe" : "ursqrte";
+      const HChar* arr = nameArr_Q_SZ(bitQ, size);
+      DIP("%s %s.%s, %s.%s\n", nm,
+          nameQReg128(dd), arr, nameQReg128(nn), arr);
+      return True;
+   }
+
    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
       /* -------- 0,0x,11101: SCVTF -------- */
       /* -------- 1,0x,11101: UCVTF -------- */
index df9b4270daffbe786b99548f85153cefb3feb170..66beb81ba08cc07440f2c17fb720f9ea713ff365 100644 (file)
@@ -722,29 +722,31 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
                                 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
 {
    switch (op) {
-      case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d";  return;
-      case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s";  return;
-      case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d";  return;
-      case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s";  return;
-      case ARM64vecu_NOT:      *nm = "not  "; *ar = "all"; return;
-      case ARM64vecu_ABS64x2:  *nm = "abs  "; *ar = "2d";  return;
-      case ARM64vecu_ABS32x4:  *nm = "abs  "; *ar = "4s";  return;
-      case ARM64vecu_ABS16x8:  *nm = "abs  "; *ar = "8h";  return;
-      case ARM64vecu_ABS8x16:  *nm = "abs  "; *ar = "16b"; return;
-      case ARM64vecu_CLS32x4:  *nm = "cls  "; *ar = "4s";  return;
-      case ARM64vecu_CLS16x8:  *nm = "cls  "; *ar = "8h";  return;
-      case ARM64vecu_CLS8x16:  *nm = "cls  "; *ar = "16b"; return;
-      case ARM64vecu_CLZ32x4:  *nm = "clz  "; *ar = "4s";  return;
-      case ARM64vecu_CLZ16x8:  *nm = "clz  "; *ar = "8h";  return;
-      case ARM64vecu_CLZ8x16:  *nm = "clz  "; *ar = "16b"; return;
-      case ARM64vecu_CNT8x16:  *nm = "cnt  "; *ar = "16b"; return;
-      case ARM64vecu_RBIT:     *nm = "rbit "; *ar = "16b"; return;
-      case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
-      case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
-      case ARM64vecu_REV328H:  *nm = "rev32"; *ar = "8h";  return;
-      case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
-      case ARM64vecu_REV648H:  *nm = "rev64"; *ar = "8h";  return;
-      case ARM64vecu_REV644S:  *nm = "rev64"; *ar = "4s";  return;
+      case ARM64vecu_FNEG64x2:    *nm = "fneg ";   *ar = "2d";  return;
+      case ARM64vecu_FNEG32x4:    *nm = "fneg ";   *ar = "4s";  return;
+      case ARM64vecu_FABS64x2:    *nm = "fabs ";   *ar = "2d";  return;
+      case ARM64vecu_FABS32x4:    *nm = "fabs ";   *ar = "4s";  return;
+      case ARM64vecu_NOT:         *nm = "not  ";   *ar = "all"; return;
+      case ARM64vecu_ABS64x2:     *nm = "abs  ";   *ar = "2d";  return;
+      case ARM64vecu_ABS32x4:     *nm = "abs  ";   *ar = "4s";  return;
+      case ARM64vecu_ABS16x8:     *nm = "abs  ";   *ar = "8h";  return;
+      case ARM64vecu_ABS8x16:     *nm = "abs  ";   *ar = "16b"; return;
+      case ARM64vecu_CLS32x4:     *nm = "cls  ";   *ar = "4s";  return;
+      case ARM64vecu_CLS16x8:     *nm = "cls  ";   *ar = "8h";  return;
+      case ARM64vecu_CLS8x16:     *nm = "cls  ";   *ar = "16b"; return;
+      case ARM64vecu_CLZ32x4:     *nm = "clz  ";   *ar = "4s";  return;
+      case ARM64vecu_CLZ16x8:     *nm = "clz  ";   *ar = "8h";  return;
+      case ARM64vecu_CLZ8x16:     *nm = "clz  ";   *ar = "16b"; return;
+      case ARM64vecu_CNT8x16:     *nm = "cnt  ";   *ar = "16b"; return;
+      case ARM64vecu_RBIT:        *nm = "rbit ";   *ar = "16b"; return;
+      case ARM64vecu_REV1616B:    *nm = "rev16";   *ar = "16b"; return;
+      case ARM64vecu_REV3216B:    *nm = "rev32";   *ar = "16b"; return;
+      case ARM64vecu_REV328H:     *nm = "rev32";   *ar = "8h";  return;
+      case ARM64vecu_REV6416B:    *nm = "rev64";   *ar = "16b"; return;
+      case ARM64vecu_REV648H:     *nm = "rev64";   *ar = "8h";  return;
+      case ARM64vecu_REV644S:     *nm = "rev64";   *ar = "4s";  return;
+      case ARM64vecu_URECPE32x4:  *nm = "urecpe";  *ar = "4s";  return;
+      case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s";  return;
       default: vpanic("showARM64VecUnaryOp");
    }
 }
@@ -2548,6 +2550,7 @@ static inline UChar qregNo ( HReg r )
 #define X101110  BITS8(0,0, 1,0,1,1,1,0)
 #define X110000  BITS8(0,0, 1,1,0,0,0,0)
 #define X110001  BITS8(0,0, 1,1,0,0,0,1)
+#define X110010  BITS8(0,0, 1,1,0,0,1,0)
 #define X110100  BITS8(0,0, 1,1,0,1,0,0)
 #define X110101  BITS8(0,0, 1,1,0,1,0,1)
 #define X110111  BITS8(0,0, 1,1,0,1,1,1)
@@ -4605,6 +4608,9 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
             010 01110 00 1 00000 000010 n d  REV64 Vd.16b, Vn.16b
             010 01110 01 1 00000 000010 n d  REV64 Vd.8h, Vn.8h
             010 01110 10 1 00000 000010 n d  REV64 Vd.4s, Vn.4s
+
+            010 01110 10 1 00001 110010 n d  URECPE Vd.4s, Vn.4s
+            011 01110 10 1 00001 110010 n d  URSQRTE Vd.4s, Vn.4s
          */
          UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
          UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
@@ -4678,6 +4684,12 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
             case ARM64vecu_REV644S:
                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
                break;
+            case ARM64vecu_URECPE32x4:
+               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
+               break;
+            case ARM64vecu_URSQRTE32x4:
+               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
+               break;
             default:
                goto bad;
          }
index e100b0fc2c681c623b0808d7a9c34f2c2beec1b0..a2aa7ac16ca37cc3f730caeb6cd471cefb10fea6 100644 (file)
@@ -386,7 +386,7 @@ typedef
 
 typedef
    enum {
-      ARM64vecmo_SUQADD64x2=335, ARM64vecmo_SUQADD32x4,
+      ARM64vecmo_SUQADD64x2=300, ARM64vecmo_SUQADD32x4,
       ARM64vecmo_SUQADD16x8,     ARM64vecmo_SUQADD8x16,
       ARM64vecmo_USQADD64x2,     ARM64vecmo_USQADD32x4,
       ARM64vecmo_USQADD16x8,     ARM64vecmo_USQADD8x16,
@@ -396,7 +396,7 @@ typedef
 
 typedef
    enum {
-      ARM64vecu_FNEG64x2=300, ARM64vecu_FNEG32x4,
+      ARM64vecu_FNEG64x2=350, ARM64vecu_FNEG32x4,
       ARM64vecu_FABS64x2,     ARM64vecu_FABS32x4,
       ARM64vecu_NOT,
       ARM64vecu_ABS64x2,      ARM64vecu_ABS32x4,
@@ -408,13 +408,15 @@ typedef
       ARM64vecu_REV1616B,
       ARM64vecu_REV3216B,     ARM64vecu_REV328H,
       ARM64vecu_REV6416B,     ARM64vecu_REV648H,      ARM64vecu_REV644S,
+      ARM64vecu_URECPE32x4,
+      ARM64vecu_URSQRTE32x4,
       ARM64vecu_INVALID
    }
    ARM64VecUnaryOp;
 
 typedef
    enum {
-      ARM64vecshi_USHR64x2=350, ARM64vecshi_USHR32x4,
+      ARM64vecshi_USHR64x2=400, ARM64vecshi_USHR32x4,
       ARM64vecshi_USHR16x8,     ARM64vecshi_USHR8x16,
       ARM64vecshi_SSHR64x2,     ARM64vecshi_SSHR32x4,
       ARM64vecshi_SSHR16x8,     ARM64vecshi_SSHR8x16,
@@ -441,7 +443,7 @@ typedef
 
 typedef
    enum {
-      ARM64vecna_XTN=400,
+      ARM64vecna_XTN=450,
       ARM64vecna_SQXTN,
       ARM64vecna_UQXTN,
       ARM64vecna_SQXTUN,
index 70c80732efcd7fbe68d7478f61250192640e6601..0da86c507710b4ae0c6031d095993776c404800f 100644 (file)
@@ -2213,35 +2213,39 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
          case Iop_Reverse8sIn16_x8:
          case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
          case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
-         case Iop_Reverse32sIn64_x2: 
+         case Iop_Reverse32sIn64_x2:
+         case Iop_RecipEst32Ux4:
+         case Iop_RSqrtEst32Ux4:
          {
             HReg res = newVRegV(env);
             HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
             ARM64VecUnaryOp op = ARM64vecu_INVALID;
             switch (e->Iex.Unop.op) {
-               case Iop_NotV128:  op = ARM64vecu_NOT;      break;
-               case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
-               case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
-               case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
-               case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
-               case Iop_Abs64x2:  op = ARM64vecu_ABS64x2;  break;
-               case Iop_Abs32x4:  op = ARM64vecu_ABS32x4;  break;
-               case Iop_Abs16x8:  op = ARM64vecu_ABS16x8;  break;
-               case Iop_Abs8x16:  op = ARM64vecu_ABS8x16;  break;
-               case Iop_Cls32x4:  op = ARM64vecu_CLS32x4;  break;
-               case Iop_Cls16x8:  op = ARM64vecu_CLS16x8;  break;
-               case Iop_Cls8x16:  op = ARM64vecu_CLS8x16;  break;
-               case Iop_Clz32x4:  op = ARM64vecu_CLZ32x4;  break;
-               case Iop_Clz16x8:  op = ARM64vecu_CLZ16x8;  break;
-               case Iop_Clz8x16:  op = ARM64vecu_CLZ8x16;  break;
-               case Iop_Cnt8x16:  op = ARM64vecu_CNT8x16;  break;
-               case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;     break;
-               case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B; break;
-               case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B; break;
-               case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;  break;
-               case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B; break;
-               case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;  break;
-               case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;  break;
+               case Iop_NotV128:           op = ARM64vecu_NOT;         break;
+               case Iop_Abs64Fx2:          op = ARM64vecu_FABS64x2;    break;
+               case Iop_Abs32Fx4:          op = ARM64vecu_FABS32x4;    break;
+               case Iop_Neg64Fx2:          op = ARM64vecu_FNEG64x2;    break;
+               case Iop_Neg32Fx4:          op = ARM64vecu_FNEG32x4;    break;
+               case Iop_Abs64x2:           op = ARM64vecu_ABS64x2;     break;
+               case Iop_Abs32x4:           op = ARM64vecu_ABS32x4;     break;
+               case Iop_Abs16x8:           op = ARM64vecu_ABS16x8;     break;
+               case Iop_Abs8x16:           op = ARM64vecu_ABS8x16;     break;
+               case Iop_Cls32x4:           op = ARM64vecu_CLS32x4;     break;
+               case Iop_Cls16x8:           op = ARM64vecu_CLS16x8;     break;
+               case Iop_Cls8x16:           op = ARM64vecu_CLS8x16;     break;
+               case Iop_Clz32x4:           op = ARM64vecu_CLZ32x4;     break;
+               case Iop_Clz16x8:           op = ARM64vecu_CLZ16x8;     break;
+               case Iop_Clz8x16:           op = ARM64vecu_CLZ8x16;     break;
+               case Iop_Cnt8x16:           op = ARM64vecu_CNT8x16;     break;
+               case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;        break;
+               case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B;    break;
+               case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B;    break;
+               case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;     break;
+               case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B;    break;
+               case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;     break;
+               case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;     break;
+               case Iop_RecipEst32Ux4:     op = ARM64vecu_URECPE32x4;  break;
+               case Iop_RSqrtEst32Ux4:     op = ARM64vecu_URSQRTE32x4; break;
                default: vassert(0);
             }
             addInstr(env, ARM64Instr_VUnaryV(op, res, arg));