From: Julian Seward <jseward@acm.org>
Date: Thu, 19 Jun 2014 22:20:47 +0000 (+0000)
Subject: Implement: dup_{d_d[], s_s[], h_h[], b_b[]}, ext
X-Git-Tag: svn/VALGRIND_3_10_1^2~91
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3317aa4a75948266a5e9c9dbb1dbfbc4b3d3aad4;p=thirdparty%2Fvalgrind.git

Implement: dup_{d_d[], s_s[], h_h[], b_b[]}, ext


git-svn-id: svn://svn.valgrind.org/vex/trunk@2880
---

diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index 63186fee26..07d19c9013 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -5555,7 +5555,58 @@ void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
 static
 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
 {
+   /* 31  29     23  21 20 15 14   10 9 4
+      0 q 101110 op2 0  m  0  imm4 0  n d
+      Decode fields: op2
+   */
 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
+   if (INSN(31,31) != 0
+       || INSN(29,24) != BITS6(1,0,1,1,1,0)
+       || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
+      return False;
+   }
+   UInt bitQ = INSN(30,30);
+   UInt op2  = INSN(23,22);
+   UInt mm   = INSN(20,16);
+   UInt imm4 = INSN(14,11);
+   UInt nn   = INSN(9,5);
+   UInt dd   = INSN(4,0);
+
+   if (op2 == BITS2(0,0)) {
+      /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
+      IRTemp sHi = newTemp(Ity_V128);
+      IRTemp sLo = newTemp(Ity_V128);
+      IRTemp res = newTemp(Ity_V128);
+      assign(sHi, getQReg128(mm));
+      assign(sLo, getQReg128(nn));
+      if (bitQ == 1) {
+         if (imm4 == 0) {
+            assign(res, mkexpr(sLo));
+         } else {
+            vassert(imm4 <= 15);
+            assign(res,
+                   binop(Iop_OrV128,
+                         binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
+                         binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
+         }
+         putQReg128(dd, mkexpr(res));
+         DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
+      } else {
+         if (imm4 >= 8) return False;
+         if (imm4 == 0) {
+            assign(res, mkexpr(sLo));
+         } else {
+           assign(res,
+                  binop(Iop_ShrV128,
+                        binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
+                        mkU8(8 * imm4)));
+         }
+         putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+         DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
+      }
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
@@ -6115,7 +6166,66 @@ Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
 static
 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
 {
+   /* 31    28       20   15 14   10 9 4
+      01 op 11110000 imm5 0  imm4 1  n d
+      Decode fields: op,imm4
+   */
 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
+   if (INSN(31,30) != BITS2(0,1)
+       || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
+       || INSN(15,15) != 0 || INSN(10,10) != 1) {
+      return False;
+   }
+   UInt bitOP = INSN(29,29);
+   UInt imm5  = INSN(20,16);
+   UInt imm4  = INSN(14,11);
+   UInt nn    = INSN(9,5);
+   UInt dd    = INSN(4,0);
+
+   if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
+      /* -------- 0,0000 DUP (element, scalar) -------- */
+      IRTemp w0     = newTemp(Ity_I64);
+      const HChar* arTs = "??";
+      IRType laneTy = Ity_INVALID;
+      UInt   laneNo = 16; /* invalid */
+      if (imm5 & 1) {
+         arTs   = "b";
+         laneNo = (imm5 >> 1) & 15;
+         laneTy = Ity_I8;
+         assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
+      }
+      else if (imm5 & 2) {
+         arTs   = "h";
+         laneNo = (imm5 >> 2) & 7;
+         laneTy = Ity_I16;
+         assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
+      }
+      else if (imm5 & 4) {
+         arTs   = "s";
+         laneNo = (imm5 >> 3) & 3;
+         laneTy = Ity_I32;
+         assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
+      }
+      else if (imm5 & 8) {
+         arTs   = "d";
+         laneNo = (imm5 >> 4) & 1;
+         laneTy = Ity_I64;
+         assign(w0, getQRegLane(nn, laneNo, laneTy));
+      }
+      else {
+         /* invalid; leave laneTy unchanged. */
+      }
+      /* */
+      if (laneTy != Ity_INVALID) {
+         vassert(laneNo < 16);
+         putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
+         DIP("dup %s, %s.%s[%u]\n",
+             nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
+         return True;
+      }
+      /* else fall through */
+   }
+
    return False;
 #  undef INSN
 }
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
index d7445399ac..8da0d30d8b 100644
--- a/VEX/priv/host_arm64_defs.c
+++ b/VEX/priv/host_arm64_defs.c
@@ -1690,6 +1690,16 @@ ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
    vassert(amt > 0 && amt <= maxSh);
    return i;
 }
+ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
+   ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
+   i->tag                 = ARM64in_VExtV;
+   i->ARM64in.VExtV.dst   = dst;
+   i->ARM64in.VExtV.srcLo = srcLo;
+   i->ARM64in.VExtV.srcHi = srcHi;
+   i->ARM64in.VExtV.amtB  = amtB;
+   vassert(amtB >= 1 && amtB <= 15);
+   return i;
+}
 //ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
 //ZZ    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
 //ZZ    i->tag              = ARMin_VAluS;
@@ -2335,6 +2345,16 @@ void ppARM64Instr ( ARM64Instr* i ) {
          vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
          return;
       }
+      case ARM64in_VExtV: {
+         vex_printf("ext    ");
+         ppHRegARM64(i->ARM64in.VExtV.dst);
+         vex_printf(".16b, ");
+         ppHRegARM64(i->ARM64in.VExtV.srcLo);
+         vex_printf(".16b, ");
+         ppHRegARM64(i->ARM64in.VExtV.srcHi);
+         vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
+         return;
+      }
 //ZZ       case ARMin_VAluS:
 //ZZ          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
 //ZZ          ppHRegARM(i->ARMin.VAluS.dst);
@@ -2816,6 +2836,10 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 )
          addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
          addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
          return;
+      case ARM64in_VExtV:
+         addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
+         addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
+         addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
 //ZZ       case ARMin_VAluS:
 //ZZ          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
 //ZZ          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
@@ -3112,6 +3136,12 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
          i->ARM64in.VShiftImmV.src
             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
          return;
+      case ARM64in_VExtV:
+         i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
+         i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
+         i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
+         return;
+
 //ZZ       case ARMin_VAluS:
 //ZZ          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
 //ZZ          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
@@ -5410,8 +5440,8 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
       }
       case ARM64in_VShiftImmV: {
          /*
-            0q1 011110 immh immb 000001 n d  USHR Vd.T, Vn.T, #sh
-            0q0 011110 immh immb 000001 n d  SSHR Vd.T, Vn.T, #sh
+            011 011110 immh immb 000001 n d  USHR Vd.T, Vn.T, #sh
+            010 011110 immh immb 000001 n d  SSHR Vd.T, Vn.T, #sh
             where immh:immb
                = case T of 
                     2d  | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx
@@ -5419,7 +5449,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
                     8h  | sh in 1..15 -> let   xxxx = 16-sh in 001x:xxx
                     16b | sh in 1..7  -> let    xxx =  8-sh in 0001:xxx
 
-            0q0 011110 immh immb 010101 n d  SHL Vd.T, Vn.T, #sh
+            010 011110 immh immb 010101 n d  SHL Vd.T, Vn.T, #sh
             where immh:immb
                = case T of 
                     2d  | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx
@@ -5487,8 +5517,6 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
                   goto done;
                }
                break;
-
-
             /* 8x16 cases */
             case ARM64vecsh_SSHR8x16: syned = True;
             case ARM64vecsh_USHR8x16: /* fallthrough */
@@ -5507,12 +5535,26 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
                   goto done;
                }
                break;
-
             default:
                break;
          }
          goto bad;
       }
+      case ARM64in_VExtV: {
+         /*
+            011 01110 000 m 0 imm4 0 n d  EXT Vd.16b, Vn.16b, Vm.16b, #imm4
+            where imm4 = the shift amount, in bytes,
+                  Vn is low operand, Vm is high operand
+         */
+         UInt vD   = qregNo(i->ARM64in.VExtV.dst);
+         UInt vN   = qregNo(i->ARM64in.VExtV.srcLo);
+         UInt vM   = qregNo(i->ARM64in.VExtV.srcHi);
+         UInt imm4 = i->ARM64in.VExtV.amtB;
+         vassert(imm4 >= 1 && imm4 <= 15);
+         *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
+                              X000000 | (imm4 << 1), vN, vD);
+         goto done;
+      }
 //ZZ       case ARMin_VAluS: {
 //ZZ          UInt dN = fregNo(i->ARMin.VAluS.argL);
 //ZZ          UInt dD = fregNo(i->ARMin.VAluS.dst);
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
index 9b8491e7de..23be594c12 100644
--- a/VEX/priv/host_arm64_defs.h
+++ b/VEX/priv/host_arm64_defs.h
@@ -561,6 +561,7 @@ typedef
       ARM64in_VUnaryV,
       ARM64in_VNarrowV,
       ARM64in_VShiftImmV,
+      ARM64in_VExtV,
 //ZZ       ARMin_VAluS,
 //ZZ       ARMin_VCMovD,
 //ZZ       ARMin_VCMovS,
@@ -855,6 +856,12 @@ typedef
            HReg            src;
            UInt            amt;
         } VShiftImmV;
+        struct {
+           HReg dst;
+           HReg srcLo;
+           HReg srcHi;
+           UInt amtB;
+        } VExtV;
 //ZZ          /* 32-bit FP binary arithmetic */
 //ZZ          struct {
 //ZZ             ARMVfpOp op;
@@ -1051,6 +1058,8 @@ extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg );
 extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src );
 extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
                                            HReg dst, HReg src, UInt amt );
+extern ARM64Instr* ARM64Instr_VExtV   ( HReg dst,
+                                        HReg srcLo, HReg srcHi, UInt amtB );
 //ZZ extern ARMInstr* ARMInstr_VAluS    ( ARMVfpOp op, HReg, HReg, HReg );
 //ZZ extern ARMInstr* ARMInstr_VCMovD   ( ARMCondCode, HReg dst, HReg src );
 //ZZ extern ARMInstr* ARMInstr_VCMovS   ( ARMCondCode, HReg dst, HReg src );
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
index 9aa03372e8..7e85082af0 100644
--- a/VEX/priv/host_arm64_isel.c
+++ b/VEX/priv/host_arm64_isel.c
@@ -5626,6 +5626,49 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
             /* else fall out; this is unhandled */
             break;
          }
+
+         case Iop_ShlV128:
+         case Iop_ShrV128: {
+            Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
+            /* This is tricky.  Generate an EXT instruction with zeroes in
+               the high operand (shift right) or low operand (shift left).
+               Note that we can only slice in the EXT instruction at a byte
+               level of granularity, so the shift amount needs careful
+               checking. */
+            IRExpr* argL = e->Iex.Binop.arg1;
+            IRExpr* argR = e->Iex.Binop.arg2;
+            if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
+               UInt amt   = argR->Iex.Const.con->Ico.U8;
+               Bool amtOK = False;
+               switch (amt) {
+                  case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
+                  case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
+                  case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
+                     amtOK = True; break;
+               }
+               /* We could also deal with amt==0 by copying the source to
+                  the destination, but there's no need for that so far. */
+               if (amtOK) {
+                  HReg src  = iselV128Expr(env, argL);
+                  HReg srcZ = newVRegV(env);
+                  addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
+                  UInt immB = amt / 8;
+                  vassert(immB >= 1 && immB <= 15);
+                  HReg dst = newVRegV(env);
+                  if (isSHR) {
+                     addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
+                                                         immB));
+                  } else {
+                     addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
+                                                         16 - immB));
+                  }
+                  return dst;
+               }
+            }
+            /* else fall out; this is unhandled */
+            break;
+         }
+
 //ZZ          case Iop_CmpGT8Ux16:
 //ZZ          case Iop_CmpGT16Ux8:
 //ZZ          case Iop_CmpGT32Ux4: {