arm64: add support for

author Julian Seward <jseward@acm.org>

Mon, 30 Mar 2015 09:01:51 +0000 (09:01 +0000)

committer Julian Seward <jseward@acm.org>

Mon, 30 Mar 2015 09:01:51 +0000 (09:01 +0000)
author Julian Seward <jseward@acm.org>
Mon, 30 Mar 2015 09:01:51 +0000 (09:01 +0000)
committer Julian Seward <jseward@acm.org>
Mon, 30 Mar 2015 09:01:51 +0000 (09:01 +0000)
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c

index fbd891e83af3b6d538acd24af0a20a1fd0dd4312..293d65e9ccbca1e4cb46faa5db54419de6459e46 100644 (file)
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -1416,7 +1416,7 @@ static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
     UInt laneSzB = 0;
     switch (laneTy) {
        case Ity_I8:                 laneSzB = 1;  break;
-      case Ity_I16:                laneSzB = 2;  break;
+      case Ity_F16: case Ity_I16:  laneSzB = 2;  break;
        case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
        case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
        case Ity_V128:               laneSzB = 16; break;
@@ -1436,7 +1436,7 @@ static void putQRegLO ( UInt qregNo, IRExpr* e )
     Int    off = offsetQRegLane(qregNo, ty, 0);
     switch (ty) {
        case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
-      case Ity_F32: case Ity_F64: case Ity_V128:
+      case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
           break;
        default:
           vassert(0); // Other cases are probably invalid
@@ -1450,7 +1450,7 @@ static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
     Int off = offsetQRegLane(qregNo, ty, 0);
     switch (ty) {
        case Ity_I8:
-      case Ity_I16:
+      case Ity_F16: case Ity_I16:
        case Ity_I32: case Ity_I64:
        case Ity_F32: case Ity_F64: case Ity_V128:
           break;
@@ -1537,7 +1537,7 @@ static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
     switch (laneTy) {
        case Ity_F64: case Ity_I64:
        case Ity_I32: case Ity_F32:
-      case Ity_I16:
+      case Ity_I16: case Ity_F16:
        case Ity_I8:
           break;
        default:
@@ -1552,7 +1552,7 @@ static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
     Int off = offsetQRegLane(qregNo, laneTy, laneNo);
     switch (laneTy) {
        case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
-      case Ity_F64: case Ity_F32:
+      case Ity_F64: case Ity_F32: case Ity_F16:
           break;
        default:
           vassert(0); // Other cases are ATC
@@ -9917,6 +9917,58 @@ Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
        return True;
     }
  
+   ix = 0; /*INVALID*/
+   switch (opcode) {
+      case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
+      case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
+      case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
+      default: break;
+   }
+   if (ix > 0) {
+      /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
+      /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
+      /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
+      /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
+      /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
+      /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
+      /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
+      /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
+      /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
+      /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
+      Bool           is64 = (size & 1) == 1;
+      IRType         tyF  = is64 ? Ity_F64 : Ity_F32;
+      IRType         tyI  = is64 ? Ity_I64 : Ity_I32;
+      IRRoundingMode irrm = 8; /*impossible*/
+      HChar          ch   = '?';
+      switch (ix) {
+         case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
+         case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
+         case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
+         case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
+         case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
+         default: vassert(0);
+      }
+      IROp cvt = Iop_INVALID;
+      if (bitU == 1) {
+         cvt = is64 ? Iop_F64toI64U : Iop_F32toI32U;
+      } else {
+         cvt = is64 ? Iop_F64toI64S : Iop_F32toI32S;
+      }
+      IRTemp src = newTemp(tyF);
+      IRTemp res = newTemp(tyI);
+      assign(src, getQRegLane(nn, 0, tyF));
+      assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
+      putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
+      if (!is64) {
+         putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
+      }
+      putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
+      HChar sOrD = is64 ? 'd' : 's';
+      DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
+          sOrD, dd, sOrD, nn);
+      return True;
+   }
+
     if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
        /* -------- 0,1x,11101: FRECPE  d_d, s_s -------- */
        /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
@@ -11906,18 +11958,48 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
        return True;
     }
  
-   if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
-      /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
-      IRTemp  rm    = mk_get_IR_rounding_mode();
-      IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
-      IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
-      putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
-      putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
+   if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
+      /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
+      UInt   nLanes = size == X00 ? 4 : 2;
+      IRType srcTy  = size == X00 ? Ity_F32 : Ity_F64;
+      IROp   opCvt  = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
+      IRTemp rm     = mk_get_IR_rounding_mode();
+      IRTemp src[nLanes];
+      for (UInt i = 0; i < nLanes; i++) {
+         src[i] = newTemp(srcTy);
+         assign(src[i], getQRegLane(nn, i, srcTy));
+      }
+      for (UInt i = 0; i < nLanes; i++) {
+         putQRegLane(dd, nLanes * bitQ + i,
+                         binop(opCvt, mkexpr(rm), mkexpr(src[i])));
+      }
        if (bitQ == 0) {
           putQRegLane(dd, 1, mkU64(0));
        }
-      DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
-          nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
+      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+      const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
+      DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+          nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
+      return True;
+   }
+
+   if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
+      /* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
+      UInt   nLanes = size == X00 ? 4 : 2;
+      IRType srcTy  = size == X00 ? Ity_F16 : Ity_F32;
+      IROp   opCvt  = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
+      IRTemp src[nLanes];
+      for (UInt i = 0; i < nLanes; i++) {
+         src[i] = newTemp(srcTy);
+         assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
+      }
+      for (UInt i = 0; i < nLanes; i++) {
+         putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
+      }
+      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+      const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
+      DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+          nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
        return True;
     }
  
@@ -12628,36 +12710,67 @@ Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
        /* -------- 01,000111: FCVT h_d -------- */
        /* -------- 01,000100: FCVT s_d -------- */
        /* 31        23 21    16 14    9 4
-         000 11110 11 10001 00 10000 n d   FCVT Sd, Hn (unimp)
-         --------- 11 ----- 01 ---------   FCVT Dd, Hn (unimp)
-         --------- 00 ----- 11 ---------   FCVT Hd, Sn (unimp)
+         000 11110 11 10001 00 10000 n d   FCVT Sd, Hn
+         --------- 11 ----- 01 ---------   FCVT Dd, Hn
+         --------- 00 ----- 11 ---------   FCVT Hd, Sn
           --------- 00 ----- 01 ---------   FCVT Dd, Sn
-         --------- 01 ----- 11 ---------   FCVT Hd, Dn (unimp)
+         --------- 01 ----- 11 ---------   FCVT Hd, Dn
           --------- 01 ----- 00 ---------   FCVT Sd, Dn
           Rounding, when dst is smaller than src, is per the FPCR.
        */
        UInt b2322 = ty;
        UInt b1615 = opcode & BITS2(1,1);
-      if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
-         /* Convert S to D */
-         IRTemp res = newTemp(Ity_F64);
-         assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
-         putQReg128(dd, mkV128(0x0000));
-         putQRegLO(dd, mkexpr(res));
-         DIP("fcvt %s, %s\n",
-             nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
-         return True;
-      }
-      if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
-         /* Convert D to S */
-         IRTemp res = newTemp(Ity_F32);
-         assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
-                                         getQRegLO(nn, Ity_F64)));
-         putQReg128(dd, mkV128(0x0000));
-         putQRegLO(dd, mkexpr(res));
-         DIP("fcvt %s, %s\n",
-             nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
-         return True;
+      switch ((b2322 << 2) | b1615) {
+         case BITS4(0,0,0,1):   // S -> D
+         case BITS4(1,1,0,1): { // H -> D
+            Bool   srcIsH = b2322 == BITS2(1,1);
+            IRType srcTy  = srcIsH ? Ity_F16 : Ity_F32;
+            IRTemp res    = newTemp(Ity_F64);
+            assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
+                             getQRegLO(nn, srcTy)));
+            putQReg128(dd, mkV128(0x0000));
+            putQRegLO(dd, mkexpr(res));
+            DIP("fcvt %s, %s\n",
+                nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
+            return True;
+         }
+         case BITS4(0,1,0,0):   // D -> S
+         case BITS4(0,1,1,1): { // D -> H
+            Bool   dstIsH = b1615 == BITS2(1,1);
+            IRType dstTy  = dstIsH ? Ity_F16 : Ity_F32;
+            IRTemp res    = newTemp(dstTy);
+            assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
+                              mkexpr(mk_get_IR_rounding_mode()),
+                              getQRegLO(nn, Ity_F64)));
+            putQReg128(dd, mkV128(0x0000));
+            putQRegLO(dd, mkexpr(res));
+            DIP("fcvt %s, %s\n",
+                nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
+            return True;
+         }
+         case BITS4(0,0,1,1):   // S -> H
+         case BITS4(1,1,0,0): { // H -> S
+            Bool   toH   = b1615 == BITS2(1,1);
+            IRType srcTy = toH ? Ity_F32 : Ity_F16;
+            IRType dstTy = toH ? Ity_F16 : Ity_F32;
+            IRTemp res = newTemp(dstTy);
+            if (toH) {
+               assign(res, binop(Iop_F32toF16,
+                                 mkexpr(mk_get_IR_rounding_mode()),
+                                 getQRegLO(nn, srcTy)));
+
+            } else {
+               assign(res, unop(Iop_F16toF32,
+                                getQRegLO(nn, srcTy)));
+            }
+            putQReg128(dd, mkV128(0x0000));
+            putQRegLO(dd, mkexpr(res));
+            DIP("fcvt %s, %s\n",
+                nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
+            return True;
+         }
+         default:
+            break;
        }
        /* else unhandled */
        return False;
@@ -13029,7 +13142,6 @@ Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
        ---------------- 01 --------------  FCVTP-------- (round to +inf)
        ---------------- 10 --------------  FCVTM-------- (round to -inf)
        ---------------- 11 --------------  FCVTZ-------- (round to zero)
-
        ---------------- 00 100 ----------  FCVTAS------- (nearest, ties away)
        ---------------- 00 101 ----------  FCVTAU------- (nearest, ties away)
  
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c

index f937c4ddd2530b7549b32f1567bb0d70a1eca7fa..fc0984a9d5bbe33b52c736b76f3123f9cc315c90 100644 (file)
--- a/VEX/priv/host_arm64_defs.c
+++ b/VEX/priv/host_arm64_defs.c
@@ -176,6 +176,11 @@ static void ppHRegARM64asSreg ( HReg reg ) {
     vex_printf("(S-reg)");
  }
  
+static void ppHRegARM64asHreg ( HReg reg ) {
+   ppHRegARM64(reg);
+   vex_printf("(H-reg)");
+}
+
  
  /* --------- Condition codes, ARM64 encoding. --------- */
  
@@ -1003,9 +1008,19 @@ ARM64Instr* ARM64Instr_MFence ( void ) {
     i->tag        = ARM64in_MFence;
     return i;
  }
+ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag                   = ARM64in_VLdStH;
+   i->ARM64in.VLdStH.isLoad = isLoad;
+   i->ARM64in.VLdStH.hD     = sD;
+   i->ARM64in.VLdStH.rN     = rN;
+   i->ARM64in.VLdStH.uimm12 = uimm12;
+   vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
+   return i;
+}
  ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
     ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
-   i->tag                  = ARM64in_VLdStS;
+   i->tag                   = ARM64in_VLdStS;
     i->ARM64in.VLdStS.isLoad = isLoad;
     i->ARM64in.VLdStS.sD     = sD;
     i->ARM64in.VLdStS.rN     = rN;
@@ -1015,7 +1030,7 @@ ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
  }
  ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
     ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
-   i->tag                  = ARM64in_VLdStD;
+   i->tag                   = ARM64in_VLdStD;
     i->ARM64in.VLdStD.isLoad = isLoad;
     i->ARM64in.VLdStD.dD     = dD;
     i->ARM64in.VLdStD.rN     = rN;
@@ -1052,12 +1067,28 @@ ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
  }
  ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
     ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
-   i->tag               = ARM64in_VCvtSD;
+   i->tag                 = ARM64in_VCvtSD;
     i->ARM64in.VCvtSD.sToD = sToD;
     i->ARM64in.VCvtSD.dst  = dst;
     i->ARM64in.VCvtSD.src  = src;
     return i;
  }
+ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag                 = ARM64in_VCvtHS;
+   i->ARM64in.VCvtHS.hToS = hToS;
+   i->ARM64in.VCvtHS.dst  = dst;
+   i->ARM64in.VCvtHS.src  = src;
+   return i;
+}
+ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag                 = ARM64in_VCvtHD;
+   i->ARM64in.VCvtHD.hToD = hToD;
+   i->ARM64in.VCvtHD.dst  = dst;
+   i->ARM64in.VCvtHD.src  = src;
+   return i;
+}
  ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
     ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
     i->tag                 = ARM64in_VUnaryD;
@@ -1534,6 +1565,21 @@ void ppARM64Instr ( const ARM64Instr* i ) {
        case ARM64in_MFence:
           vex_printf("(mfence) dsb sy; dmb sy; isb");
           return;
+      case ARM64in_VLdStH:
+         if (i->ARM64in.VLdStH.isLoad) {
+            vex_printf("ldr    ");
+            ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
+            vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
+            ppHRegARM64(i->ARM64in.VLdStH.rN);
+            vex_printf(")");
+         } else {
+            vex_printf("str    ");
+            vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
+            ppHRegARM64(i->ARM64in.VLdStH.rN);
+            vex_printf("), ");
+            ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
+         }
+         return;
        case ARM64in_VLdStS:
           if (i->ARM64in.VLdStS.isLoad) {
              vex_printf("ldr    ");
@@ -1613,6 +1659,30 @@ void ppARM64Instr ( const ARM64Instr* i ) {
              ppHRegARM64(i->ARM64in.VCvtSD.src);
           }
           return;
+      case ARM64in_VCvtHS:
+         vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
+         if (i->ARM64in.VCvtHS.hToS) {
+            ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
+            vex_printf(", ");
+            ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
+         } else {
+            ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
+            vex_printf(", ");
+            ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
+         }
+         return;
+      case ARM64in_VCvtHD:
+         vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
+         if (i->ARM64in.VCvtHD.hToD) {
+            ppHRegARM64(i->ARM64in.VCvtHD.dst);
+            vex_printf(", ");
+            ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
+         } else {
+            ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
+            vex_printf(", ");
+            ppHRegARM64(i->ARM64in.VCvtHD.src);
+         }
+         return;
        case ARM64in_VUnaryD:
           vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
           ppHRegARM64(i->ARM64in.VUnaryD.dst);
@@ -1986,6 +2056,14 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
           return;
        case ARM64in_MFence:
           return;
+      case ARM64in_VLdStH:
+         addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
+         if (i->ARM64in.VLdStH.isLoad) {
+            addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
+         } else {
+            addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
+         }
+         return;
        case ARM64in_VLdStS:
           addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
           if (i->ARM64in.VLdStS.isLoad) {
@@ -2021,6 +2099,14 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
           addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
           addHRegUse(u, HRmRead,  i->ARM64in.VCvtSD.src);
           return;
+      case ARM64in_VCvtHS:
+         addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
+         addHRegUse(u, HRmRead,  i->ARM64in.VCvtHS.src);
+         return;
+      case ARM64in_VCvtHD:
+         addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
+         addHRegUse(u, HRmRead,  i->ARM64in.VCvtHD.src);
+         return;
        case ARM64in_VUnaryD:
           addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
           addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
@@ -2230,6 +2316,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
           return;
        case ARM64in_MFence:
           return;
+      case ARM64in_VLdStH:
+         i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
+         i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
+         return;
        case ARM64in_VLdStS:
           i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
           i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
@@ -2254,6 +2344,14 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
           i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
           i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
           return;
+      case ARM64in_VCvtHS:
+         i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
+         i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
+         return;
+      case ARM64in_VCvtHD:
+         i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
+         i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
+         return;
        case ARM64in_VUnaryD:
           i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
           i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
@@ -2633,6 +2731,7 @@ static inline UInt qregEnc ( HReg r )
  #define X11011000  BITS8(1,1,0,1,1,0,0,0)
  #define X11011010  BITS8(1,1,0,1,1,0,1,0)
  #define X11011110  BITS8(1,1,0,1,1,1,1,0)
+#define X11100010  BITS8(1,1,1,0,0,0,1,0)
  #define X11110001  BITS8(1,1,1,1,0,0,0,1)
  #define X11110011  BITS8(1,1,1,1,0,0,1,1)
  #define X11110101  BITS8(1,1,1,1,0,1,0,1)
@@ -3702,6 +3801,23 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
        //   *p++ = 0xD5033F5F; /* clrex */
        //   goto done;
        //}
+      case ARM64in_VLdStH: {
+         /* 01 111101 01 imm12 n t   LDR Ht, [Xn|SP, #imm12 * 2]
+            01 111101 00 imm12 n t   STR Ht, [Xn|SP, #imm12 * 2]
+         */
+         UInt hD     = dregEnc(i->ARM64in.VLdStH.hD);
+         UInt rN     = iregEnc(i->ARM64in.VLdStH.rN);
+         UInt uimm12 = i->ARM64in.VLdStH.uimm12;
+         Bool isLD   = i->ARM64in.VLdStH.isLoad;
+         vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
+         uimm12 >>= 1;
+         vassert(uimm12 < (1<<12));
+         vassert(hD < 32);
+         vassert(rN < 31);
+         *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
+                               uimm12, rN, hD);
+         goto done;
+      }
        case ARM64in_VLdStS: {
           /* 10 111101 01 imm12 n t   LDR St, [Xn|SP, #imm12 * 4]
              10 111101 00 imm12 n t   STR St, [Xn|SP, #imm12 * 4]
@@ -3852,7 +3968,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
           goto done;
        }
        case ARM64in_VCvtSD: {
-         /* 31        23 21     16  14    9 4
+         /* 31         23 21    16  14    9 4
              000,11110, 00 10001 0,1 10000 n d   FCVT Dd, Sn (S->D)
              ---------- 01 ----- 0,0 ---------   FCVT Sd, Dn (D->S)
              Rounding, when dst is smaller than src, is per the FPCR.
@@ -3866,6 +3982,36 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
           }
           goto done;
        }
+      case ARM64in_VCvtHS: {
+         /* 31         23 21    16  14    9 4
+            000,11110, 11 10001 0,0 10000 n d   FCVT Sd, Hn (H->S)
+            ---------- 00 ----- 1,1 ---------   FCVT Hd, Sn (S->H)
+            Rounding, when dst is smaller than src, is per the FPCR.
+         */
+         UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
+         UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
+         if (i->ARM64in.VCvtHS.hToS) {
+            *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
+         } else {
+            *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
+         }
+         goto done;
+      }
+      case ARM64in_VCvtHD: {
+         /* 31         23 21    16  14    9 4
+            000,11110, 11 10001 0,1 10000 n d   FCVT Dd, Hn (H->D)
+            ---------- 01 ----- 1,1 ---------   FCVT Hd, Dn (D->H)
+            Rounding, when dst is smaller than src, is per the FPCR.
+         */
+         UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
+         UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
+         if (i->ARM64in.VCvtHD.hToD) {
+            *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
+         } else {
+            *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
+         }
+         goto done;
+      }
        case ARM64in_VUnaryD: {
           /* 31        23 21     16 14    9 4
              000,11110 01 1,0000 0,0 10000 n d  FMOV Dd, Dn (not handled)
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h

index ab3d91754fb798d745321340adda96c5cc129207..fce9f834226fd18b75c030c357fb703931256523 100644 (file)
--- a/VEX/priv/host_arm64_defs.h
+++ b/VEX/priv/host_arm64_defs.h
@@ -482,12 +482,15 @@ typedef
        ARM64in_StrEX,
        ARM64in_MFence,
        /* ARM64in_V*: scalar ops involving vector registers */
-      ARM64in_VLdStS,   /* 32-bit FP load/store, with imm offset  */
-      ARM64in_VLdStD,   /* 64-bit FP load/store, with imm offset  */
-      ARM64in_VLdStQ,
+      ARM64in_VLdStH,   /* ld/st to/from low 16 bits of vec reg, imm offset */
+      ARM64in_VLdStS,   /* ld/st to/from low 32 bits of vec reg, imm offset */
+      ARM64in_VLdStD,   /* ld/st to/from low 64 bits of vec reg, imm offset */
+      ARM64in_VLdStQ,   /* ld/st to/from all 128 bits of vec reg, no offset */
        ARM64in_VCvtI2F,
        ARM64in_VCvtF2I,
-      ARM64in_VCvtSD,
+      ARM64in_VCvtSD,   /* scalar 32 bit FP <--> 64 bit FP */
+      ARM64in_VCvtHS,   /* scalar 16 bit FP <--> 32 bit FP */
+      ARM64in_VCvtHD,   /* scalar 16 bit FP <--> 64 bit FP */
        ARM64in_VUnaryD,
        ARM64in_VUnaryS,
        ARM64in_VBinD,
@@ -670,21 +673,28 @@ typedef
           struct {
           } MFence;
           /* --- INSTRUCTIONS INVOLVING VECTOR REGISTERS --- */
-         /* 32-bit Fp load/store */
+         /* ld/st to/from low 16 bits of vec reg, imm offset */
+         struct {
+            Bool isLoad;
+            HReg hD;
+            HReg rN;
+            UInt uimm12;  /* 0 .. 8190 inclusive, 0 % 2 */
+         } VLdStH;
+         /* ld/st to/from low 32 bits of vec reg, imm offset */
           struct {
              Bool isLoad;
              HReg sD;
              HReg rN;
              UInt uimm12;  /* 0 .. 16380 inclusive, 0 % 4 */
           } VLdStS;
-         /* 64-bit Fp load/store */
+         /* ld/st to/from low 64 bits of vec reg, imm offset */
           struct {
              Bool isLoad;
              HReg dD;
              HReg rN;
              UInt uimm12;  /* 0 .. 32760 inclusive, 0 % 8 */
           } VLdStD;
-         /* 128-bit Vector load/store. */
+         /* ld/st to/from all 128 bits of vec reg, no offset */
           struct {
              Bool isLoad;
              HReg rQ; // data
@@ -704,13 +714,24 @@ typedef
              UChar      armRM; // ARM encoded RM:
                                // 00=nearest, 01=+inf, 10=-inf, 11=zero
           } VCvtF2I;
-         /* Convert between 32-bit and 64-bit FP values (both
-            ways). (FCVT) */
+         /* Convert between 32-bit and 64-bit FP values (both ways). (FCVT) */
           struct {
              Bool sToD; /* True: F32->F64.  False: F64->F32 */
              HReg dst;
              HReg src;
           } VCvtSD;
+         /* Convert between 16-bit and 32-bit FP values (both ways). (FCVT) */
+         struct {
+            Bool hToS; /* True: F16->F32.  False: F32->F16 */
+            HReg dst;
+            HReg src;
+         } VCvtHS;
+         /* Convert between 16-bit and 64-bit FP values (both ways). (FCVT) */
+         struct {
+            Bool hToD; /* True: F16->F64.  False: F64->F16 */
+            HReg dst;
+            HReg src;
+         } VCvtHD;
           /* 64-bit FP unary */
           struct {
              ARM64FpUnaryOp op;
@@ -887,6 +908,8 @@ extern ARM64Instr* ARM64Instr_Mul     ( HReg dst, HReg argL, HReg argR,
  extern ARM64Instr* ARM64Instr_LdrEX   ( Int szB );
  extern ARM64Instr* ARM64Instr_StrEX   ( Int szB );
  extern ARM64Instr* ARM64Instr_MFence  ( void );
+extern ARM64Instr* ARM64Instr_VLdStH  ( Bool isLoad, HReg sD, HReg rN,
+                                        UInt uimm12 /* 0 .. 8190, 0 % 2 */ );
  extern ARM64Instr* ARM64Instr_VLdStS  ( Bool isLoad, HReg sD, HReg rN,
                                          UInt uimm12 /* 0 .. 16380, 0 % 4 */ );
  extern ARM64Instr* ARM64Instr_VLdStD  ( Bool isLoad, HReg dD, HReg rN,
@@ -896,6 +919,8 @@ extern ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS );
  extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
                                          UChar armRM );
  extern ARM64Instr* ARM64Instr_VCvtSD  ( Bool sToD, HReg dst, HReg src );
+extern ARM64Instr* ARM64Instr_VCvtHS  ( Bool hToS, HReg dst, HReg src );
+extern ARM64Instr* ARM64Instr_VCvtHD  ( Bool hToD, HReg dst, HReg src );
  extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src );
  extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src );
  extern ARM64Instr* ARM64Instr_VBinD   ( ARM64FpBinOp op, HReg, HReg, HReg );
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c

index 829c39a3d5b247b41ded9da509f1700e8999c4e4..a6e507d050ffaa0a407056f0d76cd853682aa2f9 100644 (file)
--- a/VEX/priv/host_arm64_isel.c
+++ b/VEX/priv/host_arm64_isel.c
@@ -40,23 +40,6 @@
  #include "host_arm64_defs.h"
  
  
-//ZZ /*---------------------------------------------------------*/
-//ZZ /*--- ARMvfp control word stuff                         ---*/
-//ZZ /*---------------------------------------------------------*/
-//ZZ 
-//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
-//ZZ    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
-//ZZ    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
-//ZZ    this corresponds to a FPSCR value of zero.
-//ZZ 
-//ZZ    fpscr should therefore be zero on entry to Vex-generated code, and
-//ZZ    should be unchanged at exit.  (Or at least the bottom 28 bits
-//ZZ    should be zero).
-//ZZ */
-//ZZ 
-//ZZ #define DEFAULT_FPSCR 0
-
-
  /*---------------------------------------------------------*/
  /*--- ISelEnv                                           ---*/
  /*---------------------------------------------------------*/
@@ -223,6 +206,9 @@ static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
  static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
  static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
  
+static HReg        iselF16Expr_wrk        ( ISelEnv* env, IRExpr* e );
+static HReg        iselF16Expr            ( ISelEnv* env, IRExpr* e );
+
  static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
  static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
  
@@ -1360,6 +1346,16 @@ static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
        return ARM64cc_NE;
     }
  
+   /* --- patterns rooted at: CmpNEZ16 --- */
+
+   if (e->tag == Iex_Unop
+       && e->Iex.Unop.op == Iop_CmpNEZ16) {
+      HReg      r1    = iselIntExpr_R(env, e->Iex.Unop.arg);
+      ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
+      addInstr(env, ARM64Instr_Test(r1, xFFFF));
+      return ARM64cc_NE;
+   }
+
     /* --- patterns rooted at: CmpNEZ64 --- */
  
     if (e->tag == Iex_Unop
@@ -1854,6 +1850,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
              addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
              return dst;
           }
+         case Iop_1Sto16:
           case Iop_1Sto32:
           case Iop_1Sto64: {
              /* As with the iselStmt case for 'tmp:I1 = expr', we could
@@ -3051,6 +3048,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
              addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
              return dst;
           }
+         case Iop_F16toF64: {
+            HReg src = iselF16Expr(env, e->Iex.Unop.arg);
+            HReg dst = newVRegD(env);
+            addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
+            return dst;
+         }
           case Iop_I32UtoF64:
           case Iop_I32StoF64: {
              /* Rounding mode is not involved here, since the
@@ -3226,6 +3229,12 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
              addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
              return dst;
           }
+         case Iop_F16toF32: {
+            HReg src = iselF16Expr(env, e->Iex.Unop.arg);
+            HReg dst = newVRegD(env);
+            addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
+            return dst;
+         }
           default:
              break;
        }
@@ -3253,7 +3262,7 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
              HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
              set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
              HReg dstS = newVRegD(env);
-            addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
+            addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
              return dstS;
           }
           case Iop_I32UtoF32:
@@ -3315,6 +3324,70 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
  }
  
  
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (16 bit)         ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 16-bit floating point value into a register, the identity
+   of which is returned.  As with iselIntExpr_R, the reg may be either
+   real or virtual; in any case it must not be changed by subsequent
+   code emitted by the caller.  Values are generated into HRcFlt64
+   registers despite the values themselves being Ity_F16s. */
+
+static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
+{
+   HReg r = iselF16Expr_wrk( env, e );
+#  if 0
+   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+#  endif
+   vassert(hregClass(r) == HRcFlt64);
+   vassert(hregIsVirtual(r));
+   return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
+{
+   IRType ty = typeOfIRExpr(env->type_env,e);
+   vassert(e);
+   vassert(ty == Ity_F16);
+
+   if (e->tag == Iex_Get) {
+      Int offs = e->Iex.Get.offset;
+      if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
+         HReg rD = newVRegD(env);
+         HReg rN = get_baseblock_register();
+         addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
+         return rD;
+      }
+   }
+
+   if (e->tag == Iex_Binop) {
+      switch (e->Iex.Binop.op) {
+         case Iop_F32toF16: {
+            HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
+            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
+            HReg dstH = newVRegD(env);
+            addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
+            return dstH;
+         }
+         case Iop_F64toF16: {
+            HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
+            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
+            HReg dstH = newVRegD(env);
+            addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
+            return dstH;
+         }
+         default:
+            break;
+      }
+   }
+
+   ppIRExpr(e);
+   vpanic("iselF16Expr_wrk");
+}
+
+
  /*---------------------------------------------------------*/
  /*--- ISEL: Vector expressions (256 bit)                ---*/
  /*---------------------------------------------------------*/
@@ -3534,9 +3607,15 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
           return;
        }
        if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
-         HReg dD   = iselFltExpr(env, stmt->Ist.Put.data);
+         HReg sD   = iselFltExpr(env, stmt->Ist.Put.data);
+         HReg bbp  = get_baseblock_register();
+         addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
+         return;
+      }
+      if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
+         HReg hD   = iselF16Expr(env, stmt->Ist.Put.data);
           HReg bbp  = get_baseblock_register();
-         addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
+         addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
           return;
        }
  
@@ -3965,6 +4044,7 @@ HInstrArray* iselSB_ARM64 ( const IRSB* bb,
              hreg   = mkHReg(True, HRcInt64, 0, j++);
              hregHI = mkHReg(True, HRcInt64, 0, j++);
              break;
+         case Ity_F16: // we'll use HRcFlt64 regs for F16 too
           case Ity_F32: // we'll use HRcFlt64 regs for F32 too
           case Ity_F64:
              hreg = mkHReg(True, HRcFlt64, 0, j++);
author	Julian Seward <jseward@acm.org>
	Mon, 30 Mar 2015 09:01:51 +0000 (09:01 +0000)
committer	Julian Seward <jseward@acm.org>
	Mon, 30 Mar 2015 09:01:51 +0000 (09:01 +0000)
VEX/priv/guest_arm64_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_arm64_defs.c		patch \| blob \| blame \| history
VEX/priv/host_arm64_defs.h		patch \| blob \| blame \| history
VEX/priv/host_arm64_isel.c		patch \| blob \| blame \| history