From: Julian Seward <jseward@acm.org>
Date: Sun, 8 Feb 2015 12:08:56 +0000 (+0000)
Subject: Implement all remaining FP min/max style instructions:
X-Git-Tag: svn/VALGRIND_3_11_0^2~95
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a32846c91aec8dfe5b8a9a3c4331e6a460111489;p=thirdparty%2Fvalgrind.git

Implement all remaining FP min/max style instructions:
  {FMAXMNV,FMINMNV,FMAXV,FMINV} s_4s
  {FMAXNMP,FMINNMP,FMAXP,FMINP} d_2d, s_2s
  {FMAXNM,FMINNM,FMAX,FMIN} 2d_2d_2d, 4s_4s_4s, 2s_2s_2s
  {FMAXNMP,FMINNMP,FMAXP,FMINP} 2d_2d_2d, 4s_4s_4s, 2s_2s_2s


git-svn-id: svn://svn.valgrind.org/vex/trunk@3087
---

diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index 41a353b9e6..65601cb78a 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -7292,6 +7292,7 @@ static IRTemp math_FOLDV ( IRTemp src, IROp op )
          assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
          return res;
       }
+      case Iop_Max32Fx4: case Iop_Min32Fx4:
       case Iop_Min32Sx4: case Iop_Min32Ux4:
       case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
          IRTemp x3210 = src;
@@ -8480,7 +8481,7 @@ Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
                      : mkexpr(tN1));
       IRTemp res = math_FOLDV(tN2, op);
       if (res == IRTemp_INVALID)
-         return False; /* means math_MINMAXV
+         return False; /* means math_FOLDV
                           doesn't handle this case yet */
       putQReg128(dd, mkexpr(res));
       const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
@@ -8491,6 +8492,26 @@ Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if ((size == X00 || size == X10)
+       && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
+      /* -------- 0,00,01100: FMAXMNV s_4s -------- */
+      /* -------- 0,10,01100: FMINMNV s_4s -------- */
+      /* -------- 1,00,01111: FMAXV   s_4s -------- */
+      /* -------- 1,10,01111: FMINV   s_4s -------- */
+      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+      if (bitQ == 0) return False; // Only 4s is allowed
+      Bool   isMIN = (size & 2) == 2;
+      Bool   isNM  = opcode == BITS5(0,1,1,0,0);
+      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
+      IRTemp src = newTempV128();
+      assign(src, getQReg128(nn));
+      IRTemp res = math_FOLDV(src, opMXX);
+      putQReg128(dd, mkexpr(res));
+      DIP("%s%sv s%u, %u.4s\n",
+          isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
+      return True;
+   }
+
 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
    return False;
 #  undef INSN
@@ -9054,6 +9075,33 @@ Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if (bitU == 1
+       && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
+      /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
+      /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
+      /* -------- 1,0x,01111 FMAXP   d_2d, s_2s -------- */
+      /* -------- 1,1x,01111 FMINP   d_2d, s_2s -------- */
+      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+      Bool   isD   = (sz & 1) == 1;
+      Bool   isMIN = (sz & 2) == 2;
+      Bool   isNM  = opcode == BITS5(0,1,1,0,0);
+      IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
+      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
+      IRTemp src   = newTempV128();
+      IRTemp argL  = newTempV128();
+      IRTemp argR  = newTempV128();
+      assign(src, getQReg128(nn));
+      assign(argL, unop(opZHI, mkexpr(src)));
+      assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src), 
+                                                    mkU8(isD ? 8 : 4))));
+      putQReg128(dd, unop(opZHI,
+                          binop(opMXX, mkexpr(argL), mkexpr(argR))));
+      HChar c = isD ? 'd' : 's';
+      DIP("%s%sp %c%u, v%u.2%c\n",
+           isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
@@ -11086,6 +11134,28 @@ Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if (bitU == 0
+       && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
+      /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* -------- 0,0x,11110 FMAX   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* -------- 0,1x,11110 FMIN   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+      Bool   isD   = (size & 1) == 1;
+      if (bitQ == 0 && isD) return False; // implied 1d case
+      Bool   isMIN = (size & 2) == 2;
+      Bool   isNM  = opcode == BITS5(1,1,0,0,0);
+      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
+      IRTemp res   = newTempV128();
+      assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+      DIP("%s%s %s.%s, %s.%s, %s.%s\n",
+          isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+      return True;
+   }
+
    if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
       /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
       /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
@@ -11216,6 +11286,37 @@ Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if (bitU == 1
+       && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
+      /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* -------- 1,0x,11110 FMAXP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* -------- 1,1x,11110 FMINP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+      /* FMAXNM, FMINNM: FIXME -- KLUDGED */
+      Bool isD = (size & 1) == 1;
+      if (bitQ == 0 && isD) return False; // implied 1d case
+      Bool   isMIN = (size & 2) == 2;
+      Bool   isNM  = opcode == BITS5(1,1,0,0,0);
+      IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
+      IRTemp srcN  = newTempV128();
+      IRTemp srcM  = newTempV128();
+      IRTemp preL  = IRTemp_INVALID;
+      IRTemp preR  = IRTemp_INVALID;
+      assign(srcN, getQReg128(nn));
+      assign(srcM, getQReg128(mm));
+      math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
+                                           srcM, srcN, isD, bitQ);
+      putQReg128(
+         dd, math_MAYBE_ZERO_HI64_fromE(
+                bitQ,
+                binop(opMXX, mkexpr(preL), mkexpr(preR))));
+      const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+      DIP("%s%sp %s.%s, %s.%s, %s.%s\n", 
+          isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+      return True;
+   }
+
    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
       /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
       Bool isD = size == X01;