Frontend

author Cerion Armour-Brown <cerion@valgrind.org>

Mon, 14 Nov 2005 00:44:47 +0000 (00:44 +0000)

committer Cerion Armour-Brown <cerion@valgrind.org>

Mon, 14 Nov 2005 00:44:47 +0000 (00:44 +0000)
author Cerion Armour-Brown <cerion@valgrind.org>
Mon, 14 Nov 2005 00:44:47 +0000 (00:44 +0000)
committer Cerion Armour-Brown <cerion@valgrind.org>
Mon, 14 Nov 2005 00:44:47 +0000 (00:44 +0000)
diff --git a/VEX/priv/guest-ppc32/toIR.c b/VEX/priv/guest-ppc32/toIR.c

index db08aa9d7580fb9fc9b903f76cf9fc635fe74070..cc3e7846cdc947f0d0ef4c60ad9f45d804f068f8 100644 (file)
--- a/VEX/priv/guest-ppc32/toIR.c
+++ b/VEX/priv/guest-ppc32/toIR.c
@@ -1386,7 +1386,7 @@ static IRExpr* /* :: Ity_I32 */ get_XER_CA ( void )
  
  
  /* Set the CR6 flags following an AltiVec compare operation. */
-static void set_AV_CR6 ( IRExpr* result )
+static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
  {
     /* CR6[0:3] = {all_ones, 0, all_zeros, 0}
        all_ones  = (v[0] && v[1] && v[2] && v[3])
@@ -1406,13 +1406,6 @@ static void set_AV_CR6 ( IRExpr* result )
     assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
     assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
  
-   assign( rOnes, unop(Iop_1Uto8,
-      binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
-            unop(Iop_V128to32,
-                 binop(Iop_AndV128,
-                       binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
-                       binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))))) );
-
     assign( rZeros, unop(Iop_1Uto8,
         binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
               unop(Iop_Not32,
@@ -1422,9 +1415,19 @@ static void set_AV_CR6 ( IRExpr* result )
                               binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
                    ))) );
  
-   putCR321( 6, binop(Iop_Or8,
-                      binop(Iop_Shl8, mkexpr(rOnes),  mkU8(3)),
-                      binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+   if (test_all_ones) {
+      assign( rOnes, unop(Iop_1Uto8,
+         binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
+               unop(Iop_V128to32,
+                    binop(Iop_AndV128,
+                          binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
+                          binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))))) );
+      putCR321( 6, binop(Iop_Or8,
+                         binop(Iop_Shl8, mkexpr(rOnes),  mkU8(3)),
+                         binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+   } else {
+      putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
+   }
     putCR0( 6, mkU8(0) );
  } 
  
@@ -5954,7 +5957,7 @@ static Bool dis_av_cmp ( UInt theInstr )
     putVReg( vD_addr, mkexpr(vD) );
  
     if (flag_Rc) {
-      set_AV_CR6( mkexpr(vD) );
+      set_AV_CR6( mkexpr(vD), True );
     }
     return True;
  }
@@ -6787,6 +6790,13 @@ static Bool dis_av_fp_arith ( UInt theInstr )
     UChar vC_addr  = toUChar((theInstr >>  6) & 0x1F); /* theInstr[6:10] */
     UInt  opc2=0;
  
+   IRTemp vA = newTemp(Ity_V128);
+   IRTemp vB = newTemp(Ity_V128);
+   IRTemp vC = newTemp(Ity_V128);
+   assign( vA, getVReg(vA_addr));
+   assign( vB, getVReg(vB_addr));
+   assign( vC, getVReg(vC_addr));
+
     if (opc1 != 0x4) {
        vex_printf("dis_av_fp_arith(PPC32)(instr)\n");
        return False;
@@ -6812,23 +6822,23 @@ static Bool dis_av_fp_arith ( UInt theInstr )
     switch (opc2) {
     case 0x00A: // vaddfp (Add FP, AV p137)
        DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      putVReg( vD_addr, binop(Iop_Add32Fx4, mkexpr(vA), mkexpr(vB)) );
+      return True;
  
    case 0x04A: // vsubfp (Subtract FP, AV p261)
        DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      putVReg( vD_addr, binop(Iop_Sub32Fx4, mkexpr(vA), mkexpr(vB)) );
+      return True;
  
     case 0x40A: // vmaxfp (Maximum FP, AV p178)
        DIP("vmaxfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      putVReg( vD_addr, binop(Iop_Max32Fx4, mkexpr(vA), mkexpr(vB)) );
+      return True;
  
     case 0x44A: // vminfp (Minimum FP, AV p187)
        DIP("vminfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      putVReg( vD_addr, binop(Iop_Min32Fx4, mkexpr(vA), mkexpr(vB)) );
+      return True;
  
     default:
        break; // Fall through...
@@ -6843,13 +6853,13 @@ static Bool dis_av_fp_arith ( UInt theInstr )
     switch (opc2) {
     case 0x10A: // vrefp (Reciprocal Esimate FP, AV p228)
        DIP("vrefp v%d,v%d\n", vD_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      putVReg( vD_addr, unop(Iop_Recip32Fx4, mkexpr(vB)) );
+      return True;
  
     case 0x14A: // vrsqrtefp (Reciprocal Square Root Estimate FP, AV p237)
        DIP("vrsqrtefp v%d,v%d\n", vD_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      putVReg( vD_addr, unop(Iop_RSqrt32Fx4, mkexpr(vB)) );
+      return True;
  
     case 0x18A: // vexptefp (2 Raised to the Exp Est FP, AV p173)
        DIP("vexptefp v%d,v%d\n", vD_addr, vB_addr);
@@ -6880,6 +6890,14 @@ static Bool dis_av_fp_cmp ( UInt theInstr )
     UChar flag_Rc  = toUChar((theInstr >> 10) & 0x1);  /* theInstr[10]    */
     UInt  opc2     =         (theInstr >>  0) & 0x3FF; /* theInstr[0:9]   */
  
+   Bool cmp_bounds = False;
+
+   IRTemp vA = newTemp(Ity_V128);
+   IRTemp vB = newTemp(Ity_V128);
+   IRTemp vD = newTemp(Ity_V128);
+   assign( vA, getVReg(vA_addr));
+   assign( vB, getVReg(vB_addr));
+
     if (opc1 != 0x4) {
        vex_printf("dis_av_fp_cmp(PPC32)(instr)\n");
        return False;
@@ -6888,28 +6906,61 @@ static Bool dis_av_fp_cmp ( UInt theInstr )
     switch (opc2) {
     case 0x0C6: // vcmpeqfp (Compare Equal-to FP, AV p159)
        DIP("vcmpeqfp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      assign( vD, binop(Iop_CmpEQ32Fx4, mkexpr(vA), mkexpr(vB)) );
+      break;
  
     case 0x1C6: // vcmpgefp (Compare Greater-than-or-Equal-to FP, AV p163)
        DIP("vcmpgefp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      assign( vD, binop(Iop_CmpGE32Fx4, mkexpr(vA), mkexpr(vB)) );
+      break;
  
     case 0x2C6: // vcmpgtfp (Compare Greater-than FP, AV p164)
        DIP("vcmpgtfp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      assign( vD, binop(Iop_CmpGT32Fx4, mkexpr(vA), mkexpr(vB)) );
+      break;
  
-   case 0x3C6: // vcmpbfp (Compare Bounds FP, AV p157)
+   case 0x3C6: { // vcmpbfp (Compare Bounds FP, AV p157)
+      IRTemp gt      = newTemp(Ity_V128);
+      IRTemp lt      = newTemp(Ity_V128);
+      IRTemp zeros   = newTemp(Ity_V128);
        DIP("vcmpbfp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
-      DIP(" => not implemented\n");
-      return False;
+      cmp_bounds = True;
+      assign( zeros,   unop(Iop_Dup32x4, mkU32(0)) );
+
+      /* Note: making use of fact that the ppc backend for compare insns
+         return zero'd lanes if either of the corresponding arg lanes is a nan.
+
+         Perhaps better to have an irop Iop_isNan32Fx4, but then we'd
+         need this for the other compares too (vcmpeqfp etc)...
+         Better still, tighten down the spec for compare irops.
+       */
+      assign( gt, unop(Iop_NotV128,
+                       binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) );
+      assign( lt, unop(Iop_NotV128,
+                       binop(Iop_CmpGE32Fx4, mkexpr(vA),
+                             binop(Iop_Sub32Fx4, mkexpr(zeros), mkexpr(vB)))) );
+
+      // finally, just shift gt,lt to correct position
+      assign( vD, binop(Iop_ShlN32x4,
+                        binop(Iop_OrV128,
+                              binop(Iop_AndV128, mkexpr(gt),
+                                    unop(Iop_Dup32x4, mkU32(0x2))),
+                              binop(Iop_AndV128, mkexpr(lt),
+                                    unop(Iop_Dup32x4, mkU32(0x1)))),
+                        mkU8(30)) );
+      break;
+   }
  
     default:
        vex_printf("dis_av_fp_cmp(PPC32)(opc2)\n");
        return False;
     }
+
+   putVReg( vD_addr, mkexpr(vD) );
+
+   if (flag_Rc) {
+      set_AV_CR6( mkexpr(vD), !cmp_bounds );
+   }
     return True;
  }
  
diff --git a/VEX/priv/host-ppc32/hdefs.c b/VEX/priv/host-ppc32/hdefs.c

index 1775f88a0fc90e309af858e9e77cacdba87194b7..ff7b2fc41fe989315aea04371767f6c0dfe2c96f 100644 (file)
--- a/VEX/priv/host-ppc32/hdefs.c
+++ b/VEX/priv/host-ppc32/hdefs.c
@@ -689,16 +689,26 @@ HChar* showPPC32AvOp ( PPC32AvOp op ) {
     case Pav_MRGHI:     return "vmrgh";    // b,h,w
     case Pav_MRGLO:     return "vmrgl";    // b,h,w
  
+   default: vpanic("showPPC32AvOp");
+   }
+}
+
+HChar* showPPC32AvFpOp ( PPC32AvOp op ) {
+   switch (op) {
     /* Floating Point Binary */
-   case Pav_ADDF:      return "vaddfp";
-   case Pav_SUBF:      return "vsubfp";
-   case Pav_MULF:      return "vmaddfp";
-   case Pav_MAXF:      return "vmaxfp";
-   case Pav_MINF:      return "vminfp";
-   case Pav_CMPEQF:    return "vcmpeqfp";
-   case Pav_CMPGTF:    return "vcmpgtfp";
-   case Pav_CMPGEF:    return "vcmpgefp";
+   case Pavfp_ADDF:      return "vaddfp";
+   case Pavfp_SUBF:      return "vsubfp";
+   case Pavfp_MULF:      return "vmaddfp";
+   case Pavfp_MAXF:      return "vmaxfp";
+   case Pavfp_MINF:      return "vminfp";
+   case Pavfp_CMPEQF:    return "vcmpeqfp";
+   case Pavfp_CMPGTF:    return "vcmpgtfp";
+   case Pavfp_CMPGEF:    return "vcmpgefp";
       
+   /* Floating Point Unary */
+   case Pavfp_RCPF:      return "vrefp";
+   case Pavfp_RSQRTF:    return "vrsqrtefp";
+
     default: vpanic("showPPC32AvOp");
     }
  }
@@ -931,8 +941,8 @@ PPC32Instr* PPC32Instr_AvLdSt ( Bool isLoad, UChar sz, HReg reg, PPC32AMode* add
     return i;
  }
  PPC32Instr* PPC32Instr_AvUnary ( PPC32AvOp op, HReg dst, HReg src ) {
-   PPC32Instr* i       = LibVEX_Alloc(sizeof(PPC32Instr));
-   i->tag              = Pin_AvUnary;
+   PPC32Instr* i      = LibVEX_Alloc(sizeof(PPC32Instr));
+   i->tag             = Pin_AvUnary;
     i->Pin.AvUnary.op  = op;
     i->Pin.AvUnary.dst = dst;
     i->Pin.AvUnary.src = src;
@@ -983,6 +993,14 @@ PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR
     i->Pin.AvBin32Fx4.srcR = srcR;
     return i;
  }
+PPC32Instr* PPC32Instr_AvUn32Fx4 ( PPC32AvOp op, HReg dst, HReg src ) {
+   PPC32Instr* i        = LibVEX_Alloc(sizeof(PPC32Instr));
+   i->tag               = Pin_AvUn32Fx4;
+   i->Pin.AvUn32Fx4.op  = op;
+   i->Pin.AvUn32Fx4.dst = dst;
+   i->Pin.AvUn32Fx4.src = src;
+   return i;
+}
  PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ) {
     PPC32Instr* i      = LibVEX_Alloc(sizeof(PPC32Instr));
     i->tag             = Pin_AvPerm;
@@ -1401,6 +1419,12 @@ void ppPPC32Instr ( PPC32Instr* i )
        vex_printf(",");
        ppHRegPPC32(i->Pin.AvBin32Fx4.srcR);
        return;
+   case Pin_AvUn32Fx4:
+      vex_printf("%s ", showPPC32AvOp(i->Pin.AvUn32Fx4.op));
+      ppHRegPPC32(i->Pin.AvUn32Fx4.dst);
+      vex_printf(",");
+      ppHRegPPC32(i->Pin.AvUn32Fx4.src);
+      return;
     case Pin_AvPerm:
        vex_printf("vperm ");
        ppHRegPPC32(i->Pin.AvPerm.dst);
@@ -1660,13 +1684,17 @@ void getRegUsage_PPC32Instr ( HRegUsage* u, PPC32Instr* i )
        addHRegUse(u, HRmWrite, i->Pin.AvBin32x4.dst);
        addHRegUse(u, HRmRead,  i->Pin.AvBin32x4.srcL);
        addHRegUse(u, HRmRead,  i->Pin.AvBin32x4.srcR);
-      if (i->Pin.AvBin32x4.op == Pav_MULF)
-         addHRegUse(u, HRmWrite, hregPPC32_GPR29());
        return;
     case Pin_AvBin32Fx4:
        addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst);
        addHRegUse(u, HRmRead,  i->Pin.AvBin32Fx4.srcL);
        addHRegUse(u, HRmRead,  i->Pin.AvBin32Fx4.srcR);
+      if (i->Pin.AvBin32Fx4.op == Pavfp_MULF)
+         addHRegUse(u, HRmWrite, hregPPC32_GPR29());
+      return;
+   case Pin_AvUn32Fx4:
+      addHRegUse(u, HRmWrite, i->Pin.AvUn32Fx4.dst);
+      addHRegUse(u, HRmRead,  i->Pin.AvUn32Fx4.src);
        return;
     case Pin_AvPerm:
        addHRegUse(u, HRmWrite, i->Pin.AvPerm.dst);
@@ -1837,6 +1865,10 @@ void mapRegs_PPC32Instr (HRegRemap* m, PPC32Instr* i)
        mapReg(m, &i->Pin.AvBin32Fx4.srcL);
        mapReg(m, &i->Pin.AvBin32Fx4.srcR);
        return;
+   case Pin_AvUn32Fx4:
+      mapReg(m, &i->Pin.AvUn32Fx4.dst);
+      mapReg(m, &i->Pin.AvUn32Fx4.src);
+      return;
     case Pin_AvPerm:
        mapReg(m, &i->Pin.AvPerm.dst);
        mapReg(m, &i->Pin.AvPerm.srcL);
@@ -2212,8 +2244,8 @@ static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2,
     return emit32(p, theInstr);
  }
  
-static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2, UInt Rc,
-                          UInt r3, UInt opc2 )
+static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2,
+                          UInt r3, UInt Rc, UInt opc2 )
  {
     UInt theInstr;
     vassert(opc1 < 0x40);
@@ -2915,8 +2947,8 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i )
           p = mkFormVX( p, 4, v_dst, v_src, v_src, opc2 );
           break;
        default:
-       p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
-       break;
+         p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+         break;
        }
        goto done;
     }
@@ -3100,30 +3132,30 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i )
        UInt v_srcR = vregNo(i->Pin.AvBin32Fx4.srcR);
        switch (i->Pin.AvBin32Fx4.op) {
  
-      case Pav_ADDF:
+      case Pavfp_ADDF:
           p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 10 );   // vaddfp
           break;
-      case Pav_SUBF:
+      case Pavfp_SUBF:
           p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 74 );   // vsubfp
           break;
-      case Pav_MAXF:
+      case Pavfp_MAXF:
           p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1034 ); // vmaxfp
           break;
-      case Pav_MINF:
+      case Pavfp_MINF:
           p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1098 ); // vminfp
           break;
  
-      case Pav_MULF: {
+      case Pavfp_MULF: {
           /* Make a vmulfp from a vmaddfp:
              load -0.0 (0x8000_0000) to each 32-bit word of vB
              this makes the add a noop.
           */
           UInt vB = 29;                    // XXX: Using r29 for temp
-         UInt zero_simm = 0x80000000;
+         UInt konst = 0x1F;
  
           // Better way to load zero_imm?
           // vspltisw vB,0x1F   (0x1F => each word of vB)
-         p = mkFormVX( p, 4, vB, zero_simm, 0, 908 );
+         p = mkFormVX( p, 4, vB, konst, 0, 908 );
  
           // vslw vB,vB,vB  (each word of vB = (0x1F << 0x1F) = 0x80000000
           p = mkFormVX( p, 4, vB, vB, vB, 388 );
@@ -3132,14 +3164,14 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i )
           p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 );
           break;
        }
-      case Pav_CMPEQF:
+      case Pavfp_CMPEQF:
           p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 ); // vcmpeqfp
           break;
-      case Pav_CMPGTF:
-         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 710 ); // vcmpgtfp
+      case Pavfp_CMPGTF:
+         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 710 ); // vcmpgtfp
           break;
-      case Pav_CMPGEF:
-         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 454 ); // vcmpgefp
+      case Pavfp_CMPGEF:
+         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 454 ); // vcmpgefp
           break;
  
        default:
@@ -3148,6 +3180,20 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i )
        goto done;
     }
  
+   case Pin_AvUn32Fx4: {
+      UInt v_dst = vregNo(i->Pin.AvUn32Fx4.dst);
+      UInt v_src = vregNo(i->Pin.AvUn32Fx4.src);
+      UInt opc2;
+      switch (i->Pin.AvUn32Fx4.op) {
+      case Pavfp_RCPF:   opc2 =  266; break; // vrefp
+      case Pavfp_RSQRTF: opc2 =  330; break; // vrsqrtefp
+      default:
+         goto bad;
+      }
+      p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+      goto done;
+   }
+
     case Pin_AvPerm: {  // vperm
        UInt v_dst  = vregNo(i->Pin.AvPerm.dst);
        UInt v_srcL = vregNo(i->Pin.AvPerm.srcL);
diff --git a/VEX/priv/host-ppc32/hdefs.h b/VEX/priv/host-ppc32/hdefs.h

index a2c2ba4073ceb0d13162e116746c2d6d6e7391b7..248647f094d7696b67154422ed8d22504544b8ee 100644 (file)
--- a/VEX/priv/host-ppc32/hdefs.h
+++ b/VEX/priv/host-ppc32/hdefs.h
@@ -376,14 +376,10 @@ typedef
        Pav_UNPCKHPIX, Pav_UNPCKLPIX,
  
        /* Integer Binary */
-      Pav_AND, Pav_OR, Pav_XOR,   /* Bitwise */
-
+      Pav_AND, Pav_OR, Pav_XOR,            /* Bitwise */
        Pav_ADDU, Pav_QADDU, Pav_QADDS,
-
        Pav_SUBU, Pav_QSUBU, Pav_QSUBS,
-
        Pav_OMULU, Pav_OMULS, Pav_EMULU, Pav_EMULS,
-
        Pav_AVGU, Pav_AVGS,
        Pav_MAXU, Pav_MAXS,
        Pav_MINU, Pav_MINS,
@@ -400,18 +396,28 @@ typedef
  
        /* Merge */
        Pav_MRGHI, Pav_MRGLO,
+   }
+   PPC32AvOp;
+
+extern HChar* showPPC32AvOp ( PPC32AvOp );
+
+
+/* --------- */
+typedef
+   enum {
+      Pavfp_INVALID,
  
        /* Floating point binary */
-      Pav_ADDF, Pav_SUBF, Pav_MULF,
-      Pav_MAXF, Pav_MINF,
-      Pav_CMPEQF, Pav_CMPGTF, Pav_CMPGEF,
+      Pavfp_ADDF, Pavfp_SUBF, Pavfp_MULF,
+      Pavfp_MAXF, Pavfp_MINF,
+      Pavfp_CMPEQF, Pavfp_CMPGTF, Pavfp_CMPGEF,
  
-//..       /* Floating point unary */
-//..       Xsse_RCPF, Xsse_RSQRTF, Xsse_SQRTF,
+      /* Floating point unary */
+      Pavfp_RCPF, Pavfp_RSQRTF,
     }
-   PPC32AvOp;
+   PPC32AvFpOp;
  
-extern HChar* showPPC32AvOp ( PPC32AvOp );
+extern HChar* showPPC32AvFpOp ( PPC32AvFpOp );
  
  
  /* --------- */
@@ -453,6 +459,7 @@ typedef
        Pin_AvBin32x4,  /* AV binary, 32x4 */
  
        Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */
+      Pin_AvUn32Fx4,  /* AV FP unary,  32Fx4 */
  
        Pin_AvPerm,     /* AV permute (shuffle) */
        Pin_AvSel,      /* AV select */
@@ -672,11 +679,16 @@ typedef
              HReg      srcR;
           } AvBin32x4;
           struct {
-            PPC32AvOp op;
+            PPC32AvFpOp op;
              HReg      dst;
              HReg      srcL;
              HReg      srcR;
           } AvBin32Fx4;
+         struct {
+            PPC32AvFpOp op;
+            HReg      dst;
+            HReg      src;
+         } AvUn32Fx4;
           /* Perm,Sel,SlDbl,Splat are all weird AV permutations */
           struct {
              HReg dst;
@@ -752,6 +764,7 @@ extern PPC32Instr* PPC32Instr_AvBin8x16  ( PPC32AvOp op, HReg dst, HReg srcL, HR
  extern PPC32Instr* PPC32Instr_AvBin16x8  ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPC32Instr* PPC32Instr_AvBin32x4  ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR );
  extern PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPC32Instr* PPC32Instr_AvUn32Fx4  ( PPC32AvOp op, HReg dst, HReg src );
  extern PPC32Instr* PPC32Instr_AvPerm     ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
  extern PPC32Instr* PPC32Instr_AvSel      ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
  extern PPC32Instr* PPC32Instr_AvShlDbl   ( UChar shift, HReg dst, HReg srcL, HReg srcR );
diff --git a/VEX/priv/host-ppc32/isel.c b/VEX/priv/host-ppc32/isel.c

index 2f4e908062fc019599b5bf50a878992305fa3353..42f509b31451ca89dd0ef64d7c767b4918dccc1c 100644 (file)
--- a/VEX/priv/host-ppc32/isel.c
+++ b/VEX/priv/host-ppc32/isel.c
@@ -123,11 +123,11 @@ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
  //.. {
  //..    return IRExpr_Const(IRConst_U64(i));
  //.. }
-//.. 
-//.. static IRExpr* mkU32 ( UInt i )
-//.. {
-//..    return IRExpr_Const(IRConst_U32(i));
-//.. }
+
+static IRExpr* mkU32 ( UInt i )
+{
+   return IRExpr_Const(IRConst_U32(i));
+}
  
  static IRExpr* bind ( Int binder )
  {
@@ -135,8 +135,6 @@ static IRExpr* bind ( Int binder )
  }
  
  
-
-
  /*---------------------------------------------------------*/
  /*--- ISelEnv                                           ---*/
  /*---------------------------------------------------------*/
@@ -838,6 +836,30 @@ static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
  }
  
  
+/* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
+static HReg isNan ( ISelEnv* env, HReg vSrc )
+{
+   vassert(hregClass(vSrc) == HRcVec128);
+
+   HReg zeros   = mk_AvDuplicateRI(env, mkU32(0));
+   HReg msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000));
+   HReg msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF));
+   HReg expt    = newVRegV(env);
+   HReg mnts    = newVRegV(env);
+   HReg vIsNan  = newVRegV(env); 
+
+   /* 32bit float => sign(1) | expontent(8) | mantissa(23)
+      nan => exponent all ones, mantissa > 0 */
+
+   addInstr(env, PPC32Instr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
+   addInstr(env, PPC32Instr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
+   addInstr(env, PPC32Instr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
+   addInstr(env, PPC32Instr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
+   addInstr(env, PPC32Instr_AvBinary(Pav_AND, vIsNan, expt, mnts));
+   return vIsNan;
+}
+
+
  /*---------------------------------------------------------*/
  /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
  /*---------------------------------------------------------*/
@@ -2978,35 +3000,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
  //..          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
  //..          return dst;
  //..       }
-//.. 
-//..       case Iop_CmpNEZ32x4: {
-//..          /* Sigh, we have to generate lousy code since this has to
-//..             work on SSE1 hosts */
-//..          /* basically, the idea is: for each lane:
-//..                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
-//..                sbbl %r, %r               (now %r = 1Sto32(CF))
-//..                movl %r, lane
-//..          */
-//..          Int       i;
-//..          X86AMode* am;
-//..          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
-//..          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
-//..          HReg      dst  = newVRegV(env);
-//..          HReg      r32  = newVRegI(env);
-//..          sub_from_esp(env, 16);
-//..          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
-//..          for (i = 0; i < 4; i++) {
-//..             am = X86AMode_IR(i*4, hregX86_ESP());
-//..             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
-//..             addInstr(env, X86Instr_Unary32(Xun_NEG, X86RM_Reg(r32)));
-//..             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
-//..             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
-//..          }
-//..          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
-//..          add_to_esp(env, 16);
-//..          return dst;
-//..       }
-//.. 
+
        case Iop_CmpNEZ8x16: {
           HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
           HReg zero = newVRegV(env);
@@ -3061,18 +3055,18 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
  //..          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
  //..          return dst;
  //..       }
-//.. 
-//..       case Iop_Recip32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
-//..       case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
+
+      case Iop_Recip32Fx4: op = Pavfp_RCPF;   goto do_32Fx4_unary;
+      case Iop_RSqrt32Fx4: op = Pavfp_RSQRTF; goto do_32Fx4_unary;
  //..       case Iop_Sqrt32Fx4:  op = Xsse_SQRTF;  goto do_32Fx4_unary;
-//..       do_32Fx4_unary:
-//..       {
-//..          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
-//..          HReg dst = newVRegV(env);
-//..          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
-//..          return dst;
-//..       }
-//.. 
+      do_32Fx4_unary:
+      {
+         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg dst = newVRegV(env);
+         addInstr(env, PPC32Instr_AvUn32Fx4(op, dst, arg));
+         return dst;
+      }
+
  //..       case Iop_Recip64Fx2: op = Xsse_RCPF;   goto do_64Fx2_unary;
  //..       case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
  //..       case Iop_Sqrt64Fx2:  op = Xsse_SQRTF;  goto do_64Fx2_unary;
@@ -3237,24 +3231,44 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
           return dst;
        }
  
-//..       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
-//..       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
-//..       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
-//..       case Iop_Add32Fx4:   op = Xsse_ADDF;   goto do_32Fx4;
+      case Iop_Add32Fx4:   op = Pavfp_ADDF;   goto do_32Fx4;
+      case Iop_Sub32Fx4:   op = Pavfp_SUBF;   goto do_32Fx4;
+      case Iop_Max32Fx4:   op = Pavfp_MAXF;   goto do_32Fx4;
+      case Iop_Min32Fx4:   op = Pavfp_MINF;   goto do_32Fx4;
+      case Iop_Mul32Fx4:   op = Pavfp_MULF;   goto do_32Fx4;
  //..       case Iop_Div32Fx4:   op = Xsse_DIVF;   goto do_32Fx4;
-//..       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
-//..       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
-//..       case Iop_Mul32Fx4:   op = Xsse_MULF;   goto do_32Fx4;
-//..       case Iop_Sub32Fx4:   op = Xsse_SUBF;   goto do_32Fx4;
-//..       do_32Fx4:
-//..       {
-//..          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
-//..          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
-//..          HReg dst = newVRegV(env);
-//..          addInstr(env, mk_vMOVsd_RR(argL, dst));
-//..          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
-//..          return dst;
-//..       }
+      case Iop_CmpEQ32Fx4: op = Pavfp_CMPEQF; goto do_32Fx4;
+      case Iop_CmpGT32Fx4: op = Pavfp_CMPGTF; goto do_32Fx4;
+      case Iop_CmpGE32Fx4: op = Pavfp_CMPGEF; goto do_32Fx4;
+//..       case Iop_CmpLT32Fx4:
+      do_32Fx4:
+      {
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst = newVRegV(env);
+         addInstr(env, PPC32Instr_AvBin32Fx4(op, dst, argL, argR));
+         return dst;
+      }
+
+      case Iop_CmpLE32Fx4: {
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst = newVRegV(env);
+         
+         /* stay consistent with native ppc compares:
+            if a left/right lane holds a nan, return zeros for that lane
+            so: le == NOT(gt OR isNan)
+          */
+         HReg isNanLR = newVRegV(env);
+         HReg isNanL = isNan(env, argL);
+         HReg isNanR = isNan(env, argR);
+         addInstr(env, PPC32Instr_AvBinary(Pav_OR, isNanLR, isNanL, isNanR));
+
+         addInstr(env, PPC32Instr_AvBin32Fx4(Pavfp_CMPGTF, dst, argL, argR));
+         addInstr(env, PPC32Instr_AvBinary(Pav_OR, dst, dst, isNanLR));
+         addInstr(env, PPC32Instr_AvUnary(Pav_NOT, dst, dst));
+         return dst;
+      }
  
  //..       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
  //..       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
author	Cerion Armour-Brown <cerion@valgrind.org>
	Mon, 14 Nov 2005 00:44:47 +0000 (00:44 +0000)
committer	Cerion Armour-Brown <cerion@valgrind.org>
	Mon, 14 Nov 2005 00:44:47 +0000 (00:44 +0000)
VEX/priv/guest-ppc32/toIR.c		patch \| blob \| blame \| history
VEX/priv/host-ppc32/hdefs.c		patch \| blob \| blame \| history
VEX/priv/host-ppc32/hdefs.h		patch \| blob \| blame \| history
VEX/priv/host-ppc32/isel.c		patch \| blob \| blame \| history